<font size=6 face="黑体">**SmartQ因子检验**</font>

# 导入相应模块

In [None]:
%pylab inline --no-import-all
import pandas as pd
import numpy as np
import alphalens
import datetime
import os
import tushare as ts

# 定义相关函数

In [None]:
class Cache(object):

    def __init__(self, maxsize=100):
        self.cache = {}
        self.order = []     # least recently used first
        self.maxsize = maxsize

    def get(self, key):
        try:
            item = self.cache[key]  # KeyErroe if not present
            self.order.remove(key)
            self.order.append(key)
        except KeyError:
            item = None
        return item

    def set(self, key, value):
        if key in self.cache:
            self.order.remove(key)
        elif len(self.cache) >= self.maxsize:
            # discard least recently used item
            del self.cache[self.order.pop(0)]
        self.cache[key] = value
        self.order.append(key)

    def size(self):
        return len(self.cache)

In [None]:
utils_trading_days = pd.Series()
ts_conn = ts.get_apis()
df_SZZS = ts.bar(code='000001', conn=ts_conn, asset='INDEX')
ts.close_apis(ts_conn)
utils_trading_days = pd.Series(df_SZZS.index).sort_values()

def get_trading_days(start=None, end=None, ndays=None, ascending=True):
    """
    取得交易日列表，分三种方式取得
    （1）指定开始、结束日期，即start和end不为None，此时忽略参数ndays
    （2）指定开始日期和天数，即start和ndays不为None，而end为None
    （3）指定结束日期和天数，即end和ndays为None，而start为None
    --------
    :param start: datetime-like or str
        开始日期，格式：YYYY-MM-DD
    :param end: datetime-like or str
        结束日期，格式：YYYY-MM-DD
    :param ndays: int
        交易日天数
    :param ascending: bool，默认True
        是否升序排列
    :return:
    --------
        Series of pandas.Timestamp，交易日列表，默认按交易日升序排列
    """
    if start is not None and end is not None:
        trading_days = utils_trading_days[(utils_trading_days >= start) & (utils_trading_days <= end)]
    elif start is not None and ndays is not None:
        trading_days = utils_trading_days[utils_trading_days >= start].iloc[:ndays]
    elif end is not None and ndays is not None:
        trading_days = utils_trading_days[utils_trading_days <= end].iloc[-ndays:]
    elif start is not None:
        trading_days = utils_trading_days[utils_trading_days >= start]
    elif end is not None:
        trading_days = utils_trading_days[utils_trading_days <= end]
    elif ndays is not None:
        trading_days = utils_trading_days[-ndays:]
    else:
        trading_days = utils_trading_days
    trading_days = trading_days.reset_index(drop=True)
    if not ascending:
        trading_days = trading_days.sort_values(ascending=False)
    return trading_days
# ------------------------------------
def to_date(date_like):
    if isinstance(date_like, datetime.datetime) or isinstance(date_like, datetime.date):
        return date_like
    else:
        return datetime.datetime.strptime(date_like.replace('-', ''), '%Y%m%d')
# -------------------------------------
def is_month_end(trading_day):
    """
    是否时月末的交易日
    :param trading_day: datetime-like, str
    :return: bool
    """
    trading_day = to_date(trading_day)
    trading_days = get_trading_days(start=trading_day, ndays=2)
    if trading_day != trading_days[0]:
        return False
    else:
        if trading_day.month == trading_days[1].month:
            return False
        else:
            return True
# ------------------------------------
MKT_DAILY_FQ_HEADER = ['code', 'date', 'open', 'high', 'low', 'close', 'vol', 
                       'amount', 'turnover1', 'turnover2', 'factor']
DataCache = Cache(3000)
def is_normal_traded(code, trading_day):
    """
    个股在指定交易日是否正常交易
    """
    daily_mkt_path = '/Users/davidyujun/Dropbox/FactorDB/ElementaryFactor/mkt_daily_FQ/%s.csv' % code
    if not os.path.isfile(daily_mkt_path):
        return False
    key = '%s_daily_mkt_%s' % (code, to_date(trading_day).strftime('%Y%m%d'))
    df_daily_mkt = DataCache.get(key)
    if df_daily_mkt is None:
        df_daily_mkt = pd.read_csv(daily_mkt_path, 
                                   names=MKT_DAILY_FQ_HEADER, 
                                   parse_dates=[1],
                                   header=0)
        DataCache.set(key, df_daily_mkt)
    
    if len(df_daily_mkt[df_daily_mkt.date==to_date(trading_day)]) > 0:
        return True
    else:
        return False

# 构建数据结构

## 构建SmartQ因子数据结构

In [None]:
start_date = '2012-12-31'
end_date = '2-16-09-30'
# 取得开始结束日期间的交易日数据
trading_days = get_trading_days(start_date, end_date)
# 遍历交易日数据，如果是月末，则读取SmartQ因子载荷值
factor_data = pd.DataFrame()
