In [4]:
# coding=utf-8
import pandas as pd
import alphalens

# IC大约某个值的的比例
IC_LGT_STANDARD = 0.02

MEAN_FACTOR_RETURN_STANDARD = 0.001  #因子收益率均值
IC_MEAN_STANDARD= 0.015 #IC的均值的标准
IC_STD_STANDARD = 0.1 #IC标准差的标注
IC_LGTRATIO_STANDARD = 0.5 #IC大于IC_LGT_STANDARD的比例的标准
IR_STANDARD = 0.15 #IR的标准

start_date = "20170101"
end_date = "20180101"
stock_list = industry("C27")

factor_list = [
    "eod_derivative_indicator.pe_ratio",
    "eod_derivative_indicator.pcf_ratio",
    "eod_derivative_indicator.pb_ratio",
    "eod_derivative_indicator.market_cap",
    "eod_derivative_indicator.ps_ratio",
    "financial_indicator.return_on_invested_capital",
    "financial_indicator.du_return_on_equity",
    "financial_indicator.return_on_asset_net_profit",
    "financial_indicator.return_on_equity",
    "financial_indicator.return_on_asset",
    "financial_indicator.earnings_per_share",
    "financial_indicator.net_profit_to_revenue",
    "financial_indicator.inc_revenue",
    "financial_indicator.inc_total_asset",
    "financial_indicator.inc_net_profit",
    "financial_indicator.inc_earnings_per_share",
    "financial_indicator.inc_operating_revenue",
]




def get_cls(factor_str):
    '''把字符串转化为对象'''
    _befor, _after = factor_str.split(".")
    temp_cls = getattr(fundamentals, _befor)
    cls = getattr(temp_cls, _after)
    return cls

def build_factor_list():
    factor_list = []
    eod_derivative_indicator_ = ["eod_derivative_indicator."+i for i in dir(fundamentals.eod_derivative_indicator) if not i.startswith("_")]
    factor_list.extend(eod_derivative_indicator_)
    income_statement_ = ["income_statement."+i for i in dir(fundamentals.income_statement) if not i.startswith("_")]
    factor_list.extend(income_statement_)
    financial_indicator_ = ["financial_indicator."+i for i in dir(fundamentals.financial_indicator) if not i.startswith("_")]
    factor_list.extend(financial_indicator_)
    income_statement_TTM_ = ["income_statement_TTM."+i for i in dir(fundamentals.income_statement_TTM) if not i.startswith("_")]
    factor_list.extend(income_statement_TTM_)
    # factor_list = [i for i in factor_list if not i.endswith("date") and not i.endswith("metadata")]
    # factor_list = [i for i in factor_list if not i.endswith("date") and not i.endswith("metadata") and not i[-1] in string.digits]
    endserror = ["rpt_quarter","rpt_year","stockcode","date","metadata","enterprise_expansion_reserve","exchange_gains_or_losses"]
    endserror.extend(list(string.digits))
    final_list = []
    for factor in factor_list:
        flag = True
        for ends in endserror:
            if factor.endswith(ends):
                flag = False
        if flag:
            final_list.append(factor)

    return final_list

# 1. 准备factor
def prepar_factor(start_date, end_date, factor, stock_list):
    '''
    :param start_date:开始日期
    :param end_date: 结束日期
    :param factor: 因子名字
    :param stock_list: 股票池
    :return:factor数据，multiIndex的series
    '''
    trading_dates = get_trading_dates(start_date=start_date, end_date=end_date)
    q = query(get_cls(factor)).filter(fundamentals.stockcode.in_(stock_list))
    fund = get_fundamentals(q, entry_date=end_date, interval="{}d".format(len(trading_dates)))
    fund = fund[factor.split(".")[-1], :, :].stack()
    # print(fund)
    return fund


# 2. 准备price
def prepar_price(start_date, end_date, stock_list):
    prices = get_price(stock_list, start_date=start_date, end_date=end_date, fields="close")
    return prices


# 3. 获取计算IC值需要的数据格式，multiIndex的series,计算IC,计算因子收益率
def get_IC(factor, prices):
    factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor, prices)
    IC = alphalens.performance.factor_information_coefficient(factor_data)
    factor_return = alphalens.performance.factor_returns(factor_data)
    return factor_data, IC, factor_return


def get_result( IC, factor_return):
    '''
    :param IC: 因子的IC
    :param factor_return:因子的收益率
    :return: mean_factor_return,IC_mean,IC_std,IC_lgtratio,IR,score
    '''
    # 因子收益率的均值
    mean_factor_return = factor_return.iloc[:, 0].mean()
    # IC的均值
    IC_mean = IC.iloc[:, 0].mean()
    # IC的标准差
    IC_std = IC.iloc[:, 0].std()
    # IC大于标准的比例
    IC_lgtratio = IC.iloc[:, 0][IC.iloc[:, 0] > IC_LGT_STANDARD].shape[0] / IC.iloc[:, 0].shape[0]
    # IR
    IR = IC_mean / IC_std

    #计算当前因子的得分
    score = 0
    if abs(mean_factor_return)>=MEAN_FACTOR_RETURN_STANDARD:
        score+=1
    if abs(IC_mean) >=IC_MEAN_STANDARD:
        score+=1
    if IC_std<=IC_STD_STANDARD:
        score+=1
    if IC_lgtratio>=IC_LGTRATIO_STANDARD:
        score+=1
    if abs(IR)>=IR_STANDARD:
        score+=1

    return mean_factor_return, IC_mean, IC_std, IC_lgtratio, IR,score


# 计算所有的因子的结果
def get_all_result():
    all_result_list = []  #最终[[因子1的数据],[因子2的数据],...]
#     factor_list = build_factor_list() #可以去获取所有的因子计算
    for factor_str in factor_list:
        print(factor_str)
        #1. 准备factor
        factor = prepar_factor(start_date, end_date, factor_str, stock_list)
        #2. 准备价格
        prices = prepar_price(start_date, end_date, stock_list)
        #3. 计算IC和收益率
        try:
            factor_data, IC, factor_return = get_IC(factor, prices)
            
            #4. 得到结果
            mean_factor_return, IC_mean, IC_std, IC_lgtratio, IR,score = get_result(IC, factor_return)
            all_result_list.append([factor_str,mean_factor_return, IC_mean, IC_std, IC_lgtratio, IR,score])
        except:
            pass

    all_result_df = pd.DataFrame(all_result_list,columns=["facotr","mean_factor_return", "IC_mean", "IC_std", "IC_lgtratio", "IR","score"])
    return all_result_df

In [5]:
df = get_all_result()

eod_derivative_indicator.pe_ratio
eod_derivative_indicator.pcf_ratio
eod_derivative_indicator.pb_ratio
eod_derivative_indicator.market_cap
eod_derivative_indicator.ps_ratio
financial_indicator.return_on_invested_capital
financial_indicator.du_return_on_equity
financial_indicator.return_on_asset_net_profit
financial_indicator.return_on_equity
financial_indicator.return_on_asset
financial_indicator.earnings_per_share
financial_indicator.net_profit_to_revenue
financial_indicator.inc_revenue
financial_indicator.inc_total_asset
financial_indicator.inc_net_profit
financial_indicator.inc_earnings_per_share
financial_indicator.inc_operating_revenue


In [6]:
df

Unnamed: 0,facotr,mean_factor_return,IC_mean,IC_std,IC_lgtratio,IR,score
0,eod_derivative_indicator.pe_ratio,-0.000616,-0.023112,0.117749,0.367521,-0.196285,2
1,eod_derivative_indicator.pcf_ratio,-0.000724,-0.015745,0.094981,0.371795,-0.165773,3
2,eod_derivative_indicator.pb_ratio,0.000125,-0.013038,0.116275,0.363248,-0.112128,0
3,eod_derivative_indicator.market_cap,0.000312,0.000623,0.216232,0.448718,0.00288,0
4,eod_derivative_indicator.ps_ratio,-0.000159,-0.01426,0.115399,0.423077,-0.123567,0
5,financial_indicator.return_on_invested_capital,0.001221,0.018168,0.134614,0.470085,0.134966,2
6,financial_indicator.du_return_on_equity,0.001258,0.016976,0.135526,0.5,0.125257,3
7,financial_indicator.return_on_asset_net_profit,0.001122,0.016129,0.13253,0.508547,0.121704,3
8,financial_indicator.return_on_equity,0.001258,0.016976,0.135526,0.5,0.125257,3
9,financial_indicator.return_on_asset,0.001201,0.017158,0.133128,0.517094,0.128884,3


In [None]:
def build_factor_list():
    factor_list = []
    eod_derivative_indicator_ = ["eod_derivative_indicator."+i for i in dir(fundamentals.eod_derivative_indicator) if not i.startswith("_")]
    factor_list.extend(eod_derivative_indicator_)
    income_statement_ = ["income_statement."+i for i in dir(fundamentals.income_statement) if not i.startswith("_")]
    factor_list.extend(income_statement_)
    financial_indicator_ = ["financial_indicator."+i for i in dir(fundamentals.financial_indicator) if not i.startswith("_")]
    factor_list.extend(financial_indicator_)
    income_statement_TTM_ = ["income_statement_TTM."+i for i in dir(fundamentals.income_statement_TTM) if not i.startswith("_")]
    factor_list.extend(income_statement_TTM_)
    # factor_list = [i for i in factor_list if not i.endswith("date") and not i.endswith("metadata")]
    # factor_list = [i for i in factor_list if not i.endswith("date") and not i.endswith("metadata") and not i[-1] in string.digits]
    endserror = ["rpt_quarter","rpt_year","stockcode","announce_date","metadata"]
    endserror.extend(list(string.digits))
    final_list = []
    for factor in factor_list:
        flag = True
        for ends in endserror:
            if factor.endswith(ends):
                flag = False
        if flag:
            final_list.append(factor)

    return final_list

In [19]:
#检查因子 "eod_derivative_indicator.announce_date"
factor_str=  "income_statement.exchange_gains_or_losses"
factor = prepar_factor(start_date, end_date, factor_str, stock_list)
prices = prepar_price(start_date, end_date, stock_list)
# factor
factor
# factor_data, IC, factor_return = get_IC(factor, prices)

2017-12-29  002030.XSHE         0
            002317.XSHE         0
            002219.XSHE   -151804
            000756.XSHE         0
            000518.XSHE         0
2017-12-28  002030.XSHE         0
            002317.XSHE         0
            002219.XSHE   -151804
            000756.XSHE         0
            000518.XSHE         0
2017-12-27  002030.XSHE         0
            002317.XSHE         0
            002219.XSHE   -151804
            000756.XSHE         0
            000518.XSHE         0
2017-12-26  002030.XSHE         0
            002317.XSHE         0
            002219.XSHE   -151804
            000756.XSHE         0
            000518.XSHE         0
2017-12-25  002030.XSHE         0
            002317.XSHE         0
            002219.XSHE   -151804
            000756.XSHE         0
            000518.XSHE         0
2017-12-22  002030.XSHE         0
            002317.XSHE         0
            002219.XSHE   -151804
            000756.XSHE         0
            00

In [72]:
factor

2017-10-30  000538.XSHE     70.495
            600196.XSHG    11.7849
            600568.XSHG   -17.2253
            000423.XSHE     119.21
            002107.XSHE    47.5695
            600329.XSHG   -1224.16
            000756.XSHE    88.7603
            600781.XSHG    24.4708
            603229.XSHG     6.8758
            603707.XSHG   -23.7961
            000566.XSHE     6.4079
            600079.XSHG    41.8378
            000919.XSHE      26.35
            600488.XSHG    -14.542
            603387.XSHG      329.9
            600267.XSHG    -24.848
            600613.XSHG    43.7376
            000931.XSHE     9.5587
            600085.XSHG    53.5314
            002393.XSHE   -42.9231
2017-10-27  000538.XSHE    67.9972
            300497.XSHE    18.5069
            300363.XSHE   -18.0558
            600196.XSHG    12.1692
            600568.XSHG   -17.2945
            000423.XSHE    119.906
            600385.XSHG   -665.397
            002107.XSHE    48.7864
            600222.X

In [50]:
factor=  "eod_derivative_indicator.announce_date"
trading_dates = get_trading_dates(start_date=start_date, end_date=end_date)
q = query(get_cls(factor)).filter(fundamentals.stockcode.in_(stock_list))
fund = get_fundamentals(q, entry_date=end_date, interval="{}d".format(len(trading_dates)))
fund = fund[0, :, :].stack()
fund

2017-12-29  000538.XSHE     2.0171e+07
            300497.XSHE     2.0171e+07
            002826.XSHE     2.0171e+07
            002680.XSHE     2.0171e+07
            600201.XSHG     2.0171e+07
            603880.XSHG     2.0171e+07
            300119.XSHE     2.0171e+07
            600276.XSHG     2.0171e+07
            300363.XSHE     2.0171e+07
            002773.XSHE     2.0171e+07
            002365.XSHE     2.0171e+07
            600196.XSHG     2.0171e+07
            603456.XSHG     2.0171e+07
            000952.XSHE     2.0171e+07
            300558.XSHE     2.0171e+07
            603538.XSHG     2.0171e+07
            300009.XSHE     2.0171e+07
            600572.XSHG     2.0171e+07
            000661.XSHE     2.0171e+07
            600568.XSHG     2.0171e+07
            002675.XSHE     2.0171e+07
            603567.XSHG     2.0171e+07
            000423.XSHE     2.0171e+07
            000153.XSHE     2.0171e+07
            600867.XSHG     2.0171e+07
            000590.XSHE  

### 如何把所有的因子计算一遍

In [28]:
eod_derivative_indicator_ = [i for i in dir(fundamentals.eod_derivative_indicator) if not i.startswith("_")]
income_statement_ = [i for i in dir(fundamentals.income_statement) if not i.startswith("_")]
financial_indicator_ = [i for i in dir(fundamentals.financial_indicator) if not i.startswith("_")]
income_statement_TTM_ = [i for i in dir(fundamentals.income_statement_TTM) if not i.startswith("_")]

In [29]:
income_statement_

['announce_date',
 'asset_depreciation',
 'asset_impairment',
 'basic_earnings_per_share',
 'cost_of_goods_sold',
 'disposal_loss_on_asset',
 'end_date',
 'enterprise_expansion_reserve',
 'exchange_gains_or_losses',
 'fair_value_change_income',
 'financing_expense',
 'ga_expense',
 'gross_profit',
 'income_tax',
 'interest_expense',
 'interest_income',
 'inventory_shrinkage',
 'invest_income_associates',
 'investment_income',
 'metadata',
 'net_operating_revenue',
 'net_profit',
 'net_profit_parent_company',
 'non_operating_expense',
 'non_operating_net_profit',
 'non_operating_revenue',
 'operating_expense',
 'operating_revenue',
 'ordinary_stock_dividends',
 'other_income',
 'other_operating_cost',
 'other_operating_income',
 'other_operating_revenue',
 'other_surplus_reserve',
 'pnl_adjustment',
 'preferred_stock_dividends',
 'profit_available_for_distribution',
 'profit_available_for_owner_distribution',
 'profit_before_tax',
 'profit_from_ma',
 'profit_from_operation',
 'retained_

In [30]:
financial_indicator_

['account_payable_turnover_days',
 'account_payable_turnover_rate',
 'account_receivable_turnover_days',
 'account_receivable_turnover_rate',
 'adjusted_diluted_earnings_per_share',
 'adjusted_earnings_per_share',
 'adjusted_fully_diluted_earnings_per_share',
 'adjusted_net_profit',
 'adjusted_profit_to_total_profit',
 'adjusted_return_on_equity_average',
 'adjusted_return_on_equity_diluted',
 'adjusted_return_on_equity_weighted_average',
 'announce_date',
 'annual_return_on_asset',
 'annual_return_on_asset_net_profit',
 'annual_return_on_equity',
 'book_value_per_share',
 'capital_reserve_per_share',
 'cash_flow_from_operations_per_share',
 'cost_to_sales',
 'current_asset_to_total_asset',
 'current_asset_turnover',
 'current_debt_to_total_debt',
 'current_ratio',
 'debt_to_asset_ratio',
 'debt_to_equity_ratio',
 'depreciation_and_amortization',
 'diluted_earnings_per_share',
 'dividend_per_share',
 'du_asset_turnover_ratio',
 'du_equity_multiplier',
 'du_profit_margin',
 'du_return_o

In [34]:
dir(income_statement_TTM_)
# print(income_statement_TTM_.__class__)

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__delitem__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__imul__',
 '__init__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__rmul__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'append',
 'clear',
 'copy',
 'count',
 'extend',
 'index',
 'insert',
 'pop',
 'remove',
 'reverse',
 'sort']

In [65]:
def build_factor_list():
    factor_list = []
    eod_derivative_indicator_ = ["eod_derivative_indicator."+i for i in dir(fundamentals.eod_derivative_indicator) if not i.startswith("_")]
    factor_list.extend(eod_derivative_indicator_)
    income_statement_ = ["income_statement."+i for i in dir(fundamentals.income_statement) if not i.startswith("_")]
    factor_list.extend(income_statement_)
    financial_indicator_ = ["financial_indicator."+i for i in dir(fundamentals.financial_indicator) if not i.startswith("_")]
    factor_list.extend(financial_indicator_)
    income_statement_TTM_ = ["income_statement_TTM."+i for i in dir(fundamentals.income_statement_TTM) if not i.startswith("_")]
    factor_list.extend(income_statement_TTM_)
    factor_list = [i for i in factor_list if not i.endswith("date") and not i.endswith("metadata")]
    return factor_list

In [66]:
factor_list = build_factor_list()

In [67]:
factor_list

['eod_derivative_indicator.a_share_market_val',
 'eod_derivative_indicator.a_share_market_val_2',
 'eod_derivative_indicator.dividend_yield',
 'eod_derivative_indicator.enterprise_value',
 'eod_derivative_indicator.enterprise_value_2',
 'eod_derivative_indicator.market_cap',
 'eod_derivative_indicator.market_cap_2',
 'eod_derivative_indicator.pb_ratio',
 'eod_derivative_indicator.pcf_ratio',
 'eod_derivative_indicator.pcf_ratio_1',
 'eod_derivative_indicator.pcf_ratio_2',
 'eod_derivative_indicator.pcf_ratio_3',
 'eod_derivative_indicator.pe_ratio',
 'eod_derivative_indicator.pe_ratio_1',
 'eod_derivative_indicator.pe_ratio_2',
 'eod_derivative_indicator.peg_ratio',
 'eod_derivative_indicator.ps_ratio',
 'eod_derivative_indicator.rpt_quarter',
 'eod_derivative_indicator.rpt_year',
 'eod_derivative_indicator.stockcode',
 'eod_derivative_indicator.val_of_stk_right',
 'income_statement.asset_depreciation',
 'income_statement.asset_impairment',
 'income_statement.basic_earnings_per_share',

In [38]:
len(factor_list)

224