In [215]:
import numpy as np
import pandas as pd

from Analysis_Funs import get_rank_ic

In [248]:
class FactorWeight():
    """产生权重，或因子合成
    """
    def __init__(self):
        pass
    
    @staticmethod
    def weighted(factor_df, weight):
        """按权重合成因子
           注意，只传一个因子没有意义
        """
        return factor_df.apply(lambda x: (x*weight.loc[x.name[0]]).sum(),axis=1)

    @staticmethod
    def equal_weighted(factor_df):
        """简单等权合成
        """
        weight = pd.Series([1. / len(factor_df.columns)] * len(factor_df.columns),index=factor_df.columns)
        factor_weighted = FactorWeight.weighted(factor_df, weight)
        return factor_weighted
    
    @staticmethod
    def ic_weighted(factor_df, forward_return, window=12):
        """平均IC加权，df需要提前标准化
        """
        ic = factor_df.apply(lambda x: get_rank_ic(x, forward_return),axis=0)
        ic_mean = ic.rolling(12).mean()
        ic_mean = ic_mean.shift(1)
        weight = ic_mean.apply(lambda x: x / x.sum(), axis=1)
        factor_weighted = FactorWeight.weighted(factor_df, weight)
        return factor_weighted
    
    @staticmethod
    def ic_ir_weighted(factor_df, forward_return, window=12):
        """平均IR加权，df需要提前标准化
        """
        ic = factor_df.apply(lambda x: get_rank_ic(x, forward_return),axis=0)
        ic_ir = ic.rolling(12).mean()/ ic_datas.rolling(12).std()
        ic_ir = ic_ir.shift(1)
        weight = ic_ir.apply(lambda x: x / x.sum(), axis=1)
        factor_weighted = FactorWeight.weighted(factor_df, weight)
        return factor_weighted
    
    @staticmethod
    def make_half_life_ind(period):
        return np.exp(np.log(0.5)/period)
    
    @staticmethod
    def weight_half_life(T,period):
        '''半衰期权重生成
           :param T： --当前轮数
           :param period：--半衰期长度
           生成 例：list(map(lambda x:weight_half_life(x,9), np.linspace(1, 20, 20)))
        '''
        return round(np.power(make_half_life_ind(period), T),8)

In [217]:
import QUANTAXIS as QA
import base.JuUnits as u
from QUANTAXIS.QAUtil.QAParameter import MARKET_TYPE, RUNNING_ENVIRONMENT, ORDER_DIRECTION
data = QA.QA_quotation(['000001','000002', '000004', '000005', '000006'], '2019-12-25', '2020-12-31', source=QA.DATASOURCE.MONGO,
                               frequence='day', market=MARKET_TYPE.STOCK_CN, 
                               output=QA.OUTPUT_FORMAT.DATASTRUCT)
data.data

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume,amount
date,code,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-12-25,000001,16.45,16.56,16.24,16.30,414918.0,6.796646e+08
2019-12-25,000002,30.40,30.63,30.18,30.29,685037.0,2.082008e+09
2019-12-25,000004,21.69,21.87,21.28,21.71,6989.0,1.515990e+07
2019-12-25,000005,3.05,3.05,3.01,3.03,47741.0,1.445555e+07
2019-12-25,000006,5.27,5.27,5.19,5.24,72595.0,3.800551e+07
...,...,...,...,...,...,...,...
2020-12-31,000001,19.21,19.58,19.02,19.34,924503.0,1.781736e+09
2020-12-31,000002,28.29,28.80,28.24,28.70,651991.0,1.862538e+09
2020-12-31,000004,20.69,21.09,20.31,20.70,22304.0,4.619703e+07
2020-12-31,000005,2.50,2.53,2.50,2.53,67806.0,1.701545e+07


In [250]:
%%time
forward_return = data.close.groupby(level=1, group_keys=False).apply(lambda x:((x-x.shift(1))/x.shift(1)).shift(-1))
test_factor = data.volume
test_factor = pd.DataFrame({"a":test_factor, "b":data.close, "c":np.sqrt(data.close)+np.log(test_factor)})
# data['close']

# 标准化
def standardize(data, multi_code=False):
    if multi_code:
        return data.groupby(level=1, group_keys=False).apply(lambda x: standardize(x,multi_code=False))
    else:
        return (data - data.mean())/data.std()



test_factor_stda = standardize(test_factor,multi_code=True)
print(test_factor_stda)



# print("mean,std,t,tt,p:",get_ic_desc(ic_datas))
# print('ir',get_ic_ir(ic_datas))

# test_df = pd.DataFrame({"a":test_factor, "b":test_factor*2})
# print(test_df)
# # print(FactorWeight.equal_weight(test_df))
# print(test_df.columns)


# equal_weight 
# weight = pd.Series([1. / len(test_factor_stda.columns)] * len(test_factor_stda.columns),index=test_factor_stda.columns)
# print(weight)
# print(test_factor_stda.apply(lambda x: (x*weight).sum(),axis=1))

# print(FactorWeight.equal_weighted(test_factor_stda))


ic_datas = test_factor_stda.apply(lambda x: get_rank_ic(x, forward_return),axis=0)
print(ic_datas)
ic_datas_mean = ic_datas.rolling(12).mean()
ic_datas_mean = ic_datas_mean.shift(1)
# print(ic_datas_mean)
weight = ic_datas_mean.apply(lambda x: x / x.sum(), axis=1)
print(weight)
print(test_factor_stda.apply(lambda x:(x*weight.loc[x.name[0]]).sum() ,axis=1))

print(FactorWeight.ic_weighted(test_factor_stda, forward_return))


# ic_ir_datas = ic_datas.rolling(12).mean()/ ic_datas.rolling(12).std()
# ic_ir_datas = ic_ir_datas.shift(1)
# print(ic_ir_datas)
# weight = ic_ir_datas.apply(lambda x: x / x.sum(), axis=1)
# print(weight)
# print(test_factor_stda.apply(lambda x: (x*weight).sum(),axis=1))

# print(FactorWeight.ic_ir_weight(test_factor_stda, forward_return))

                          a         b         c
date       code                                
2019-12-25 000001 -1.211333  0.505250 -1.483098
           000002 -0.361248  1.173286  0.240396
           000004 -0.968436 -1.293610 -2.134791
           000005 -0.634221  1.243630 -0.753569
           000006 -0.703299 -0.499816 -0.971731
...                     ...       ...       ...
2020-12-31 000001 -0.345097  1.993560  0.791049
           000002 -0.456381  0.286439 -0.227461
           000004 -0.543730 -1.505309 -1.089650
           000005 -0.397100 -1.362151 -0.438649
           000006 -0.715385 -0.167065 -0.936981

[1240 rows x 3 columns]
               a     b     c
date                        
2019-12-25   0.2  -0.1  -0.1
2019-12-26  -0.3   0.3  -0.5
2019-12-27  -0.8   0.2     0
2019-12-30   0.1   0.5   0.3
2019-12-31   0.6   0.6   0.7
...          ...   ...   ...
2020-12-25  -0.6   0.7   0.2
2020-12-28   0.7   0.3   0.2
2020-12-29     0   0.3  -0.1
2020-12-30  -0.3  -0.7  -0.3
202