In [2]:
# 因子的定义和计算

import pandas as pd
import numpy as np
import logging
from datetime import datetime
import warnings
warnings.filterwarnings("ignore")

In [3]:
class Factor:

# 只展示一部分了，因子间加减乘除可以自己定义，同一个模式很简单
    
    def __init__(self, data, expr=''):
        # 两个方法，一个用来返回数据，一个用来返回公式
        self.data = data
        self.expr = expr
    
    def where(self, condition, x, y):
        # 封装一个三元处理函数
        result_data = pd.DataFrame(np.where(condition, x.data, y.data), index=self.data.index, columns=self.data.columns)
        result_expr = f"(condition ? {x.expr} : {y.expr})"
        
        return Factor(result_data, result_expr)

def factor(name, data_source):

    return Factor(data_source, name)

def stddev(self, n):
    
    std_data = self.data.rolling(n).std()
    std_expr = f"stddev({self.expr}, {n})"

    return Factor(std_data, std_expr)

def SignedPower(self, power):
    
    sp_data = np.sign(self.data) * np.power(np.abs(self.data), power)
    sp_expr = f"SignedPower({self.expr}, {power})"
    
    return Factor(sp_data, sp_expr)

def Ts_ArgMax(self, window):
    max_values = pd.DataFrame(index=self.data.index, columns=self.data.columns)
    
    for i in range(len(self.data)):
        if i < window - 1:
            max_values.iloc[i] = np.nan
        else:
            max_values.iloc[i] = self.data.iloc[i-window+1:i+1].max()
            
    expr = f"Ts_ArgMax({self.expr}, {window})"
    
    return Factor(max_values, expr)

def rank(self):
    ranked_data = self.data.rank(axis=1, ascending=False, method='min')
    ranked_data.index = pd.to_datetime(ranked_data.index)
    
    ranked_expr = f"rank({self.expr})"
    
    return Factor(ranked_data, ranked_expr)

In [4]:
close_ = pd.read_hdf('/Users/syesw/Desktop/single_factor_research/data/hs300/hs300-20100101-20220101_price.h5', key='data')
returns_ = close_.pct_change()

# 进行封装
close = factor('close', close_)
returns = factor('returns', returns_)

In [12]:
price = close.data
close.data

order_book_id,000001.XSHE,000002.XSHE,000009.XSHE,000012.XSHE,000021.XSHE,000024.XSHE,000027.XSHE,000031.XSHE,000039.XSHE,000046.XSHE,...,601872.XSHG,601898.XSHG,601899.XSHG,601918.XSHG,601919.XSHG,601939.XSHG,601958.XSHG,601988.XSHG,601991.XSHG,601998.XSHG
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-04,7.2969,6.4640,4.6810,5.9280,7.6948,16.0009,4.6634,9.9592,5.3788,5.6604,...,3.5720,10.2336,4.7198,11.3000,7.2399,2.9058,15.1265,1.9673,6.6924,4.5115
2010-01-05,7.1707,6.3177,4.5601,5.8273,7.8710,15.2800,4.6600,9.7503,5.4528,5.4641,...,3.6296,10.6214,4.8180,11.7370,7.4300,2.9485,15.7055,1.9902,6.8567,4.5674
2010-01-06,7.0476,6.3177,4.5557,6.0075,7.8182,15.3795,4.6530,9.7685,5.5514,5.4682,...,3.6871,10.7202,4.7493,11.6990,7.4458,2.9010,15.6412,1.9673,6.8343,4.4388
2010-01-07,6.9706,6.2689,4.6421,5.6898,7.9180,15.3484,4.5487,9.8956,5.4651,5.3946,...,3.5976,10.4313,4.6659,11.2937,7.2240,2.8583,15.5125,1.9306,6.6774,4.2879
2010-01-08,6.9553,6.3116,4.8666,5.6287,7.9063,15.5970,4.6008,9.9773,5.4733,5.4396,...,3.6296,10.4997,4.6266,11.0910,7.1923,2.8678,15.1828,1.9398,6.6401,4.3270
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-27,16.5085,17.5668,13.8251,8.8987,15.5371,,8.0435,3.6800,10.3690,1.9200,...,3.8913,6.1054,9.2355,4.6047,12.9815,5.1826,6.5506,2.6849,3.3551,4.0644
2021-12-28,16.4605,17.6205,14.1732,8.8797,15.7429,,7.7769,3.7000,10.3011,1.8800,...,3.8913,5.9025,9.3608,4.4220,13.6063,5.2180,6.5795,2.6937,3.2166,4.0910
2021-12-29,16.0579,17.3519,14.1931,9.2881,15.4979,,7.6722,3.6400,10.3258,1.8900,...,3.8722,5.8749,9.2740,4.4797,13.5022,5.1826,6.5699,2.6761,3.1176,4.0999
2021-12-30,16.1250,17.1191,14.1036,9.2406,15.5273,,7.6817,3.6800,10.3134,1.8800,...,3.8817,5.8657,9.2933,4.4220,12.9815,5.1826,6.5699,2.6761,3.1671,4.0821


In [6]:
returns.data

order_book_id,000001.XSHE,000002.XSHE,000009.XSHE,000012.XSHE,000021.XSHE,000024.XSHE,000027.XSHE,000031.XSHE,000039.XSHE,000046.XSHE,...,601872.XSHG,601898.XSHG,601899.XSHG,601918.XSHG,601919.XSHG,601939.XSHG,601958.XSHG,601988.XSHG,601991.XSHG,601998.XSHG
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-04,,,,,,,,,,,...,,,,,,,,,,
2010-01-05,-0.017295,-0.022633,-0.025828,-0.016987,0.022899,-0.045054,-0.000729,-0.020976,0.013758,-0.034680,...,0.016125,0.037895,0.020806,0.038673,0.026257,0.014695,0.038277,0.011640,0.024550,0.012391
2010-01-06,-0.017167,0.000000,-0.000965,0.030923,-0.006708,0.006512,-0.001502,0.001867,0.018082,0.000750,...,0.015842,0.009302,-0.014259,-0.003238,0.002127,-0.016110,-0.004094,-0.011506,-0.003267,-0.028156
2010-01-07,-0.010926,-0.007724,0.018965,-0.052884,0.012765,-0.002022,-0.022416,0.013011,-0.015546,-0.013460,...,-0.024274,-0.026949,-0.017560,-0.034644,-0.029789,-0.014719,-0.008228,-0.018655,-0.022958,-0.033996
2010-01-08,-0.002195,0.006811,0.048362,-0.010739,-0.001478,0.016197,0.011454,0.008256,0.001500,0.008342,...,0.008895,0.006557,-0.008423,-0.017948,-0.004388,0.003324,-0.021254,0.004765,-0.005586,0.009119
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-27,-0.005200,-0.001018,-0.013486,-0.008468,-0.008133,0.000000,0.029239,0.013774,0.002989,0.000000,...,0.002473,-0.017809,-0.011347,0.002089,0.018516,-0.006784,-0.001479,0.003288,-0.011665,-0.004336
2021-12-28,-0.002908,0.003057,0.025179,-0.002135,0.013246,0.000000,-0.033145,0.005435,-0.006548,-0.020833,...,0.000000,-0.033233,0.013567,-0.039677,0.048130,0.006831,0.004412,0.003278,-0.041280,0.006545
2021-12-29,-0.024459,-0.015244,0.001404,0.045993,-0.015563,0.000000,-0.013463,-0.016216,0.002398,0.005319,...,-0.004908,-0.004676,-0.009273,0.013048,-0.007651,-0.006784,-0.001459,-0.006534,-0.030778,0.002176
2021-12-30,0.004179,-0.013416,-0.006306,-0.005114,0.001897,0.000000,0.001238,0.010989,-0.001201,-0.005291,...,0.002453,-0.001566,0.002081,-0.012880,-0.038564,0.000000,0.000000,0.000000,0.015878,-0.004342


In [7]:
# 直接计算alpha1（alpha101中的第一个因子）
alpha1 = rank(Ts_ArgMax(SignedPower(returns.where(returns.data < 0, stddev(returns, 5), close), 2.), 5))

In [8]:
# 计算后的因子数据
alpha1.data

order_book_id,000001.XSHE,000002.XSHE,000009.XSHE,000012.XSHE,000021.XSHE,000024.XSHE,000027.XSHE,000031.XSHE,000039.XSHE,000046.XSHE,...,601872.XSHG,601898.XSHG,601899.XSHG,601918.XSHG,601919.XSHG,601939.XSHG,601958.XSHG,601988.XSHG,601991.XSHG,601998.XSHG
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-04,,,,,,,,,,,...,,,,,,,,,,
2010-01-05,,,,,,,,,,,...,,,,,,,,,,
2010-01-06,,,,,,,,,,,...,,,,,,,,,,
2010-01-07,,,,,,,,,,,...,,,,,,,,,,
2010-01-08,158.0,173.0,228.0,183.0,143.0,43.0,237.0,110.0,201.0,196.0,...,269.0,95.0,229.0,80.0,154.0,286.0,44.0,295.0,168.0,244.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-27,65.0,57.0,75.0,120.0,69.0,,134.0,230.0,107.0,281.0,...,232.0,166.0,116.0,207.0,85.0,194.0,162.0,273.0,253.0,228.0
2021-12-28,287.0,60.0,73.0,120.0,70.0,,134.0,239.0,109.0,278.0,...,232.0,166.0,115.0,206.0,82.0,193.0,162.0,268.0,251.0,226.0
2021-12-29,287.0,60.0,79.0,120.0,70.0,,134.0,236.0,109.0,278.0,...,229.0,281.0,115.0,204.0,82.0,191.0,163.0,266.0,247.0,223.0
2021-12-30,67.0,60.0,79.0,117.0,70.0,,134.0,233.0,108.0,277.0,...,228.0,280.0,116.0,202.0,84.0,189.0,164.0,265.0,242.0,220.0


In [9]:
# 计算方式
alpha1.expr

'rank(Ts_ArgMax(SignedPower((condition ? stddev(returns, 5) : close), 2.0), 5))'

In [10]:
# 为了便于后面的因子分析，还需要把因子数据转化为alphalens能接受的格式
factor = alpha1.data.stack().reset_index()
factor.columns = ['date', 'asset', 'Alpha1']
factor.set_index(['date', 'asset'], inplace=True)
factor

Unnamed: 0_level_0,Unnamed: 1_level_0,Alpha1
date,asset,Unnamed: 2_level_1
2010-01-08,000001.XSHE,158.0
2010-01-08,000002.XSHE,173.0
2010-01-08,000009.XSHE,228.0
2010-01-08,000012.XSHE,183.0
2010-01-08,000021.XSHE,143.0
...,...,...
2021-12-31,601939.XSHG,188.0
2021-12-31,601958.XSHG,161.0
2021-12-31,601988.XSHG,265.0
2021-12-31,601991.XSHG,249.0


In [11]:
factor.to_hdf('/Users/syesw/Desktop/single_factor_research/temp/hs300_alpha1_factor.h5', key='data', mode='w', complevel=9, complib='blosc')

In [13]:
price.to_hdf('/Users/syesw/Desktop/single_factor_research/temp/hs300_price.h5', key='data', mode='w', complevel=9, complib='blosc')