In [1]:
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
daily_basic = pd.read_csv("./raw_data/股票每日指标.csv")
daily = pd.read_csv("./raw_data/股票日线行情.csv")
daily_basic.drop(['pe_ttm','volume_ratio','turnover_rate','ps','ps_ttm','dv_ratio','dv_ttm','total_share','free_share','circ_mv'],axis=1,inplace=True)
daily.drop(['close','change','pct_chg','pre_close','amount'],axis=1,inplace=True)
data = pd.merge(daily_basic,daily,on=['ts_code','trade_date'])
data.head()

Unnamed: 0,ts_code,trade_date,close,turnover_rate_f,pe,pb,float_share,total_mv,lclose,lreturn,open,high,low,vol
0,000001.SZ,20130104,15.99,1.7178,7.9702,1.0087,310535.8672,8192237.0,2.771964,-0.001874,16.32,16.45,15.92,443851.37
1,000001.SZ,20130107,16.3,1.3823,8.1247,1.0283,310535.8672,8351061.0,2.791165,0.019202,15.98,16.35,15.88,357169.25
2,000001.SZ,20130108,16.0,1.2093,7.9751,1.0094,310535.8672,8197361.0,2.772589,-0.018576,16.3,16.37,15.86,312479.12
3,000001.SZ,20130109,15.86,0.9727,7.9054,1.0005,310535.8672,8125634.0,2.7638,-0.008789,15.96,16.02,15.8,251329.15
4,000001.SZ,20130110,15.87,0.929,7.9104,1.0012,310535.8672,8130757.0,2.764431,0.00063,15.86,16.1,15.79,240030.27


In [16]:
daily.shape

(6141655, 6)

In [10]:
n_list = [1,2,3,6,12]

###### 动量因子
def rolling_sum(data,n):
    data['return_{}m'.format(n)] = data['lreturn'].rolling(n*20).sum()
    return data

def weighted_rolling_sum(data,n):
    data['wgt_return_{}m'.format(n)] = (((1/data['turnover_rate_f']) / (1/data['turnover_rate_f']).rolling(n*20).sum())
                                        *data['lreturn']).rolling(n*20).sum() / (n*20)
    return data

def momentum_factor(data):
    # 计算收益率
    data = data.sort_values(['ts_code','trade_date'])
    data['lclose'] = data['close'].apply(np.log)
    data['lreturn'] = data[['lclose','ts_code']].groupby('ts_code').apply(lambda x: x - x.shift(1))
    data['adjustment_return'] = data[['lreturn','ts_code']].groupby('ts_code').shift(-1)
    data.lreturn[data.lreturn > 0.1] = 0.1
    data.lreturn[data.lreturn < -0.1] = -0.1
    # 最近1,2,3,6,12个月收益率
    for i in n_list:
        data = data.groupby('ts_code').apply(rolling_sum,i)
    # 最近1,2,3,6,12个月换手率加权日均收益率
    for i in n_list:
        data = data.groupby('ts_code').apply(weighted_rolling_sum,i)
    return data


###### 波动率因子
def rolling_max(data,n):
    data['high_low_{}m'.format(n)] = data['high'].rolling(n*20).max() / data['low'].rolling(n*20).min()
    return data

def rolling_std(data,n):
    data['std_{}m'.format(n)] = data['lreturn'].rolling(n*20).std()
    return data

def volatility_factor(data):
    # 最高价/最低价（最近1,2,3,6,12个月内价格）
    for i in n_list:
        data = data.groupby('ts_code').apply(rolling_max,i)
    # 最近1,2,3,6,12个月的日收益率标准差
    for i in n_list:
        data = data.groupby('ts_code').apply(rolling_std,i)
    # 股价取对数
    data['ln_price'] = data['close'].apply(np.log)
    return data


########## 换手率因子
def rolling_turnover(data,n):
    data['turnover_{}m'.format(n)] = data['vol'].rolling(n*20).sum() / data['float_share'] / 100
    return data

def turnover_factor(data):
    # 最近1,2,3,6,12个月换手率
    for i in n_list:
        data = data.groupby('ts_code').apply(rolling_turnover,i)
    return data
    

########## 规模因子
def size_factor(data):
    # 对数市值
    data['size_factor'] = data['total_mv'].apply(np.log)
    return data
    

######### 技术因子
def rolling_correlation(data,d):
    data['Alpha1'] = (-1 * data[['open','vol']].astype(float).rolling(d).corr('spearman'))
    return data

def technical_factor(data):
    data = data.groupby('ts_code').apply(rolling_correlation,10)
    return data

def cal_factor(data):
    data = momentum_factor(data)
    data = volatility_factor(data)
    data = turnover_factor(data)
    data = size_factor(data)

    data.drop(['total_mv','float_share','vol','turnover_rate_f','close','high','low','open'],axis=1, inplace=True)
    
    data.to_csv('./processed_data/raw_factor.csv',index=False)
    return data

In [11]:
df = cal_factor(data)
df.head()


Unnamed: 0,ts_code,trade_date,pe,pb,lreturn,adjustment_return,return_1m,return_2m,return_3m,return_6m,return_12m,wgt_return_1m,wgt_return_2m,wgt_return_3m,wgt_return_6m,wgt_return_12m,high_low_1m,high_low_2m,high_low_3m,high_low_6m,high_low_12m,std_1m,std_2m,std_3m,std_6m,std_12m,ln_price,turnover_1m,turnover_2m,turnover_3m,turnover_6m,turnover_12m,size_factor
0,000001.SZ,20130104,7.9702,1.0087,,0.019202,,,,,,,,,,,,,,,,,,,,,2.771964,,,,,,15.918698
1,000001.SZ,20130107,8.1247,1.0283,0.019202,-0.018576,,,,,,,,,,,,,,,,,,,,,2.791165,,,,,,15.937899
2,000001.SZ,20130108,7.9751,1.0094,-0.018576,-0.008789,,,,,,,,,,,,,,,,,,,,,2.772589,,,,,,15.919323
3,000001.SZ,20130109,7.9054,1.0005,-0.008789,0.00063,,,,,,,,,,,,,,,,,,,,,2.7638,,,,,,15.910534
4,000001.SZ,20130110,7.9104,1.0012,0.00063,-0.021013,,,,,,,,,,,,,,,,,,,,,2.764431,,,,,,15.911165


In [12]:
df.shape

(3703018, 33)