In [1]:
import sys
sys.path.append(r'D:/projects/singletrader/')
# import singletrader
# from singletrader.datautils.qlibapi.constructor.base import MultiFactor
import pandas as pd
import warnings
from workflow import MultiFactorTesting,bar_resample
from pprint import pprint
import numpy as np
warnings.filterwarnings('ignore')

In [2]:


fields = []
names = []


fields += ['$close','$open','$high','$low','$avg','$volume','$circulating_market_cap','$turnover_ratio']
bars = ['close','open','high','low','avg','volume','market_cap','turnover_ratio']
names += bars


In [None]:
mf = MultiFactor(field=fields,name=names,start_date='2009-01-01',end_date='2022-12-31')
data = mf._data.swaplevel(0,1)
data.index = data.index.set_names(['date','asset'])

In [None]:
## raw data (daily)
# 因为计算distance需要损失过去一年数据，所以原始数据从2009年开始

In [None]:
# #原始数据示例
# data.head()

In [None]:
#收益月偏度计算
skew = data.groupby('asset').apply(lambda x:x['close'].droplevel('asset').pct_change().resample('M').apply(lambda x:x.skew()))
skew = skew.stack().swaplevel(0,1)
skew.name = 'skew'

#年度最高价距离计算 1 - close/Max(high,252)
distance = data.groupby('asset').apply(lambda x:(1-x['close'] / x['high'].rolling(252).max()).droplevel('asset').resample('M').last())
distance =  distance.stack().swaplevel(0,1)
distance.name = 'distance'


#行情数据降至月频率
bar_monthly = bar_resample(data[bars],frequency='M')

#3month动量
mom3M =  bar_monthly.groupby('asset').apply(lambda x:x['close']/x['close'].shift(3)-1).droplevel(0)
mom3M.name = 'mom3M'


#3month turnover
turnover3M =  bar_monthly.groupby('asset').apply(lambda x:x['turnover_ratio'].rolling(3).sum()).droplevel(0)
turnover3M.name = 'turnover3M'


#数据合并对齐
merged_data = pd.concat([bar_monthly, skew, distance, mom3M, turnover3M],axis=1)

## Notes
universe: all A share

period: 2010.01~2022.12 (156 months)

$ forward\_return = \frac{close_{next\_month}}{open_{next\_month}}-1$

$ distance =  1 - close / MAX(high,252days)$

mom3M: currentclose / close 3 months ago

turnover3M: sum of last 3 months turnover ratio

market_cap: market capitalization in circulation

bechmark for equal weighted:  equal weight of all stock's forward return

bechmark for market Cap weighted:  market Cap weight of all stock's forward return

## merged data (monthly)
处理和合并后行情和因子值的月度数据，

In [3]:
#合并数据示例
from pathlib import Path
from workflow import MultiFactorTesting
__file__ = r'D:\projects\singletrader\samples\double_sort_framework\workflow.ipynb'
file = Path(__file__)
parent_path = file.parent
data_file = parent_path.__str__() + '/data/' + r'price_and_factor_data.csv'
merged_data = pd.read_csv(data_file)
merged_data['date'] = pd.to_datetime(merged_data['date']) #date字段需要datetime格式；
merged_data = merged_data.set_index(['date','asset']) #必须为date,asset双重索引；
merged_data = merged_data.dropna()
merged_data.dropna().head()

Unnamed: 0_level_0,Unnamed: 1_level_0,close,open,high,low,avg,volume,market_cap,turnover_ratio,skew,distance,mom3M,turnover3M
date,asset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2010-01-31,000001.XSHE,848.17,958.39,960.73,805.17,858.33,24294088.0,634.5328,32.4735,0.232038,0.188175,-0.03469,78.139599
2010-02-28,000001.XSHE,877.48,848.56,884.13,820.81,875.92,10662390.0,656.4637,14.2521,0.88619,0.160121,-0.074613,67.740299
2010-03-31,000001.XSHE,906.8,877.48,948.62,866.54,908.75,15706730.0,678.3945,20.9947,0.091379,0.132058,-0.048009,67.720299
2010-04-30,000001.XSHE,803.61,909.53,932.59,754.75,798.92,20433608.0,601.1979,27.3131,-1.859854,0.230826,-0.052537,62.5599
2010-05-31,000001.XSHE,684.4,785.63,793.45,665.24,694.56,18574160.0,512.0124,24.8277,-0.454606,0.344928,-0.220039,73.1355


## forward return & factor quantile

In [4]:
mft = MultiFactorTesting(bar_data=merged_data.dropna()[bars],factor_data=merged_data.dropna()[['distance','skew']],add_shift=0)
clean_factor_return = mft.get_clean_factor_return(quantiles=5,labels={'distance':['Near','Moderate Near','Med','Moderate Far','Far'],'skew':['Low','Moderate Low','Medium','Moderate High','High']})#clean_factor_return = mft.get_clean_factor_return(quantiles={'distance':3,'skew':3})

#因子分组和下期收益数据示例
clean_factor_return.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,next_return,distance,skew
date,asset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-31,000001.XSHE,0.034081,Moderate Far,Moderate High
2010-01-31,000002.XSHE,0.013977,Far,Moderate Low
2010-01-31,000004.XSHE,0.035992,Near,Medium
2010-01-31,000005.XSHE,0.035326,Far,High
2010-01-31,000006.XSHE,0.040107,Far,Moderate High


# Part I skew & distance (5x5)

In [5]:
from workflow import MultiFactorTesting
mft = MultiFactorTesting(bar_data=merged_data.dropna()[bars],factor_data=merged_data.dropna()[['skew','distance']],add_shift=0)
# summary = mft.summary(quantiles=3,labels={'skew':['Low','Medium','High'],'distance':['Near','Med','Far']})
summary = mft.summary(quantiles=5,labels={'distance':['Near','Moderate Near','Med','Moderate Far','Far'],'skew':['Low','Moderate Low','Medium','Moderate High','High']})
for i in summary:
    print(i)
    pprint(summary[i])
    print('\n')

avg_annual_excess_return_eq(%)_with_mkt=13.14%
distance       Near  Moderate Near   Med  Moderate Far   Far
skew                                                        
Low            1.33           3.58  2.77          4.10  4.69
Moderate Low   4.77           2.88  2.93          3.79  6.28
Medium         3.79           0.57 -1.61          2.06  2.98
Moderate High -3.70          -5.67 -5.58         -3.86 -1.45
High          -0.54          -7.15 -8.93         -8.26 -5.75


avg_anual_excess_return_cap(%)_with_mkt=6.18%
distance       Near  Moderate Near   Med  Moderate Far   Far
skew                                                        
Low            0.86           4.36  2.92          2.47  2.59
Moderate Low   4.89           2.72  1.20          4.00  2.69
Medium         1.15           3.46 -3.15          0.94  2.13
Moderate High  0.15          -3.99 -5.04         -2.04 -0.45
High           5.60          -4.48 -4.93         -6.98 -4.78


hit_rate_eq(%)
distance        Near  Moderate Nea

# Part II Mom3M & turnover3M (5x5)

In [6]:
mft = MultiFactorTesting(bar_data=merged_data.dropna()[bars],factor_data=merged_data.dropna()[['mom3M','turnover3M']],add_shift=0)
# summary = mft.summary(quantiles=3,labels={'skew':['Low','Medium','High'],'distance':['Near','Med','Far']})
summary = mft.summary(quantiles=5,labels={'mom3M':['Low','Moderate Low','Medium','Moderate High','High'],'turnover3M':['Low','Moderate Low','Medium','Moderate High','High']})
for i in summary:
    print(i)
    pprint(summary[i])
    print('\n')

avg_annual_excess_return_eq(%)_with_mkt=13.14%
turnover3M      Low  Moderate Low  Medium  Moderate High   High
mom3M                                                          
Low            8.90          4.56    2.90           2.44   0.03
Moderate Low   6.43          2.42    2.27           0.86  -0.26
Medium         4.59          4.50    1.54           1.95  -4.33
Moderate High  4.18         -0.04    0.50          -2.24  -5.33
High          -1.15         -3.77   -3.59          -5.22 -16.66


avg_anual_excess_return_cap(%)_with_mkt=6.18%
turnover3M      Low  Moderate Low  Medium  Moderate High   High
mom3M                                                          
Low            2.95          5.22    4.68           3.36  -1.43
Moderate Low   6.24          1.72    5.90          -0.36  -1.33
Medium         4.56          4.89    3.08           2.12  -4.32
Moderate High  1.83          0.34    2.30          -1.23  -6.03
High          -0.69         -3.27   -0.13          -5.93 -12.49


hit_rat

# Part III turnover3M & distance (5x5)

In [7]:
mft = MultiFactorTesting(bar_data=merged_data.dropna()[bars],factor_data=merged_data.dropna()[['turnover3M','distance']],add_shift=0)
# summary = mft.summary(quantiles=3,labels={'skew':['Low','Medium','High'],'distance':['Near','Med','Far']})
summary = mft.summary(quantiles=5,labels={'turnover3M':['Low','Moderate Low','Medium','Moderate High','High'],'distance':['Low','Moderate Low','Medium','Moderate High','High']})
for i in summary:
    print(i)
    pprint(summary[i])
    print('\n')

avg_annual_excess_return_eq(%)_with_mkt=13.14%
distance         Low  Moderate Low  Medium  Moderate High   High
turnover3M                                                      
Low            11.12          0.95    3.28           2.81  10.68
Moderate Low    2.51          2.49    0.28           2.53   2.64
Medium          2.51          0.40   -2.32           0.89   3.47
Moderate High  -0.53         -1.42   -1.60          -0.16   0.75
High          -13.62         -8.38   -8.88          -5.94  -2.22


avg_anual_excess_return_cap(%)_with_mkt=6.18%
distance         Low  Moderate Low  Medium  Moderate High  High
turnover3M                                                     
Low             5.40         -0.86   -0.59          -0.65  2.18
Moderate Low    1.80          1.78   -0.14           3.20  0.67
Medium          5.05          1.92   -3.79           0.71  5.36
Moderate High  -3.32          0.57   -2.94          -1.96  0.56
High          -11.75         -6.20   -9.10          -5.46 -3.32




# Part IV turnover3M & skew (5x5)

In [8]:
mft = MultiFactorTesting(bar_data=merged_data.dropna()[bars],factor_data=merged_data.dropna()[['turnover3M','skew']],add_shift=0)
# summary = mft.summary(quantiles=3,labels={'skew':['Low','Medium','High'],'distance':['Near','Med','Far']})
summary = mft.summary(quantiles=5,labels={'turnover3M':['Low','Moderate Low','Medium','Moderate High','High'],'skew':['Low','Moderate Low','Medium','Moderate High','High']})
for i in summary:
    print(i)
    pprint(summary[i])
    print('\n')

avg_annual_excess_return_eq(%)_with_mkt=13.14%
skew            Low  Moderate Low  Medium  Moderate High   High
turnover3M                                                     
Low            0.95          8.65    7.07          -0.42   1.29
Moderate Low   4.46          5.09    2.25           0.13  -2.04
Medium         4.96          4.43    1.18          -0.79  -4.59
Moderate High  4.85          4.09    0.70          -3.27  -7.06
High           0.84         -2.64   -6.09         -11.38 -15.24


avg_anual_excess_return_cap(%)_with_mkt=6.18%
skew            Low  Moderate Low  Medium  Moderate High   High
turnover3M                                                     
Low            0.29          1.63    0.08           1.13   4.56
Moderate Low   3.70          4.95    1.28          -0.82   1.52
Medium         6.64          7.02   -0.12           3.75  -3.59
Moderate High  4.38          2.16   -0.11          -3.56  -5.31
High           0.68         -2.28   -6.26         -12.00 -12.59


hit_rat

In [None]:
cell_return_ds = MultiFactorTesting(bar_data=merged_data.dropna()[bars],factor_data=merged_data.dropna()[['distance','skew']],add_shift=0).compute_cell_return_eq(quantiles=5,labels={'distance':['Low','Moderate Low','Medium','Moderate High','High'],'skew':['Low','Moderate Low','Medium','Moderate High','High']})
cell_return_mt = MultiFactorTesting(bar_data=merged_data.dropna()[bars],factor_data=merged_data.dropna()[['mom3M','turnover3M']],add_shift=0).compute_cell_return_eq(quantiles=5,labels={'mom3M':['Low','Moderate Low','Medium','Moderate High','High'],'turnover3M':['Low','Moderate Low','Medium','Moderate High','High']})

per_ds = performance_indicator((1+cell_return_ds).cumprod(),freq=12).T
per_mt = performance_indicator((1+cell_return_mt).cumprod(),freq=12).T


ds_portfolio = per_ds[per_ds['夏普比(0.02)']>=0.4]
mt_portfolio = per_mt[per_mt['夏普比(0.02)']>=0.4]