In [2]:
import sys
sys.path.append(r'D:/projects/singletrader/')
# import singletrader
# from singletrader.datautils.qlibapi.constructor.base import MultiFactor
import pandas as pd
import warnings
from samples.double_sort_framework.workflow import MultiFactorTesting,bar_resample
from pprint import pprint
import numpy as np
warnings.filterwarnings('ignore')

In [3]:


fields = []
names = []


fields += ['$close','$open','$high','$low','$avg','$volume','$circulating_market_cap','$turnover_ratio']
bars = ['close','open','high','low','avg','volume','market_cap','turnover_ratio']
names += bars


In [None]:
mf = MultiFactor(field=fields,name=names,start_date='2009-01-01',end_date='2022-12-31')
data = mf._data.swaplevel(0,1)
data.index = data.index.set_names(['date','asset'])

In [None]:
## raw data (daily)
# 因为计算distance需要损失过去一年数据，所以原始数据从2009年开始

In [None]:
# #原始数据示例
# data.head()

In [None]:
#收益月偏度计算
skew = data.groupby('asset').apply(lambda x:x['close'].droplevel('asset').pct_change().resample('M').apply(lambda x:x.skew()))
skew = skew.stack().swaplevel(0,1)
skew.name = 'skew'

#年度最高价距离计算 1 - close/Max(high,252)
distance = data.groupby('asset').apply(lambda x:(1-x['close'] / x['high'].rolling(252).max()).droplevel('asset').resample('M').last())
distance =  distance.stack().swaplevel(0,1)
distance.name = 'distance'


#行情数据降至月频率
bar_monthly = bar_resample(data[bars],frequency='M')

#3month动量
mom3M =  bar_monthly.groupby('asset').apply(lambda x:x['close']/x['close'].shift(3)-1).droplevel(0)
mom3M.name = 'mom3M'


#3month turnover
turnover3M =  bar_monthly.groupby('asset').apply(lambda x:x['turnover_ratio'].rolling(3).sum()).droplevel(0)
turnover3M.name = 'turnover3M'


#数据合并对齐
merged_data = pd.concat([bar_monthly, skew, distance, mom3M, turnover3M],axis=1)

## Notes
universe: all A share

period: 2010.01~2022.12 (156 months)

$ forward\_return = \frac{close_{next\_month}}{open_{next\_month}}-1$

$ distance =  1 - close / MAX(high,252days)$

mom3M: currentclose / close 3 months ago

turnover3M: sum of last 3 months turnover ratio

market_cap: market capitalization in circulation

bechmark for equal weighted:  equal weight of all stock's forward return

bechmark for market Cap weighted:  market Cap weight of all stock's forward return

## merged data (monthly)
处理和合并后行情和因子值的月度数据，

In [4]:
#合并数据示例
from pathlib import Path
# from workflow import MultiFactorTesting
__file__ = r'D:\projects\singletrader\samples\double_sort_framework\workflow.ipynb'
file = Path(__file__)
parent_path = file.parent
data_file = r'D:\projects\singletrader\samples\double_sort_framework\report_1st\price_and_factor_data.csv'#parent_path.__str__() + '/data/' + r'price_and_factor_data.csv'
merged_data = pd.read_csv(r'D:\projects\singletrader\samples\double_sort_framework\report3rd\third_data.csv')#pd.read_csv(data_file)
merged_data['date'] = pd.to_datetime(merged_data['date']) #date字段需要datetime格式；
merged_data = merged_data.set_index(['date','asset']) #必须为date,asset双重索引；
merged_data = merged_data.dropna()
merged_data.dropna().head()

merged_data['amount3M'] = merged_data.groupby(level=0).apply(lambda x:x['amount3M'].rank()/x['amount3M'].count()).droplevel(0)
merged_data['ep'] = merged_data.groupby(level=0).apply(lambda x:x['ep'].rank()/x['ep'].count()).droplevel(0)

## forward return & factor quantile

In [5]:
mft = MultiFactorTesting(bar_data=merged_data.dropna()[bars],factor_data=merged_data.dropna()[['distance','skew']],add_shift=0)
clean_factor_return = mft.get_clean_factor_return(quantiles=5,labels={'distance':['Near','Moderate Near','Med','Moderate Far','Far'],'skew':['Low','Moderate Low','Medium','Moderate High','High']})#clean_factor_return = mft.get_clean_factor_return(quantiles={'distance':3,'skew':3})

#因子分组和下期收益数据示例
clean_factor_return.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,next_return,distance,skew
date,asset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-31,000001.XSHE,0.034081,Moderate Far,Moderate High
2010-01-31,000002.XSHE,0.013977,Far,Moderate Low
2010-01-31,000004.XSHE,0.035992,Near,Medium
2010-01-31,000005.XSHE,0.035326,Far,High
2010-01-31,000006.XSHE,0.040107,Far,Moderate High


# double sort performance
avg.ep: 当前cell成分股票平均earning/price ratio的分位数，譬如0.44表示当前cell的平均ep水平在全市场44%分位附近；

avg.liquidity: 当前cell成分股票平均近三个月成交额中位数的分位数，释义同上；

- Part I skew & distance (5x5)

In [6]:
from samples.double_sort_framework.workflow import MultiFactorTesting
mft = MultiFactorTesting(bar_data=merged_data.dropna()[bars],factor_data=merged_data.dropna()[['skew','distance']],add_shift=0)
# summary = mft.summary(quantiles=3,labels={'skew':['Low','Medium','High'],'distance':['Near','Med','Far']})
summary = mft.summary(quantiles=5,labels={'distance':['Near','Moderate Near','Med','Moderate Far','Far'],'skew':['Low','Moderate Low','Medium','Moderate High','High']})
clean_factor_return = mft.get_clean_factor_return(quantiles=5,labels={'distance':['Near','Moderate Near','Med','Moderate Far','Far'],'skew':['Low','Moderate Low','Medium','Moderate High','High']})
clean_factor_return = pd.concat([clean_factor_return,merged_data[['amount3M','ep']]],axis=1).dropna()
summary['avg.ep']  = round(clean_factor_return.groupby(['skew','distance']).apply(lambda x:x['ep'].mean()).unstack(),4)
summary['avg.liquidity']  = round(clean_factor_return.groupby(['skew','distance']).apply(lambda x:x['amount3M'].mean()).unstack(),4)
for i in summary:
    print(i)
    pprint(summary[i])
    print('\n')

avg_annual_excess_return_eq(%)_with_mkt=13.2%
distance       Near  Moderate Near   Med  Moderate Far   Far
skew                                                        
Low            1.28           3.58  2.72          4.06  4.77
Moderate Low   4.86           2.95  2.93          3.78  6.24
Medium         3.89           0.66 -1.56          2.07  3.58
Moderate High -3.84          -5.80 -5.69         -3.88 -1.56
High          -0.75          -7.09 -8.93         -8.42 -5.70


hit_rate_eq(%)
distance        Near  Moderate Near    Med  Moderate Far    Far
skew                                                           
Low            56.13          56.77  63.87         58.71  53.55
Moderate Low   54.84          59.35  55.48         51.61  58.06
Medium         56.13          49.03  41.94         52.90  50.97
Moderate High  49.03          42.58  40.65         35.48  45.81
High           51.61          35.48  25.81         32.26  36.77


avg.ep
distance         Near  Moderate Near     Med  Moderat

- Part II Mom3M & turnover3M (5x5)

In [9]:
mft = MultiFactorTesting(bar_data=merged_data.dropna()[bars],factor_data=merged_data.dropna()[['turnover3M','mom3M']],add_shift=0)
# summary = mft.summary(quantiles=3,labels={'skew':['Low','Medium','High'],'distance':['Near','Med','Far']})
summary = mft.summary(quantiles=5,labels={'mom3M':['Low','Moderate Low','Medium','Moderate High','High'],'turnover3M':['Low','Moderate Low','Medium','Moderate High','High']})
clean_factor_return = mft.get_clean_factor_return(quantiles=5,labels={'mom3M':['Low','Moderate Low','Medium','Moderate High','High'],'turnover3M':['Low','Moderate Low','Medium','Moderate High','High']})
clean_factor_return = pd.concat([clean_factor_return,merged_data[['amount3M','ep']]],axis=1).dropna()
summary['avg.ep']  = round(clean_factor_return.groupby(['turnover3M','mom3M']).apply(lambda x:x['ep'].mean()).unstack(),4)
summary['avg.liquidity']  = round(clean_factor_return.groupby(['turnover3M','mom3M']).apply(lambda x:x['amount3M'].mean()).unstack(),4)
for i in summary:
    print(i)
    pprint(summary[i])
    print('\n')

avg_annual_excess_return_eq(%)_with_mkt=13.2%
mom3M           Low  Moderate Low  Medium  Moderate High  High
turnover3M                                                    
Low            7.00          2.94    2.01           1.60  7.54
Moderate Low   0.46          1.89    2.76           4.64  3.85
Medium        -0.69          1.46    3.14           1.97 -0.29
Moderate High -1.78          0.70   -1.24           1.40 -0.18
High          -8.16         -8.96   -8.40          -7.52 -8.41


hit_rate_eq(%)
mom3M            Low  Moderate Low  Medium  Moderate High   High
turnover3M                                                      
Low            53.55         49.68   53.55          51.61  59.35
Moderate Low   50.97         57.42   52.90          56.77  52.90
Medium         47.74         55.48   61.29          50.97  49.68
Moderate High  44.52         47.74   47.10          46.45  50.32
High           38.06         34.19   38.06          36.13  38.06


avg.ep
mom3M             Low  Moderate 

- Part III turnover3M & distance (5x5)

In [8]:
mft = MultiFactorTesting(bar_data=merged_data.dropna()[bars],factor_data=merged_data.dropna()[['turnover3M','distance']],add_shift=0)
# summary = mft.summary(quantiles=3,labels={'skew':['Low','Medium','High'],'distance':['Near','Med','Far']})
summary = mft.summary(quantiles=5,labels={'turnover3M':['Low','Moderate Low','Medium','Moderate High','High'],'distance':['Low','Moderate Low','Medium','Moderate High','High']})
clean_factor_return = mft.get_clean_factor_return(quantiles=5,labels={'turnover3M':['Low','Moderate Low','Medium','Moderate High','High'],'distance':['Low','Moderate Low','Medium','Moderate High','High']})
clean_factor_return = pd.concat([clean_factor_return,merged_data[['amount3M','ep']]],axis=1).dropna()
summary['avg.ep']  = round(clean_factor_return.groupby(['turnover3M','distance']).apply(lambda x:x['ep'].mean()).unstack(),4)
summary['avg.liquidity']  = round(clean_factor_return.groupby(['turnover3M','distance']).apply(lambda x:x['amount3M'].mean()).unstack(),4)
for i in summary:
    print(i)
    pprint(summary[i])
    print('\n')

avg_annual_excess_return_eq(%)_with_mkt=13.2%
distance         Low  Moderate Low  Medium  Moderate High  High
turnover3M                                                     
Low            11.27          1.93    3.36           4.35  9.40
Moderate Low    5.45          2.36    0.74           2.49  3.36
Medium          1.92          0.50   -1.82           0.74  3.26
Moderate High   0.95         -0.08   -3.13          -1.07  0.95
High          -14.30         -9.81   -8.98          -7.00 -2.83


hit_rate_eq(%)
distance         Low  Moderate Low  Medium  Moderate High   High
turnover3M                                                      
Low            61.29         54.19   56.13          54.19  60.00
Moderate Low   59.35         62.58   52.90          50.32  45.81
Medium         52.26         53.55   43.87          49.03  49.68
Moderate High  47.10         50.32   39.35          45.16  47.10
High           33.55         35.48   34.19          40.00  45.16


avg.ep
distance          Low  Mo

- Part IV turnover3M & skew (5x5)

In [10]:
mft = MultiFactorTesting(bar_data=merged_data.dropna()[bars],factor_data=merged_data.dropna()[['turnover3M','skew']],add_shift=0)
# summary = mft.summary(quantiles=3,labels={'skew':['Low','Medium','High'],'distance':['Near','Med','Far']})
summary = mft.summary(quantiles=5,labels={'turnover3M':['Low','Moderate Low','Medium','Moderate High','High'],'skew':['Low','Moderate Low','Medium','Moderate High','High']})
clean_factor_return = mft.get_clean_factor_return(quantiles=5,labels={'turnover3M':['Low','Moderate Low','Medium','Moderate High','High'],'skew':['Low','Moderate Low','Medium','Moderate High','High']})
clean_factor_return = pd.concat([clean_factor_return,merged_data[['amount3M','ep']]],axis=1).dropna()
summary['avg.ep']  = round(clean_factor_return.groupby(['turnover3M','skew']).apply(lambda x:x['ep'].mean()).unstack(),4)
summary['avg.liquidity']  = round(clean_factor_return.groupby(['turnover3M','skew']).apply(lambda x:x['amount3M'].mean()).unstack(),4)
for i in summary:
    print(i)
    pprint(summary[i])
    print('\n')

avg_annual_excess_return_eq(%)_with_mkt=13.2%
skew            Low  Moderate Low  Medium  Moderate High   High
turnover3M                                                     
Low            0.98          9.72    7.47          -0.19   0.94
Moderate Low   4.40          4.53    3.56           0.69  -1.07
Medium         4.50          4.25    0.73          -1.63  -2.99
Moderate High  5.24          3.60    0.67          -3.20  -6.25
High           0.36         -3.23   -6.70         -11.06 -17.07


hit_rate_eq(%)
skew             Low  Moderate Low  Medium  Moderate High   High
turnover3M                                                      
Low            55.48         60.00   56.77          54.19  52.90
Moderate Low   63.87         56.77   58.71          54.84  54.84
Medium         57.42         64.52   49.03          46.45  42.58
Moderate High  60.00         56.77   47.74          40.65  31.61
High           47.74         43.87   38.06          33.55  26.45


avg.ep
skew              Low  Mo