In [None]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm,trange
import matplotlib.pyplot as plt
import matplotlib
import time
import statsmodels.api as sm
from scipy import stats
from typing import Union
import re
from copy import deepcopy
import pickle
from utils import *
import matplotlib.dates as mdate
import datetime

plt.rcParams['font.sans-serif'] = ['KaiTi'] # 指定默认字体
plt.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
%config InlineBackend.figure_format = 'retina'

In [None]:
tradedays_list = list(np.sort(pd.read_excel('data\\stock_index\\shse.xlsx').iloc[:,0].values))
naturedays_list = [str(i)[:10] for i in pd.date_range('2000-01-04',tradedays_list[-1])]

In [None]:
## 定义指数类
class Stock_index():
    def __init__(self, market, constituent=None, tradedays_list=tradedays_list, naturedays_list=naturedays_list) -> None:
        '''
        market属性是指数价量数据
        constituent属性是指数成分数据
        '''
        market = market.iloc[:,[0,1,2,3,4,5,6,9,10]]
        market.columns = ['date','open','high','low','close','change','returns','volume','amount']
        market.index = market['date'].values
        market = market.sort_index()
        market.iloc[:,1:] = market.iloc[:,1:].applymap(lambda x: np.float64(x.replace(',','')) if type(x) is str and not pd.isna(x) else x)
        market['returns'] = market['returns']/100
        self.market = market
        if constituent is None:
            self.constituent = constituent
        else:
            constituent = constituent.iloc[:,[0,1,6]]
            constituent.columns = ['date','code','inout']
            constituent['inout'] = constituent['inout'].apply(lambda x: 'in' if x=='纳入' else 'out')
            constituent = constituent.groupby(['date','inout']).apply(lambda x: set(x['code'].values))
            constituent = constituent.sort_index()
            def f(constituent_temp):
                constituent_stock = set()
                for i in range(constituent_temp.shape[0]):
                    if constituent_temp.index.get_level_values('inout')[i]=='in':
                        constituent_stock = constituent_stock|constituent_temp.iloc[i]
                    else:
                        constituent_stock = constituent_stock-constituent_temp.iloc[i]
                return constituent_stock
            constituent_dict = dict()
            start_date = constituent.index.get_level_values('date')[0]
            for date in tqdm(tradedays_list[tradedays_list.index(start_date):]):
                constituent_dict[date] = f(constituent.loc[:date])
            self.constituent = constituent_dict
            self.constituent_date = list(self.constituent.keys())
    def get_constituent(self, date):
        if self.constituent is None:
            return None
        assert self.constituent_date[0]<=date<=self.constituent_date[-1], '日期在指数成分日期范围外'
        assert date in self.constituent_date, '日期是非交易日'
        return self.constituent[date]

In [None]:
## szse是深证综指
szse = Stock_index(pd.read_excel('data\\stock_index\\szse.xlsx'),
                    constituent=pd.read_excel('data\\stock_index\\szse_constituent.xlsx'))

In [None]:
## 检查Inc_data里的股票在预案公告日时是否在股指里
Inc_data = pd.read_pickle('data preprocess\\Inc_data.pkl')
Asharedescription = pd.read_pickle('data preprocess\\AShareDescription.pkl')

Inc_data = Inc_data.drop_duplicates(['S_INFO_WINDCODE','PREPLAN_ANN_DATE'])
Inc_data = Inc_data[Inc_data['PREPLAN_ANN_DATE']>=szse.constituent_date[0]]
temp = Inc_data.apply(lambda x: x['S_INFO_WINDCODE'] in szse.get_constituent(x['PREPLAN_ANN_DATE_to_trade']), axis=1)
temp.sum()/temp.shape[0]

## 事件研究法

In [None]:
data = pd.read_pickle('data\\AShareEODPrices.pkl')
data = data.sort_values(['code','date']).reset_index(drop=True)
data.index = pd.MultiIndex.from_arrays([data.code.values, data.date.values], names=['code','date'])
data = data.iloc[:,2:]
data['returns'] = data['adjclose']/data['adjpreclose']-1+data['volume']-data['volume']

In [None]:
T0,T1,T2=-220,-20,61

est_nanloc = pd.DataFrame(1, index=Inc_data.index, columns=np.arange(T0,T1))
abnormal_return = pd.DataFrame(np.nan, index=Inc_data.index, columns=np.arange(T1,T2))
normal_return = pd.DataFrame(np.nan, index=Inc_data.index, columns=np.arange(T1,T2))
event_nonloc = pd.DataFrame(1, index=Inc_data.index, columns=np.arange(T1,T2))
sigma_hat = pd.Series(np.nan, index=Inc_data.index) # 方差估计
V = []
for i in trange(Inc_data.shape[0]):
    ## 估计正常收益率序列
    x = Inc_data.iloc[i]
    code = x['S_INFO_WINDCODE']
    PREPLAN_ANN_DATE_to_trade_index = tradedays_list.index(x.loc['PREPLAN_ANN_DATE_to_trade'])
    estimate_window = tradedays_list[PREPLAN_ANN_DATE_to_trade_index+T0:PREPLAN_ANN_DATE_to_trade_index+T1]
    X = market_return = szse.market['returns'].loc[estimate_window]
    y = stock_return = data['returns'].loc[code].loc[estimate_window]
    X, y = np.log(1+X), np.log(1+y) # 取对数收益率
    est_nanloc.iloc[i][y.isna().values] = 0  # 记录估计窗中缺失的位置，设置为0
    X = X[~(y.isna())]
    y = y.dropna()
    X = sm.add_constant(X)
    model = sm.WLS(y,X)
    results = model.fit()
    event_window = tradedays_list[PREPLAN_ANN_DATE_to_trade_index+T1:PREPLAN_ANN_DATE_to_trade_index+T2]
    ## 估计异常收益率序列
    X_new = market_return_new = szse.market['returns'].loc[event_window]
    y_new = stock_return = data['returns'].loc[code].loc[event_window]
    X_new, y_new = np.log(1+X_new), np.log(1+y_new) # 取对数收益率
    X_new = sm.add_constant(X_new)
    event_nonloc.iloc[i][y_new.isna().values] = 0  # 记录事件窗中缺失的位置，设置为0
    normal_return.iloc[i] = results.predict(X_new)
    abnormal_return.iloc[i] = y_new-normal_return.iloc[i].values
    sigma_hat.iloc[i] = results.mse_resid
    H = ((X_new@(np.linalg.pinv(X.T@X)))@(X_new.T.values)).values
    V.append(H+np.eye(H.shape[0]))
V=np.array(V)

In [None]:
abnormal_return.mean().plot(title='平均异常收益率')
plt.xlabel('第T日')
plt.ylabel('收益率')
plt.savefig('show_data\\事件研究法\\平均异常收益率.png', dpi=300, bbox_inches='tight', transparent=True)
plt.show()
abnormal_return.mean().cumsum().plot(title='累计平均异常收益率')
plt.xlabel('第T日')
plt.ylabel('累计收益率')
plt.savefig('show_data\\事件研究法\\累计平均异常收益率.png', dpi=300, bbox_inches='tight', transparent=True)
plt.show()

In [None]:
## 计算每天的J1统计量
def f(Inc_data,abnormal_return,event_nonloc,V):
    statistics = pd.DataFrame(np.nan, index=['mu','sigma','J1','pvalue'], columns=np.arange(T1,T2))
    for t in tqdm(np.arange(T1,T2)):
        t1,t2 = t,t+1
        mu_sigma2_hat = pd.DataFrame(np.nan, index=Inc_data.index, columns=['mu','sigma2'])
        mu_sigma2_hat['mu'] = abnormal_return[np.arange(t1,t2)].sum(axis=1)
        nanloc_bools_temp = (event_nonloc[np.arange(t1,t2)].sum(axis=1)==0).values
        for i in range(mu_sigma2_hat.shape[0]):
            gamma = np.zeros((T2-T1,1))
            gamma[t1-T1:t2-T1] = 1
            gamma = gamma*event_nonloc.iloc[[i]].values.T

            sigma2_i = (gamma.T@V[i]@gamma*sigma_hat[i])[0,0]
            mu_sigma2_hat.iloc[i]['sigma2'] = sigma2_i
        mu_sigma2_hat = mu_sigma2_hat[~nanloc_bools_temp]
        mu_sigma2_hat = mu_sigma2_hat.mean()
        ## 计算J1统计量
        statistics.loc[['mu','sigma'],t] = [mu_sigma2_hat['mu'],np.sqrt(mu_sigma2_hat['sigma2'])]
    return statistics
statistics = f(Inc_data,abnormal_return,event_nonloc,V)
statistics.loc['J1']=statistics.loc['mu']/statistics.loc['sigma']
statistics.loc['pvalue'] = 1-(stats.norm.cdf(statistics.loc['J1'].abs())-0.5)*2
statistics = statistics.T

In [None]:
statistics[['mu','sigma']] = statistics[['mu','sigma']].applymap(lambda x: format(x, '.2%'))
statistics[['J1','pvalue']] = statistics[['J1','pvalue']].round(3)
statistics.to_excel('show_data\\事件研究法\\每日平均异常收益率.xlsx')

In [None]:
## 计算发生时间之前和之后的J1统计量
def f(Inc_data,abnormal_return,event_nonloc,V):
    time_intervel_list = [(-20,-1),(0,60)]
    statistics = pd.DataFrame(np.nan, index=['mu','sigma','J1','pvalue'], columns=['(-20, -1)','(0, 60)'])
    for t in tqdm(time_intervel_list):
        t1,t2 = t[0],t[1]+1
        mu_sigma2_hat = pd.DataFrame(np.nan, index=Inc_data.index, columns=['mu','sigma2'])
        mu_sigma2_hat['mu'] = abnormal_return[np.arange(t1,t2)].sum(axis=1)
        nanloc_bools_temp = (event_nonloc[np.arange(t1,t2)].sum(axis=1)==0).values
        for i in range(mu_sigma2_hat.shape[0]):
            gamma = np.zeros((T2-T1,1))
            gamma[t1-T1:t2-T1] = 1
            gamma = gamma*event_nonloc.iloc[[i]].values.T

            sigma2_i = (gamma.T@V[i]@gamma*sigma_hat[i])[0,0]
            mu_sigma2_hat.iloc[i]['sigma2'] = sigma2_i
        mu_sigma2_hat = mu_sigma2_hat[~nanloc_bools_temp]
        mu_sigma2_hat = mu_sigma2_hat.mean()
        ## 计算J1统计量
        statistics.loc[['mu','sigma'],str(t)] = [mu_sigma2_hat['mu'],np.sqrt(mu_sigma2_hat['sigma2'])]
    return statistics
statistics = f(Inc_data,abnormal_return,event_nonloc,V)
statistics.loc['J1']=statistics.loc['mu']/statistics.loc['sigma']
statistics.loc['pvalue'] = 1-(stats.norm.cdf(statistics.loc['J1'].abs())-0.5)*2
statistics = statistics.T

In [None]:
statistics[['mu','sigma']] = statistics[['mu','sigma']].applymap(lambda x: format(x, '.2%'))
statistics[['J1','pvalue']] = statistics[['J1','pvalue']].round(3)
statistics.to_excel('show_data\\事件研究法\\事件发生前后累计异常收益率.xlsx')

## 单因子测试

In [None]:
market_value = pd.read_pickle('data\\market_value\\market_value_ts.pkl')
market_value = market_value['float_MV']
## 市值因子标准化
market_value = np.log(market_value)
market_value = market_value.groupby("date").apply(lambda x: RobustCSZScoreNorm(x, clip_outlier=True))

In [None]:
Inc_data = pd.read_pickle('data preprocess\\Inc_data.pkl')
Inc_label = pd.read_pickle('data preprocess\\Inc_label.pkl')
Inc_financial_and_market = pd.read_pickle('data preprocess\\Inc_financial_and_market.pkl')
Inc_concentration = pd.read_pickle('data preprocess\\Inc_concentration.pkl')
Inc_text_data = pd.read_pickle('data preprocess\\Inc_text_data.pkl')

In [None]:
## 分组计算收益，使用超额收益，即60日绝对收益-60日基准收益
interval = 60
feature_label = Inc_label[['S_INFO_WINDCODE','S_INC_SEQUENCE','PREPLAN_ANN_DATE','PREPLAN_ANN_DATE_to_trade','Sell_day_real_'+str(interval)]].copy()
feature_label['Year'] = Inc_data['PREPLAN_ANN_DATE'].apply(lambda x: x[:4])
feature_label['Label'] = np.log(1+Inc_label[str(interval)+'_days_return'])-np.log(1+Inc_label.apply(lambda x: szse.market.loc[x['Sell_day_real_'+str(interval)],'close']/szse.market.loc[x['Buy_day_real'],'close']-1, axis=1))
feature_label['MV'] = Inc_data.apply(lambda x: market_value.loc[(x['S_INFO_WINDCODE'],x['PREPLAN_ANN_DATE_to_trade'])], axis=1)
feature_label['Corporate_type'] = Inc_data.apply(lambda x: Asharedescription.loc[x['S_INFO_WINDCODE'],'Corporate_type'], axis=1)
feature_label['Corporate_type2'] = feature_label['Corporate_type'].apply(lambda x: x if x=='民营企业' else '其他企业')
feature_label['S_INFO_LISTBOARDNAME'] = Inc_data.apply(lambda x: Asharedescription.loc[x['S_INFO_WINDCODE'],'S_INFO_LISTBOARDNAME'], axis=1)
def f(x, ST_range_list):
    if len(ST_range_list)==0:
        return False
    for ST_range in ST_range_list:
        if (x['PREPLAN_ANN_DATE']>=ST_range[0] and x['PREPLAN_ANN_DATE']<=ST_range[1]):
            return True
    return False
feature_label['Is_ST'] = Inc_data.apply(lambda x: f(x, Asharedescription.loc[x['S_INFO_WINDCODE'],'S_INFO_LISTBOARDNAME']), axis=1)
feature_label['S_INC_SUBJECT'] = Inc_data['S_INC_SUBJECT'].copy().apply(lambda x: {1:'期权', 2:'限制性股票', 3:'股票增值权'}[x])
feature_label['S_INC_SUBJECT2'] = Inc_data['S_INC_SUBJECT'].copy().apply(lambda x: {1:'期权', 2:'限制性股票', 3:'期权'}[x]).values
feature_label['S_INC_TYPE'] = Inc_data['S_INC_TYPE'].copy()
feature_label['S_INC_TYPE2'] = feature_label['S_INC_TYPE'].copy().apply(lambda x: {5:1, 6:3}[x] if x in [5,6] else x)
feature_label['INC_NUMBERS_RATE'] = Inc_data['INC_NUMBERS_RATE'].copy() # 有11个缺失值
def f(x):
    if x>=7:
        return 7
    elif x<=3:
        return 3
    else:
        return x
feature_label['S_INC_EXPIRYDATE'] = Inc_data['S_INC_EXPIRYDATE'].copy().apply(lambda x: round(f(x)) if not pd.isna(x) else x) # 有34个缺失值
feature_label = feature_label.merge(Inc_concentration[['S_INFO_WINDCODE', 'S_INC_SEQUENCE', 'Management_Inc_ratio','Management_ratio','Topk_Inc_ratio','Inc_entropy', 'Inc_entropy_all', 'Inc_gini_ml',
                                                                            'Inc_gini_ml_all', 'Inc_gini_eco', 'Inc_gini_eco_all']], on=['S_INFO_WINDCODE', 'S_INC_SEQUENCE'], how='left') #缺失值个数分别为912, 945, 1093, 912, 1093, 912, 1093, 912, 1093
feature_label = feature_label.merge(Inc_financial_and_market[['S_INFO_WINDCODE', 'S_INC_SEQUENCE','ROE_Norm','Price_gap']], on=['S_INFO_WINDCODE', 'S_INC_SEQUENCE'], how='left') #缺失值个数分别为5, 0
feature_label['Price_gap_ex'] = (np.log(1+Inc_financial_and_market['Price_gap'])-np.log(Inc_financial_and_market['Buy_day_real'].apply(lambda x: szse.market.loc[x,'open']/szse.market.loc[tradedays_list[tradedays_list.index(x)-1],'close']))).values
feature_label = feature_label.merge(Inc_text_data[['S_INFO_WINDCODE', 'S_INC_SEQUENCE','S_INC_INITEXECPRI','Exercise_num','Time_from_preplan_to_target']], on=['S_INFO_WINDCODE', 'S_INC_SEQUENCE'], how='left') #缺失值个数分别为17,14,14
feature_label['S_INC_INITEXECPRI_to_vwap'] = feature_label.apply(lambda x: x['S_INC_INITEXECPRI']/data.loc[(x['S_INFO_WINDCODE'],x['PREPLAN_ANN_DATE_to_trade']),'vwap'] ,axis=1).values
feature_label['S_INC_INITEXECPRI_to_vwap'] = feature_label.apply(lambda x: x['S_INC_INITEXECPRI_to_vwap']*2 if x['S_INC_SUBJECT2']=='限制性股票' else x['S_INC_INITEXECPRI_to_vwap'],axis=1)

In [None]:
## 筛选2011年之后的
feature_label_2011plus = feature_label[feature_label['Year']>='2011']
feature_label_2011plus_train = feature_label_2011plus[feature_label_2011plus['Sell_day_real_'+str(interval)]<'2019']
feature_label_2011plus_test = feature_label_2011plus[feature_label_2011plus['Sell_day_real_'+str(interval)]>='2019']

In [None]:
feature_label_2011plus.isna().sum()

In [None]:
## 描述性统计分析
def describe_analysis_discrete(feature_label_2011plus, feature_name):  #无序离散因子
    if not os.path.exists('show_data\\因子的描述性统计分析\\'+feature_name):
        os.makedirs('show_data\\因子的描述性统计分析\\'+feature_name)

    count = feature_label_2011plus.groupby(feature_name)[['S_INFO_WINDCODE']].count().rename(columns={'S_INFO_WINDCODE':'样本数'})
    count['比例'] = count['样本数']/count['样本数'].sum()
    (count['样本数']).plot(kind='bar',figsize=(5,3))
    plt.xlabel(feature_name)
    plt.ylabel('数目')
    plt.title(feature_name+'各类的样本数目')
    plt.grid()  # 生成网格
    plt.savefig('show_data\\因子的描述性统计分析\\'+feature_name+'\\数目.png', dpi=300, bbox_inches='tight', transparent=True)
    plt.show()

    count2 = feature_label_2011plus.groupby(['Year',feature_name])['S_INFO_WINDCODE'].count()
    count2 = count2.apply(lambda x: int(x)).unstack()[count.index]
    count2.plot(kind='bar',figsize=(12,3))
    plt.ylabel('数目')
    plt.title(''+feature_name+'各类每年的样本数目')
    plt.legend(loc='best')
    plt.grid()  # 生成网格
    plt.savefig('show_data\\因子的描述性统计分析\\'+feature_name+'\\分年度数目.png', dpi=300, bbox_inches='tight', transparent=True)
    plt.show()

    count = count.T
    count.iloc[0] = (count.iloc[0]).apply(lambda x: str(int(x)))
    count.iloc[1] = count.iloc[1].apply(lambda x: format(x, '.2%'))
    return count, count2
def describe_analysis_continuous(feature_label_2011plus, feature_name, rows=3, columns=4, figsize=(12,7), 
                                bins1=30, bins2=20, x_lim=None, y_lim=None, xticks=None):  #连续因子
    if not os.path.exists('show_data\\因子的描述性统计分析\\'+feature_name):
        os.makedirs('show_data\\因子的描述性统计分析\\'+feature_name)
    fig, ax = plt.subplots(rows,columns, figsize=figsize,constrained_layout=True)
    fig.suptitle(feature_name+'因子每年的直方图')
    feature_label_2011plus[feature_name].hist(bins=bins1, ax=ax[0,0])
    if x_lim is not None:
        ax[0,0].set_xlim(x_lim)
    if xticks is not None:
        ax[0,0].set_xticks(xticks)
    ax[0,0].set_xlabel('因子值')
    ax[0,0].set_ylabel('数目')
    ax[0,0].set_title('全部年份')
    for year in range(2011,2022):
        row = (year-2010)//columns
        column = (year-2010)%columns
        year = str(year)
        temp = feature_label_2011plus[feature_label_2011plus['Year']==year]
        temp[feature_name].hist(bins=bins2, ax=ax[row,column])
        if x_lim is not None:
            ax[row,column].set_xlim(x_lim)
        if y_lim is not None:
            ax[row,column].set_ylim(y_lim)
        if xticks is not None:
            ax[row,column].set_xticks(xticks)
        ax[row,column].set_xlabel('因子值')
        ax[row,column].set_ylabel('数目')
        ax[row,column].set_title(year+'年')
    plt.savefig('show_data\\因子的描述性统计分析\\'+feature_name+'\\直方图.png', dpi=300, bbox_inches='tight', transparent=True)
    plt.show()
    describe_stats = feature_label_2011plus.groupby('Year')[feature_name].describe()
    describe_stats = describe_stats.append(feature_label_2011plus[feature_name].describe())
    describe_stats.index = list(describe_stats.index[:-1])+['全部年份']
    describe_stats = describe_stats.round(3)
    describe_stats['count'] = describe_stats['count'].apply(lambda x: str(int(x)))
    describe_stats.to_excel('show_data\\因子的描述性统计分析\\'+feature_name+'\\描述性统计量.xlsx')
    return describe_stats

In [None]:
discrete_factor_name_list = ['Corporate_type','Corporate_type2','S_INFO_LISTBOARDNAME','S_INC_SUBJECT','S_INC_TYPE','S_INC_TYPE2','Exercise_num']
for feature_name in discrete_factor_name_list:
    count, count2 = describe_analysis_discrete(feature_label_2011plus, feature_name)
    count.to_excel('show_data\\因子的描述性统计分析\\'+feature_name+'\\各类的样本数目.xlsx')

In [None]:
continuous_factor_name_list = ['MV','INC_NUMBERS_RATE','Management_Inc_ratio','Topk_Inc_ratio','Inc_entropy','Inc_entropy_all','Inc_gini_ml',
                            'Inc_gini_ml_all','Inc_gini_eco','Inc_gini_eco_all','ROE_Norm','Price_gap_ex','S_INC_INITEXECPRI_to_vwap',
                            'Time_from_preplan_to_target']
for feature_name in continuous_factor_name_list:
    describe_analysis_continuous(feature_label_2011plus, feature_name, rows=3, columns=4, figsize=(12,7), 
                                bins1=30, bins2=20)

In [None]:
## 分组收益（离散型）最新
def group_return_method_performance_discrete(feature_label):
    '''
    返回1,2,...n列
    '''
    group_return = feature_label.groupby('feature').mean()
    group_return.columns = ['超额收益率均值']
    group_return['超额收益率标准差'] = feature_label.groupby('feature').std()
    group_return['超额收益率夏普比率'] = group_return['超额收益率均值']/group_return['超额收益率标准差']
    group_return['胜率'] = feature_label.groupby('feature')['label'].apply(lambda x: (x>0).sum()/x.shape[0])
    group_return['数目'] = feature_label.groupby('feature')['label'].apply(lambda x: x.shape[0])
    group_return.index = [str(i) for i in group_return.index]
    return group_return
def group_return_method_event_return_seq_discrete(feature_label):
    '''
    返回各组逐事件的超额收益率序列
    '''
    group_return = {}
    feature_list = feature_label['feature'].unique()
    feature = feature_label['feature']
    for feature_name in feature_list:
        group_return[feature_name] = feature_label['label'][(feature==feature_name)]
    return group_return
def group_return_method_discrete_value_wrapper(feature_label_2011plus_train, feature_name ,figsize=(12,3.5)):
    if not os.path.exists('show_data\\单因子测试\\'+feature_name):
        os.makedirs('show_data\\单因子测试\\'+feature_name)
    fig, ax = plt.subplots(1,1,figsize=figsize,constrained_layout=True)
    fig.suptitle(feature_name+'因子分组收益测试', x=0.5, y=0, size=14)
    group_return = group_return_method_performance_discrete(feature_label_2011plus_train[['Label',feature_name]].rename(columns={'Label':'label',feature_name:'feature'}))
    group_return['超额收益率均值'].plot(kind='bar', ax=ax, title='各组平均超额收益率', xlabel='组名', ylabel='收益率')
    plt.savefig('show_data\\单因子测试\\'+feature_name+'\\因子分组收益测试.png', dpi=300, bbox_inches='tight', transparent=True)
    plt.show()
    group_return.iloc[:,:-1] = group_return.iloc[:,:-1].applymap(lambda x: format(x, '.2%'))
    group_return.to_excel('show_data\\单因子测试\\'+feature_name+'\\分组收益绩效评价.xlsx')
    return group_return

In [None]:
## 分组收益（连续型）最新
def group_return_method_performance(feature_label, N=5):
    '''
    返回1,2,3,4,5...列，其中组号从小到大对应因子从小到大
    '''
    group_return = pd.DataFrame(np.nan, index=['Group'+str(i+1) for i in range(N)], columns=['超额收益率均值','超额收益率标准差','超额收益率夏普比率','胜率'])
    for i in range(N):
        feature = feature_label['feature']
        if i==0:
            label_select = feature_label['label'][(feature>=feature.quantile(i/N)) & (feature<=feature.quantile((i+1)/N))]
            group_return['超额收益率均值'].iloc[i] = label_select.mean()
            group_return['超额收益率标准差'].iloc[i] = label_select.std()
            group_return['超额收益率夏普比率'].iloc[i] = group_return['超额收益率均值'].iloc[i]/group_return['超额收益率标准差'].iloc[i]
            group_return['胜率'].iloc[i] = (label_select>0).sum()/label_select.shape[0]
        else:
            label_select = feature_label['label'][(feature>feature.quantile(i/N)) & (feature<=feature.quantile((i+1)/N))]
            group_return['超额收益率均值'].iloc[i] = label_select.mean()
            group_return['超额收益率标准差'].iloc[i] = label_select.std()
            group_return['超额收益率夏普比率'].iloc[i] = group_return['超额收益率均值'].iloc[i]/group_return['超额收益率标准差'].iloc[i]
            group_return['胜率'].iloc[i] = (label_select>0).sum()/label_select.shape[0]
    return group_return
def group_return_method_event_return_seq(feature_label, N=5):
    '''
    返回1,2,3,4,5组逐事件的超额收益率序列
    '''
    group_return = {'Group'+str(i+1):[] for i in range(N)}
    feature = feature_label['feature']
    for i in range(N):
        if i==0:
            group_return['Group'+str(i+1)] = feature_label['label'][(feature>=feature.quantile(i/N)) & (feature<=feature.quantile((i+1)/N))]
        else:
            group_return['Group'+str(i+1)] = feature_label['label'][(feature>feature.quantile(i/N)) & (feature<=feature.quantile((i+1)/N))]
    return group_return
def group_return_method_wrapper(feature_label_2011plus_train, feature_name ,figsize=(12,3.5)):
    if not os.path.exists('show_data\\单因子测试\\'+feature_name):
        os.makedirs('show_data\\单因子测试\\'+feature_name)
    fig, ax = plt.subplots(1,3,figsize=figsize,constrained_layout=True)
    fig.suptitle(feature_name+'因子分组收益测试', x=0.5, y=0, size=14)
    group_return5 = group_return_method_performance(feature_label_2011plus_train[['Label',feature_name]].rename(columns={'Label':'label',feature_name:'feature'}), N=5)
    group_return5['超额收益率均值'].plot(kind='bar', ax=ax[0], title='各组平均超额收益率（分5组）', xlabel='组号', ylabel='收益率')
    group_return10 = group_return_method_performance(feature_label_2011plus_train[['Label',feature_name]].rename(columns={'Label':'label',feature_name:'feature'}), N=10)
    group_return10['超额收益率均值'].plot(kind='bar', ax=ax[1], title='各组平均超额收益率（分10组）', xlabel='组号', ylabel='收益率')
    group_return5_seq = group_return_method_event_return_seq(feature_label_2011plus_train[['Label',feature_name]].rename(columns={'Label':'label',feature_name:'feature'}), N=5)
    for i in range(5):
        group_return5_seq['Group'+str(i+1)].reset_index(drop=True).cumsum().plot(label='Group'+str(i+1), ax=ax[2], title='各组逐事件累计超额收益率（分5组）', xlabel='各组事件序号（按预案公告日先后顺序）', ylabel='累计收益率')
    plt.legend(loc='best')
    plt.savefig('show_data\\单因子测试\\'+feature_name+'\\因子分组收益测试.png', dpi=300, bbox_inches='tight', transparent=True)
    plt.show()
    group_return5 = group_return5.applymap(lambda x: format(x, '.2%'))
    group_return5.to_excel('show_data\\单因子测试\\'+feature_name+'\\分组收益绩效评价_5组.xlsx')
    return group_return5

In [None]:
discrete_factor_name_list = ['Corporate_type2','S_INFO_LISTBOARDNAME','S_INC_SUBJECT','Exercise_num']
for feature_name in discrete_factor_name_list:
    group_return = group_return_method_discrete_value_wrapper(feature_label_2011plus_train, feature_name ,figsize=(5,3.5))
    print(group_return)

In [None]:
continuous_factor_name_list = ['MV','INC_NUMBERS_RATE','Management_Inc_ratio','Topk_Inc_ratio','Inc_entropy','Inc_entropy_all','Inc_gini_ml',
                            'Inc_gini_ml_all','Inc_gini_eco','Inc_gini_eco_all','ROE_Norm','Price_gap_ex','S_INC_INITEXECPRI_to_vwap',
                            'Time_from_preplan_to_target']
for feature_name in continuous_factor_name_list:
    group_return = group_return_method_wrapper(feature_label_2011plus_train, feature_name)
    print(group_return)

## 选股回测

In [None]:
EPS_position = 1e-4
## 定义选股策略类
class NholdIncStrategy:
    def __init__(self, Inc_label, tradedays_list, holding_num_max=1000):
        '''
        tradedays_list：交易日日期列表，
        Inc_label：经过打分筛选后需要选择的股票dataframe
        holding_num_max：最大持有股票数，这里设置为1000表示全部持有
        '''
        self.tradedays_list = tradedays_list
        self.Inc_label = Inc_label
        self.holding_num_max = holding_num_max
    ## 获取下一个交易日date_next的需要持有的股票，以及持有的股票的权重
    def get_target_position(self, date_next):
        target_position = {}
        Inc_data_temp = self.Inc_label[(self.Inc_label['Buy_day_real']<=date_next)&(self.Inc_label['Sell_day_real_'+str(interval)]>date_next)]
        stock_array = Inc_data_temp['S_INFO_WINDCODE'].unique()
        N = min(self.holding_num_max, len(stock_array))
        if N==0:
            return pd.Series(target_position, name='target_position', dtype=np.float64)
        return pd.Series(1/N, index=stock_array[-N:], name='target_position', dtype=np.float64)
## 定义回测类
class Backtest:
    def __init__(self, price:pd.DataFrame, strategy:NholdIncStrategy, tradedays_list, start_date='2011-01-04', end_date='2021-07-30', init_cash=1e7, commission_rate=0.003, tax_rate=0.001):
        '''
            account: 账户，形如 {'2020-01-04':{'position':pd.DataFrame，当日仓位情况
                                                    value    volume    held_days
                                        000001.SZ                                
                                        000002.SZ                               
                                            ...
                                        , 'Buy_info':pd.DataFrame，当日买入情况
                                                    value   volume
                                        000001.SZ
                                            ...
                                        , 'Sell_info':pd.DataFrame，当日卖出情况
                                                    value   volume
                                        000001.SZ
                                            ...
                                        , 'commission':当日手续费
                                        , 'tax':当日税费
                                ,'cash':当日收盘后现金余额, 'stock_value':当日收盘后持有股票价值, 'total_value':当日收盘后总账户价值},
                            ...}
            price：价量数据，
            strategy：选股策略，
            tradedays_list：交易日日期列表，
            start_date：开始日期；end_date：结束日期，
            init_cash：初始资金，
            commission_rate：佣金费率，
            commission_min：最小佣金，
            tax_rate：税率，
        '''
        self.price = price
        self.strategy = strategy
        self.tradedays_list = tradedays_list
        self.start_date = start_date
        self.end_date = end_date
        self.account = {tradedays_list[i]:{} for i in range(tradedays_list.index(start_date), tradedays_list.index(end_date)+1, 1)}
        self.account[start_date] = {'position':pd.DataFrame({'value':np.array([],dtype=np.float64),'volume':np.array([],dtype=np.int64),'held_days':np.array([],dtype=np.int64)}),
                                    'Buy_info':pd.DataFrame({'value':np.array([],dtype=np.float64),'volume':np.array([],dtype=np.int64)}),
                                    'Sell_info':pd.DataFrame({'value':np.array([],dtype=np.float64),'volume':np.array([],dtype=np.int64)}),
                                    'commission':np.float64(0), 'tax':np.float64(0), 'cash':np.float64(init_cash), 'stock_value':np.float64(0), 'total_value':np.float64(init_cash)}
        self.commission_rate = np.float64(commission_rate)
        if commission_rate>0:
            self.commission_min = 5 ## 最小佣金
        else:
            self.commission_min = 0 ## 最小佣金
        self.tax_rate = np.float64(tax_rate) ##卖方单边收取
    ## 主要接口，运行回测
    def run(self):
        to_buy = pd.Series(name='buy_value',dtype=np.float64) # to_buy_value
        to_sell = pd.Series(name='sell_volume',dtype=np.int64) # to_sell_volume
        target_position = pd.Series(name='target_position',dtype=np.float64)
        for i in trange(self.tradedays_list.index(self.start_date), self.tradedays_list.index(self.end_date)+1, 1):
            date = self.tradedays_list[i]
            ## 更新账户信息，根据to_buy买入，根据to_sell卖出
            self.update(date, to_buy, to_sell)
            ## 生成下一日的持仓状态，和为1的向量
            date_next = self.tradedays_list[self.tradedays_list.index(date)+1]
            target_position = self.strategy.get_target_position(date_next)
            ## 从target_position中获得下一日的卖出量to_sell（整手数或全仓）和理论买入额to_buy（在update中根据to_buy计算整手买入量）
            to_buy, to_sell = self.get_bsh_from_target_position(target_position, date, date_next)
    def update(self, date, to_buy=pd.Series(name='buy_value',dtype=np.float64), to_sell=pd.Series(name='sell_volume',dtype=np.int64)):
        i = self.tradedays_list.index(date)
        date_last = self.tradedays_list[i-1]
        ## 初始化当日的仓位
        if len(self.account[date])==0:
            self.account[date] = deepcopy(self.account[date_last])
            self.account[date]['Buy_info'] = pd.DataFrame({'value':np.array(0,dtype=np.float64),'volume':np.array(0,dtype=np.int64)}, index=to_buy.index)
            self.account[date]['Sell_info'] = pd.DataFrame({'value':np.array(0,dtype=np.float64),'volume':np.array(0,dtype=np.int64)}, index=to_sell.index)

        ## 先根据adjfactor的变化调整volume
        factor_change = self.price.loc[pd.MultiIndex.from_product([[date],to_sell.index]),'adjfactor'].values/self.price.loc[pd.MultiIndex.from_product([[date_last],to_sell.index]),'adjfactor'].values
        temp_bool = (factor_change-1>1e-3)  ## 如果复权因子没有变化，则不改变to_sell，否则to_sell可能会因为python精度问题变成非整百股
        if np.sum(temp_bool)>0:
            to_sell[temp_bool] = np.int64(to_sell[temp_bool].values*factor_change[temp_bool])
        factor_change = self.price.loc[pd.MultiIndex.from_product([[date],self.account[date]['position'].index]),'adjfactor'].values/self.price.loc[pd.MultiIndex.from_product([[date_last],self.account[date]['position'].index]),'adjfactor'].values
        temp_bool = (factor_change-1>1e-3)  ## 如果复权因子没有变化，则不改变position的volume，否则to_sell可能会因为python精度问题变成非整百股
        if np.sum(temp_bool)>0:
            self.account[date]['position']['volume'][temp_bool] =  np.int64(self.account[date]['position']['volume'][temp_bool].values*factor_change[temp_bool])

        ## 卖出股票，获得现金，将全部卖出的股票从持仓中剔除，未全部卖出的股票调整仓位
        commission = 0
        cash_of_sell = 0
        self.account[date]['Sell_info']['volume'] = to_sell.values #更新账户Sell_info
        for stock_code,volume_to_sell in to_sell.items():
            if self.price.loc[(date,stock_code),'NOCB']: #如果是一字板，则不买入卖出
                continue
            assert self.get_stock_volume(date, stock_code)>=volume_to_sell, '卖出量比持仓量大' #检查卖出量不高于持仓量
            value_to_sell = np.float64(volume_to_sell*self.price.loc[(date,stock_code),'vwap']) #以vwap价格卖出
            cash_of_sell += value_to_sell
            self.account[date]['Sell_info'].loc[stock_code, 'value'] = value_to_sell #更新账户Sell_info
            # 如果卖出量=持仓量，从持仓中剔除该股票
            if self.get_stock_volume(date, stock_code)==volume_to_sell: 
                self.account[date]['position'] = self.account[date]['position'].drop(stock_code)
                continue
            # 更新账户持仓
            self.account[date]['position'].loc[stock_code,'volume'] = self.account[date]['position'].loc[stock_code,'volume']-volume_to_sell
            self.account[date]['position'].loc[stock_code,'value'] = np.float64(self.account[date]['position'].loc[stock_code,'volume']*self.price.loc[(date,stock_code),'close'])
            self.account[date]['position'].loc[stock_code,'held_days'] += 1
            if volume_to_sell>0:    
                commission += max(round(value_to_sell*self.commission_rate,2), self.commission_min)
        # 更新账户手续费、税、现金
        self.account[date]['tax'] = round(cash_of_sell*self.tax_rate,2)
        self.account[date]['commission'] = np.float64(commission)
        self.account[date]['cash'] += (cash_of_sell-self.account[date]['tax']-commission)

        ## 买入股票，调整仓位，设置新买入的股票的持有日为0，优先新买入的股票能够买够量
        commission = 0
        for stock_code,value_to_buy in to_buy.items():
            if self.price.loc[(date,stock_code),'NOCB']: #如果是一字板，则不买入卖出
                continue
            assert self.account[date]['cash']>=0, '当前现金为负' #检查当前现金非负
            # 如果持仓中没有该股票，则加入该股票
            if stock_code not in self.get_stock_list(date):
                self.account[date]['position']=self.account[date]['position'].append(pd.DataFrame({'value':np.array([0],dtype=np.float64),'volume':np.array([0],dtype=np.int64),'held_days':np.array([0],dtype=np.int64)},index=[stock_code]))
            commission_temp = np.float64(max(round(value_to_buy*self.commission_rate+0.01,2), self.commission_min))
            value_to_buy = max(min(self.account[date]['cash'], value_to_buy)-2*commission_temp,0) #根据当前现金计算理论买入额
            volume_to_buy = np.int64(value_to_buy/self.price.loc[(date,stock_code),'vwap']/100)*100 #根据理论买入额计算实际买入量（整手数）
            self.account[date]['Buy_info'].loc[stock_code, 'volume'] = volume_to_buy #更新账户Buy_info
            # 更新账户仓位
            self.account[date]['position'].loc[stock_code,'volume'] += volume_to_buy
            value_to_buy = round(np.float64(volume_to_buy*self.price.loc[(date,stock_code),'vwap']),2)
            self.account[date]['Buy_info'].loc[stock_code, 'value'] = value_to_buy
            self.account[date]['position'].loc[stock_code,'value'] = np.float64(self.account[date]['position'].loc[stock_code,'volume']*self.price.loc[(date,stock_code),'close'])
            self.account[date]['position'].loc[stock_code,'held_days'] += 1
            if volume_to_buy>0:
                commission_temp = np.float64(max(round(value_to_buy*self.commission_rate,2), self.commission_min))
                commission += commission_temp
                self.account[date]['cash'] -= np.float64(value_to_buy+commission_temp) #更新账户现金
        # 更新账户手续费
        self.account[date]['commission'] += commission
        ## 更新账户stock_value,total_value
        self.account[date]['stock_value'] = self.get_total_stock_value(date)
        self.account[date]['total_value'] = self.account[date]['stock_value']+self.account[date]['cash']
    # 获取下一日目标持仓比例
    def get_bsh_from_target_position(self, target_position:pd.Series, date, date_next):
        total_value = self.account[date]['total_value']
        target_stock_value = target_position*total_value
        
        to_buy = self.get_to_buy_from_target_position(target_stock_value, date)
        index_temp = pd.MultiIndex.from_product([[date_next],to_buy.index])
        stock_array = self.price.loc[index_temp, 'volume'].dropna().index.get_level_values(1)
        to_buy = to_buy.loc[stock_array]    ## 剔除第二天停牌的股票

        to_sell = self.get_to_sell_from_target_position(target_stock_value, date)
        index_temp = pd.MultiIndex.from_product([[date_next],to_sell.index])
        stock_array = self.price.loc[index_temp, 'volume'].dropna().index.get_level_values(1)
        to_sell = to_sell.loc[stock_array]    ## 剔除第二天停牌的股票
        return to_buy, to_sell
    def get_to_sell_from_target_position(self, target_stock_value, date):
        to_sell = {}
        ## 全部卖出的股票
        stock_list_sell_all = list(set(self.get_stock_list(date))-set(target_stock_value.index))
        for stock_code in stock_list_sell_all:
            to_sell[stock_code] = self.get_stock_volume(date,stock_code)
        ## 部分卖出的股票（再平衡）,使用收盘价计算需要卖出的volume
        stock_list_holding = list(set(self.get_stock_list(date))&set(target_stock_value.index))
        stock_list_sell_part = [stock_code for stock_code in stock_list_holding if self.get_stock_value(date,stock_code)>target_stock_value[stock_code]]
        for stock_code in stock_list_sell_part:
            to_sell[stock_code] = np.int64((self.get_stock_value(date,stock_code)-target_stock_value[stock_code])/self.price.loc[(date,stock_code),'close']/100)*100
            assert to_sell[stock_code]<self.get_stock_volume(date,stock_code)
        return pd.Series(to_sell, name='sell_volume', dtype=np.int64)
    def get_to_buy_from_target_position(self, target_stock_value,date):
        to_buy = {}
        ## 新买入的股票
        stock_list_buy_new = list(set(target_stock_value.index)-set(self.get_stock_list(date)))
        for stock_code in stock_list_buy_new:
            to_buy[stock_code] = np.float64(target_stock_value[stock_code])
        ## 已经在持仓的股票需要加仓（再平衡）
        stock_list_holding = list(set(self.get_stock_list(date))&set(target_stock_value.index))
        stock_list_buy_part = [stock_code for stock_code in stock_list_holding if self.get_stock_value(date,stock_code)<target_stock_value[stock_code]] #这个地方从小于等于改成小于，若有问题则改回
        for stock_code in stock_list_buy_part:
            to_buy[stock_code] = target_stock_value[stock_code]-self.get_stock_value(date,stock_code)
        return pd.Series(to_buy,name='buy_value', dtype=np.float64)
    def get_stock_list(self, date):
        return list(set(self.account[date]['position'].index))
    
    def get_stock_value(self, date, stock_code):
        return self.account[date]['position'].loc[stock_code,'value']
                    
    def get_stock_volume(self, date, stock_code):
        return self.account[date]['position'].loc[stock_code,'volume']
    
    def get_total_stock_value(self, date):
        value = 0
        for stock_code in self.get_stock_list(date):
            value += self.get_stock_value(date, stock_code)
        return value

In [None]:
price = data.swaplevel().sort_index()['2010':]
price['NOCB'] = False #是否为一字板
price['NOCB'][((price['high']==price['low'])&(~price['volume'].isna()))] = True

In [None]:
## 首先跑一个不进行任何筛选无手续费的结果，将结果存到本地，之后使用
strategy = NholdIncStrategy(Inc_label, tradedays_list)
backtest = Backtest(price, strategy, tradedays_list, start_date='2011-01-04', end_date='2021-07-30', commission_rate=0, tax_rate=0)
backtest.run()
file = open('show_data\\选股回测\\NoSelect_NoCommission_NoTax.pkl','wb+')
del backtest.price
pickle.dump(backtest, file)
file.close()

In [None]:
## 再跑一个不进行任何筛选含手续费0.003的结果，将结果存到本地，之后使用
strategy = NholdIncStrategy(Inc_label, tradedays_list)
backtest = Backtest(price, strategy, tradedays_list, start_date='2011-01-04', end_date='2021-07-30', commission_rate=0.003, tax_rate=0.001)
backtest.run()
file = open('show_data\\选股回测\\NoSelect_3Commission_1Tax.pkl','wb+')
del backtest.price
pickle.dump(backtest, file)
file.close()

In [None]:
def S_INFO_LISTBOARDNAME_score_func(x):
    if pd.isna(x):
        return 0
    if x=='主板':
        return 0
    else:
        return 1
def MV_score_func(x, threshold_low=feature_label_2011plus_train['MV'].quantile(0.2), threshold_high=feature_label_2011plus_train['MV'].quantile(0.4)):
    if pd.isna(x):
        return 0
    if threshold_low<=x<=threshold_high:
        return 1
    elif threshold_low>x:
        return 0
    else:
        return -1
def Inc_entropy_all_score_func(x, threshold_low=feature_label_2011plus_train['Inc_entropy_all'].quantile(0.2), threshold_high=feature_label_2011plus_train['Inc_entropy_all'].quantile(0.4)):
    if pd.isna(x):
        return 0
    if threshold_low<=x<=threshold_high:
        return -1
    elif threshold_low>x:
        return 0
    else:
        return 1
def Price_gap_ex_score_func(x, threshold_low=feature_label_2011plus_train['Price_gap_ex'].quantile(0.2), threshold_high=feature_label_2011plus_train['Price_gap_ex'].quantile(0.6)):
    if pd.isna(x):
        return 0
    if threshold_low<=x<=threshold_high:
        return -1
    elif threshold_low>x:
        return 0
    else:
        return 1
def S_INC_INITEXECPRI_to_vwap_score_func(x, threshold_low=feature_label_2011plus_train['S_INC_INITEXECPRI_to_vwap'].quantile(0.2), threshold_high=feature_label_2011plus_train['S_INC_INITEXECPRI_to_vwap'].quantile(0.8)):
    if pd.isna(x):
        return 0
    if threshold_low<=x<=threshold_high:
        return 1
    elif threshold_low>x:
        return 0
    else:
        return 1
def Time_from_preplan_to_target_score_func(x, threshold_low=feature_label_2011plus_train['Time_from_preplan_to_target'].quantile(0.4), threshold_high=feature_label_2011plus_train['Time_from_preplan_to_target'].quantile(0.8)):
    if pd.isna(x):
        return 0
    if threshold_low<=x<=threshold_high:
        return 1
    elif threshold_low>x:
        return 0
    else:
        return -1

In [None]:
## 选股规则，将样本分为训练集和测试集，在训练集上调试阈值，根据阈值高低打分（-1,0,1分）
## 目前使用的因子包括：'S_INFO_LISTBOARDNAME','MV','Inc_entropy_all','Price_gap_ex','S_INC_INITEXECPRI_to_vwap','Time_from_preplan_to_target'
feature_label_score_2011plus = pd.DataFrame(index=feature_label_2011plus.index)
score_func_list = [S_INFO_LISTBOARDNAME_score_func,MV_score_func,Inc_entropy_all_score_func,Price_gap_ex_score_func,S_INC_INITEXECPRI_to_vwap_score_func,Time_from_preplan_to_target_score_func]
feature_names_list = ['S_INFO_LISTBOARDNAME','MV','Inc_entropy_all','Price_gap_ex','S_INC_INITEXECPRI_to_vwap','Time_from_preplan_to_target']
for i in range(len(feature_names_list)):
    feature_label_score_2011plus[feature_names_list[i]+'_score'] = feature_label_2011plus[feature_names_list[i]].apply(lambda x: score_func_list[i](x))
feature_label_2011plus['Score'] = feature_label_score_2011plus.sum(axis=1).values

In [None]:
temp = feature_label_2011plus.groupby(['PREPLAN_ANN_DATE','S_INFO_WINDCODE'])[['Score']].apply(lambda x: x.mean()).reset_index()
print(pd.DataFrame(temp['Score'].describe()).T.round(2))
temp.round(2).groupby('Score')['S_INFO_WINDCODE'].count().plot(kind='bar',title='不同最终得分的样本数目')
plt.savefig('show_data\\选股回测\\最终得分的样本数目.png', dpi=300, bbox_inches='tight', transparent=True)
plt.show()

In [None]:
## 策略一
Inc_label_select = Inc_label.merge(feature_label_2011plus[['S_INFO_WINDCODE','S_INC_SEQUENCE','Score']], on=['S_INFO_WINDCODE','S_INC_SEQUENCE'])
def f(x):
    if x.shape[0]==1:
        return x
    else:
        x_copy = x.copy()
        x_copy['Score'] = x_copy['Score'].mean()
        return x_copy
Inc_label_select = Inc_label_select.groupby(['S_INFO_WINDCODE','PREPLAN_ANN_DATE']).apply(lambda x: f(x)).drop_duplicates(['S_INFO_WINDCODE','PREPLAN_ANN_DATE'])
Inc_label_select = Inc_label_select[Inc_label_select['Score']>=1]
strategy = NholdIncStrategy(Inc_label_select, tradedays_list)
backtest = Backtest(price, strategy, tradedays_list, commission_rate=0, tax_rate=0)
backtest.run()
del backtest.price
file = open('show_data\\选股回测\\AllFactorSelect_NoCommission_NoTax_1.pkl','wb+')
pickle.dump(backtest, file)
file.close()
backtest = Backtest(price, strategy, tradedays_list, commission_rate=0.003, tax_rate=0.001)
backtest.run()
del backtest.price
file = open('show_data\\选股回测\\AllFactorSelect_3Commission_1Tax_1.pkl','wb+')
pickle.dump(backtest, file)
file.close()

In [None]:
## 策略二
Inc_label_select = Inc_label.merge(feature_label_2011plus[['S_INFO_WINDCODE','S_INC_SEQUENCE','Score']], on=['S_INFO_WINDCODE','S_INC_SEQUENCE'])
def f(x):
    if x.shape[0]==1:
        return x
    else:
        x_copy = x.copy()
        x_copy['Score'] = x_copy['Score'].mean()
        return x_copy
Inc_label_select = Inc_label_select.groupby(['S_INFO_WINDCODE','PREPLAN_ANN_DATE']).apply(lambda x: f(x)).drop_duplicates(['S_INFO_WINDCODE','PREPLAN_ANN_DATE'])
Inc_label_select = Inc_label_select[Inc_label_select['Score']>=2]
strategy = NholdIncStrategy(Inc_label_select, tradedays_list)
backtest = Backtest(price, strategy, tradedays_list, commission_rate=0, tax_rate=0)
backtest.run()
del backtest.price
file = open('show_data\\选股回测\\AllFactorSelect_NoCommission_NoTax_2.pkl','wb+')
pickle.dump(backtest, file)
file.close()
backtest = Backtest(price, strategy, tradedays_list, commission_rate=0.003, tax_rate=0.001)
backtest.run()
del backtest.price
file = open('show_data\\选股回测\\AllFactorSelect_3Commission_1Tax_2.pkl','wb+')
pickle.dump(backtest, file)
file.close()

In [None]:
## 获取账户每日净值、持仓数目、现金、税、手续费、总价值、换手率等
def account_summary(account):
    tradedays_list = list(account.keys())
    summary_df = pd.DataFrame(index=tradedays_list, columns=['Year', 'NV', 'Stock_num', 'Cash', 'Tax', 'Commission', 'Total_value', 'Stock_value', 
                                                            'Buy_value', 'Sell_value', 'Buy_turnover_rate', 'Sell_turnover_rate'])
    init_value = account[tradedays_list[0]]['total_value']
    init_cash = account[tradedays_list[0]]['cash']
    date_last = tradedays_list[0]
    for date in tqdm(tradedays_list):
        summary_df.loc[date,'Year'] = date[:4]
        summary_df.loc[date,'Month'] = date[:7]
        summary_df.loc[date,'NV'] = account[date]['total_value']/init_value
        summary_df.loc[date,'Stock_num'] = account[date]['position'].shape[0]
        summary_df.loc[date,'Cash'] = account[date]['cash']
        summary_df.loc[date,'Tax'] = account[date]['tax']
        summary_df.loc[date,'Commission'] = account[date]['commission']
        summary_df.loc[date,'Total_value'] = account[date]['total_value']
        summary_df.loc[date,'Stock_value'] = account[date]['stock_value']
        summary_df.loc[date,'Buy_value'] = account[date]['Buy_info']['value'].sum()
        summary_df.loc[date,'Sell_value'] = account[date]['Sell_info']['value'].sum()
        summary_df.loc[date,'Buy_turnover_rate'] = summary_df.loc[date,'Buy_value']/summary_df.loc[date_last,'Total_value']
        summary_df.loc[date,'Sell_turnover_rate'] = summary_df.loc[date,'Sell_value']/summary_df.loc[date_last,'Total_value']
        date_last = date
    return summary_df
## 获取前一函数信息，以及账户的每日买入卖出和持仓的具体情况
def get_summary_buy_sell_df(backtest, price=price):
    summary = account_summary(backtest.account)
    summary = summary.loc[:'2021-08']
    benchmark_NV = (szse.market.loc[summary.index]['close']/szse.market.loc[summary.index]['close'].iloc[0])
    summary['NV_ex'] = summary['NV']/benchmark_NV
    position = []
    for date in tqdm(list(backtest.account.keys())):
        temp = backtest.account[date]['position'].copy()
        temp.index = pd.MultiIndex.from_product([[date],temp.index])
        position.append(temp)
    position = pd.concat(position)
    buy_info = []
    for date in tqdm(list(backtest.account.keys())):
        temp = backtest.account[date]['Buy_info'].copy()
        temp.index = pd.MultiIndex.from_product([[date],temp.index])
        buy_info.append(temp)
    buy_info = pd.concat(buy_info)
    buy_info['TradeAmount_to_TotalAmount'] = buy_info['value']/(price.loc[buy_info.index,'amount']*1000)
    sell_info = []
    for date in tqdm(list(backtest.account.keys())):
        temp = backtest.account[date]['Sell_info'].copy()
        temp.index = pd.MultiIndex.from_product([[date],temp.index])
        sell_info.append(temp)
    sell_info = pd.concat(sell_info)
    sell_info['TradeAmount_to_TotalAmount'] = sell_info['value']/(price.loc[sell_info.index,'amount']*1000)
    return summary, position, buy_info, sell_info
## 获取两个日期的间隔年份，用于算年化指标
def get_interval_year(start_date:str, end_date, tradedays_list)->float:
    start_year,end_year = start_date[:4],end_date[:4]
    if start_year==end_year and (start_year==tradedays_list[0][:4] or end_year==tradedays_list[-1][:4]):
        interval_year = trade_day_interval(start_date, end_date,tradedays_list)/250
        return interval_year
    num_of_tradedays_of_start_year = trade_day_interval(start_date, start_year+'-12-31',tradedays_list)
    if start_year==tradedays_list[0][:4]:
        len_of_start_year = 250
    else:
        len_of_start_year = trade_day_interval(start_year+'-01-01',start_year+'-12-31',tradedays_list)-1
    num_of_tradedays_of_end_year = trade_day_interval(end_year+'-01-01',end_date,tradedays_list)
    if end_year==tradedays_list[-1][:4]:
        len_of_end_year = 250
    else:
        len_of_end_year = trade_day_interval(end_year+'-01-01',end_year+'-12-31',tradedays_list)-1
    interval_year = int(end_year)-int(start_year)-1+num_of_tradedays_of_start_year/len_of_start_year+num_of_tradedays_of_end_year/len_of_end_year
    return interval_year
## 最大回撤：输入净值序列（第一项是1），输出最大回撤序列（负数）
def max_backward(x:pd.Series)->pd.Series:
    temp_max = 1
    temp = pd.Series(0, x.index)
    for i in range(x.shape[0]):
        if x.iloc[i]>temp_max:
            temp_max = x.iloc[i]
        else:
            temp.iloc[i] = x.iloc[i]/temp_max-1
    return temp
## 输入：净值序列，基准净值序列
## 输出：绩效评价指标
def get_performance_analysis(net_value:pd.Series, tradedays_list, benchmark_NV:Union[pd.Series,None]=None):
    performance_analysis_df = pd.Series(np.nan, index=['年化收益率','年化标准差','夏普比率','最大回撤',
                                            '超额年化收益率','超额年化标准差','超额夏普比率','胜率','超额最大回撤'])
    returns = net_value.pct_change().fillna(0)
    num_of_interval_year = get_interval_year(returns.index[0],returns.index[-1],tradedays_list)
    num_of_interval_tradedays = trade_day_interval(returns.index[0], returns.index[-1],tradedays_list)
    performance_analysis_df['年化收益率'] = (net_value.iloc[-1]/net_value.iloc[0])**(1/num_of_interval_year)-1
    performance_analysis_df['年化标准差'] = returns.std()*np.sqrt(num_of_interval_tradedays/num_of_interval_year)
    performance_analysis_df['夏普比率'] = performance_analysis_df['年化收益率']/performance_analysis_df['年化标准差']
    performance_analysis_df['最大回撤'] = max_backward(net_value).min()
    if benchmark_NV is not None:
        ex_net_value = net_value/benchmark_NV
        ex_returns = (np.log(ex_net_value)).diff().fillna(0)
        performance_analysis_df['超额年化收益率'] = (ex_net_value.iloc[-1]/ex_net_value.iloc[0])**(1/num_of_interval_year)-1
        performance_analysis_df['超额年化标准差'] = ex_returns.std()*np.sqrt(num_of_interval_tradedays/num_of_interval_year)
        performance_analysis_df['超额夏普比率'] = performance_analysis_df['超额年化收益率']/performance_analysis_df['超额年化标准差']
        performance_analysis_df['胜率'] = (ex_returns>0).sum()/ex_returns.shape[0]
        performance_analysis_df['超额最大回撤'] = max_backward(ex_net_value).min()
    return performance_analysis_df

In [None]:
file = open('show_data\\选股回测\\NoSelect_NoCommission_NoTax.pkl','rb+')
backtest = pickle.load(file)
file.close()
summary,position,buy_info,sell_info = get_summary_buy_sell_df(backtest)

In [None]:
file = open('show_data\\选股回测\\NoSelect_3Commission_1Tax.pkl','rb+')
backtest = pickle.load(file)
file.close()
summary_fee,position_fee,buy_info_fee,sell_info_fee = get_summary_buy_sell_df(backtest)

In [None]:
benchmark_NV = (szse.market.loc[summary.index]['close']/szse.market.loc[summary.index]['close'].iloc[0])
def get_performance_analysis_wrapper(summary, benchmark_NV):
    NV_df = pd.DataFrame([summary['NV'].astype(np.float64),benchmark_NV]).T
    NV_df.columns = ['策略零','基准']
    NV_df['Year'] = pd.to_datetime(NV_df.index).strftime('%Y')
    performance_df = NV_df.groupby('Year').apply(lambda x: get_performance_analysis(x['策略零'], tradedays_list, x['基准']))
    performance_df = performance_df.append(pd.DataFrame(get_performance_analysis(summary['NV'].loc[:'2019'].astype(np.float64), tradedays_list, benchmark_NV.loc[:'2019']),columns=['2011-2018']).T)
    performance_df = performance_df.append(pd.DataFrame(get_performance_analysis(summary['NV'].loc['2019':].astype(np.float64), tradedays_list, benchmark_NV.loc['2019':]),columns=['2019-2021']).T)
    performance_df = performance_df.append(pd.DataFrame(get_performance_analysis(summary['NV'].astype(np.float64), tradedays_list, benchmark_NV),columns=['全部']).T)
    performance_df[['年化收益率','年化标准差','最大回撤','超额年化收益率','超额年化标准差','胜率','超额最大回撤']] = performance_df[['年化收益率','年化标准差','最大回撤','超额年化收益率','超额年化标准差','胜率','超额最大回撤']].applymap(lambda x: format(x, '.2%'))
    performance_df[['夏普比率','超额夏普比率']] = performance_df[['夏普比率','超额夏普比率']].round(3)
    return performance_df
performance_df = get_performance_analysis_wrapper(summary, benchmark_NV)
performance_df.to_excel('show_data\\选股回测\\NoSelect_NoCommission_NoTax\performance.xlsx')
performance_df

In [None]:
benchmark_NV = (szse.market.loc[summary.index]['close']/szse.market.loc[summary.index]['close'].iloc[0])
def get_performance_analysis_wrapper(summary, benchmark_NV):
    NV_df = pd.DataFrame([summary['NV'].astype(np.float64),benchmark_NV]).T
    NV_df.columns = ['策略零','基准']
    NV_df['Year'] = pd.to_datetime(NV_df.index).strftime('%Y')
    performance_df = NV_df.groupby('Year').apply(lambda x: get_performance_analysis(x['策略零'], tradedays_list, x['基准']))
    performance_df = performance_df.append(pd.DataFrame(get_performance_analysis(summary['NV'].loc[:'2019'].astype(np.float64), tradedays_list, benchmark_NV.loc[:'2019']),columns=['2011-2018']).T)
    performance_df = performance_df.append(pd.DataFrame(get_performance_analysis(summary['NV'].loc['2019':].astype(np.float64), tradedays_list, benchmark_NV.loc['2019':]),columns=['2019-2021']).T)
    performance_df = performance_df.append(pd.DataFrame(get_performance_analysis(summary['NV'].astype(np.float64), tradedays_list, benchmark_NV),columns=['全部']).T)
    performance_df[['年化收益率','年化标准差','最大回撤','超额年化收益率','超额年化标准差','胜率','超额最大回撤']] = performance_df[['年化收益率','年化标准差','最大回撤','超额年化收益率','超额年化标准差','胜率','超额最大回撤']].applymap(lambda x: format(x, '.2%'))
    performance_df[['夏普比率','超额夏普比率']] = performance_df[['夏普比率','超额夏普比率']].round(3)
    return performance_df
performance_df = get_performance_analysis_wrapper(summary_fee, benchmark_NV)
performance_df.to_excel('show_data\\选股回测\\NoSelect_3Commission_1Tax\performance.xlsx')
performance_df

In [None]:
returns_year = pd.concat([(summary.groupby('Year')['NV_ex'].apply(lambda x: x.iloc[-1]/x.iloc[0])-1),(summary_fee.groupby('Year')['NV_ex'].apply(lambda x: x.iloc[-1]/x.iloc[0])-1)],axis=1)
returns_year.columns = ['不考虑手续费与税','考虑手续费与税']
returns_month = pd.concat([(summary.groupby('Month')['NV_ex'].apply(lambda x: x.iloc[-1]/x.iloc[0])-1),(summary_fee.groupby('Month')['NV_ex'].apply(lambda x: x.iloc[-1]/x.iloc[0])-1)],axis=1)
returns_month.columns = ['不考虑手续费与税','考虑手续费与税']

In [None]:
## 策略零
fig,ax = plt.subplots(1,2,figsize=(15,5))
summary['NV'].plot(ax=ax[0], label='策略零-不考虑手续费和税')
summary_fee['NV'].plot(ax=ax[0], label='策略零-考虑手续费和税')
benchmark_NV.plot(ax=ax[0], label='基准')
ax[0].xaxis.set_major_locator(plt.MultipleLocator(250))
ax[0].legend()
ax[0].set_xlabel('日期')
ax[0].set_ylabel('累计净值')
ax[0].set_title('累计净值曲线')
((summary['NV_ex'])).plot(ax=ax[1], label='策略零-不考虑手续费和税')
((summary_fee['NV_ex'])).plot(ax=ax[1], label='策略零-考虑手续费和税')
ax[1].xaxis.set_major_locator(plt.MultipleLocator(250))
ax[1].legend()
ax[1].set_xlabel('日期')
ax[1].set_ylabel('超额累计净值')
ax[1].set_title('超额累计净值曲线')
plt.savefig('show_data\\选股回测\\NoSelect_pic\\超额累计净值曲线.png', dpi=300, bbox_inches='tight', transparent=True)
plt.show()

fig,ax = plt.subplots(1,1)
returns_year.plot(kind='bar', ax=ax)
ax.set_xlabel('年份')
ax.set_ylabel('超额收益率')
ax.set_title('策略零各年度超额收益率')
plt.savefig('show_data\\选股回测\\NoSelect_pic\\策略零各年度超额收益率.png', dpi=300, bbox_inches='tight', transparent=True)
plt.show()

fig,ax = plt.subplots(1,1,figsize=(15,5))
returns_month.plot(kind='bar', ax=ax)
ax.xaxis.set_major_locator(plt.MultipleLocator(12))
ax.set_xlabel('月份')
ax.set_ylabel('超额收益率')
ax.set_title('策略零各月超额收益率')
plt.savefig('show_data\\选股回测\\NoSelect_pic\\策略零各月超额收益率.png', dpi=300, bbox_inches='tight', transparent=True)
plt.show()

fig,ax = plt.subplots(1,1)
summary['Stock_num'].plot(ax=ax)
ax.set_xlabel('日期')
ax.set_ylabel('持仓股票数目')
ax.set_title('每日持仓股票数目')
plt.savefig('show_data\\选股回测\\NoSelect_pic\\每日持仓股票数目.png', dpi=300, bbox_inches='tight', transparent=True)
plt.show()

fig,ax = plt.subplots(1,2,figsize=(15,5))
summary_fee.groupby('Month')[['Buy_turnover_rate','Sell_turnover_rate']].sum().mean(axis=1).plot(ax=ax[0])
ax[0].set_xlabel('月份')
ax[0].set_ylabel('换手率')
ax[0].set_title('双边月换手率')
summary_fee['Commission'].cumsum().plot(ax=ax[1],label='手续费')
summary_fee['Tax'].cumsum().plot(ax=ax[1],label='税费')
ax[1].legend()
ax[1].set_xlabel('日期')
ax[1].set_ylabel('费用')
ax[1].set_title('手续费和税费')
plt.savefig('show_data\\选股回测\\NoSelect_pic\\手续费和税费.png', dpi=300, bbox_inches='tight', transparent=True)
plt.show()

In [None]:
## 策略一和策略二
file = open('show_data\\选股回测\\AllFactorSelect_3Commission_1Tax_1.pkl','rb+')
backtest = pickle.load(file)
file.close()
summary_AllSelect1_fee,position_AllSelect1_fee,buy_info_AllSelect1_fee,sell_info_AllSelect1_fee = get_summary_buy_sell_df(backtest)
summary_AllSelect1_fee['NV_ex2'] = summary_AllSelect1_fee['NV']/summary_fee['NV']
file = open('show_data\\选股回测\\AllFactorSelect_3Commission_1Tax_2.pkl','rb+')
backtest = pickle.load(file)
file.close()
summary_AllSelect2_fee,position_AllSelect2_fee,buy_info_AllSelect2_fee,sell_info_AllSelect2_fee = get_summary_buy_sell_df(backtest)
summary_AllSelect2_fee['NV_ex2'] = summary_AllSelect2_fee['NV']/summary_fee['NV']

In [None]:
performance_df = get_performance_analysis_wrapper(summary_AllSelect1_fee, benchmark_NV)
performance_df.to_excel('show_data\\选股回测\\AllSelect_3Commission_1Tax_1\performance.xlsx')
performance_df.iloc[-3:].T

In [None]:
performance_df = get_performance_analysis_wrapper(summary_AllSelect2_fee, benchmark_NV)
performance_df.to_excel('show_data\\选股回测\\AllSelect_3Commission_1Tax_2\performance.xlsx')
performance_df.iloc[-3:].T

In [None]:
returns_year = pd.concat([(summary_AllSelect1_fee.groupby('Year')['NV_ex'].apply(lambda x: x.iloc[-1]/x.iloc[0])-1),(summary_AllSelect2_fee.groupby('Year')['NV_ex'].apply(lambda x: x.iloc[-1]/x.iloc[0])-1)],axis=1)
returns_year.columns = ['策略一-考虑手续费与税','策略二-考虑手续费与税']
returns_year2 = pd.concat([(summary_AllSelect1_fee.groupby('Year')['NV_ex2'].apply(lambda x: x.iloc[-1]/x.iloc[0])-1),(summary_AllSelect2_fee.groupby('Year')['NV_ex2'].apply(lambda x: x.iloc[-1]/x.iloc[0])-1)],axis=1)
returns_year2.columns = ['策略一-考虑手续费与税','策略二-考虑手续费与税']
returns_month = pd.concat([(summary_AllSelect1_fee.groupby('Month')['NV_ex'].apply(lambda x: x.iloc[-1]/x.iloc[0])-1),(summary_AllSelect2_fee.groupby('Month')['NV_ex'].apply(lambda x: x.iloc[-1]/x.iloc[0])-1)],axis=1)
returns_month.columns = ['策略一-考虑手续费与税','策略二-考虑手续费与税']
returns_month2 = pd.concat([(summary_AllSelect1_fee.groupby('Month')['NV_ex2'].apply(lambda x: x.iloc[-1]/x.iloc[0])-1),(summary_AllSelect2_fee.groupby('Month')['NV_ex2'].apply(lambda x: x.iloc[-1]/x.iloc[0])-1)],axis=1)
returns_month2.columns = ['策略一-考虑手续费与税','策略二-考虑手续费与税']

In [None]:
fig,ax = plt.subplots(1,1,figsize=(8,5))
summary_AllSelect1_fee['NV'].plot(ax=ax, label='策略一-考虑手续费和税')
summary_AllSelect2_fee['NV'].plot(ax=ax, label='策略二-考虑手续费和税')
summary_fee['NV'].plot(ax=ax, label='策略零-考虑手续费和税')
benchmark_NV.plot(ax=ax, label='基准')
ax.xaxis.set_major_locator(plt.MultipleLocator(250))
ax.legend()
ax.set_xlabel('日期')
ax.set_ylabel('累计净值')
ax.set_title('累计净值曲线')
plt.savefig('show_data\\选股回测\\AllSelect_pic\\累计净值曲线.png', dpi=300, bbox_inches='tight', transparent=True)
plt.show()
fig,ax = plt.subplots(1,2,figsize=(15,5))
((summary_AllSelect1_fee['NV_ex'])).plot(ax=ax[0], label='策略一-考虑手续费和税')
((summary_AllSelect2_fee['NV_ex'])).plot(ax=ax[0], label='策略二-考虑手续费和税')
((summary_fee['NV_ex'])).plot(ax=ax[0], label='策略零-考虑手续费和税')
ax[0].xaxis.set_major_locator(plt.MultipleLocator(250))
ax[0].legend()
ax[0].set_xlabel('日期')
ax[0].set_ylabel('超额累计净值')
ax[0].set_title('相对于基准的超额累计净值曲线')
((summary_AllSelect1_fee['NV_ex2'])).plot(ax=ax[1], label='策略一-考虑手续费和税')
((summary_AllSelect2_fee['NV_ex2'])).plot(ax=ax[1], label='策略二-考虑手续费和税')
ax[1].xaxis.set_major_locator(plt.MultipleLocator(250))
ax[1].legend()
ax[1].set_xlabel('日期')
ax[1].set_ylabel('超额累计净值')
ax[1].set_title('相对于策略零的超额累计净值曲线')
plt.savefig('show_data\\选股回测\\AllSelect_pic\\超额累计净值曲线.png', dpi=300, bbox_inches='tight', transparent=True)
plt.show()

fig,ax = plt.subplots(1,1,figsize=(15,5))
returns_month.plot(kind='bar', ax=ax)
ax.xaxis.set_major_locator(plt.MultipleLocator(12))
ax.set_xlabel('月份')
ax.set_ylabel('超额收益率')
ax.set_title('策略一和二各月相对于基准的超额收益率')
plt.savefig('show_data\\选股回测\\AllSelect_pic\\策略一和二各月相对于基准的超额收益率.png', dpi=300, bbox_inches='tight', transparent=True)
plt.show()
fig,ax = plt.subplots(1,1,figsize=(15,5))
returns_month2.plot(kind='bar', ax=ax)
ax.xaxis.set_major_locator(plt.MultipleLocator(12))
ax.set_xlabel('月份')
ax.set_ylabel('超额收益率')
ax.set_title('策略一和二各月相对于策略零的超额收益率')
plt.savefig('show_data\\选股回测\\AllSelect_pic\\策略一和二各月相对于策略零的超额收益率.png', dpi=300, bbox_inches='tight', transparent=True)
plt.show()

fig,ax = plt.subplots(1,2,figsize=(15,5))
returns_year.plot(kind='bar', ax=ax[0])
ax[0].set_xlabel('年份')
ax[0].set_ylabel('超额收益率')
ax[0].set_title('策略一和二各年度相对于基准的超额收益率')
returns_year2.plot(kind='bar', ax=ax[1])
ax[1].set_xlabel('年份')
ax[1].set_ylabel('超额收益率')
ax[1].set_title('策略一和二各年度相对于策略零的超额收益率')
plt.savefig('show_data\\选股回测\\AllSelect_pic\\策略一和二各年度的超额收益率.png', dpi=300, bbox_inches='tight', transparent=True)
plt.show()

fig,ax = plt.subplots(1,1)
summary_AllSelect1_fee['Stock_num'].plot(ax=ax, label='策略一-考虑手续费和税')
summary_AllSelect2_fee['Stock_num'].plot(ax=ax, label='策略二-考虑手续费和税')
plt.legend()
ax.set_xlabel('日期')
ax.set_ylabel('持仓股票数目')
ax.set_title('每日持仓股票数目')
plt.savefig('show_data\\选股回测\\AllSelect_pic\\每日持仓股票数目.png', dpi=300, bbox_inches='tight', transparent=True)
plt.show()

fig,ax = plt.subplots(1,2,figsize=(15,5))
summary_AllSelect1_fee.groupby('Month')[['Buy_turnover_rate','Sell_turnover_rate']].sum().mean(axis=1).plot(ax=ax[0], label='策略一-考虑手续费和税')
summary_AllSelect2_fee.groupby('Month')[['Buy_turnover_rate','Sell_turnover_rate']].sum().mean(axis=1).plot(ax=ax[0], label='策略二-考虑手续费和税')
ax[0].legend()
ax[0].set_xlabel('月份')
ax[0].set_ylabel('换手率')
ax[0].set_title('双边月换手率')
summary_AllSelect1_fee['Commission'].cumsum().plot(ax=ax[1],label='策略一-手续费')
summary_AllSelect1_fee['Tax'].cumsum().plot(ax=ax[1],label='策略一-税费')
summary_AllSelect2_fee['Commission'].cumsum().plot(ax=ax[1],label='策略二-手续费')
summary_AllSelect2_fee['Tax'].cumsum().plot(ax=ax[1],label='策略二-税费')
ax[1].legend()
ax[1].set_xlabel('日期')
ax[1].set_ylabel('费用')
ax[1].set_title('手续费和税费')
plt.savefig('show_data\\选股回测\\AllSelect_pic\\手续费和税费.png', dpi=300, bbox_inches='tight', transparent=True)
plt.show()