In [1]:
import numpy as np
import pandas as pd
from pathmgmt import pathmgmt as myPath
import os

In [2]:
# Combine all alphas
def get_stat():
    res = pd.DataFrame()
    for folderName in os.listdir(myPath.PLOT_DIR):
        # print(folderName)
        folder = myPath.PLOT_DIR/folderName/'statistics'
        for fileName in os.listdir(folder):
            # print(fileName)
            file = folder/fileName
            df = pd.read_csv(file)
            df.index = [f'{folderName};{fileName[:-15]}']
            # print(df)
            if res.empty:
                res = df
            else:
                res = pd.concat([res, df])
    return res

In [3]:
stats = get_stat()

In [4]:
stats.describe()

Unnamed: 0,Annualized Return,Annualized Excess Return,IR,IR long only,IC,Max Drawdown,Max Drawdown long only,daily Turnover
count,456.0,456.0,456.0,456.0,456.0,456.0,456.0,456.0
mean,0.013859,-0.084055,0.237436,-1.514155,0.00128,0.185202,0.357324,0.158366
std,0.112514,0.069393,1.430174,1.519164,0.00931,0.165463,0.148527,0.180124
min,-0.251808,-0.270461,-2.642853,-5.404325,-0.0186,0.01159,0.134477,0.014491
25%,-0.080879,-0.129499,-1.084638,-2.470939,-0.007177,0.071641,0.23061,0.047563
50%,0.023654,-0.064384,0.536066,-0.832745,0.002605,0.104247,0.334948,0.092456
75%,0.101126,-0.033454,1.380419,-0.3437,0.008921,0.275125,0.474661,0.190962
max,0.251808,0.017203,2.858043,0.224382,0.0186,0.772929,0.762976,1.236467


> We first select alphas with IR larger than 2

In [5]:
good_alphas = list(stats.loc[stats.IR > 2].index)

In [6]:
good_alphas

['alpha.YaoReV001-10days-zz1000;20180101-20201231-Rank-Industry-holding20days',
 'alpha.YaoReV001-20days-zz1000;20180101-20201231-Rank-Industry-holding10days',
 'alpha.YaoReV001-20days-zz1000;20180101-20201231-Rank-Industry-holding1days',
 'alpha.YaoReV001-20days-zz1000;20180101-20201231-Rank-Industry-holding20days',
 'alpha.YaoReV001-20days-zz1000;20180101-20201231-Rank-Industry-holding3days',
 'alpha.YaoReV001-20days-zz1000;20180101-20201231-Rank-Industry-holding5days',
 'alpha.YaoReV002-10days-20days-zz1000;20180101-20201231-Rank-Industry-holding10days',
 'alpha.YaoReV002-10days-20days-zz1000;20180101-20201231-Rank-Industry-holding1days',
 'alpha.YaoReV002-10days-20days-zz1000;20180101-20201231-Rank-Industry-holding20days',
 'alpha.YaoReV002-10days-20days-zz1000;20180101-20201231-Rank-Industry-holding3days',
 'alpha.YaoReV002-10days-20days-zz1000;20180101-20201231-Rank-Industry-holding5days',
 'alpha.YaoReV002-3days-20days-zz1000;20180101-20201231-Rank-Industry-holding10days',
 'alp

In [7]:
# combine daily pnl
def get_pnl():
    res = pd.DataFrame()
    for folderName in os.listdir(myPath.PLOT_DIR):
        # print(folderName)
        folder = myPath.PLOT_DIR/folderName/'PnL_results'
        for fileName in os.listdir(folder):
            file = folder/fileName
            df = pd.read_csv(file)
            # df = df.set_index('time')
            # df = df['pnl']
            df = df[['time', 'pnl']]
            df.rename(
                columns={'pnl': f'{folderName};{fileName[:-13]}'}, inplace=True)
            if res.empty:
                res = df
            else:
                res = res.merge(df, on='time', how='outer')
    res = res.set_index('time').sort_index()
    return res

In [8]:
pnls = get_pnl()

> Compute correlation matrix on good alpha candidates

In [11]:
corr = pnls[good_alphas].corr()

> We want to select alphas with less correlations

In [12]:
(corr.mask(np.eye(len(corr), dtype=bool)).abs() < 0.6).sum()

alpha.YaoReV001-10days-zz1000;20180101-20201231-Rank-Industry-holding20days            0
alpha.YaoReV001-20days-zz1000;20180101-20201231-Rank-Industry-holding10days            2
alpha.YaoReV001-20days-zz1000;20180101-20201231-Rank-Industry-holding1days             0
alpha.YaoReV001-20days-zz1000;20180101-20201231-Rank-Industry-holding20days           10
alpha.YaoReV001-20days-zz1000;20180101-20201231-Rank-Industry-holding3days             0
alpha.YaoReV001-20days-zz1000;20180101-20201231-Rank-Industry-holding5days             0
alpha.YaoReV002-10days-20days-zz1000;20180101-20201231-Rank-Industry-holding10days     1
alpha.YaoReV002-10days-20days-zz1000;20180101-20201231-Rank-Industry-holding1days      0
alpha.YaoReV002-10days-20days-zz1000;20180101-20201231-Rank-Industry-holding20days    10
alpha.YaoReV002-10days-20days-zz1000;20180101-20201231-Rank-Industry-holding3days      0
alpha.YaoReV002-10days-20days-zz1000;20180101-20201231-Rank-Industry-holding5days      0
alpha.YaoReV002-3days

In [13]:
small_corr = (corr.mask(np.eye(len(corr), dtype=bool)).abs() < 0.6).sum() >= 5

In [14]:
corr.loc[small_corr, small_corr]

Unnamed: 0,alpha.YaoReV001-20days-zz1000;20180101-20201231-Rank-Industry-holding20days,alpha.YaoReV002-10days-20days-zz1000;20180101-20201231-Rank-Industry-holding20days,alpha.YaoReV002-3days-20days-zz1000;20180101-20201231-Rank-Industry-holding20days,alpha.YaoReV002-3days-60days-zz1000;20180101-20201231-Rank-Industry-holding3days,alpha.YaoReV002-5days-20days-zz1000;20180101-20201231-Rank-Industry-holding20days,alpha.YaoReV005-20days-zz1000;20180101-20201231-Rank-Industry-holding1days,alpha.YaoReV005-20days-zz1000;20180101-20201231-Rank-Industry-holding20days,alpha.YaoReV005-60days-zz1000;20180101-20201231-Rank-Industry-holding10days,alpha.YaoReV006-10days-zz1000;20180101-20201231-Rank-Industry-holding10days,alpha.YaoReV006-20days-zz1000;20180101-20201231-Rank-Industry-holding1days,alpha.YaoReV006-3days-zz1000;20180101-20201231-Rank-Industry-holding20days,alpha.YaoReV006-5days-zz1000;20180101-20201231-Rank-Industry-holding20days,alpha.YaoReV006-60days-zz1000;20180101-20201231-Rank-Industry-holding10days
alpha.YaoReV001-20days-zz1000;20180101-20201231-Rank-Industry-holding20days,1.0,0.989684,0.970377,0.913313,0.978528,0.448049,0.760198,0.713269,0.36876,0.437984,0.396142,0.459438,0.719257
alpha.YaoReV002-10days-20days-zz1000;20180101-20201231-Rank-Industry-holding20days,0.989684,1.0,0.978912,0.883641,0.988948,0.448566,0.775247,0.695081,0.36019,0.423826,0.389715,0.458792,0.695891
alpha.YaoReV002-3days-20days-zz1000;20180101-20201231-Rank-Industry-holding20days,0.970377,0.978912,1.0,0.923491,0.998108,0.577566,0.789593,0.717064,0.503482,0.548094,0.522473,0.585413,0.721767
alpha.YaoReV002-3days-60days-zz1000;20180101-20201231-Rank-Industry-holding3days,0.913313,0.883641,0.923491,1.0,0.913506,0.598182,0.673572,0.750694,0.558541,0.582163,0.524703,0.566691,0.760907
alpha.YaoReV002-5days-20days-zz1000;20180101-20201231-Rank-Industry-holding20days,0.978528,0.988948,0.998108,0.913506,1.0,0.540043,0.78883,0.712844,0.463983,0.511086,0.483432,0.550061,0.715939
alpha.YaoReV005-20days-zz1000;20180101-20201231-Rank-Industry-holding1days,0.448049,0.448566,0.577566,0.598182,0.540043,1.0,0.68146,0.542831,0.857526,0.944673,0.895601,0.886852,0.55093
alpha.YaoReV005-20days-zz1000;20180101-20201231-Rank-Industry-holding20days,0.760198,0.775247,0.789593,0.673572,0.78883,0.68146,1.0,0.755617,0.544589,0.62305,0.627764,0.685222,0.734134
alpha.YaoReV005-60days-zz1000;20180101-20201231-Rank-Industry-holding10days,0.713269,0.695081,0.717064,0.750694,0.712844,0.542831,0.755617,1.0,0.521636,0.559079,0.521383,0.571908,0.95908
alpha.YaoReV006-10days-zz1000;20180101-20201231-Rank-Industry-holding10days,0.36876,0.36019,0.503482,0.558541,0.463983,0.857526,0.544589,0.521636,1.0,0.89775,0.90962,0.918747,0.541238
alpha.YaoReV006-20days-zz1000;20180101-20201231-Rank-Industry-holding1days,0.437984,0.423826,0.548094,0.582163,0.511086,0.944673,0.62305,0.559079,0.89775,1.0,0.939895,0.930818,0.576991


> our final choice for this batch is as follows:

In [18]:
batch1 = corr.loc[small_corr, small_corr].columns

In [27]:
stats.T[batch1]

Unnamed: 0,alpha.YaoReV001-20days-zz1000;20180101-20201231-Rank-Industry-holding20days,alpha.YaoReV002-10days-20days-zz1000;20180101-20201231-Rank-Industry-holding20days,alpha.YaoReV002-3days-20days-zz1000;20180101-20201231-Rank-Industry-holding20days,alpha.YaoReV002-3days-60days-zz1000;20180101-20201231-Rank-Industry-holding3days,alpha.YaoReV002-5days-20days-zz1000;20180101-20201231-Rank-Industry-holding20days,alpha.YaoReV005-20days-zz1000;20180101-20201231-Rank-Industry-holding1days,alpha.YaoReV005-20days-zz1000;20180101-20201231-Rank-Industry-holding20days,alpha.YaoReV005-60days-zz1000;20180101-20201231-Rank-Industry-holding10days,alpha.YaoReV006-10days-zz1000;20180101-20201231-Rank-Industry-holding10days,alpha.YaoReV006-20days-zz1000;20180101-20201231-Rank-Industry-holding1days,alpha.YaoReV006-3days-zz1000;20180101-20201231-Rank-Industry-holding20days,alpha.YaoReV006-5days-zz1000;20180101-20201231-Rank-Industry-holding20days,alpha.YaoReV006-60days-zz1000;20180101-20201231-Rank-Industry-holding10days
Annualized Return,0.147113,0.131892,0.140714,0.208355,0.137529,0.194895,0.137809,0.154871,0.115211,0.209624,0.054651,0.073755,0.144673
Annualized Excess Return,0.011869,0.011695,0.01094,-0.026198,0.010764,-0.048801,-0.024408,-0.015412,-0.013454,-0.040287,-0.007956,-0.01378,-0.022322
IR,2.079685,2.064194,2.139603,2.02194,2.107409,2.302861,2.566186,2.264423,2.024323,2.631168,2.00125,2.209709,2.288077
IR long only,0.134,0.120457,0.109224,-0.379529,0.108987,-0.988425,-0.319535,-0.302688,-0.181443,-0.848528,-0.046004,-0.091049,-0.456257
IC,0.013303,0.012681,0.014148,0.014265,0.01361,0.014082,0.01205,0.010312,0.011159,0.015257,0.012315,0.012777,0.010034
Max Drawdown,0.074698,0.077714,0.0682,0.081591,0.072295,0.061203,0.041734,0.056275,0.082843,0.060668,0.033408,0.038804,0.052272
Max Drawdown long only,0.197889,0.23321,0.236261,0.165863,0.233724,0.163988,0.180407,0.137665,0.20307,0.154523,0.418526,0.358049,0.1451
daily Turnover,0.068344,0.067906,0.067729,0.106496,0.067797,0.297382,0.070054,0.071096,0.134312,0.299691,0.066295,0.066613,0.070201
