In [1]:
import numpy as np
import pandas as pd
from pathmgmt import pathmgmt as myPath
import os

In [2]:
# Combine all alphas
def get_stat():
    res = pd.DataFrame()
    for folderName in os.listdir(myPath.PLOT_DIR):
        if 'zz9999' not in folderName or 'Tec' not in folderName:
            continue
        # print(folderName)
        folder = myPath.PLOT_DIR/folderName/'statistics'
        for fileName in os.listdir(folder):
            # print(fileName)
            file = folder/fileName
            df = pd.read_csv(file)
            df.index = [f'{folderName};{fileName[:-15]}']
            # print(df)
            if res.empty:
                res = df
            else:
                res = pd.concat([res, df])
    return res

In [3]:
stats = get_stat()

In [4]:
stats.describe()

Unnamed: 0,Annualized Return,Annualized Excess Return,IR,IR long only,IC,Max Drawdown,Max Drawdown long only,daily Turnover
count,456.0,456.0,456.0,456.0,456.0,456.0,456.0,456.0
mean,0.078597,-0.118891,1.21154,-0.860347,0.006682,0.084118,0.421369,0.162667
std,0.092897,0.046564,1.336737,0.342296,0.00691,0.076905,0.103783,0.178302
min,-0.250842,-0.346628,-2.986703,-2.372647,-0.01814,0.009147,0.196674,0.013546
25%,0.021354,-0.146822,0.552832,-1.054695,0.003166,0.046711,0.363998,0.047704
50%,0.071549,-0.109151,1.311538,-0.807867,0.007313,0.068209,0.420933,0.098157
75%,0.132627,-0.086123,2.236784,-0.606932,0.011495,0.086906,0.465666,0.211961
max,0.373127,-0.033167,3.996524,-0.283971,0.022099,0.640058,0.918585,1.19667


In [17]:
good_alphas = list(stats.loc[(stats.IR > 2.5) & (stats['daily Turnover'] < 0.2)].index)


In [18]:
len(good_alphas)

35

In [20]:
# combine daily pnl
def get_pnl():
    res = pd.DataFrame()
    for folderName in os.listdir(myPath.PLOT_DIR):
        if 'zz9999' not in folderName and 'Tech' not in folderName:
            continue
        # print(folderName)
        folder = myPath.PLOT_DIR/folderName/'PnL_results'
        for fileName in os.listdir(folder):
            file = folder/fileName
            df = pd.read_csv(file)
            # df = df.set_index('time')
            # df = df['pnl']
            df = df[['time', 'pnl']]
            df.rename(
                columns={'pnl': f'{folderName};{fileName[:-13]}'}, inplace=True)
            if res.empty:
                res = df
            else:
                res = res.merge(df, on='time', how='outer')
    res = res.set_index('time').sort_index()
    return res


In [21]:
pnls = get_pnl()

In [22]:
corr = pnls[good_alphas].corr()

In [23]:
from itertools import combinations
import random
res = []
num = 4
good_candidates = list(
    ((corr.mask(np.eye(len(corr), dtype=bool)).abs() < 0.6).sum() > num).index)
random.shuffle(good_candidates)
for cols in combinations(good_candidates, num):
    corr_small = corr.loc[cols, cols]
    if (~(corr_small.mask(np.eye(len(corr_small), dtype=bool)).abs() > 0.6).any()).sum() == num:
        res.append(corr_small)

In [24]:
max_IR = float('-inf')
max_idx = -1
for i in range(len(res)):
    total_IR = np.sum([(stats.loc[stats.index==name].IR) for name in res[i].index])
    if total_IR > max_IR:
        max_IR = total_IR
        max_idx = i

In [25]:
res[max_idx]

Unnamed: 0,alpha.YaoTec004-5days-zz9999;20180101-20201231-Rank-Industry-holding10days,alpha.YaoTec011-3days-20days-zz9999;20180101-20201231-Rank-Industry-holding10days,alpha.YaoTec003-10days-zz9999;20180101-20201231-Rank-Industry-holding1days,alpha.YaoTec007-60days-zz9999;20180101-20201231-Rank-Industry-holding1days
alpha.YaoTec004-5days-zz9999;20180101-20201231-Rank-Industry-holding10days,1.0,0.19317,0.567049,0.470313
alpha.YaoTec011-3days-20days-zz9999;20180101-20201231-Rank-Industry-holding10days,0.19317,1.0,0.440218,0.113529
alpha.YaoTec003-10days-zz9999;20180101-20201231-Rank-Industry-holding1days,0.567049,0.440218,1.0,0.171366
alpha.YaoTec007-60days-zz9999;20180101-20201231-Rank-Industry-holding1days,0.470313,0.113529,0.171366,1.0


In [26]:
stats.loc[stats.index.isin(res[max_idx].index)]

Unnamed: 0,Annualized Return,Annualized Excess Return,IR,IR long only,IC,Max Drawdown,Max Drawdown long only,daily Turnover
alpha.YaoTec003-10days-zz9999;20180101-20201231-Rank-Industry-holding1days,0.3295,-0.054146,3.291217,-0.427668,0.017708,0.069035,0.200819,0.197056
alpha.YaoTec004-5days-zz9999;20180101-20201231-Rank-Industry-holding10days,0.124721,-0.087197,2.631417,-0.698189,0.013729,0.032391,0.335938,0.132477
alpha.YaoTec007-60days-zz9999;20180101-20201231-Rank-Industry-holding1days,0.171024,-0.078445,3.003937,-0.567547,0.012488,0.047727,0.241926,0.173329
alpha.YaoTec011-3days-20days-zz9999;20180101-20201231-Rank-Industry-holding10days,0.07782,-0.044824,2.676651,-0.321325,0.011783,0.026886,0.383843,0.133177
