In [1]:
import numpy as np
import pandas as pd
from pathmgmt import pathmgmt as myPath
import os

In [2]:
# Combine all alphas
def get_stat():
    res = pd.DataFrame()
    for folderName in os.listdir(myPath.PLOT_DIR):
        if 'zz9999' not in folderName or 'Vol' not in folderName:
            continue
        # print(folderName)
        folder = myPath.PLOT_DIR/folderName/'statistics'
        for fileName in os.listdir(folder):
            # print(fileName)
            file = folder/fileName
            df = pd.read_csv(file)
            df.index = [f'{folderName};{fileName[:-15]}']
            # print(df)
            if res.empty:
                res = df
            else:
                res = pd.concat([res, df])
    return res


In [3]:
stats = get_stat()

In [4]:
stats.describe()

Unnamed: 0,Annualized Return,Annualized Excess Return,IR,IR long only,IC,Max Drawdown,Max Drawdown long only,daily Turnover
count,372.0,372.0,366.0,372.0,366.0,372.0,372.0,372.0
mean,0.116773,-0.102999,1.867542,-0.795522,0.006638,0.059086,0.376955,0.147417
std,0.085956,0.040702,1.035233,0.309199,0.004462,0.029368,0.101609,0.180126
min,-0.019162,-0.221433,-1.096069,-1.615433,-0.001334,-0.0,0.191635,0.0
25%,0.039429,-0.122903,1.400462,-0.953671,0.003554,0.039116,0.299868,0.038376
50%,0.118469,-0.096284,1.868106,-0.764785,0.006077,0.055874,0.363979,0.082344
75%,0.178668,-0.078087,2.463512,-0.579251,0.008744,0.084306,0.460906,0.188414
max,0.409374,0.003246,6.09859,0.02483,0.02611,0.112945,0.66785,1.370488


In [5]:
good_alphas = list(stats.loc[stats.IR > 3].index)

In [6]:
len(good_alphas)

37

In [7]:
# combine daily pnl
def get_pnl():
    res = pd.DataFrame()
    for folderName in os.listdir(myPath.PLOT_DIR):
        if 'zz9999' not in folderName and 'Vol' not in folderName:
            continue
        # print(folderName)
        folder = myPath.PLOT_DIR/folderName/'PnL_results'
        for fileName in os.listdir(folder):
            file = folder/fileName
            df = pd.read_csv(file)
            # df = df.set_index('time')
            # df = df['pnl']
            df = df[['time', 'pnl']]
            df.rename(
                columns={'pnl': f'{folderName};{fileName[:-13]}'}, inplace=True)
            if res.empty:
                res = df
            else:
                res = res.merge(df, on='time', how='outer')
    res = res.set_index('time').sort_index()
    return res


In [8]:
pnls = get_pnl()

In [9]:
corr = pnls[good_alphas].corr()

In [10]:
from itertools import combinations
import random
res = []
num = 4
good_candidates = list(
    ((corr.mask(np.eye(len(corr), dtype=bool)).abs() < 0.6).sum() > num).index)
random.shuffle(good_candidates)
for cols in combinations(good_candidates, num):
    corr_small = corr.loc[cols, cols]
    if (~(corr_small.mask(np.eye(len(corr_small), dtype=bool)).abs() > 0.6).any()).sum() == num:
        res.append(corr_small)

In [11]:
max_IR = float('-inf')
max_idx = -1
for i in range(len(res)):
    total_IR = np.sum([(stats.loc[stats.index==name].IR) for name in res[i].index])
    if total_IR > max_IR:
        max_IR = total_IR
        max_idx = i

In [12]:
res[max_idx]

Unnamed: 0,alpha.YaoVol002-60days-zz9999;20180101-20201231-Rank-Industry-holding3days,alpha.YaoVol006-3days-zz9999;20180101-20201231-Rank-Industry-holding1days,alpha.YaoVol005-20days-zz9999;20180101-20201231-Rank-Industry-holding60days,alpha.YaoVol007-10days-zz9999;20180101-20201231-Rank-Industry-holding1days
alpha.YaoVol002-60days-zz9999;20180101-20201231-Rank-Industry-holding3days,1.0,0.366266,0.253826,0.191765
alpha.YaoVol006-3days-zz9999;20180101-20201231-Rank-Industry-holding1days,0.366266,1.0,0.461368,0.166075
alpha.YaoVol005-20days-zz9999;20180101-20201231-Rank-Industry-holding60days,0.253826,0.461368,1.0,0.071407
alpha.YaoVol007-10days-zz9999;20180101-20201231-Rank-Industry-holding1days,0.191765,0.166075,0.071407,1.0


In [18]:
stats.loc[stats.index.isin(res[max_idx].index)]

Unnamed: 0,Annualized Return,Annualized Excess Return,IR,IR long only,IC,Max Drawdown,Max Drawdown long only,daily Turnover
alpha.YaoVol002-60days-zz9999;20180101-20201231-Rank-Industry-holding3days,0.147306,-0.104327,3.240497,-0.772919,0.010048,0.042189,0.285442,0.078172
alpha.YaoVol005-20days-zz9999;20180101-20201231-Rank-Industry-holding60days,0.023955,-0.108468,3.054662,-0.680418,0.005021,0.008389,0.462555,0.021801
alpha.YaoVol006-3days-zz9999;20180101-20201231-Rank-Industry-holding1days,0.382751,-0.023038,6.09859,-0.166761,0.024658,0.036727,0.191635,0.770609
alpha.YaoVol007-10days-zz9999;20180101-20201231-Rank-Industry-holding1days,0.098923,-0.102236,3.54732,-0.738922,0.007169,0.022099,0.405913,0.565616
