In [1]:
import numpy as np
import pandas as pd
from pathmgmt import pathmgmt as myPath
import os

In [2]:
# Combine all alphas
def get_stat():
    res = pd.DataFrame()
    for folderName in os.listdir(myPath.PLOT_DIR):
        if 'zz9999' not in folderName or 'Lqd' not in folderName:
            continue
        # print(folderName)
        folder = myPath.PLOT_DIR/folderName/'statistics'
        for fileName in os.listdir(folder):
            # print(fileName)
            file = folder/fileName
            df = pd.read_csv(file)
            df.index = [f'{folderName};{fileName[:-15]}']
            # print(df)
            if res.empty:
                res = df
            else:
                res = pd.concat([res, df])
    return res

In [3]:
stats = get_stat()

In [4]:
stats.describe()

Unnamed: 0,Annualized Return,Annualized Excess Return,IR,IR long only,IC,Max Drawdown,Max Drawdown long only,daily Turnover
count,562.0,562.0,562.0,562.0,562.0,562.0,562.0,562.0
mean,0.145569,0.008578,2.706276,0.06951,0.012195,0.069305,0.17188,0.231212
std,0.150991,0.02721,1.669375,0.303862,0.008595,0.066746,0.060973,0.325789
min,-0.086086,-0.085206,-0.848314,-0.931348,-0.003659,0.001911,0.074962,0.008278
25%,0.045773,-0.009246,1.324191,-0.104194,0.00542,0.022031,0.127691,0.031577
50%,0.101186,0.013834,2.990833,0.130986,0.012212,0.052657,0.154376,0.08964
75%,0.200956,0.027107,3.883308,0.214906,0.017637,0.08647,0.193618,0.270888
max,0.867974,0.072142,6.474684,0.982315,0.037569,0.358252,0.335895,1.377221


In [7]:
good_alphas = list(stats.loc[(stats.IR > 3) & (stats['daily Turnover'] < 0.2)].index)

In [8]:
len(good_alphas)

125

In [10]:
# combine daily pnl
def get_pnl():
    res = pd.DataFrame()
    for folderName in os.listdir(myPath.PLOT_DIR):
        if 'zz9999' not in folderName and 'Lqd' not in folderName:
            continue
        # print(folderName)
        folder = myPath.PLOT_DIR/folderName/'PnL_results'
        for fileName in os.listdir(folder):
            file = folder/fileName
            df = pd.read_csv(file)
            # df = df.set_index('time')
            # df = df['pnl']
            df = df[['time', 'pnl']]
            df.rename(
                columns={'pnl': f'{folderName};{fileName[:-13]}'}, inplace=True)
            if res.empty:
                res = df
            else:
                res = res.merge(df, on='time', how='outer')
    res = res.set_index('time').sort_index()
    return res


In [11]:
pnls = get_pnl()

In [12]:
corr = pnls[good_alphas].corr()

In [13]:
from itertools import combinations
import random
res = []
num = 4
good_candidates = list(
    ((corr.mask(np.eye(len(corr), dtype=bool)).abs() < 0.6).sum() > num).index)
random.shuffle(good_candidates)
for cols in combinations(good_candidates, num):
    corr_small = corr.loc[cols, cols]
    if (~(corr_small.mask(np.eye(len(corr_small), dtype=bool)).abs() > 0.6).any()).sum() == num:
        res.append(corr_small)

In [24]:
max_IR = float('-inf')
max_idx = -1
for i in range(len(res)):
    total_IR = np.sum([(stats.loc[stats.index==name].IR) for name in res[i].index])
    if total_IR > max_IR:
        max_IR = total_IR
        max_idx = i

In [25]:
res[max_idx]

Unnamed: 0,alpha.YaoTec004-5days-zz9999;20180101-20201231-Rank-Industry-holding10days,alpha.YaoTec011-3days-20days-zz9999;20180101-20201231-Rank-Industry-holding10days,alpha.YaoTec003-10days-zz9999;20180101-20201231-Rank-Industry-holding1days,alpha.YaoTec007-60days-zz9999;20180101-20201231-Rank-Industry-holding1days
alpha.YaoTec004-5days-zz9999;20180101-20201231-Rank-Industry-holding10days,1.0,0.19317,0.567049,0.470313
alpha.YaoTec011-3days-20days-zz9999;20180101-20201231-Rank-Industry-holding10days,0.19317,1.0,0.440218,0.113529
alpha.YaoTec003-10days-zz9999;20180101-20201231-Rank-Industry-holding1days,0.567049,0.440218,1.0,0.171366
alpha.YaoTec007-60days-zz9999;20180101-20201231-Rank-Industry-holding1days,0.470313,0.113529,0.171366,1.0


In [26]:
stats.loc[stats.index.isin(res[max_idx].index)]

Unnamed: 0,Annualized Return,Annualized Excess Return,IR,IR long only,IC,Max Drawdown,Max Drawdown long only,daily Turnover
alpha.YaoTec003-10days-zz9999;20180101-20201231-Rank-Industry-holding1days,0.3295,-0.054146,3.291217,-0.427668,0.017708,0.069035,0.200819,0.197056
alpha.YaoTec004-5days-zz9999;20180101-20201231-Rank-Industry-holding10days,0.124721,-0.087197,2.631417,-0.698189,0.013729,0.032391,0.335938,0.132477
alpha.YaoTec007-60days-zz9999;20180101-20201231-Rank-Industry-holding1days,0.171024,-0.078445,3.003937,-0.567547,0.012488,0.047727,0.241926,0.173329
alpha.YaoTec011-3days-20days-zz9999;20180101-20201231-Rank-Industry-holding10days,0.07782,-0.044824,2.676651,-0.321325,0.011783,0.026886,0.383843,0.133177
