In [1]:
import pandas as pd
import numpy as np

In [2]:
def change_features(df):
    df['AVG_returns'] = (df['AVG_returns'] + 1) ** 252 - 1
    df['Risk'] = (df['Risk'] + 1) ** np.sqrt(252) - 1
    df['Alpha'] = df['Alpha'].round(4)
    df['Drawdown'] = -df['Drawdown']
    return df 

In [3]:
dict_category2emb = {
    'topological' : [
        'PersImage_2',
        'PersLandscape_1'
        'PersImage_1',
        'PersLandscape_2',
        'topo_features'                 
      ],
    
    'dim_reduction' : ['umap', 'pca', 'fastica', 'tnse'],
    
    'transformers' : [ 'transformer_embds1',
 'transformer_embds2',
 'transformer_embds3',
 'transformer_embds_sum_23',
 'transformer_embds_sum_all',],
    
    'autoencoders' : [ 'autoencoder_conv',
 'autoencoder_lstm',
 'autoencoder_mlp',],
    
    'baselines' : ['sp500', 'sectors'],
    
    'interpretable' : [ 'tsfresh', 'table_finance_features'],
    
    'time_clustering' : ['KShape',
 'MSTcorr',
 'TimeSeriesKMeans'],
    
    'time_embedding' : ['ts2vec', 'signal2vec_embds']
}

dict_emb2cat = {method:categ for categ, list_methods in dict_category2emb.items() for method in list_methods }

# Data loading

In [4]:
fin_results_2012 = pd.read_csv('results/usa_2012/financial_metric_False.csv', index_col=0)
fin_results_2015 = pd.read_csv('results/usa_2015/financial_metric_False.csv', index_col=0)
fin_results_2018 = pd.read_csv('results/usa_2018/financial_metric_False.csv', index_col=0)

clust_results_2012 = pd.read_csv('results/usa_2012/clust_metric_False.csv', index_col=0)
clust_results_2015 = pd.read_csv('results/usa_2012/clust_metric_False.csv', index_col=0)
clust_results_2018 = pd.read_csv('results/usa_2012/clust_metric_False.csv', index_col=0)
fin_results_2012.head()

Unnamed: 0,AVG_returns,Risk,Beta,Alpha,Sharpe,VaR,Drawdown,Recovery,clust_model,emb_model
port,0.000517,0.016028,0.329642,0.000345,0.024955,-0.026447,-37.24377,385.0,KMeans,ts2vec
port,0.001062,0.014736,0.38423,0.000881,0.064101,-0.024314,-27.391429,259.0,Agg,ts2vec
port,0.000656,0.007743,0.752471,0.000414,0.069636,-0.012776,-11.716343,185.0,KMeans,PersImage_2
port,0.000659,0.008137,0.736411,0.00042,0.066596,-0.013426,-11.172857,183.0,Agg,PersImage_2
port,0.000633,0.015816,0.339472,0.000459,0.032596,-0.026096,-38.037391,385.0,KMeans,PersLandscape_1


In [5]:
def load_fin_clust(exp_name):
    dict_data = {
        "finance":pd.read_csv(f'results/{exp_name}/financial_metric_False.csv', index_col=0),
        "clustering":pd.read_csv(f'results/{exp_name}/clust_metric_False.csv', index_col=0)
    }
    return dict_data


In [6]:
experiments = ['usa_2012', 'usa_2015', 'usa_2018']
dict_results = {exp:load_fin_clust(exp) for exp in experiments}

In [7]:
def build_entire_table(fin_results, clust_results, dict_emb2cat, sort_metric='Risk'):
    fin_results = change_features(fin_results)

    df_to_rebuild = fin_results
    df_to_rebuild = pd.merge(clust_results.drop(columns=['clust_model']), df_to_rebuild, on='emb_model', how='outer')

    df_best_clustering = df_to_rebuild.sort_values([sort_metric], ascending=False)
    df_best_clustering = df_best_clustering.groupby('emb_model', as_index=False, sort=True).first()

    df_best_clustering['category'] = df_best_clustering['emb_model'].map(dict_emb2cat)
    return df_best_clustering.sort_values(sort_metric)

In [8]:
dict_results_selected = {exp:build_entire_table(
        dict_results[exp]['finance'], 
        dict_results[exp]['clustering'], 
        dict_emb2cat
        ) 
    for exp in experiments
}

In [9]:
all_experiments = pd.DataFrame([])

for exp_name, exp_data in dict_results_selected.items():
    exp_data['exp_name'] = exp_name
    
    all_experiments = pd.concat([all_experiments, exp_data])
    
print(all_experiments.shape)
all_experiments.head()

(63, 16)


Unnamed: 0,emb_model,DB,HC,Sil,hom,AVG_returns,Risk,Beta,Alpha,Sharpe,VaR,Drawdown,Recovery,clust_model,category,exp_name
1,PersImage_2,1.7243,61.553752,0.145029,0.099462,0.180643,0.137287,0.736411,0.0004,0.066596,-0.013426,11.172857,183.0,Agg,topological,usa_2012
10,sp500,,,,,0.073929,0.142009,1.0,0.0,0.019734,-0.01386,14.183248,253.0,sp500,baselines,usa_2012
0,PersImage_1,2.113002,38.591573,0.112135,0.085452,0.230194,0.143126,0.790175,0.0006,0.08333,-0.013962,11.145127,128.0,Agg,,usa_2012
11,table_finance_features,1.243502,76.237093,0.159563,0.134707,0.293567,0.185312,0.500845,0.0008,0.084022,-0.017765,14.913612,184.0,KMeans,interpretable,usa_2012
9,sectors,4.01921,7.379893,-0.02987,1.0,0.304948,0.204896,0.517192,0.0009,0.079542,-0.019488,25.386463,256.0,sectors,baselines,usa_2012


In [10]:
all_exp_pivot = all_experiments.pivot(index='emb_model', columns='exp_name', values=['Risk', 'Sharpe'])
all_exp_pivot.sort_values([('Risk', 'usa_2018')])

Unnamed: 0_level_0,Risk,Risk,Risk,Sharpe,Sharpe,Sharpe
exp_name,usa_2012,usa_2015,usa_2018,usa_2012,usa_2015,usa_2018
emb_model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
sp500,0.142009,0.136343,0.289158,0.019734,0.043638,0.014858
PersImage_2,0.137287,0.142336,0.335643,0.066596,0.128478,0.063872
PersImage_1,0.143126,0.147628,0.337164,0.08333,0.059114,0.050032
fastica,0.279785,0.22557,0.361338,0.044366,0.078007,0.037374
topo_features,0.301772,0.17931,0.367651,0.041673,0.099889,0.04989
pca,0.279785,0.22557,0.38238,0.044366,0.078007,0.025618
ts2vec,0.287141,0.290068,0.390234,0.024955,0.098511,0.072554
transformer_embds2,0.211621,0.169785,0.390733,0.076999,0.147098,0.068911
table_finance_features,0.185312,0.176772,0.39202,0.084022,0.132535,0.051444
transformer_embds5,0.211621,0.160075,0.393402,0.076999,0.158944,0.066072
