In [12]:
from datetime import datetime
from datetime import timedelta

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import os
import json
from tqdm.notebook import tqdm
import plotly.express as px
import plotly

from comet_ml import Experiment

In [2]:
with open('config/config.json', 'r') as file:
    config = json.load(file)
    
with open('config/config_emb_path.json', 'r') as file:
    config_emb = json.load(file)
    
rs = config['random_state']

In [5]:
def change_features(df):
    df['AVG_returns'] = (df['AVG_returns'] + 1) ** 252 - 1
    df['Risk'] = (df['Risk'] + 1) ** np.sqrt(252) - 1
    df['Alpha'] = df['Alpha'].round(4)
    df['Drawdown'] = -df['Drawdown']
    

    return df 

# Visualization

In [3]:
config['financial_metric_path'].format(year_start, fine_tune)

'results/financial_metric_2021_False.csv'

In [4]:
financial_metrics = {year_start:pd.read_csv(config['financial_metric_path'].format(year_start, fine_tune), index_col=0) 
                     for year_start in [2020, 2021]}

In [6]:
df_2020 = change_features(financial_metrics[2020])
df_2021 = change_features(financial_metrics[2021])

In [7]:
def filter_risk_return(df, filter_baselines=['sp500', 'sectors'], save_methods=['sectors', 'sp500']):      
        
    for baseline in filter_baselines:

        risk_baseline = df[df.emb_model == baseline]['Risk'].values[0]
        mask_risk = df['Risk'] <= risk_baseline
        mask = mask_risk

        return_baseline = df[df.emb_model == baseline]['AVG_returns'].values[0]
        mask_return = df['AVG_returns'] >= return_baseline
        mask = mask & mask_return
        
        drd_baseline = df[df.emb_model == baseline]['Drawdown'].values[0]
        mask_drd = df['Drawdown'] <= drd_baseline
        mask = mask & mask_drd
        
        mask_save = df['emb_model'].isin(save_methods)
        mask = mask | mask_save

        df = df[mask]
        
    mask_save = df['emb_model'].isin(save_methods)
    df = df[(df['AVG_returns'] > 0) | mask_save]
    return df.sort_values('Sharpe', ascending=False)

In [8]:
df_2021_filtered = filter_risk_return(df_2021, filter_baselines=['sectors'], 
                                      save_methods=['sectors', 'sp500'])
df_2021_filtered

Unnamed: 0,AVG_returns,Risk,Beta,Alpha,Sharpe,VaR,Drawdown,Recovery,clust_model,emb_model
port,0.672739,0.270982,0.501459,0.0021,0.126564,-0.025113,12.020327,65.0,KMeans,table_finance_features
port,0.414883,0.208473,0.605803,0.0014,0.105069,-0.0198,11.293906,78.0,Agg,table_finance_features
port,0.491682,0.315458,0.468096,0.0016,0.084426,-0.028746,16.850101,73.0,Agg,tsfresh
port,0.354555,0.328862,0.421275,0.0012,0.060186,-0.029819,20.176808,88.0,sectors,sectors
port,-0.042238,0.203246,1.0,0.0,-0.024612,-0.019344,25.081503,179.0,sp500,sp500


In [9]:
df_2020_filtered = filter_risk_return(df_2020, filter_baselines=['sectors'], 
                                      save_methods=['sectors', 'sp500'])
df_2020_filtered

Unnamed: 0,AVG_returns,Risk,Beta,Alpha,Sharpe,VaR,Drawdown,Recovery,clust_model,emb_model
port,0.600734,0.339103,0.701615,0.0016,0.094339,-0.030631,33.63109,57.0,KMeans,takens_mult_PI_2_sigma=0.0005
port,0.505332,0.286336,0.717754,0.0014,0.094262,-0.026381,14.655753,83.0,KMeans,takens_one_PL_2
port,0.521516,0.329129,0.709373,0.0014,0.085684,-0.02984,21.298497,115.0,Agg,takens_one_PL_2
port,0.581317,0.368728,0.664694,0.0016,0.085272,-0.03295,28.935546,50.0,KMeans,takens_mult_BC_0
port,0.4492,0.304558,0.787807,0.0012,0.080292,-0.027867,31.327309,180.0,Agg,takens_mult_PI_2_sigma=0.0005
port,0.454484,0.324734,0.742873,0.0013,0.076683,-0.02949,29.773179,115.0,Agg,takens_one_BC_2
port,0.501222,0.381811,0.64785,0.0014,0.072699,-0.033958,31.645513,182.0,KMeans,takens_one_PL_1
port,0.389792,0.296542,0.79497,0.0011,0.07213,-0.027215,37.944779,69.0,Agg,takens_one_PI_1_sigma=0.0005
port,0.464639,0.366876,0.692306,0.0013,0.070321,-0.032806,32.784684,89.0,KMeans,transformer_embds3
port,0.433002,0.342955,0.72437,0.0012,0.069945,-0.030936,26.039232,83.0,KMeans,takens_one_BC_2


In [10]:
df_2020_filtered[df_2020_filtered.clust_model != 'Agg']

Unnamed: 0,AVG_returns,Risk,Beta,Alpha,Sharpe,VaR,Drawdown,Recovery,clust_model,emb_model
port,0.600734,0.339103,0.701615,0.0016,0.094339,-0.030631,33.63109,57.0,KMeans,takens_mult_PI_2_sigma=0.0005
port,0.505332,0.286336,0.717754,0.0014,0.094262,-0.026381,14.655753,83.0,KMeans,takens_one_PL_2
port,0.581317,0.368728,0.664694,0.0016,0.085272,-0.03295,28.935546,50.0,KMeans,takens_mult_BC_0
port,0.501222,0.381811,0.64785,0.0014,0.072699,-0.033958,31.645513,182.0,KMeans,takens_one_PL_1
port,0.464639,0.366876,0.692306,0.0013,0.070321,-0.032806,32.784684,89.0,KMeans,transformer_embds3
port,0.433002,0.342955,0.72437,0.0012,0.069945,-0.030936,26.039232,83.0,KMeans,takens_one_BC_2
port,0.423275,0.369481,0.680248,0.0012,0.0642,-0.033008,32.500412,119.0,KMeans,transformer_embds1
port,0.377516,0.342339,0.688975,0.0011,0.061672,-0.030887,40.743809,190.0,KMeans,umap
port,0.373594,0.34136,0.705352,0.001,0.06122,-0.03081,36.36942,91.0,KMeans,table_finance_features
port,0.387615,0.358618,0.683558,0.0011,0.060713,-0.032164,30.531414,182.0,KMeans,transformer_embds2


In [11]:
df_2020_filtered[df_2020_filtered.clust_model != 'KMeans']

Unnamed: 0,AVG_returns,Risk,Beta,Alpha,Sharpe,VaR,Drawdown,Recovery,clust_model,emb_model
port,0.521516,0.329129,0.709373,0.0014,0.085684,-0.02984,21.298497,115.0,Agg,takens_one_PL_2
port,0.4492,0.304558,0.787807,0.0012,0.080292,-0.027867,31.327309,180.0,Agg,takens_mult_PI_2_sigma=0.0005
port,0.454484,0.324734,0.742873,0.0013,0.076683,-0.02949,29.773179,115.0,Agg,takens_one_BC_2
port,0.389792,0.296542,0.79497,0.0011,0.07213,-0.027215,37.944779,69.0,Agg,takens_one_PI_1_sigma=0.0005
port,0.458292,0.373351,0.690987,0.0013,0.068409,-0.033307,32.183046,90.0,Agg,takens_one_PL_1
port,0.428275,0.352416,0.712236,0.0012,0.067619,-0.031679,35.920772,137.0,Agg,takens_one_BC_1
port,0.464833,0.385091,0.65109,0.0013,0.067461,-0.03421,30.864434,122.0,Agg,transformer_embds_sum_23
port,0.380717,0.335222,0.696776,0.0011,0.063318,-0.030324,31.249841,75.0,Agg,transformer_embds2
port,0.410354,0.370762,0.673659,0.0011,0.062202,-0.033107,38.04148,100.0,Agg,ts2vec
port,0.407141,0.368544,0.68549,0.0011,0.062072,-0.032935,30.835164,119.0,Agg,transformer_embds1


## COMET_ML

In [78]:
year_start = 2020
fine_tune = True

for key in config_emb.keys():
    config_emb[key] = config_emb[key].format(year_start)

In [79]:
df_finance = pd.read_csv(config['financial_metric_path'].format(year_start, fine_tune), index_col=0)
df_clust = pd.read_csv(config['clust_metric_path'].format(year_start, fine_tune), index_col=0)

df_finance = change_features(df_finance)

In [80]:
df_finance.head()

Unnamed: 0,AVG_returns,Risk,Beta,Alpha,Sharpe,VaR,Drawdown,Recovery,clust_model,emb_model
port,0.495915,0.41104,0.601512,0.0014,0.067593,-0.03618,43.955665,75.0,KMeans,ts2vec
port,0.191167,0.363129,0.640662,0.0005,0.029286,-0.032515,36.463147,154.0,Agg,ts2vec
port,0.404731,0.356168,0.696695,0.0011,0.06359,-0.031973,35.514546,115.0,KMeans,takens_one_BC_2
port,-0.065147,0.247098,0.941967,-0.0005,-0.027456,-0.023112,31.13078,642.0,Agg,takens_one_BC_2
port,0.606091,0.355021,0.674516,0.0017,0.091323,-0.031883,24.775282,57.0,KMeans,takens_one_PL_2


In [81]:
model_del = 'Agg' if model_name == 'KMeans' else 'KMeans'

df_finance = df_finance[df_finance.clust_model != model_del]
df_clust = df_clust[df_clust.clust_model != model_del]

df_finance = df_finance.set_index('emb_model').drop(columns=['clust_model'])
df_clust = df_clust.set_index('emb_model').drop(columns=['clust_model'])

df_data = df_finance.join(df_clust)

assert len(df_data) == len(df_clust) + 1 == len(df_finance)
df_data.head()

Unnamed: 0_level_0,AVG_returns,Risk,Beta,Alpha,Sharpe,VaR,Drawdown,Recovery,DB,HC,Sil,hom
emb_model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
ts2vec,0.191167,0.363129,0.640662,0.0005,0.029286,-0.032515,36.463147,154.0,0.320173,18.543424,0.51673,0.028243
takens_one_BC_2,-0.065147,0.247098,0.941967,-0.0005,-0.027456,-0.023112,31.13078,642.0,0.45379,6.411142,0.273747,0.0289
takens_one_PL_2,0.156138,0.317826,0.740148,0.0003,0.02615,-0.028937,28.214303,182.0,0.311802,10.268861,0.379736,0.023011
transformer_embds_sum_23,0.330917,0.359422,0.667278,0.0009,0.052108,-0.032227,36.297709,180.0,1.142738,130.819923,0.216855,0.051649
transformer_embds3,0.350557,0.34313,0.696339,0.001,0.057361,-0.030949,34.166709,180.0,1.202075,129.118784,0.223701,0.054524


In [None]:
for model_name, data in tqdm(df_model.iterrows()):
    
    experiment = Experiment(
    api_key="W4Exl5JlKMoVHRelfS04Tc0Lg",
    project_name="stock-clustering-with-time-series-embeddings",
    workspace="petrsokerin",
    )

    experiment.set_name(model_name)

    hyper_params = {
        "model": model_name,
        "include_2020": year_start == 2021,
        "fine_tune": fine_tune,
    }

    experiment.log_parameters(hyper_params)
    
    print(model_name)
    metrics = data.to_dict()
    experiment.log_metrics(metrics)

0it [00:00, ?it/s]

COMET ERROR: Run will not be logged 
Traceback (most recent call last):
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/experiment.py", line 946, in _start
    self.alive = self._setup_streamer()
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 328, in _setup_streamer
    self._initialize_streamer(
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 433, in _initialize_streamer
    self.ws_connection.start()
  File "/opt/anaconda3/envs/cuda10/lib/python3.8/threading.py", line 852, in start
    _start_new_thread(self._bootstrap, ())
RuntimeError: can't start new thread


ts2vec


COMET ERROR: Run will not be logged 
Traceback (most recent call last):
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/experiment.py", line 946, in _start
    self.alive = self._setup_streamer()
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 328, in _setup_streamer
    self._initialize_streamer(
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 433, in _initialize_streamer
    self.ws_connection.start()
  File "/opt/anaconda3/envs/cuda10/lib/python3.8/threading.py", line 852, in start
    _start_new_thread(self._bootstrap, ())
RuntimeError: can't start new thread


takens_one_BC_2


COMET ERROR: Run will not be logged 
Traceback (most recent call last):
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/experiment.py", line 946, in _start
    self.alive = self._setup_streamer()
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 328, in _setup_streamer
    self._initialize_streamer(
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 433, in _initialize_streamer
    self.ws_connection.start()
  File "/opt/anaconda3/envs/cuda10/lib/python3.8/threading.py", line 852, in start
    _start_new_thread(self._bootstrap, ())
RuntimeError: can't start new thread


takens_one_PL_2


COMET ERROR: Run will not be logged 
Traceback (most recent call last):
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/experiment.py", line 946, in _start
    self.alive = self._setup_streamer()
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 328, in _setup_streamer
    self._initialize_streamer(
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 433, in _initialize_streamer
    self.ws_connection.start()
  File "/opt/anaconda3/envs/cuda10/lib/python3.8/threading.py", line 852, in start
    _start_new_thread(self._bootstrap, ())
RuntimeError: can't start new thread


transformer_embds_sum_23


COMET ERROR: Run will not be logged 
Traceback (most recent call last):
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/experiment.py", line 946, in _start
    self.alive = self._setup_streamer()
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 328, in _setup_streamer
    self._initialize_streamer(
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 433, in _initialize_streamer
    self.ws_connection.start()
  File "/opt/anaconda3/envs/cuda10/lib/python3.8/threading.py", line 852, in start
    _start_new_thread(self._bootstrap, ())
RuntimeError: can't start new thread


transformer_embds3


COMET ERROR: Run will not be logged 
Traceback (most recent call last):
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/experiment.py", line 946, in _start
    self.alive = self._setup_streamer()
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 328, in _setup_streamer
    self._initialize_streamer(
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 433, in _initialize_streamer
    self.ws_connection.start()
  File "/opt/anaconda3/envs/cuda10/lib/python3.8/threading.py", line 852, in start
    _start_new_thread(self._bootstrap, ())
RuntimeError: can't start new thread


transformer_embds2


COMET ERROR: Run will not be logged 
Traceback (most recent call last):
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/experiment.py", line 946, in _start
    self.alive = self._setup_streamer()
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 328, in _setup_streamer
    self._initialize_streamer(
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 433, in _initialize_streamer
    self.ws_connection.start()
  File "/opt/anaconda3/envs/cuda10/lib/python3.8/threading.py", line 852, in start
    _start_new_thread(self._bootstrap, ())
RuntimeError: can't start new thread


takens_mult_PI_0_sigma=0.0005


COMET ERROR: Run will not be logged 
Traceback (most recent call last):
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/experiment.py", line 946, in _start
    self.alive = self._setup_streamer()
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 328, in _setup_streamer
    self._initialize_streamer(
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 433, in _initialize_streamer
    self.ws_connection.start()
  File "/opt/anaconda3/envs/cuda10/lib/python3.8/threading.py", line 852, in start
    _start_new_thread(self._bootstrap, ())
RuntimeError: can't start new thread


takens_mult_PL_2


COMET ERROR: Run will not be logged 
Traceback (most recent call last):
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/experiment.py", line 946, in _start
    self.alive = self._setup_streamer()
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 328, in _setup_streamer
    self._initialize_streamer(
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 433, in _initialize_streamer
    self.ws_connection.start()
  File "/opt/anaconda3/envs/cuda10/lib/python3.8/threading.py", line 852, in start
    _start_new_thread(self._bootstrap, ())
RuntimeError: can't start new thread


takens_one_BC_1


COMET ERROR: Run will not be logged 
Traceback (most recent call last):
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/experiment.py", line 946, in _start
    self.alive = self._setup_streamer()
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 328, in _setup_streamer
    self._initialize_streamer(
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 433, in _initialize_streamer
    self.ws_connection.start()
  File "/opt/anaconda3/envs/cuda10/lib/python3.8/threading.py", line 852, in start
    _start_new_thread(self._bootstrap, ())
RuntimeError: can't start new thread


table_finance_features


COMET ERROR: Run will not be logged 
Traceback (most recent call last):
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/experiment.py", line 946, in _start
    self.alive = self._setup_streamer()
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 328, in _setup_streamer
    self._initialize_streamer(
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 433, in _initialize_streamer
    self.ws_connection.start()
  File "/opt/anaconda3/envs/cuda10/lib/python3.8/threading.py", line 852, in start
    _start_new_thread(self._bootstrap, ())
RuntimeError: can't start new thread


takens_mult_PI_1_sigma=0.0005


COMET ERROR: Run will not be logged 
Traceback (most recent call last):
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/experiment.py", line 946, in _start
    self.alive = self._setup_streamer()
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 328, in _setup_streamer
    self._initialize_streamer(
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 433, in _initialize_streamer
    self.ws_connection.start()
  File "/opt/anaconda3/envs/cuda10/lib/python3.8/threading.py", line 852, in start
    _start_new_thread(self._bootstrap, ())
RuntimeError: can't start new thread


signal2vec_embds


COMET ERROR: Run will not be logged 
Traceback (most recent call last):
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/experiment.py", line 946, in _start
    self.alive = self._setup_streamer()
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 328, in _setup_streamer
    self._initialize_streamer(
  File "/cephfs/projects/psoker/.local/lib/python3.8/site-packages/comet_ml/_online.py", line 433, in _initialize_streamer
    self.ws_connection.start()
  File "/opt/anaconda3/envs/cuda10/lib/python3.8/threading.py", line 852, in start
    _start_new_thread(self._bootstrap, ())
RuntimeError: can't start new thread


autoencoder_mlp
