<a href="https://colab.research.google.com/github/leonardobocci/ml-stock-market/blob/main/2.master_thesis_results.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.style
plt.style.use('seaborn')
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import numpy as np
from sklearn import metrics
import plotly.io as pio
pio.templates.default = "seaborn"
import plotly.express as px
import plotly.figure_factory as ff
from datetime import datetime
from dateutil.relativedelta import relativedelta
pd.options.display.float_format = '{:,.2%}'.format

In [None]:
models = ['last_price', 'ols', 'ridge', 'lasso', 'elastic_net', 'decision_tree', 'random_forest', 'gradient_boost', 'xgboost', 'sv_rbf', 'lgbm', 'arima', 'ff_nn', 'lstm_nn']
keys = ['EWC', 'EWQ', 'EWG', 'EWI', 'EWJ', 'EWU', 'SPY']

In [None]:
%%capture
from google.colab import drive
drive.mount('/content/drive')

from google.colab import auth
auth.authenticate_user()
import gspread
from google.auth import default
creds, _ = default()
gc = gspread.authorize(creds)

etfs = {}
for etf in keys:
  etfs[etf] = pd.read_csv(f'/content/drive/MyDrive/Bocci_Machine_Learning_Returns/Data/{etf}.csv')
  etfs[etf]['date'] = pd.to_datetime(etfs[etf]['date'], format="%Y/%m/%d")
  etfs[etf].set_index('date', inplace=True)

results_path='/content/drive/MyDrive/Bocci_Machine_Learning_Returns/Data/results.csv'
results=pd.read_csv(results_path)
results['id'] = results.etf + results.model

descriptive_stats = {}
descriptive_stats_path = f'/content/drive/MyDrive/Bocci_Machine_Learning_Returns/Data/full_describe_{etf}.csv'
for etf in keys:
  descriptive_stats[etf]=pd.read_csv(descriptive_stats_path)

fits = {}
workbook = gc.open('fits')
for etf in keys:
  sheet = workbook.worksheet(f'{etf}')
  fits[etf] = pd.DataFrame(sheet.get_all_records())
  fits[etf]['date'] = pd.to_datetime(fits[etf]['date'], format="%Y/%m/%d")
  fits[etf].set_index('date', inplace=True)

predictions = {}
workbook = gc.open('predictions')
for etf in keys:
  sheet = workbook.worksheet(f'{etf}')
  predictions[etf] = pd.DataFrame(sheet.get_all_records())
  predictions[etf]['date'] = pd.to_datetime(predictions[etf]['date'], format="%Y/%m/%d")
  predictions[etf].set_index('date', inplace=True)

workbook = gc.open('features')
sheet = workbook.worksheet('Sheet1')
features = pd.DataFrame(sheet.get_all_records())

desc_stats = {}
for etf in keys:
  desc_stats[etf] = pd.read_csv(f'/content/drive/MyDrive/Bocci_Machine_Learning_Returns/Data/base_describe_{etf}.csv')
  desc_stats[etf] = desc_stats[etf].set_index('Unnamed: 0')
  desc_stats[etf].index.name = etf

#Absolute metrics

## Descriptive Statistics

In [None]:
desc_df = pd.DataFrame(columns=['etf', 'mean returns', 'stdev returns'])
for etf in keys:
  mean_returns = desc_stats[etf].log_returns['mean']
  stdev_returns = desc_stats[etf].log_returns['std']
  desc = pd.DataFrame({
      'etf': etf,
      'mean returns': mean_returns,
      'stdev returns': stdev_returns
  }, index=[0])
  desc_df = pd.concat([desc_df, desc])

pd.options.display.float_format = '{:,.2%}'.format
desc_df = desc_df.reset_index(drop=True)
desc_df

Unnamed: 0,etf,mean returns,stdev returns
0,EWC,0.01%,1.48%
1,EWQ,0.00%,1.61%
2,EWG,-0.00%,1.66%
3,EWI,-0.01%,1.75%
4,EWJ,0.00%,1.40%
5,EWU,-0.01%,1.47%
6,SPY,0.02%,1.24%


In [None]:
pd.options.display.float_format = '{:,.2f}'.format
for etf in keys:
  print(desc_stats[etf])

       log_returns  outlier     open     high      low    close        volume
EWC                                                                          
count     6,265.00 6,265.00 6,265.00 6,265.00 6,265.00 6,265.00      6,265.00
mean          0.00    -0.01    23.64    23.80    23.46    23.64  1,667,124.40
std           0.01     0.12     7.74     7.77     7.70     7.74  1,913,628.83
min          -0.26    -1.00     8.00     8.30     8.00     8.11        100.00
25%          -0.01     0.00    16.66    16.79    16.50    16.69    127,400.00
50%           0.00     0.00    26.04    26.20    25.79    25.98  1,278,171.00
75%           0.01     0.00    28.87    28.99    28.71    28.86  2,386,601.00
max           0.12     1.00    40.74    41.12    40.53    40.66 18,747,223.00
       log_returns  outlier     open     high      low    close        volume
EWQ                                                                          
count     6,265.00 6,265.00 6,265.00 6,265.00 6,265.00 6,265.00 

In [None]:
outliers = pd.DataFrame(columns=['outlier', 'day', 'etf'])
for etf in keys:
  outlier = etfs[etf][['outlier', 'day']].groupby('outlier').count().reset_index()
  outlier['etf'] = etf
  outliers = pd.concat([outliers, outlier])
neg_outliers = outliers.loc[outliers.outlier==-1, ['etf', 'day']].sort_values('etf')
pos_outliers = outliers.loc[outliers.outlier==1, ['etf', 'day']].sort_values('etf')

In [None]:
fig = make_subplots(rows=1, cols=2, subplot_titles=("Count of Positive Outliers","Count of Negative Outliers"))
fig.add_trace(go.Bar(x=neg_outliers.etf, y=neg_outliers.day), row=1, col=1)
fig.add_trace(go.Bar(x=pos_outliers.etf, y=pos_outliers.day), row=1, col=2)
fig.show()

In [None]:
neg_outliers

Unnamed: 0,etf,day
0,EWC,59
0,EWG,56
0,EWI,48
0,EWJ,26
0,EWQ,53
0,EWU,50
0,SPY,57


## RMSE on train and test, by model and etf

In [None]:
rmse_test = pd.DataFrame()
errors_test = pd.Series(dtype='float64')

for etf in keys:
  for model in models:
    errors_test[f'{model}']= metrics.mean_squared_error(predictions[etf].loc[:,'true'], predictions[etf].loc[:,f'{model}_predicted'], squared=False)
  rmse_test[f'{etf}'] = errors_test

rmse_train = pd.DataFrame()
errors_train = pd.Series(dtype='float64')

for etf in keys:
  for model in models:
    errors_train[f'{model}']= metrics.mean_squared_error(fits[etf].loc[:,'true'], fits[etf].loc[:,f'{model}_fitted'], squared=False)
  rmse_train[f'{etf}'] = errors_train

In [None]:
#Test
fig = make_subplots(rows=7, cols=1,
                    shared_xaxes=True,
                    vertical_spacing=0.03,
                    subplot_titles=(keys))
for i in range(1,8):
  rmse_test = rmse_test.sort_values(f'{keys[i-1]}', ascending=False)
  fig.add_trace(go.Bar(x=rmse_test[f'{keys[i-1]}'],
                           y=rmse_test.index, 
                           orientation='h',
                           name=keys[i-1]),
                row=i, col=1)

fig.update_layout(height=3200, width=800, title_text="Out-of-Sample (Test) RMSE")
fig.update_layout(xaxis_showticklabels=True, 
                  xaxis2_showticklabels=True,
                  xaxis3_showticklabels=True,
                  xaxis4_showticklabels=True,
                  xaxis5_showticklabels=True,
                  xaxis6_showticklabels=True,
                  xaxis7_showticklabels=True)
fig.update_xaxes(tickformat=(".1%"))
fig.show()

In [None]:
#Train
fig = make_subplots(rows=7, cols=1,
                    shared_xaxes=True,
                    vertical_spacing=0.03,
                    subplot_titles=(keys))
for i in range(1,8):
  rmse_train = rmse_train.sort_values(f'{keys[i-1]}', ascending=False)
  fig.add_trace(go.Bar(x=rmse_train[f'{keys[i-1]}'],
                           y=rmse_train.index, 
                           orientation='h',
                           name=keys[i-1]),
                row=i, col=1)

fig.update_layout(height=3200, width=800, title_text="In-Sample (Train) RMSE")
fig.update_layout(xaxis_showticklabels=True, 
                  xaxis2_showticklabels=True,
                  xaxis3_showticklabels=True,
                  xaxis4_showticklabels=True,
                  xaxis5_showticklabels=True,
                  xaxis6_showticklabels=True,
                  xaxis7_showticklabels=True)
fig.update_xaxes(tickformat=(".1%"))
fig.show()

##Aggregate RMSE, train and test

### By model

In [None]:
mean_rmse_test = pd.DataFrame(rmse_test.mean(axis=1), columns=['rmse']).sort_values('rmse', ascending=False)
mean_rmse_train = pd.DataFrame(rmse_train.mean(axis=1), columns=['rmse']).sort_values('rmse', ascending=False)

In [None]:
#Test
fig = px.bar(mean_rmse_test, y=mean_rmse_test.index, x='rmse', text="rmse", text_auto='.2%')
fig.update_xaxes(tickformat=(".1%"))
fig.update_yaxes(title='model')
fig.update_layout(title_text="Mean Out-of-Sample (Test) RMSE")
fig.show()

In [None]:
#Train
fig = px.bar(mean_rmse_train, y=mean_rmse_train.index, x='rmse', text="rmse", text_auto='.2%')
fig.update_xaxes(tickformat=(".1%"))
fig.update_yaxes(title='model')
fig.update_layout(title_text="Mean In-Sample (Train) RMSE")
fig.show()

### By ETF

In [None]:
rmse_etf = rmse_test.transpose().mean(axis=1).sort_values(ascending=False)
#Test
fig = px.bar(rmse_etf, text_auto='.2%', orientation='h')
fig.update_xaxes(tickformat=(".1%"))
fig.update_yaxes(title='model')
fig.update_layout(title_text="Mean Out-of-Sample (Test) RMSE by ETF")
fig.show()

In [None]:
lollipop = desc_df.set_index('etf')
lollipop['rmse'] = rmse_etf
lollipop = lollipop.sort_values('rmse', ascending=False)
fig = go.Figure()
fig.add_trace(go.Scatter(x = lollipop["rmse"], 
                          y = lollipop.index,
                          mode = 'markers',
                          marker_color = 'darkblue',
                          marker_size = 10,
                          name = 'RMSE'))
fig.add_trace(go.Scatter(x = lollipop["stdev returns"], 
                          y = lollipop.index,
                          mode = 'markers',
                          marker_color = 'orange', 
                          marker_size = 10,
                          name = 'Stdev'))
for i in range(0, len(lollipop)):
               fig.add_shape(type='line',
                              x0 = lollipop["rmse"][i],
                              y0 = i,
                              x1 = lollipop["stdev returns"][i],
                              y1 = i,
                              line=dict(color='crimson', width = 3))
fig.update_layout(title_text = "RMSE vs Stdev of Returns")
fig.update_xaxes(tickformat=(".1%"))
fig.show()

## RMSE in test subsets

In [None]:
rmse_test = pd.DataFrame()
errors_test = pd.DataFrame(columns=['etf', 'model', 'subset', 'error'])
subsets = ['1M', '3M', '6M', '12M', '24M', '36M']
'''
for etf in keys:
  for model in models:
    for subset in subsets:
      first_date = min(predictions[etf].index)
      forward_months = int(subset.rstrip('M'))
      end_date = pd.to_datetime((first_date + relativedelta(months = forward_months)).date())
      errors_test[f'{model}_{subset}']= metrics.mean_squared_error(predictions[etf].loc[predictions[etf].index<=end_date,'true'], predictions[etf].loc[predictions[etf].index<=end_date,f'{model}_predicted'], squared=False)
  rmse_test[f'{etf}'] = errors_test
'''
for etf in keys:
  for model in models:
    for subset in subsets:
      first_date = min(predictions[etf].index)
      forward_months = int(subset.rstrip('M'))
      end_date = pd.to_datetime((first_date + relativedelta(months = forward_months)).date())
      error = pd.DataFrame({
          'etf': etf,
          'model': model,
          'subset': subset,
          'error': metrics.mean_squared_error(predictions[etf].loc[predictions[etf].index <= end_date,'true'], predictions[etf].loc[predictions[etf].index<=end_date,f'{model}_predicted'], squared=False)}, index=[0])
      errors_test = pd.concat([errors_test, error])

errors_test = errors_test.groupby(['model', 'subset']).mean().unstack(level=-1)
errors_test.columns = errors_test.columns.droplevel()
errors_test = errors_test.loc[:, subsets]

In [None]:
pd.options.display.float_format = '{:,.2%}'.format
errors_test

subset,1M,3M,6M,12M,24M,36M
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
arima,1.13%,0.96%,0.78%,1.46%,1.28%,1.28%
decision_tree,1.14%,0.95%,0.77%,1.46%,1.28%,1.28%
elastic_net,1.13%,0.95%,0.77%,2.12%,1.68%,1.58%
ff_nn,1.27%,1.08%,0.89%,2.07%,1.78%,1.72%
gradient_boost,1.14%,0.95%,0.77%,1.50%,1.30%,1.30%
lasso,1.13%,0.95%,0.77%,2.12%,1.68%,1.58%
last_price,1.13%,0.95%,0.77%,2.12%,1.68%,1.58%
lgbm,1.14%,0.95%,0.77%,1.62%,1.37%,1.35%
lstm_nn,1.15%,0.99%,0.83%,2.14%,1.71%,1.60%
ols,1.13%,0.96%,0.78%,1.46%,1.28%,1.28%


In [None]:
#Mean error by subset
fig = px.bar(errors_test.transpose().mean(axis=1), text_auto='.2%')
fig.update_yaxes(tickformat=(".1%"))
fig.update_yaxes(title='model')
fig.update_layout(width = 1000, title_text="Out-of-Sample RMSE by Time Subset")
fig.show()

#Relative metrics

In [None]:
rae_test = pd.DataFrame()
errors_test = pd.Series(dtype='float64')

for etf in keys:
  for model in models:
    if model != 'last_price':
      errors_test[f'{model}']= np.median(np.abs(predictions[etf].loc[:,'true'] - predictions[etf].loc[:,f'{model}_predicted']) / np.abs(predictions[etf].loc[:,'true'] - predictions[etf].loc[:,'last_price_predicted']))
  rae_test[f'{etf}'] = errors_test

rae_train = pd.DataFrame()
errors_train = pd.Series(dtype='float64')

for etf in keys:
  for model in models:
    if model != 'last_price':
      errors_train[f'{model}']= np.median(np.abs(fits[etf].loc[:,'true'] - fits[etf].loc[:,f'{model}_fitted']) / np.abs(fits[etf].loc[:,'true'] - fits[etf].loc[:,'last_price_fitted']))
  rae_train[f'{etf}'] = errors_train

### Median Relative Absolute Error (md_RAE)

In [None]:
#Train
fig = make_subplots(rows=7, cols=1,
                    shared_xaxes=True,
                    vertical_spacing=0.03,
                    subplot_titles=(keys))
for i in range(1,8):
  rae_train = rae_train.sort_values(f'{keys[i-1]}', ascending=False)
  fig.add_trace(go.Bar(x=rae_train[f'{keys[i-1]}'],
                           y=rae_train.index, 
                           orientation='h',
                           name=keys[i-1]),
                row=i, col=1)

fig.update_layout(height=3200, width=800, title_text="In-Sample (Train) Median Relative Absolute Error")
fig.update_layout(xaxis_showticklabels=True, 
                  xaxis2_showticklabels=True,
                  xaxis3_showticklabels=True,
                  xaxis4_showticklabels=True,
                  xaxis5_showticklabels=True,
                  xaxis6_showticklabels=True,
                  xaxis7_showticklabels=True)
fig.update_xaxes(tickformat=(".1f"))
fig.show()

In [None]:
#Test
fig = make_subplots(rows=7, cols=1,
                    shared_xaxes=True,
                    vertical_spacing=0.03,
                    subplot_titles=(keys))
for i in range(1,8):
  rae_test = rae_test.sort_values(f'{keys[i-1]}', ascending=False)
  fig.add_trace(go.Bar(x=rae_test[f'{keys[i-1]}'],
                           y=rae_test.index, 
                           orientation='h',
                           name=keys[i-1]),
                row=i, col=1)

fig.update_layout(height=3200, width=800, title_text="Out-of-Sample (Test) Median Relative Absolute Error")
fig.update_layout(xaxis_showticklabels=True, 
                  xaxis2_showticklabels=True,
                  xaxis3_showticklabels=True,
                  xaxis4_showticklabels=True,
                  xaxis5_showticklabels=True,
                  xaxis6_showticklabels=True,
                  xaxis7_showticklabels=True)
fig.update_xaxes(tickformat=(".1f"))
fig.show()

### Aggregate md_RAE

In [None]:
mean_rae_test = pd.DataFrame(rae_test.mean(axis=1), columns=['rae']).sort_values('rae', ascending=False)
mean_rae_train = pd.DataFrame(rae_train.mean(axis=1), columns=['rae']).sort_values('rae', ascending=False)

In [None]:
#Test
fig = px.bar(mean_rae_test, y=mean_rae_test.index, x='rae', text="rae", text_auto='.4f')
fig.update_xaxes(tickformat=(".1f"))
fig.update_yaxes(title='model')
fig.update_layout(title_text="Mean Out-of-Sample (Test) Md-RAE")
fig.show()

In [None]:
#Train
fig = px.bar(mean_rae_train, y=mean_rae_train.index, x='rae', text="rae", text_auto='.2f')
fig.update_xaxes(tickformat=(".1f"))
fig.update_yaxes(title='model')
fig.update_layout(title_text="Mean In-Sample (Train) Md-RAE")
fig.show()

# Feature selection

## Aggregate count

In [None]:
feature_count = features.groupby('etf').count().sort_values('model', ascending=True)['model']/9
fig = px.bar(feature_count, y=feature_count.index, x='model', text="model", text_auto='.0f')
fig.update_xaxes(title='selection frequency')
fig.update_traces(textangle=0)
fig.update_layout(title_text="Frequency of Selection by ETF")
fig.show()

In [None]:
feature_count = features.groupby('features').count().sort_values('etf', ascending=True)['model']/9
feature_count = feature_count.loc[feature_count>=3]
fig = px.bar(feature_count, y=feature_count.index, x='model', text="model", text_auto='.0f')
fig.update_xaxes(title='selection frequency')
fig.update_traces(textangle=0)
fig.update_layout(height= 900, title_text="Frequency of Selection by Feature")
fig.show()

# Error sign analysis

In [None]:
train_sign_analysis = pd.DataFrame(columns=['etf', 'model', 'correct_sign_pct'])
for etf in keys:
  for model_name in models:
    if model_name == 'last_price':
      pass
    else:
      same_sign = pd.DataFrame((fits[etf][f'{model_name}_fitted'] * fits[etf].true >= 0), columns=['same_sign']).reset_index().groupby('same_sign').count()
      correct_pct = (same_sign.loc[True, 'date'] / (same_sign.loc[True, 'date'] + same_sign.loc[False, 'date']))
      sign_df = pd.DataFrame({'etf': etf,
                 'model': model_name,
                 'correct_sign_pct': correct_pct}, index=[0])
      train_sign_analysis = pd.concat([train_sign_analysis, sign_df])

train_model_sign_analysis = train_sign_analysis.groupby('model').mean().sort_values('correct_sign_pct' ,ascending=True)
train_etf_sign_analysis = train_sign_analysis.groupby('etf').mean().sort_values('correct_sign_pct' ,ascending=True)
train_sign_analysis.reset_index(drop=True).to_csv('/content/drive/MyDrive/Bocci_Machine_Learning_Returns/Data/train_sign_analysis.csv')

test_sign_analysis = pd.DataFrame(columns=['etf', 'model', 'correct_sign_pct'])
for etf in keys:
  for model_name in models:
    if model_name == 'last_price':
      pass
    else:
      same_sign = pd.DataFrame((predictions[etf][f'{model_name}_predicted'] * predictions[etf].true >= 0), columns=['same_sign']).reset_index().groupby('same_sign').count()
      correct_pct = (same_sign.loc[True, 'date'] / (same_sign.loc[True, 'date'] + same_sign.loc[False, 'date']))
      sign_df = pd.DataFrame({'etf': etf,
                 'model': model_name,
                 'correct_sign_pct': correct_pct}, index=[0])
      test_sign_analysis = pd.concat([test_sign_analysis, sign_df])

test_model_sign_analysis = test_sign_analysis.groupby('model').mean().sort_values('correct_sign_pct' ,ascending=True)
test_etf_sign_analysis = test_sign_analysis.groupby('etf').mean().sort_values('correct_sign_pct' ,ascending=True)
test_sign_analysis.reset_index(drop=True).to_csv('/content/drive/MyDrive/Bocci_Machine_Learning_Returns/Data/test_sign_analysis.csv')

## By model

In [None]:
#Test
fig = px.bar(test_model_sign_analysis, y=test_model_sign_analysis.index, x='correct_sign_pct', text="correct_sign_pct", text_auto='.1%')
fig.update_xaxes(tickformat=(".0%"))
fig.update_yaxes(title='model')
fig.update_traces( textangle=0)
fig.update_layout(title_text="Mean Out-Of-sample (Test) Percentage of correct sign")
fig.show()

In [None]:
#Train
fig = px.bar(train_model_sign_analysis, y=train_model_sign_analysis.index, x='correct_sign_pct', text="correct_sign_pct", text_auto='.1%')
fig.update_xaxes(tickformat=(".0%"))
fig.update_yaxes(title='model')
fig.update_traces(textangle=0)
fig.update_layout(title_text="Mean In-sample (Train) Percentage of correct sign")
fig.show()

## By Etf

In [None]:
#Test
fig = px.bar(test_etf_sign_analysis, y=test_etf_sign_analysis.index, x='correct_sign_pct', text="correct_sign_pct", text_auto='.1%')
fig.update_xaxes(tickformat=(".0%"))
fig.update_yaxes(title='model')
fig.update_traces( textangle=0)
fig.update_layout(title_text="Mean Out-Of-sample (Test) Percentage of correct sign")
fig.show()

In [None]:
#Train
fig = px.bar(train_etf_sign_analysis, y=train_etf_sign_analysis.index, x='correct_sign_pct', text="correct_sign_pct", text_auto='.1%')
fig.update_xaxes(tickformat=(".0%"))
fig.update_yaxes(title='model')
fig.update_traces( textangle=0)
fig.update_layout(title_text="Mean In-sample (Train) Percentage of correct sign")
fig.show()

# Error distribution

## Histogram of errors by model and etf

In [None]:
hist_dict = {}
hist_data = []
for etf in keys:
  for model_name in models:
    hist_data.append(predictions[etf][f'{model_name}_error'])  
  hist_dict[etf] = hist_data
  hist_data = []

In [None]:
group_labels=[]
for i in range(0, len(hist_dict[etf])):
  group_labels.append(hist_dict[etf][i].name)

for etf in keys:
  fig = ff.create_distplot(hist_dict[etf], group_labels, show_hist=False)
  fig.update_layout(title_text=f'Out-of-Sample (Test) Return Distribution - {etf}')
  fig.update_xaxes(tickformat=(".1%"))
  fig.show()

## Time plot of errors by model and etf

In [None]:
predicted_errors = pd.DataFrame()
for etf in keys: 
  errs_pred = predictions[etf].filter(like='_error', axis=1)
  predicted_errors[etf] = errs_pred.mean(axis=1)

In [None]:
for etf in keys: 
  cum_rets = fits[etf].cumsum()
  cum_rets_fitted = cum_rets.filter(like='_fitted', axis=1)
  cum_rets_fitted['true'] = cum_rets[['true']]

In [None]:
for etf in keys: 
  cum_rets = predictions[etf].cumsum()
  cum_rets_predicted = cum_rets.filter(like='_predicted', axis=1)
  cum_rets_predicted['true'] = cum_rets[['true']]

In [None]:
errs={}
for etf in keys: 
  cum_rets = predictions[etf].cumsum()
  errs[etf] = cum_rets.filter(like='_error', axis=1)

In [None]:
px.line(predicted_errors.mean(axis=1))

In [None]:
# Cumulative errors
px.line(predicted_errors.cumsum())

In [None]:
# Abolute value of errors
px.line(predicted_errors.abs().cumsum())

In [None]:
px.line(cum_rets_predicted)

In [None]:
px.line(cum_rets_fitted)

In [None]:
etf_mean_err = pd.DataFrame()
for etf in keys:
  etf_mean_err[etf] = errs[etf].mean(axis=1)

px.line(etf_mean_err)

In [None]:
etf_mean_err

Unnamed: 0_level_0,EWC,EWQ,EWG,EWI,EWJ,EWU,SPY
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-07-18,0.32%,0.26%,-0.22%,0.93%,-0.52%,0.52%,0.33%
2019-07-19,-0.20%,-0.22%,-0.83%,-1.96%,-0.25%,0.39%,-0.27%
2019-07-22,-0.25%,-0.11%,-0.52%,-1.23%,-0.15%,0.37%,-0.05%
2019-07-23,-0.29%,0.52%,0.63%,-0.32%,0.64%,0.76%,0.62%
2019-07-24,-0.04%,0.40%,1.03%,0.06%,0.73%,0.38%,1.08%
...,...,...,...,...,...,...,...
2022-07-12,-12.23%,-27.08%,-46.47%,4.89%,-9.47%,11.32%,30.72%
2022-07-13,-12.31%,-26.76%,-46.77%,4.94%,-10.05%,11.13%,30.14%
2022-07-14,-15.25%,-28.23%,-48.82%,0.45%,-11.06%,9.07%,29.90%
2022-07-15,-14.27%,-26.21%,-46.36%,2.36%,-10.00%,10.92%,31.59%


In [None]:
px.line(errs.mean(axis=1))

AttributeError: ignored

# Cumsum returns by etf 

In [None]:
cum_rets_df = pd.DataFrame()
for etf in keys: 
  cum_rets = predictions[etf].cumsum()
  cum_rets_df[f'{etf}'] = cum_rets.filter(like='true', axis=1)
px.line(cum_rets_df, title='Out-of-Sample (Test) Cumulative Returns by ETF')

In [None]:
cum_rets_df_train = pd.DataFrame()
for etf in keys: 
  cum_rets = fits[etf].cumsum()
  cum_rets_df_train[f'{etf}'] = cum_rets.filter(like='true', axis=1)
px.line(cum_rets_df_train, title='In-Sample (Train) Cumulative Returns by ETF')