# "Reuters Dataset 1 Sentimetre Model 2 Long-Short BackTest Top 5 Long, Top 5 Short Predictions"
> "Reuters Dataset 1 Sentimetre Model 2 Long-Short BackTest Top 5 Long, Top 5 Short Predictions"
- toc: false
- branch: master
- badges: false
- comments: true
- categories: [long, short, longshort]
- hide: false
- search_exclude: true
- metadata_key1: metadata_value1
- metadata_key2: metadata_value2

**Backtest:

We assume that we are able to buy at market open and liquidate at market close. 
Our backtest does not incorporate:

1. Use of options/derivatives

2. Self-financing portfolios

3. Leverage

4. Optimal sizing of trades: all positions are the same size

5. Transaction costs

5. Slippage

**Top 5 position selection

For each day, we select the top 5 long positions and top 5 short position based on features and discard the remaining positions. 

**Proof-of-Concept 1: Reuters dataset 1 2017-2020

Data is segmented into training data (2017-2018) and test data (2019-2020). Preprocessing of the text data for text normalization, stemming, lemmatization and extraction of stop words.

*Model accuracy on the validation dataset:

    NTLK VADER Sentiment Analyzer - N/A

    Linear Classifier - 53%

    Sentimetre Model 1 - 53%

    Sentimetre Model 2 - 57%

*Prediction accuracy on the test dataset:

    NTLK VADER Sentiment Analyzer - 50%

    Linear Classifier - 52%

    Sentimetre Model 1 - 51%

    Sentimetre Model 2 - 55%

In [1]:
#hide_input
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import seaborn as sns
import os
import time
import gc
import glob
import matplotlib.pyplot as plt

In [2]:
#hide_input
df_tes=pd.read_csv('D:\\Downloads\\Reuters_testwithtrspacy2sentencepcheadlineplustextplustrplustrheadline.csv')

In [3]:
#hide_input
df_sort_long=df_tes.loc[df_tes.pred==1].loc[df_tes['Date'].isin(df_tes.loc[df_tes.pred==1].Date.value_counts().loc[lambda x: x>5].index)]
df_sort_long_top=df_tes.groupby(['Date']).apply(lambda x: x.nlargest(5,['tr_headlineplustext_prof1'])).reset_index(drop=True)

In [4]:
#hide_input
df_sort_short=df_tes.loc[df_tes.pred==-1].loc[df_tes['Date'].isin(df_tes.loc[df_tes.pred==-1].Date.value_counts().loc[lambda x: x>5].index)]
df_sort_short_top=df_tes.groupby(['Date']).apply(lambda x: x.nlargest(5,['tr_headlineplustext_prof-1'])).reset_index(drop=True)

In [5]:

#hide_input
df_tes=df_sort_long_top.append(df_sort_short_top, ignore_index=True).sort_values('Date')

df_tes['vader_score']=np.where(df_tes.compound>0,1,-1)
#hide
df_tes['returnpred']=df_tes['1_day_return']*df_tes['tr_pred']
df_tes['returnpredvader']=df_tes['1_day_return']*df_tes['vader_score']
df_tes['returnpredsgd']=df_tes['1_day_return']*df_tes['linearsgdlog_pred']


# In[266]:


#hide
#df_testp=df_testp.loc[df_testp.label!='nan'].loc[df_testp.Date>=20190101].loc[df_testp.Date<20190201].sort_values(by='Date',ascending=True).copy()
#df_test=df_test.loc[df_test.label!='nan'].loc[df_test.Date>=20190101].loc[df_test.Date<20190201].sort_values(by='Date',ascending=True).copy()
#df_tes=df_tes.loc[df_tes.label!='nan'].loc[df_tes.Date>=20190201].loc[df_tes.Date<20190301].sort_values(by='Date',ascending=True).copy().reset_index(drop=True)
df_long=df_tes.loc[df_tes.pred==1].copy().reset_index(drop=True)
df_short=df_tes.loc[df_tes.pred==-1].copy().reset_index(drop=True)
#df_testp.reset_index(drop=True, inplace=True)


# In[267]:


#hide
df_tes['dailyaveragereturn']=0.0
for date in df_tes['Date'].unique():
    df_tes.loc[df_tes['Date']==date,'dailyaveragereturn']=np.prod(1+(df_tes.loc[df_tes['Date']==date]['returnpred']/100))**(1/len(df_tes.loc[df_tes['Date']==date]))
    #print(np.prod(1+(df_tes.loc[df_tes['Date']==date]['labelr1']/100))**(1/len(df_tes.loc[df_tes['Date']==date])))

#hide
df_tes['dailyaveragereturnvader']=0.0
for date in df_tes['Date'].unique():
    df_tes.loc[df_tes['Date']==date,'dailyaveragereturnvader']=np.prod(1+(df_tes.loc[df_tes['Date']==date]['returnpredvader']/100))**(1/len(df_tes.loc[df_tes['Date']==date]))
    #print(np.prod(1+(df_tes.loc[df_tes['Date']==date]['labelr1']/100))**(1/len(df_tes.loc[df_tes['Date']==date])))

df_tes['dailyaveragereturnsgd']=0.0
for date in df_tes['Date'].unique():
    df_tes.loc[df_tes['Date']==date,'dailyaveragereturnsgd']=np.prod(1+(df_tes.loc[df_tes['Date']==date]['returnpredsgd']/100))**(1/len(df_tes.loc[df_tes['Date']==date]))
    #print(np.prod(1+(df_tes.loc[df_tes['Date']==date]['labelr1']/100))**(1/len(df_tes.loc[df_tes['Date']==date])))



# In[268]:


#hide
#df_tes['2dayreturn']=0.0
#df_tes['Daytwo']=df_tes["Date"].shift(1)
#for date in df_tes['Daytwo'].unique():
    #if date>0:
        #df_tes.loc[df_tes['Daytwo']==date,'2dayreturn']=np.prod(1+(df_tes.loc[df_tes['Daytwo']==date]['2_day_return']))**(1/len(df_tes.loc[df_tes['Daytwo']==date]))
        #print(np.prod(1+(df_tes.loc[df_tes['Date']==date]['labelr1']/100))**(1/len(df_tes.loc[df_tes['Date']==date])))


# In[269]:


#hide
df_a=df_tes.drop_duplicates(subset = ["Date"],keep='last').reset_index(drop=True)


# In[270]:


#hide
df_a['cumreturn1b']=0.0
for l in range(len(df_a)):
    df_a.loc[l,'cumreturn1b']=np.prod(df_a['dailyaveragereturn'][:l+1])

df_a['cumreturn1d']=0.0
for l in range(len(df_a)):
    df_a.loc[l,'cumreturn1d']=np.prod(df_a['dailyaveragereturnvader'][:l+1])

df_a['cumreturn1e']=0.0
for l in range(len(df_a)):
    df_a.loc[l,'cumreturn1e']=np.prod(df_a['dailyaveragereturnsgd'][:l+1])
# **Long-Short Portfolio**

# In[271]:


#hide
if len(df_a)>0:
    import altair as alt
    import datetime
    df_a['Date']=df_a['Date'].astype(int)
    df_a['Date']=df_a['Date'].astype(str).str[:4]+'-'+df_a['Date'].astype(str).str[4:6]+'-'+df_a['Date'].astype(str).str[6:]
    df_a['Date']=df_a['Date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))
    df_dow=pd.read_csv('D:\\Downloads\\GSPCa.csv')
    df_dow['Date']=df_dow['Date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))
    df_dow=df_dow.loc[df_dow.Date>=df_a.Date.min()].loc[df_dow.Date<=df_a.Date.max()].reset_index()
    df_dow['return']=((df_dow['Close'].shift(-1)-df_dow['Close'])/df_dow["Close"])
    df_dow['cumreturndow']=0.0
    for l in range(len(df_dow)):
        df_dow['cumreturndow'][l]=np.prod((1+(df_dow['return']))[:l+1])
    df_a=pd.merge(df_a,df_dow[['Date','cumreturndow']], right_on='Date', left_on='Date',how='inner')
    df_sp500=pd.read_csv('D:\\Downloads\\DJIa.csv')
    df_sp500['Date']=df_sp500['Date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))
    df_sp500=df_sp500.loc[df_sp500.Date>=df_a.Date.min()].loc[df_sp500.Date<=df_a.Date.max()].reset_index()
    df_sp500['return']=((df_sp500['Close'].shift(-1)-df_sp500['Close'])/df_sp500["Close"])
    df_sp500['cumreturnsp500']=0.0
    for l in range(len(df_sp500)):
        df_sp500['cumreturnsp500'][l]=np.prod((1+(df_sp500['return']))[:l+1])
    
    df_a=pd.merge(df_a,df_sp500[['Date','cumreturnsp500']], right_on='Date', left_on='Date',how='inner')
    #df_ag['Date']=df_ag['Date'].astype(str).str[:4]+'-'+df_ag['Date'].astype(str).str[4:6]+'-'+df_ag['Date'].astype(str).str[6:]
    #df_ag['Date']=df_ag['Date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))
    #df_agg=pd.merge(df_agg,df_ag[['Date','cumreturn1a']], right_on='Date', left_on='Date',how='inner')
    #df_a['Date']=df_a['Date'].astype(str).str[:4]+'-'+df_a['Date'].astype(str).str[4:6]+'-'+df_a['Date'].astype(str).str[6:]
    #df_a['Date']=df_a['Date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))
    #df_agg=pd.merge(df_agg,df_a[['Date','cumreturn1b']], right_on='Date', left_on='Date',how='inner')


# In[272]:


#hide_input
df_a[['Date','cumreturn1b','cumreturndow','cumreturnsp500']]


# In[273]:


#hide_input
import altair as alt
a1 = alt.Chart(df_a).mark_trail(color='steelblue').encode(
    x='Date:T',
    y='cumreturn1b:Q',
    size='cumreturn1b',
    
).properties(
    width=1000,
    height=1000,
    title='Long-Short Portfolio Backtest: Sentimetre Model 2 (blue) vs Vader (brown) vs Linear Classifier (orange) vs Dow (black) vs SP500 (red) '
)

a2 = alt.Chart(df_a).mark_circle(color='black',size=40).encode(
    x='Date:T',
    y='cumreturndow:Q',

).properties(
    width=1000,
    height=1000
)

a3 = alt.Chart(df_a).mark_tick(color='red').encode(
    x='Date:T',
    y='cumreturnsp500:Q',

).properties(
    width=1000,
    height=1000
)

a4 = alt.Chart(df_a).mark_line(color='brown').encode(
   x='Date:T',
   y='cumreturn1d:Q',

).properties(
    width=1000,
    height=1000
)

a5 = alt.Chart(df_a).mark_line(color='orange').encode(
    x='Date:T',
    y='cumreturn1e:Q',

).properties(
    width=1000,
    height=1000
)

a1 + a2 + a3 + a4 + a5

In [6]:
#Accuracy
#hide_input
list_accuracy=['pred']
for acc in list_accuracy:
  print('Accuracy Long-Short Portfolio',acc,len(df_tes.loc[df_tes.label==df_tes[acc].astype(float)])/len(df_tes))


Accuracy Long-Short Portfolio pred 0.5111111111111111


In [7]:
#hide_input

# **Long portfolio**

# In[274]:
df_long=df_sort_long_top.copy()
df_long['vader_score']=np.where(df_long.compound>0,1,-1)
#hide
df_long['returnpred']=df_long['1_day_return']*df_long['tr_pred']
df_long['returnpredvader']=df_long['1_day_return']*df_long['vader_score']
df_long['returnpredsgd']=df_long['1_day_return']*df_long['linearsgdlog_pred']

#hide
df_long['dailyaveragereturn']=0.0
for date in df_long['Date'].unique():
    df_long.loc[df_long['Date']==date,'dailyaveragereturn']=np.prod(1+(df_long.loc[df_long['Date']==date]['returnpred']/100))**(1/len(df_long.loc[df_long['Date']==date]))
    #print(np.prod(1+(df_tes.loc[df_tes['Date']==date]['labelr1']/100))**(1/len(df_tes.loc[df_tes['Date']==date])))

df_long['dailyaveragereturnvader']=0.0
for date in df_long['Date'].unique():
    df_long.loc[df_long['Date']==date,'dailyaveragereturnvader']=np.prod(1+(df_long.loc[df_long['Date']==date]['returnpredvader']/100))**(1/len(df_long.loc[df_long['Date']==date]))
    #print(np.prod(1+(df_tes.loc[df_tes['Date']==date]['labelr1']/100))**(1/len(df_tes.loc[df_tes['Date']==date])))


df_long['dailyaveragereturnsgd']=0.0
for date in df_long['Date'].unique():
    df_long.loc[df_long['Date']==date,'dailyaveragereturnsgd']=np.prod(1+(df_long.loc[df_long['Date']==date]['returnpredsgd']/100))**(1/len(df_long.loc[df_long['Date']==date]))
    #print(np.prod(1+(df_tes.loc[df_tes['Date']==date]['labelr1']/100))**(1/len(df_tes.loc[df_tes['Date']==date])))
df_a=df_long.drop_duplicates(subset = ["Date"],keep='last').reset_index(drop=True)

df_a['cumreturn1b']=0.0
for l in range(len(df_a)):
    df_a.loc[l,'cumreturn1b']=np.prod(df_a['dailyaveragereturn'][:l+1])

df_a['cumreturn1d']=0.0
for l in range(len(df_a)):
    df_a.loc[l,'cumreturn1d']=np.prod(df_a['dailyaveragereturnvader'][:l+1])

df_a['cumreturn1e']=0.0
for l in range(len(df_a)):
    df_a.loc[l,'cumreturn1e']=np.prod(df_a['dailyaveragereturnsgd'][:l+1])

# In[275]:


#hide
if len(df_a)>0:
    import altair as alt
    import datetime
    df_a['Date']=df_a['Date'].astype(int)
    df_a['Date']=df_a['Date'].astype(str).str[:4]+'-'+df_a['Date'].astype(str).str[4:6]+'-'+df_a['Date'].astype(str).str[6:]
    df_a['Date']=df_a['Date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))
    df_dow=pd.read_csv('D:\\Downloads\\GSPCa.csv')
    df_dow['Date']=df_dow['Date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))
    df_dow=df_dow.loc[df_dow.Date>=df_a.Date.min()].loc[df_dow.Date<=df_a.Date.max()].reset_index()
    df_dow['return']=((df_dow['Close'].shift(-1)-df_dow['Close'])/df_dow["Close"])
    df_dow['cumreturndow']=0.0
    for l in range(len(df_dow)):
        df_dow['cumreturndow'][l]=np.prod((1+(df_dow['return']))[:l+1])
    df_a=pd.merge(df_a,df_dow[['Date','cumreturndow']], right_on='Date', left_on='Date',how='inner')
    df_sp500=pd.read_csv('D:\\Downloads\\DJIa.csv')
    df_sp500['Date']=df_sp500['Date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))
    df_sp500=df_sp500.loc[df_sp500.Date>=df_a.Date.min()].loc[df_sp500.Date<=df_a.Date.max()].reset_index()
    df_sp500['return']=((df_sp500['Close'].shift(-1)-df_sp500['Close'])/df_sp500["Close"])
    df_sp500['cumreturnsp500']=0.0
    for l in range(len(df_sp500)):
        df_sp500['cumreturnsp500'][l]=np.prod((1+(df_sp500['return']))[:l+1])
    
    df_a=pd.merge(df_a,df_sp500[['Date','cumreturnsp500']], right_on='Date', left_on='Date',how='inner')
    #df_ag['Date']=df_ag['Date'].astype(str).str[:4]+'-'+df_ag['Date'].astype(str).str[4:6]+'-'+df_ag['Date'].astype(str).str[6:]
    #df_ag['Date']=df_ag['Date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))
    #df_agg=pd.merge(df_agg,df_ag[['Date','cumreturn1a']], right_on='Date', left_on='Date',how='inner')
    #df_a['Date']=df_a['Date'].astype(str).str[:4]+'-'+df_a['Date'].astype(str).str[4:6]+'-'+df_a['Date'].astype(str).str[6:]
    #df_a['Date']=df_a['Date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))
    #df_agg=pd.merge(df_agg,df_a[['Date','cumreturn1b']], right_on='Date', left_on='Date',how='inner')


# In[276]:


#hide_input
df_a[['Date','cumreturn1b','cumreturndow','cumreturnsp500']]


# In[277]:


#hide_input
import altair as alt
a1 = alt.Chart(df_a).mark_trail(color='steelblue').encode(
    x='Date:T',
    y='cumreturn1b:Q',
    size='cumreturn1b',
    
).properties(
    width=1000,
    height=1000,
    title='Long Portfolio Backtest: Sentimetre Model 2 (blue) vs Vader (brown) vs Linear Classifier (orange) vs  Dow (black) vs SP500 (red) '
)


a2 = alt.Chart(df_a).mark_circle(color='black',size=40).encode(
    x='Date:T',
    y='cumreturndow:Q',

).properties(
    width=1000,
    height=1000
)

a3 = alt.Chart(df_a).mark_tick(color='red').encode(
    x='Date:T',
    y='cumreturnsp500:Q',

).properties(
    width=1000,
    height=1000
)

a4 = alt.Chart(df_a).mark_line(color='brown').encode(
    x='Date:T',
    y='cumreturn1d:Q',

).properties(
    width=1000,
    height=1000
)

a5 = alt.Chart(df_a).mark_line(color='orange').encode(
   x='Date:T',
    y='cumreturn1e:Q',

).properties(
    width=1000,
    height=1000
)

a1 + a2 + a3 + a4 + a5


In [8]:
#Accuracy
#hide_input
list_accuracy=['pred']
for acc in list_accuracy:
  print('Accuracy Long Portfolio',acc,len(df_long.loc[df_long.label==df_long[acc].astype(float)])/len(df_long))


Accuracy Long Portfolio pred 0.5152046783625731


In [9]:
#hide_input


# **Short Portfolio**

# In[278]:

df_short=df_sort_short_top.copy()
df_short['vader_score']=np.where(df_short.compound>0,1,-1)
#hide
df_short['returnpred']=df_short['1_day_return']*df_short['tr_pred']
df_short['returnpredvader']=df_short['1_day_return']*df_short['vader_score']
df_short['returnpredsgd']=df_short['1_day_return']*df_short['linearsgdlog_pred']
#hide
df_short['dailyaveragereturn']=0.0
for date in df_short['Date'].unique():
    df_short.loc[df_short['Date']==date,'dailyaveragereturn']=np.prod(1+(df_short.loc[df_short['Date']==date]['returnpred']/100))**(1/len(df_short.loc[df_short['Date']==date]))
    #print(np.prod(1+(df_tes.loc[df_tes['Date']==date]['labelr1']/100))**(1/len(df_tes.loc[df_tes['Date']==date])))

df_short['dailyaveragereturnvader']=0.0
for date in df_short['Date'].unique():
    df_short.loc[df_short['Date']==date,'dailyaveragereturnvader']=np.prod(1+(df_short.loc[df_short['Date']==date]['returnpredvader']/100))**(1/len(df_short.loc[df_short['Date']==date]))
    #print(np.prod(1+(df_tes.loc[df_tes['Date']==date]['labelr1']/100))**(1/len(df_tes.loc[df_tes['Date']==date])))

df_short['dailyaveragereturnsgd']=0.0
for date in df_short['Date'].unique():
    df_short.loc[df_short['Date']==date,'dailyaveragereturnsgd']=np.prod(1+(df_short.loc[df_short['Date']==date]['returnpredsgd']/100))**(1/len(df_short.loc[df_short['Date']==date]))
    #print(np.prod(1+(df_tes.loc[df_tes['Date']==date]['labelr1']/100))**(1/len(df_tes.loc[df_tes['Date']==date])))

df_a=df_short.drop_duplicates(subset = ["Date"],keep='last').reset_index(drop=True)

df_a['cumreturn1b']=0.0
for l in range(len(df_a)):
    df_a.loc[l,'cumreturn1b']=np.prod(df_a['dailyaveragereturn'][:l+1])


df_a['cumreturn1d']=0.0
for l in range(len(df_a)):
    df_a.loc[l,'cumreturn1d']=np.prod(df_a['dailyaveragereturnvader'][:l+1])

df_a['cumreturn1e']=0.0
for l in range(len(df_a)):
    df_a.loc[l,'cumreturn1e']=np.prod(df_a['dailyaveragereturnsgd'][:l+1])

# In[279]:


#hide
if len(df_a)>0:
    import altair as alt
    import datetime
    df_a['Date']=df_a['Date'].astype(int)
    df_a['Date']=df_a['Date'].astype(str).str[:4]+'-'+df_a['Date'].astype(str).str[4:6]+'-'+df_a['Date'].astype(str).str[6:]
    df_a['Date']=df_a['Date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))
    df_dow=pd.read_csv('D:\\Downloads\\GSPCa.csv')
    df_dow['Date']=df_dow['Date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))
    df_dow=df_dow.loc[df_dow.Date>=df_a.Date.min()].loc[df_dow.Date<=df_a.Date.max()].reset_index()
    df_dow['return']=((df_dow['Close'].shift(-1)-df_dow['Close'])/df_dow["Close"])
    df_dow['cumreturndow']=0.0
    for l in range(len(df_dow)):
        df_dow['cumreturndow'][l]=np.prod((1+(df_dow['return']))[:l+1])
    df_a=pd.merge(df_a,df_dow[['Date','cumreturndow']], right_on='Date', left_on='Date',how='inner')
    df_sp500=pd.read_csv('D:\\Downloads\\DJIa.csv')
    df_sp500['Date']=df_sp500['Date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))
    df_sp500=df_sp500.loc[df_sp500.Date>=df_a.Date.min()].loc[df_sp500.Date<=df_a.Date.max()].reset_index()
    df_sp500['return']=((df_sp500['Close'].shift(-1)-df_sp500['Close'])/df_sp500["Close"])
    df_sp500['cumreturnsp500']=0.0
    for l in range(len(df_sp500)):
        df_sp500['cumreturnsp500'][l]=np.prod((1+(df_sp500['return']))[:l+1])
    
    df_a=pd.merge(df_a,df_sp500[['Date','cumreturnsp500']], right_on='Date', left_on='Date',how='inner')
    #df_ag['Date']=df_ag['Date'].astype(str).str[:4]+'-'+df_ag['Date'].astype(str).str[4:6]+'-'+df_ag['Date'].astype(str).str[6:]
    #df_ag['Date']=df_ag['Date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))
    #df_agg=pd.merge(df_agg,df_ag[['Date','cumreturn1a']], right_on='Date', left_on='Date',how='inner')
    #df_a['Date']=df_a['Date'].astype(str).str[:4]+'-'+df_a['Date'].astype(str).str[4:6]+'-'+df_a['Date'].astype(str).str[6:]
    #df_a['Date']=df_a['Date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))
    #df_agg=pd.merge(df_agg,df_a[['Date','cumreturn1b']], right_on='Date', left_on='Date',how='inner')


# In[280]:


#hide_input
#df_a[['Date','cumreturn1b','cumreturndow','cumreturnsp500']]


# In[281]:


#hide_input
import altair as alt
a1 = alt.Chart(df_a).mark_trail(color='steelblue').encode(
    x='Date:T',
    y='cumreturn1b:Q',
    size='cumreturn1b',
    
).properties(
    width=1000,
    height=1000,
    title='Short Portfolio Backtest: Sentimetre Model 2 (blue) vs Vader (brown) vs Linear Classifier (orange) vs Dow (black) vs SP500 (red) '
)


a2 = alt.Chart(df_a).mark_circle(color='black',size=40).encode(
    x='Date:T',
    y='cumreturndow:Q',

).properties(
    width=1000,
    height=1000
)

a3 = alt.Chart(df_a).mark_tick(color='red').encode(
    x='Date:T',
    y='cumreturnsp500:Q',

).properties(
    width=1000,
    height=1000
)

a4 = alt.Chart(df_a).mark_line(color='brown').encode(
    x='Date:T',
    y='cumreturn1d:Q',

).properties(
    width=1000,
    height=1000
)

a5 = alt.Chart(df_a).mark_line(color='orange').encode(
    x='Date:T',
    y='cumreturn1e:Q',

).properties(
    width=1000,
    height=1000
)

a1 + a2 + a3 + a4 + a5



In [10]:
#Accuracy
#hide_input
list_accuracy=['pred']
for acc in list_accuracy:
  print('Accuracy Short Portfolio',acc,len(df_short.loc[df_short.label==df_short[acc].astype(float)])/len(df_short))


Accuracy Short Portfolio pred 0.5070175438596491
