In [104]:
import pandas as pd
import pickle

from helpers import *
from supervised_ensemble import EnsembleObjective, Model


def create_table(data, typ: str = 'mae'):
    datasets = list()
    for model in data:
        df = pd.DataFrame(pd.DataFrame(data[model]).T[typ])
        df.columns = [model]

        datasets.append(df)

    dataset = merge_dataframes(datasets).T
    dataset['Technical Improvement'] = dataset['baseline'] - dataset['technical']
    dataset['Sentiment Improvement'] = dataset['baseline'] - dataset['sentiment']
    dataset['Combined Improvement'] = dataset['baseline'] - dataset['combined']

    return dataset


In [105]:
datasets = dict()
for window in range(2, 7):
    filename = f'pickles/results_{window}.pkl'
    with open(filename, 'rb') as handle:
        data = pickle.load(handle)
        datasets[window] = dict()
        for typ in ['mae', 'mse']:
            datasets[window][typ] = create_table(data, typ)

In [106]:
def convert_altair(dataset: pd.DataFrame):
    df = dataset.copy().stack().reset_index()
    df.columns = ['Model', 'Dataset', 'Score']
    df['Score'] = df['Score'].clip(0, 1)
    return df


df = convert_altair(datasets[2]['mae'])
print(df)

     Model                Dataset     Score
0   linear                  dummy  0.681300
1   linear               baseline  0.536120
2   linear              technical  0.433067
3   linear              sentiment  0.519447
4   linear               combined  0.430990
5   linear  Technical Improvement  0.103054
6   linear  Sentiment Improvement  0.016673
7   linear   Combined Improvement  0.105130
8       rf                  dummy  0.681300
9       rf               baseline  0.594096
10      rf              technical  0.466245
11      rf              sentiment  0.514815
12      rf               combined  0.460031
13      rf  Technical Improvement  0.127851
14      rf  Sentiment Improvement  0.079281
15      rf   Combined Improvement  0.134065
16      gb                  dummy  0.681300
17      gb               baseline  0.495833
18      gb              technical  0.434598
19      gb              sentiment  0.489806
20      gb               combined  0.433411
21      gb  Technical Improvemen

In [119]:
import altair as alt

charts = dict()
offset = 2
for typ in ['MAE', 'MSE']:
    dataset = convert_altair(datasets[offset][typ.lower()][['baseline', 'combined', 'sentiment']])
    dataset.columns = [x.title() for x in dataset.columns]
    cht = alt.Chart(dataset).mark_bar().encode(
        x=alt.X('Model:O', title=''),
        y=alt.Y('sum(Score):Q', title=typ.upper(),
                scale=alt.Scale(domain=(0, 1), clamp=True)),
        column=alt.Column('Dataset:O', title='')
    ).properties(
        title=typ,
    )
    charts[typ] = cht

cht = charts['MAE'] | charts['MSE']
cht.properties(title='14 Hour Ahead Volatility Accuracy')
cht

In [118]:
improvement = pd.DataFrame()
for i in range(2, 7):
    df = pd.DataFrame(datasets[i]['mae'][['Sentiment Improvement', 'Combined Improvement']].iloc[2]).T
    df.index = [i]
    improvement = pd.concat([improvement, df])
print(improvement)

   Sentiment Improvement  Combined Improvement
2               0.006027              0.062422
3               0.005730              0.065537
4               0.006171              0.072853
5               0.007728              0.072909
6               0.007530              0.081648


In [11]:
baseline_prediction = baseline['pred']
baseline_prediction = remove_duplicate_index(baseline_prediction)

bollinger_X = model.create_objective(X.copy(), 'bollinger')
bollinger_prediction = model.simple_model(bollinger_X, Y.copy())['pred']



In [12]:
sentiment_data = model.create_twitter_datasets()

In [13]:
sentiment_dataset = model.apply_sentiment_data(X.copy(), sentiment_data)
sentiment_dataset.dropna(inplace=True)

print(sentiment_dataset.head())

                    Close  Adj Close  sentiment_score
2021_06_07_ABEV  0.007752   0.007752         0.000000
2021_06_07_ABNB -0.011676  -0.011676         0.191218
2021_06_07_AZO  -0.001845  -0.001845         0.190217
2021_06_07_BBY   0.009570   0.009570         0.017200
2021_06_07_BKNG -0.007415  -0.007415         0.455900
