In [1]:
import os 
import pandas as pd
import numpy as np
from keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

Using TensorFlow backend.


In [2]:
from plotly.offline import iplot
import cufflinks as cf
cf.go_offline()

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
index = ['sto_vol_all',
         'sto_vol_twt',
         'sto_rea_all',
         'sto_rea_twt',
         'sto_si1_all', 
         'sto_si2_all', 
         'sto_si3_all', 
         'sto_si1_twt', 
         'sto_si2_twt', 
         'sto_si3_twt',
         'stw_vol_dcl',
         'stw_vol_brt',
         'stw_si1_dcl',
         'stw_si2_dcl',
         'stw_si3_dcl', 
         'stw_si1_brt', 
         'stw_si2_brt',
         'stw_si3_brt']

In [6]:
sc = pd.DataFrame(np.zeros((18, 4)), index=index, columns=['btc', 'eth', 'aapl', 'spx'])
for file in os.listdir('data/pred'):
    _, source, type1, type2, symbol = file.split('.')[0].split('_')
    
    if type2 == 'fbp' or type1 == 'fbp':
        continue
        
    new = source + '_' + type1 + '_' + type2
    if type2.startswith('si'):
        new = source + '_' + type2 + '_' + type1
    
    data = pd.read_csv('data/pred/' + file, index_col=0, parse_dates=True)
    changes = pd.read_csv('data/changes/' + symbol + '_changes.csv', index_col=0, parse_dates=True)
    roll = pd.read_csv('data/roll/' + symbol + '_roll.csv', index_col=0, parse_dates=True)

    data_train = data[data.index < pd.to_datetime('2019-05-01')]
    data_test = data[data.index >= pd.to_datetime('2019-05-01')]
    changes_train = changes[changes.index < pd.to_datetime('2019-05-01')]
    changes_test = changes[changes.index >= pd.to_datetime('2019-05-01')]
    roll_train = roll[roll.index < pd.to_datetime('2019-05-01')]
    roll_test = roll[roll.index >= pd.to_datetime('2019-05-01')]

    tg_train = TimeseriesGenerator(data_train.values, changes_train.values, 3, batch_size=len(data_train))
    x_train, y_train = tg_train[0]
    x_train = x_train.reshape(-1, x_train.shape[1] * x_train.shape[2])
    y_train = y_train.reshape(-1)

    tg_test = TimeseriesGenerator(data_test.values, changes_test.values, 3, batch_size=len(data_test))
    x_test, y_test = tg_test[0]
    x_test = x_test.reshape(-1, x_test.shape[1] * x_test.shape[2])
    y_test = y_test.reshape(-1)

    x_train = np.append(x_train, roll_train[2:-1], axis=1)
    x_test = np.append(x_test, roll_test[2:-1], axis=1)

    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)

    model = LogisticRegression()
    model.fit(x_train, y_train)
    prob = model.predict_proba(x_test)[:, 1]
    roc_score = roc_auc_score(y_test, prob)
    sc.loc[new, symbol] = roc_score

In [7]:
sc

Unnamed: 0,btc,eth,aapl,spx
sto_vol_all,0.634868,0.746154,0.474026,0.459052
sto_vol_twt,0.569079,0.57641,0.538961,0.44181
sto_rea_all,0.542763,0.567692,0.481602,0.56681
sto_rea_twt,0.502193,0.618462,0.458874,0.685345
sto_si1_all,0.625548,0.557436,0.477273,0.538793
sto_si2_all,0.633772,0.522564,0.52381,0.5625
sto_si3_all,0.593202,0.522051,0.455628,0.534483
sto_si1_twt,0.589912,0.587179,0.49026,0.518319
sto_si2_twt,0.592105,0.594359,0.506494,0.516164
sto_si3_twt,0.595943,0.534872,0.420996,0.512931


In [8]:
sc['avg'] = sc.apply(lambda x: x.abs().mean(), axis=1)

In [9]:
sc

Unnamed: 0,btc,eth,aapl,spx,avg
sto_vol_all,0.634868,0.746154,0.474026,0.459052,0.578525
sto_vol_twt,0.569079,0.57641,0.538961,0.44181,0.531565
sto_rea_all,0.542763,0.567692,0.481602,0.56681,0.539717
sto_rea_twt,0.502193,0.618462,0.458874,0.685345,0.566218
sto_si1_all,0.625548,0.557436,0.477273,0.538793,0.549762
sto_si2_all,0.633772,0.522564,0.52381,0.5625,0.560661
sto_si3_all,0.593202,0.522051,0.455628,0.534483,0.526341
sto_si1_twt,0.589912,0.587179,0.49026,0.518319,0.546418
sto_si2_twt,0.592105,0.594359,0.506494,0.516164,0.55228
sto_si3_twt,0.595943,0.534872,0.420996,0.512931,0.516185


In [10]:
sc.sort_values('avg')

Unnamed: 0,btc,eth,aapl,spx,avg
stw_vol_brt,0.524123,0.502051,0.480519,0.548491,0.513796
sto_si3_twt,0.595943,0.534872,0.420996,0.512931,0.516185
stw_vol_dcl,0.534539,0.515385,0.464286,0.560345,0.518639
sto_si3_all,0.593202,0.522051,0.455628,0.534483,0.526341
sto_vol_twt,0.569079,0.57641,0.538961,0.44181,0.531565
sto_rea_all,0.542763,0.567692,0.481602,0.56681,0.539717
sto_si1_twt,0.589912,0.587179,0.49026,0.518319,0.546418
stw_si2_brt,0.597039,0.557949,0.429654,0.602371,0.546753
stw_si2_dcl,0.594846,0.532308,0.433983,0.633621,0.548689
sto_si1_all,0.625548,0.557436,0.477273,0.538793,0.549762
