In [19]:
import numpy as np

from sklearn.preprocessing import StandardScaler

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import GRU

In [20]:
import pandas as pd
import pandas_datareader.data as web

import datetime

start = datetime.datetime(2015, 1, 1)

TICKER = 'NVDA'
DAILY_DATA = web.DataReader(TICKER, "google", start)
VIX = web.DataReader("VIXCLS", "fred", start)

_DATA = pd.concat([DAILY_DATA, VIX], axis=1).dropna()
_DATA['RETURN'] = _DATA.Close.pct_change()
_DATA['sVIXCLS'] = StandardScaler().fit_transform(_DATA.VIXCLS.values.reshape(_DATA.VIXCLS.shape[0],1))
_DATA['sVolume'] = StandardScaler().fit_transform(_DATA.Volume.values.reshape(_DATA.Volume.shape[0],1))
_DATA = _DATA[1:]
_DATA.head()

Unnamed: 0,Open,High,Low,Close,Volume,VIXCLS,RETURN,sVIXCLS,sVolume
2015-01-05,20.13,20.19,19.7,19.79,4948799.0,19.92,-0.01689,1.058288,-0.78889
2015-01-06,19.82,19.84,19.17,19.19,4944121.0,21.12,-0.030318,1.344465,-0.78953
2015-01-07,19.33,19.5,19.08,19.14,8045186.0,19.31,-0.002606,0.912814,-0.365413
2015-01-08,19.36,19.98,19.35,19.86,7094534.0,17.01,0.037618,0.364307,-0.495429
2015-01-09,19.93,20.09,19.66,19.94,5238601.0,17.55,0.004028,0.493087,-0.749256


In [21]:
lookback = 30
horizon = 30
test_samples = 7
PREDICTORS = ['RETURN','sVIXCLS','sVolume']

TRAIN = _DATA.iloc[:-(lookback + horizon + test_samples)]
TEST = _DATA.iloc[-(lookback + horizon + test_samples):]

In [22]:
TEST_X = []
TEST_Y = []

for i in range( TEST.shape[0] -(lookback + horizon ) ):
    TEST_X.append( np.array(TEST.iloc[i:i + lookback][PREDICTORS].T ) )
    TEST_Y.append( np.array(TEST.iloc[i + lookback:i + lookback + horizon ][PREDICTORS].T ) ) 
    
TEST_X = np.array(TEST_X)
#TEST_X = TEST_X.reshape(TEST_X.shape[0], 1, lookback)

TEST_Y = np.array(TEST_Y)
#TEST_Y = TEST_Y.reshape(TEST_Y.shape[0], 1, horizon)

TEST_X.shape

(7, 3, 30)

In [23]:
TRAIN_X = []
TRAIN_Y = []

for i in range( TRAIN.shape[0] -(lookback + horizon) + 1 ):
    TRAIN_X.append( np.array(TRAIN.iloc[i:i + lookback][PREDICTORS].T ) )
    TRAIN_Y.append( np.array(TRAIN.iloc[i + lookback:i + lookback + horizon ][PREDICTORS].T ) )
    
TRAIN_X = np.array(TRAIN_X)
#TRAIN_X = TRAIN_X.reshape(TRAIN_X.shape[0], 1, lookback)

TRAIN_Y = np.array(TRAIN_Y)
#TRAIN_Y = TRAIN_Y.reshape(TRAIN_Y.shape[0], 1, horizon )

TRAIN_X.shape

(481, 3, 30)

In [24]:
# Build Model
model = Sequential()  
model.add(GRU(lookback, input_shape=(TRAIN_X.shape[1], TRAIN_X.shape[2]), return_sequences=True, activation='relu', name='GRU_1') )
model.add(GRU(lookback + 10, return_sequences=True, activation='relu', name='GRU_2'))
model.add(GRU(lookback + 10, return_sequences=True, activation='relu', name='GRU_3'))
model.add(GRU(horizon, return_sequences=True, activation='linear', name='GRU_4'))
#model.add(Dense(horizon, activation='linear'))

model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

In [75]:
from keras.models import load_model

model = load_model('{}_MODEL.h5'.format(TICKER))

## Fit the model

In [None]:
model.fit(TRAIN_X, TRAIN_Y, epochs=35000, validation_data=(TEST_X, TEST_Y), batch_size=TRAIN_X.shape[0], verbose=False)

In [None]:
model.save('{}_MODEL.h5'.format(TICKER), overwrite=True)  # creates a HDF5 file 'my_model.h5'
#del model  # deletes the existing model

In [None]:
FITTED = model.predict(TRAIN_X)
TEST_PREDICTION = model.predict(TEST_X)

In [None]:
from bokeh.io import show, output_notebook
from bokeh.plotting import figure
from bokeh.layouts import column
from bokeh.models import Jitter
output_notebook()

In [None]:
x_range = list(range(TRAIN_X.shape[0]))

p1 = figure(title="Fitted vs. Actual Returns" , plot_width=900 , plot_height=300)
p1.line( x=x_range , y=TRAIN_Y[:,0,0].flatten().tolist() , alpha=0.5 , legend="Target")
p1.line( x=x_range , y=FITTED[:,0,0].flatten().tolist() , color='red' , alpha=0.5 , legend="Fitted")

p2 = figure(title="Fitted Same Direction as Actual" , plot_width=900 , plot_height=300)
p2.circle( x=x_range , alpha=0.5 , y=((FITTED>0.0) == (TRAIN_Y>0.0))[:,0,1].flatten().tolist() )

p3 = figure(title="Fitted Returns - Actual Returns" , plot_width=900 , plot_height=300)
p3.circle( x=x_range , y=( FITTED - TRAIN_Y )[:,0,0].flatten().tolist() , color='orange' , alpha=0.5)

p4 = figure(title="Test Predicted vs. Target" , plot_width=900 , plot_height=300)
p4.circle( x=TEST_PREDICTION[0,0,:] , y=TEST_Y[0,0,:] , color='red' , alpha=0.5)

p1.x_range = p2.x_range = p3.x_range

show(column(p1,p2,p3,p4))

In [None]:
# accuracy by horizon time step
for i in range( horizon ):
    print( ((FITTED>0) == (TRAIN_Y>0))[:,0,i].astype(int).flatten().sum()/TRAIN_Y.shape[0] )

In [None]:
for i in range( horizon ):
    print( ((TEST_PREDICTION>0) == (TEST_Y>0))[:,0,i].astype(int).flatten().sum()/TEST_Y.shape[0] )

In [None]:
(TEST_Y - TEST_PREDICTION)/TEST_Y