In [2]:
from iexfinance import get_historical_data
import pandas as pd
from keras.models import Sequential
from keras.layers import Activation, Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, roc_auc_score, f1_score, confusion_matrix, r2_score, mean_squared_error, mean_absolute_error
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import interact
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)
import plotly
import timeit
import random
random.seed(7)

In [7]:
def get_iex_data(stock_list, start, end):
    return_list = []
    for i in stock_list:
        df = pd.DataFrame(get_historical_data(i, start, end, output_format='pandas')).interpolate()
        df['ticker'] = i
        return_list.append(df)
    return return_list

def lstm_clean_data(data):
    for i in range(len(data)):
        data[i] = data[i].reset_index().dropna()
        data[i]['date'] = pd.to_datetime(data[i]['date'])
        data[i] = data[i].set_index('date')
        data[i]['Reg_Target'] = data[i]['close'].shift(-1)
    return data

def add_past(etf_list, times):
    for i in range(len(etf_list)):
        for n in times:
            etf_list[i]['{}day return'.format(n)] = -etf_list[i]['close'].diff(periods=n).round(3)
    return etf_list

def lstm_time_test_split(X, n_past, date):
    X = X.reset_index()
    scaler = MinMaxScaler()
    y_scaler = MinMaxScaler()
    ticker = X['ticker'].iloc[0]
    x_train = X[X['date'] < date].drop(columns=['date', 'Reg_Target', 'ticker', '1day return', '5day return', '21day return', '252day return'])
    scaler.fit(x_train)
    x_test = X[X['date'] >= date].drop(columns=['date', 'Reg_Target', 'ticker', '1day return', '5day return', '21day return', '252day return'])[:-1]
    x_train = scaler.transform(x_train)
    x_train = np.reshape(x_train,(x_train.shape[0], n_past, x_train.shape[1]))
    x_test = scaler.transform(x_test)
    x_test = np.reshape(x_test,(x_test.shape[0], n_past, x_test.shape[1]))
    y_train = np.array(X[X['date'] < date]['Reg_Target'].drop(columns='date')).ravel().astype('float').reshape(-1,1)
    y_scaler.fit(y_train)
    y_train = y_scaler.transform(y_train)
    y_test = np.array(X[X['date'] >= date]['Reg_Target'].drop(columns='date')).ravel().astype('float')[:-1].reshape(-1,1)
    y_test = y_scaler.transform(y_test)
    x_holdout = X[X['date'] >= date].drop(columns=['date', 'Reg_Target', 'ticker', '1day return', '5day return', '21day return', '252day return'])[-1:]
    x_holdout = scaler.transform(x_holdout)
    x_holdout = np.reshape(x_holdout,(x_holdout.shape[0], n_past, x_holdout.shape[1]))
#     y_test = scaler.transform(y_test)
    return ticker, x_train, x_test, x_holdout, y_train, y_test, scaler, y_scaler

def build_step_model(x_train, y_train, epoc):
    model = Sequential()
    model.add(LSTM(50, input_shape=(x_train.shape[1], x_train.shape[2]), return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    model.add(Activation('relu'))
    model.compile(loss='mse', optimizer='adam')
    history = model.fit(x_train, y_train, epochs=epoc, batch_size=64, validation_split=.1, verbose=2,shuffle=False)
    return model, history

def yield_preds(model, scaler, x_test, x_holdout, y_test):
    yhat = model.predict(x_test)
    preds = scaler.inverse_transform(yhat)
    true = scaler.inverse_transform(y_test)
    today = model.predict(x_holdout)
    today_pred = scaler.inverse_transform(today)
    return preds, today_pred

def run_all_lstms(data, split, epoc):
    out = pd.DataFrame()
    tomorrow = pd.DataFrame()
    start = timeit.default_timer()
    for i in range(len(data)):
        ticker, x_train, x_test, x_holdout, y_train, y_test, scaler, y_scaler = lstm_time_test_split(data[i], 1, split)
        print('Model #: {}'.format(i))
        model, history = build_step_model(x_train, y_train, epoc)
        preds, future = yield_preds(model, y_scaler, x_test, x_holdout, y_test)
        out[ticker] = preds.flatten()
        tomorrow[ticker] = future.flatten()
    out = out.set_index(data[0][-len(out):].index)
    stop = timeit.default_timer()
    print('Time: ', stop - start)
    return out, tomorrow
etf_list = ['SPY','IVV','VTI','VOO','QQQ','VEA','EFA','IEFA','VWO','AGG','IJH','IEMG','IWM','IJR','VTV','IWF','IWD','VUG','BND','LQD']
data = get_iex_data(etf_list, None, None)
clean_full = lstm_clean_data(data)
data = add_past(clean_full, [1, 5, 21, 252])

In [8]:
df = pd.DataFrame()
for i in range(len(data)):
    df[data[i]['ticker'].iloc[0]] = data[i]['close']

In [9]:
naive = df.shift(1)

In [10]:
mapframe_preds, future_preds = run_all_lstms(data, '09-2018', 12)

Model #: 0
Train on 831 samples, validate on 93 samples
Epoch 1/12
 - 2s - loss: 0.1311 - val_loss: 0.4998
Epoch 2/12
 - 0s - loss: 0.0643 - val_loss: 0.2724
Epoch 3/12
 - 0s - loss: 0.0207 - val_loss: 0.0924
Epoch 4/12
 - 0s - loss: 0.0130 - val_loss: 0.0268
Epoch 5/12
 - 0s - loss: 0.0167 - val_loss: 0.0251
Epoch 6/12
 - 0s - loss: 0.0130 - val_loss: 0.0277
Epoch 7/12
 - 0s - loss: 0.0103 - val_loss: 0.0181
Epoch 8/12
 - 0s - loss: 0.0068 - val_loss: 0.0078
Epoch 9/12
 - 0s - loss: 0.0057 - val_loss: 0.0048
Epoch 10/12
 - 0s - loss: 0.0042 - val_loss: 0.0026
Epoch 11/12
 - 0s - loss: 0.0031 - val_loss: 7.1386e-04
Epoch 12/12
 - 0s - loss: 0.0024 - val_loss: 4.4506e-04
Model #: 1
Train on 831 samples, validate on 93 samples
Epoch 1/12
 - 2s - loss: 0.1281 - val_loss: 0.5014
Epoch 2/12
 - 0s - loss: 0.0628 - val_loss: 0.2744
Epoch 3/12
 - 0s - loss: 0.0204 - val_loss: 0.0989
Epoch 4/12
 - 0s - loss: 0.0161 - val_loss: 0.0373
Epoch 5/12
 - 0s - loss: 0.0197 - val_loss: 0.0381
Epoch 6/12

Epoch 2/12
 - 0s - loss: 0.0986 - val_loss: 0.3883
Epoch 3/12
 - 0s - loss: 0.0313 - val_loss: 0.1407
Epoch 4/12
 - 0s - loss: 0.0103 - val_loss: 0.0319
Epoch 5/12
 - 0s - loss: 0.0162 - val_loss: 0.0331
Epoch 6/12
 - 0s - loss: 0.0136 - val_loss: 0.0459
Epoch 7/12
 - 0s - loss: 0.0096 - val_loss: 0.0316
Epoch 8/12
 - 0s - loss: 0.0081 - val_loss: 0.0180
Epoch 9/12
 - 0s - loss: 0.0067 - val_loss: 0.0120
Epoch 10/12
 - 0s - loss: 0.0056 - val_loss: 0.0090
Epoch 11/12
 - 0s - loss: 0.0040 - val_loss: 0.0042
Epoch 12/12
 - 0s - loss: 0.0035 - val_loss: 0.0015
Model #: 13
Train on 831 samples, validate on 93 samples
Epoch 1/12
 - 5s - loss: 0.1198 - val_loss: 0.5225
Epoch 2/12
 - 0s - loss: 0.0563 - val_loss: 0.2816
Epoch 3/12
 - 0s - loss: 0.0143 - val_loss: 0.0900
Epoch 4/12
 - 0s - loss: 0.0117 - val_loss: 0.0339
Epoch 5/12
 - 0s - loss: 0.0149 - val_loss: 0.0445
Epoch 6/12
 - 0s - loss: 0.0119 - val_loss: 0.0467
Epoch 7/12
 - 0s - loss: 0.0091 - val_loss: 0.0319
Epoch 8/12
 - 0s - los

## Forecasting with LSTM

In [11]:
def lstm_plot(ETFs):
    nav_rmse = round(mean_squared_error(df[ETFs][mapframe_preds.index].values, naive[ETFs][mapframe_preds.index].values),3)
    nav_mae = round(mean_absolute_error(df[ETFs][mapframe_preds.index].values, naive[ETFs][mapframe_preds.index].values),3)
    nav_r2 = round(r2_score(df[ETFs][mapframe_preds.index].values, naive[ETFs][mapframe_preds.index].values),3)
    rmse = round(mean_squared_error(df[ETFs][mapframe_preds.index].values, mapframe_preds[ETFs].values),3)
    mae = round(mean_absolute_error(df[ETFs][mapframe_preds.index].values, mapframe_preds[ETFs].values),3)
    r2 = round(r2_score(df[ETFs][mapframe_preds.index].values, mapframe_preds[ETFs].values),3)
    true = go.Scatter(x=df.index, y=df[ETFs].values, mode = 'markers', name = 'True Value')
    pred = go.Scatter(x=mapframe_preds.index, y=mapframe_preds[ETFs].values, mode = 'markers', name = 'Prediction')
    nav = go.Scatter(x=mapframe_preds.index, y=naive[ETFs][mapframe_preds.index].values, mode = 'markers', name = 'Naive')
    fake = go.Scatter(x=['07-2018'], y=df[ETFs].values, opacity = 0, name = '<br>Naive Metrics:<br>RMSE: {}<br>R-Squared: {}<br>MAE: {}<br><br>LSTM Metrics:<br>RMSE: {}<br>R-Squared: {}<br>MAE: {}'.format(nav_rmse,nav_mae,nav_r2,rmse,r2,mae))
    trace = [true, nav, pred, fake]
    layout = dict(title = "{} Prices".format(ETFs), xaxis = dict(range = ['2018-09-01','2018-10-04']), yaxis=dict(autorange=True, showgrid=True))
    fig = dict(data=trace, layout=layout)
    iplot(fig)
interact(lstm_plot, ETFs=etf_list)

<function __main__.lstm_plot>