In [2]:
from datetime import datetime
from iexfinance.stocks import get_historical_data
import pandas as pd
from keras.models import Sequential
from keras.layers import Activation, Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, roc_auc_score, f1_score, confusion_matrix, r2_score, mean_squared_error, mean_absolute_error
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import interact
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)
import plotly
import timeit
import random
random.seed(7)

Using TensorFlow backend.


In [3]:
# etf_list = ['SPY','IVV','VTI','VOO','QQQ','VEA','EFA','IEFA','VWO','AGG','IJH','IEMG','IWM','IJR','VTV','IWF','IWD','VUG','BND','LQD','XLF','VNQ','VIG','EEM','GLD','VB','BSV','VO','TIP','VEU','IVW','DIA','XLK','VYM','VGT','VCSH','MDY','IWB','VCIT','XLV','IWR','XLE','DVY','USMV','EWJ','VGK','PFF','SCHF','SDY','RSP','XLY','ITOT','IVE','SCHX','HYG','SHV','VBR','EMB','SHY','VV','SCHB','XLI','BIV','VT','MBB','BNDX','IWS','VXUS','FLOT','IWO','IXUS','MINT','SCZ','PYZ','MXI','IYM','IXP','RXI','VCR','RHS','VDC','PXI','PXE','IEO','RYF','IYG','KIE','FBT','PTH','IHI','ITA','VIS','ICF','REZ','RWR','PSJ','IGV','RYU','IDU','VPU']
# data = pd.read_csv('stats_100_etfs.csv')
# data
etf_list = ['SPY']

In [4]:
def get_iex_data(stock_list, start=datetime(2015,1,1), end=datetime(2019,12,31)):
    return_list = []
    for i in stock_list:
        df = pd.DataFrame(get_historical_data(i, start, end, output_format='pandas', token='pk_d28c0190de7a4d6da30b3bd2b08487c8')).interpolate()
        df['ticker'] = i
        return_list.append(df)
    return return_list

def lstm_clean_data(data):
    for i in range(len(data)):
        data[i] = data[i].reset_index().dropna()
        data[i]['date'] = pd.to_datetime(data[i]['date'])
        data[i] = data[i].set_index('date')
        data[i]['Reg_Target'] = data[i]['close'].shift(-1)
    return data

def add_past(etf_list, times):
    for i in range(len(etf_list)):
        for n in times:
            etf_list[i]['{}day return'.format(n)] = -etf_list[i]['close'].diff(periods=n).round(3)
    return etf_list

def lstm_time_test_split(X, n_past, date):
    X = X.reset_index()
    scaler = MinMaxScaler()
    y_scaler = MinMaxScaler()
    ticker = X['ticker'].iloc[0]
    x_train = X[X['date'] < date].drop(columns=['date', 'Reg_Target', 'ticker', '1day return', '5day return', '21day return', '252day return'])
    scaler.fit(x_train)
    x_test = X[X['date'] >= date].drop(columns=['date', 'Reg_Target', 'ticker', '1day return', '5day return', '21day return', '252day return'])[:-1]
    x_train = scaler.transform(x_train)
    x_train = np.reshape(x_train,(x_train.shape[0], n_past, x_train.shape[1]))
    x_test = scaler.transform(x_test)
    x_test = np.reshape(x_test,(x_test.shape[0], n_past, x_test.shape[1]))
    y_train = np.array(X[X['date'] < date]['Reg_Target'].drop(columns='date')).ravel().astype('float').reshape(-1,1)
    y_scaler.fit(y_train)
    y_train = y_scaler.transform(y_train)
    y_test = np.array(X[X['date'] >= date]['Reg_Target'].drop(columns='date')).ravel().astype('float')[:-1].reshape(-1,1)
    y_test = y_scaler.transform(y_test)
    x_holdout = X[X['date'] >= date].drop(columns=['date', 'Reg_Target', 'ticker', '1day return', '5day return', '21day return', '252day return'])[-1:]
    x_holdout = scaler.transform(x_holdout)
    x_holdout = np.reshape(x_holdout,(x_holdout.shape[0], n_past, x_holdout.shape[1]))
#     y_test = scaler.transform(y_test)
    return ticker, x_train, x_test, x_holdout, y_train, y_test, scaler, y_scaler

def build_step_model(x_train, y_train, epoc):
    model = Sequential()
    model.add(LSTM(50, input_shape=(x_train.shape[1], x_train.shape[2]), return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    model.add(Activation('relu'))
    model.compile(loss='mse', optimizer='adam')
    history = model.fit(x_train, y_train, epochs=epoc, batch_size=64, validation_split=.1, verbose=2,shuffle=False)
    return model, history

def yield_preds(model, scaler, x_test, x_holdout, y_test):
    yhat = model.predict(x_test)
    preds = scaler.inverse_transform(yhat)
    true = scaler.inverse_transform(y_test)
    today = model.predict(x_holdout)
    today_pred = scaler.inverse_transform(today)
    return preds, today_pred

def run_all_lstms(data, split, epoc):
    out = pd.DataFrame()
    tomorrow = pd.DataFrame()
    start = timeit.default_timer()
    for i in range(len(data)):
        ticker, x_train, x_test, x_holdout, y_train, y_test, scaler, y_scaler = lstm_time_test_split(data[i], 1, split)
        print('Model #: {}'.format(i))
        model, history = build_step_model(x_train, y_train, epoc)
        preds, future = yield_preds(model, y_scaler, x_test, x_holdout, y_test)
        out[ticker] = preds.flatten()
        tomorrow[ticker] = future.flatten()
    out = out.set_index(data[0][-len(out):].index)
    stop = timeit.default_timer()
    print('Time: ', stop - start)
    return out, tomorrow
data = get_iex_data(etf_list)
clean_full = lstm_clean_data(data)
data = add_past(clean_full, [1, 5, 21, 252])

In [1]:
# # data
# comb = data[0]
# x=1
# while x != 100:
#     comb = comb.append(data[x])
#     x += 1
#     print (x)
# comb


NameError: name 'data' is not defined

In [6]:
comb.to_csv('ForeMet_100_etfs.csv', encoding='utf-8')

In [5]:
df = pd.DataFrame()
for i in range(len(data)):
    df[data[i]['ticker'].iloc[0]] = data[i]['close']

In [8]:
df.to_csv('eachetf.csv', encoding='utf-8')

In [6]:
naive = df.shift(1)

In [7]:
naive

Unnamed: 0_level_0,SPY
date,Unnamed: 1_level_1
2015-01-09,
2015-01-12,204.25
2015-01-13,202.65
2015-01-14,202.08
2015-01-15,200.86
...,...
2019-12-24,321.22
2019-12-26,321.23
2019-12-27,322.94
2019-12-30,322.86


In [9]:
mapframe_preds, future_preds = run_all_lstms(data, '11-2019', 100)

Model #: 0
Train on 1090 samples, validate on 122 samples
Epoch 1/100
 - 4s - loss: 0.1701 - val_loss: 0.4168
Epoch 2/100
 - 0s - loss: 0.0447 - val_loss: 0.0829
Epoch 3/100
 - 0s - loss: 0.0106 - val_loss: 0.0038
Epoch 4/100
 - 0s - loss: 0.0175 - val_loss: 0.0117
Epoch 5/100
 - 0s - loss: 0.0113 - val_loss: 0.0067
Epoch 6/100
 - 0s - loss: 0.0078 - val_loss: 0.0027
Epoch 7/100
 - 0s - loss: 0.0064 - val_loss: 0.0023
Epoch 8/100
 - 0s - loss: 0.0049 - val_loss: 9.4998e-04
Epoch 9/100
 - 0s - loss: 0.0039 - val_loss: 7.4846e-04
Epoch 10/100
 - 0s - loss: 0.0031 - val_loss: 5.6697e-04
Epoch 11/100
 - 0s - loss: 0.0029 - val_loss: 6.2229e-04
Epoch 12/100
 - 0s - loss: 0.0026 - val_loss: 6.5721e-04
Epoch 13/100
 - 0s - loss: 0.0028 - val_loss: 6.1850e-04
Epoch 14/100
 - 0s - loss: 0.0024 - val_loss: 7.5228e-04
Epoch 15/100
 - 0s - loss: 0.0024 - val_loss: 6.3289e-04
Epoch 16/100
 - 0s - loss: 0.0033 - val_loss: 0.0016
Epoch 17/100
 - 0s - loss: 0.0032 - val_loss: 7.9410e-04
Epoch 18/100
 

## Forecasting with LSTM

In [12]:
def lstm_plot(ETFs):
    nav_rmse = round(mean_squared_error(df[ETFs][mapframe_preds.index].values, naive[ETFs][mapframe_preds.index].values),3)
    nav_mae = round(mean_absolute_error(df[ETFs][mapframe_preds.index].values, naive[ETFs][mapframe_preds.index].values),3)
    nav_r2 = round(r2_score(df[ETFs][mapframe_preds.index].values, naive[ETFs][mapframe_preds.index].values),3)
    rmse = round(mean_squared_error(df[ETFs][mapframe_preds.index].values, mapframe_preds[ETFs].values),3)
    mae = round(mean_absolute_error(df[ETFs][mapframe_preds.index].values, mapframe_preds[ETFs].values),3)
    r2 = round(r2_score(df[ETFs][mapframe_preds.index].values, mapframe_preds[ETFs].values),3)
    true = go.Scatter(x=df.index, y=df[ETFs].values, mode = 'markers', name = 'True Value')
    pred = go.Scatter(x=mapframe_preds.index, y=mapframe_preds[ETFs].values, mode = 'markers', name = 'Prediction')
    nav = go.Scatter(x=mapframe_preds.index, y=naive[ETFs][mapframe_preds.index].values, mode = 'markers', name = 'Naive')
    fake = go.Scatter(x=['10-2019'], y=df[ETFs].values, opacity = 0, name = '<br>Naive Metrics:<br>RMSE: {}<br>R-Squared: {}<br>MAE: {}<br><br>LSTM Metrics:<br>RMSE: {}<br>R-Squared: {}<br>MAE: {}'.format(nav_rmse,nav_mae,nav_r2,rmse,r2,mae))
    trace = [true, nav, pred, fake]
    layout = dict(title = "{} Prices".format(ETFs), xaxis = dict(range = ['2015-1-1','2018-12-31']), yaxis=dict(autorange=True, showgrid=True))
    fig = dict(data=trace, layout=layout)
    iplot(fig)
interact(lstm_plot, ETFs=etf_list)

interactive(children=(Dropdown(description='ETFs', options=('SPY',), value='SPY'), Output()), _dom_classes=('w…

<function __main__.lstm_plot(ETFs)>