In [9]:
import os
import pandas as pd
import datetime as dt
import numpy as np
from keras.models import Sequential
from keras.layers import Activation, Dense
from keras.layers import LSTM
from keras.layers import Dropout
from pandas import Series
import matplotlib.pyplot as plt

In [4]:
os.chdir('data/nyse_etfs/')

In [5]:
nyse_files = [file for file in os.listdir('.')]
nyse_data = []
for filename in nyse_files:
    df = pd.read_csv(filename, sep=',')
    label, _, _ = filename.split(sep='.')
    df['Label'] = filename
    df['Date'] = pd.to_datetime(df['Date'])
    nyse_data.append(df)

In [23]:
df = nyse_data[0]
window_len = 10
#Create a data point (i.e. a date) which splits the training and testing set
split_date = list(nyse_data[0]["Date"][-(2*window_len+1):])[0]

#Split the training and test set
training_set, test_set = df[df['Date'] < split_date], df[df['Date'] >= split_date]
training_set = training_set.drop(['Date','Label', 'OpenInt'], 1)
test_set = test_set.drop(['Date','Label','OpenInt'], 1)

#Create windows for training
LSTM_training_inputs = []
for i in range(len(training_set)-window_len):
    temp_set = training_set[i:(i+window_len)].copy()
    
    for col in list(temp_set):
        temp_set[col] = temp_set[col]/temp_set[col].iloc[0] - 1
    
    LSTM_training_inputs.append(temp_set)
LSTM_training_outputs = (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1

LSTM_training_inputs = [np.array(LSTM_training_input) for LSTM_training_input in LSTM_training_inputs]
LSTM_training_inputs = np.array(LSTM_training_inputs)

#Create windows for testing
LSTM_test_inputs = []
for i in range(len(test_set)-window_len):
    temp_set = test_set[i:(i+window_len)].copy()
    
    for col in list(temp_set):
        temp_set[col] = temp_set[col]/temp_set[col].iloc[0] - 1
    
    LSTM_test_inputs.append(temp_set)
LSTM_test_outputs = (test_set['Close'][window_len:].values/test_set['Close'][:-window_len].values)-1

LSTM_test_inputs = [np.array(LSTM_test_inputs) for LSTM_test_inputs in LSTM_test_inputs]
LSTM_test_inputs = np.array(LSTM_test_inputs)

In [22]:
LSTM_test_inputs.shape

(11, 10, 5)

In [17]:
def build_model(inputs, output_size, neurons, activ_func="linear",
                dropout=0.40, loss="mae", optimizer="adam"):
    model = Sequential()
    model.add(LSTM(neurons, input_shape=(inputs.shape[1], inputs.shape[2])))
    model.add(Dropout(dropout))
    model.add(Dense(units=output_size))
    model.add(Activation(activ_func))
    model.compile(loss=loss, optimizer=optimizer)
    return model

In [24]:
model = build_model(LSTM_training_inputs, output_size=1, neurons = 32)
history = model.fit(LSTM_training_inputs, LSTM_training_outputs, epochs=10, batch_size=1, verbose=2, shuffle=True)

Epoch 1/10
 - 16s - loss: 0.0610
Epoch 2/10
 - 18s - loss: 0.0195
Epoch 3/10
 - 15s - loss: 0.0145
Epoch 4/10
 - 15s - loss: 0.0127
Epoch 5/10
 - 15s - loss: 0.0125
Epoch 6/10
 - 13s - loss: 0.0120
Epoch 7/10
 - 13s - loss: 0.0118
Epoch 8/10
 - 13s - loss: 0.0118
Epoch 9/10
 - 15s - loss: 0.0115
Epoch 10/10
 - 15s - loss: 0.0114


In [14]:
history.model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 32)                4864      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
_________________________________________________________________
activation_1 (Activation)    (None, 1)                 0         
Total params: 4,897
Trainable params: 4,897
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.pr