In [211]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM


%matplotlib inline

In [212]:
spx = pd.read_csv("../data/transformed_data.csv",index_col=0,parse_dates=True)

In [214]:
spx.tail()

Unnamed: 0_level_0,changeClose,changeHC,changeLC,nextchange
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-11-30,0.009819,0.014038,0.002296,0.001568
2019-12-07,0.001568,0.003058,-0.02275,0.00725
2019-12-14,0.00725,0.01162,-0.00632,0.016407
2019-12-21,0.016407,0.017781,0.004669,0.005819
2019-12-28,0.005819,0.008258,-0.00022,-0.002856


In [221]:
xdata = spx.iloc[:,:-1]
ydata = spx.iloc[:,-1]

In [222]:
print(xdata.shape)
print(ydata.shape)

(2608, 3)
(2608,)


In [224]:
train_size = int(len(spx)*.8)
test_size = len(spx)-train_size

#change y's if multiple outputs
xtrain,xtest = xdata.iloc[0:train_size,:], xdata.iloc[train_size:len(spx),:]
ytrain,ytest = ydata.iloc[0:train_size], ydata.iloc[train_size:len(spx)]

In [227]:
print(len(xtrain),len(xtest))
print(len(ytrain),len(ytest))

2086 522
2086 522


In [85]:
scalerX = StandardScaler()
scalerY = StandardScaler()

In [241]:
xtrain_sc = scalerX.fit_transform(xtrain.to_numpy())
xtest_sc = scalerX.transform(xtest.to_numpy())
ytrain_sc = scalerY.fit_transform(ytrain.to_numpy().reshape(-1,1))
ytest_sc = scalerY.transform(ytest.to_numpy().reshape(-1,1))

In [199]:
def lstm_data_transform(x_data, y_data, num_steps=5):
    """ Changes data to the format for LSTM training 
    for sliding window approach """
    # Prepare the list for the transformed data
    X, y = list(), list()
    # Loop of the entire data set
    for i in range(x_data.shape[0]):
        # compute a new (sliding window) index
        end_ix = i + num_steps
        # if index is larger than the size of the dataset, we stop
        if end_ix >= x_data.shape[0]:
            break
        # Get a sequence of data for x
        seq_X = x_data[i:end_ix]
        # Get only the last element of the sequency for y
        seq_y = y_data[end_ix]
        # Append the list with sequencies
        X.append(seq_X)
        y.append(seq_y)
    # Make final arrays
    x_array = np.array(X)
    y_array = np.array(y)
    return x_array, y_array

In [270]:
num_steps = 10

xtrain_transformed, ytrain_transformed = lstm_data_transform(xtrain_sc, ytrain_sc, num_steps=num_steps)
xtest_transformed, ytest_transformed = lstm_data_transform(xtest_sc, ytest_sc, num_steps=num_steps)

In [306]:
model = Sequential()
model.add(LSTM(4, input_shape=(num_steps, 3),return_sequences=False))
# model.add(Dense(3,activation='relu'))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam')

In [307]:
model.summary()

Model: "sequential_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_18 (LSTM)               (None, 4)                 128       
_________________________________________________________________
dense_22 (Dense)             (None, 1)                 5         
Total params: 133
Trainable params: 133
Non-trainable params: 0
_________________________________________________________________


In [297]:
history = model.fit(xtrain_transformed, ytrain_transformed, epochs=100, batch_size=16, verbose=2)

Epoch 1/100
130/130 - 1s - loss: 0.7316
Epoch 2/100
130/130 - 1s - loss: 0.7304
Epoch 3/100
130/130 - 1s - loss: 0.7284
Epoch 4/100
130/130 - 1s - loss: 0.7275
Epoch 5/100
130/130 - 1s - loss: 0.7270
Epoch 6/100
130/130 - 1s - loss: 0.7274
Epoch 7/100
130/130 - 1s - loss: 0.7254
Epoch 8/100
130/130 - 1s - loss: 0.7264
Epoch 9/100
130/130 - 1s - loss: 0.7256
Epoch 10/100
130/130 - 1s - loss: 0.7248
Epoch 11/100
130/130 - 1s - loss: 0.7242
Epoch 12/100
130/130 - 1s - loss: 0.7229
Epoch 13/100
130/130 - 1s - loss: 0.7211
Epoch 14/100
130/130 - 1s - loss: 0.7205
Epoch 15/100
130/130 - 1s - loss: 0.7183
Epoch 16/100
130/130 - 1s - loss: 0.7181
Epoch 17/100
130/130 - 1s - loss: 0.7160
Epoch 18/100
130/130 - 1s - loss: 0.7159
Epoch 19/100
130/130 - 1s - loss: 0.7136
Epoch 20/100
130/130 - 1s - loss: 0.7106
Epoch 21/100
130/130 - 1s - loss: 0.7083
Epoch 22/100
130/130 - 1s - loss: 0.7098
Epoch 23/100
130/130 - 1s - loss: 0.7061
Epoch 24/100
130/130 - 1s - loss: 0.7042
Epoch 25/100
130/130 - 1s

In [298]:
preds = model.predict(xtest_transformed)

In [299]:
mean_squared_error(ytest_transformed,preds)

0.897892968168665

In [300]:
preds_inversed = scalerY.inverse_transform(preds)
ys_inversed = scalerY.inverse_transform(ytest_transformed)

In [301]:
ix = preds_inversed>0
print(ys_inversed.reshape(-1,1)[ix].mean()*52)
print(ys_inversed.reshape(-1,1)[~ix].mean()*52)

0.10278523384343731
0.10771020784085339


In [294]:
print(ys_inversed.reshape(-1,1)[ix].std()*np.sqrt(52))
print(ys_inversed.reshape(-1,1)[~ix].std()*np.sqrt(52))

0.13077360800011922
0.15819831909229967


In [319]:
########## try stateful method ##########
model2 = Sequential()
model2.add(LSTM(4, batch_input_shape=(1,num_steps, 3),stateful=True))
# model.add(Dense(3,activation='relu'))
model2.add(Dense(1))
model2.compile(loss='mae', optimizer='adam')

In [320]:
model2.summary()

Model: "sequential_22"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_21 (LSTM)               (1, 4)                    128       
_________________________________________________________________
dense_25 (Dense)             (1, 1)                    5         
Total params: 133
Trainable params: 133
Non-trainable params: 0
_________________________________________________________________


In [321]:
for i in range(100):
    model2.fit(xtrain_transformed, ytrain_transformed, epochs=1, batch_size=1, shuffle=False, verbose=2)
    model2.reset_states()

2076/2076 - 8s - loss: 0.7299
2076/2076 - 8s - loss: 0.7285
2076/2076 - 8s - loss: 0.7281
2076/2076 - 8s - loss: 0.7279
2076/2076 - 8s - loss: 0.7278
2076/2076 - 7s - loss: 0.7275
2076/2076 - 7s - loss: 0.7272
2076/2076 - 8s - loss: 0.7267
2076/2076 - 8s - loss: 0.7262
2076/2076 - 8s - loss: 0.7264
2076/2076 - 8s - loss: 0.7264
2076/2076 - 8s - loss: 0.7257
2076/2076 - 8s - loss: 0.7254
2076/2076 - 8s - loss: 0.7250
2076/2076 - 7s - loss: 0.7252
2076/2076 - 7s - loss: 0.7249
2076/2076 - 7s - loss: 0.7245
2076/2076 - 7s - loss: 0.7243
2076/2076 - 7s - loss: 0.7239
2076/2076 - 7s - loss: 0.7239
2076/2076 - 7s - loss: 0.7235
2076/2076 - 7s - loss: 0.7225
2076/2076 - 7s - loss: 0.7221
2076/2076 - 8s - loss: 0.7211
2076/2076 - 8s - loss: 0.7226
2076/2076 - 8s - loss: 0.7218
2076/2076 - 8s - loss: 0.7216
2076/2076 - 8s - loss: 0.7215
2076/2076 - 8s - loss: 0.7196
2076/2076 - 8s - loss: 0.7183
2076/2076 - 8s - loss: 0.7193
2076/2076 - 8s - loss: 0.7189
2076/2076 - 8s - loss: 0.7173
2076/2076 

In [324]:
preds2 = model2.predict(xtest_transformed,batch_size=1)

In [325]:
preds_inversed2 = scalerY.inverse_transform(preds2)
ix = preds_inversed2>0
print(ys_inversed.reshape(-1,1)[ix].mean()*52)
print(ys_inversed.reshape(-1,1)[~ix].mean()*52)

0.11771821231138269
0.04320382910502953
