In [None]:
from keras.utils import np_utils
from keras.layers.core import Dense, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from keras.callbacks import EarlyStopping
import warnings
import pandas as pd 
import numpy as np
import tensorflow as tf
from sqlalchemy import create_engine

warnings.filterwarnings("ignore")
np.random.seed(7)

In [None]:
#Read Data From MySQL
conn = create_engine('mysql+mysqlconnector://root:root@localhost/research_data')
query = "select * from sptsx_data"
data_extract = pd.read_sql(query,conn)
data_extract.head()

In [None]:
#Turn into Time Series 
tickers = pd.read_csv('..\Data\TSX_Constituents_05202019.csv')
tickers = list(tickers.Symbol)
symbol_list = []

for ticker in tickers:
    test2 = str(ticker).endswith('.UN')
    test3 = str(ticker).endswith('.X')
    test4 = str(ticker).endswith('.B')
    test5 = str(ticker).endswith('.A')
    if not test2 and not test3 and not test4 and not test5:
        ticker = str(ticker) + '.'
    symbol_list.append(ticker)

data_extract = data_extract[data_extract['tic'].isin(symbol_list)]
data_extract = data_extract.pivot(index='datadate', columns='tic', values='prccd')
data_extract.head()

In [None]:
train_size = 0.80

In [None]:
dataset = data_extract.copy(deep=True)
dataset.index = pd.to_datetime(dataset.index)
dataset = dataset.dropna(axis='columns')

stock_list = dataset.columns

dataset = dataset.pct_change()
dataset = dataset[1:]
dataset['mean'] = dataset.mean(axis=1)
for c in dataset.columns:
    dataset[c + '_out'] = np.where(dataset[c] >= dataset['mean'], 0, 1)
    #eq 2 in the paper
    dataset[c] = (dataset[c] - dataset[c][:int(len(dataset) * train_size)].mean())/dataset[c][:int(len(dataset) * train_size)].std()
    
trainset = dataset[:int(len(dataset) * train_size)]
testset = dataset[int(len(dataset) * (train_size)):]

In [None]:
step = 1
look_back = 240

In [None]:
X_s = np.empty((0, look_back, step))
y_s = np.empty((0, 2))

for stock in stock_list:
    timeseries = np.asarray(trainset[stock])
    timeseries = np.atleast_2d(timeseries)
    if timeseries.shape[0] == 1:
        timeseries = timeseries.T

    X = np.atleast_3d(np.array([timeseries[start:start + look_back] for start in range(0, timeseries.shape[0] - look_back)]))

    y_series = np.asarray(trainset[stock + '_out'])
    y = y_series[look_back:]
    y = np_utils.to_categorical(y)

    X_s = np.append(X_s, X, axis=0)
    y_s = np.append(y_s, y, axis=0)

In [None]:
#Run LSTM
model = Sequential()
model.add(LSTM(25, input_shape=(look_back, 1)))
model.add(Dropout(0.1))
model.add(Dense(2, activation='softmax'))
model.compile(loss="binary_crossentropy", optimizer="rmsprop")
with tf.device('gpu'):
    model.fit(X_s, y_s, epochs=10, batch_size=250, verbose=1, shuffle=False, callbacks=[EarlyStopping(patience=10)])

In [None]:
model_json = model.to_json()
with open("LSTM norm.json", "w") as json_file:
    json_file.write(model_json)
model.save_weights("LSTM norm weights.h5")
print("Saved!!!!")

In [None]:
predictors = stock_list

for c in predictors:
    testset[c + '_dn'] = 0.0000000
    testset[c + '_up'] = 0.0000000
    for i in range(len(testset.index)):
        if i > look_back:
            b = testset.loc[testset.index[i - look_back:i], c].as_matrix()
            with tf.device('gpu'):
                yp = model.predict(b.reshape(1, look_back, 1))
            testset.loc[testset.index[i], c + '_dn'] = yp[0][0]
            testset.loc[testset.index[i], c + '_up'] = yp[0][1]

In [None]:
testset

In [None]:
testset.to_csv("results.csv")

In [None]:
plt.plot(data_extract.index,data_extract['ABX.'])
plt.plot(testset.index,testset['ABX.'])