In [45]:
# Recurrent Neural Network

# Part 1 - Data Preprocessing

# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the training set
dataset= pd.read_csv("Datasets/SPX_Health_Sector_stocks_all.csv")

In [46]:
dataset['Date'] =  pd.to_datetime(dataset['Date'], infer_datetime_format=True)


In [55]:
dataset_train = dataset[dataset["Date"].isin(pd.date_range("2020-03-11", "2020-12-30"))]
dataset_train

Unnamed: 0,Ticker,Date,Open,High,Low,Close,Trading Volume,Volume Weighted Average Price,Number of Transactions
0,ABT,2020-03-11,73.28,78.15,72.0100,74.74,14123240.0,74.9395,113015
1,ABT,2020-03-12,77.91,82.67,74.8300,81.65,18521541.0,77.7958,160884
2,ABT,2020-03-15,70.65,79.25,70.0000,73.66,14358978.0,75.4936,113523
3,ABT,2020-03-16,75.52,79.79,73.3400,79.49,13441141.0,77.9694,124950
4,ABT,2020-03-17,74.98,81.99,74.3900,79.26,17680823.0,78.7228,141919
...,...,...,...,...,...,...,...,...,...
29878,BNTX,2020-12-23,98.50,99.89,96.6400,96.96,1039109.0,97.8215,14211
29879,BNTX,2020-12-27,95.00,95.40,87.0700,88.11,4698872.0,89.9943,58940
29880,BNTX,2020-12-28,84.45,89.39,83.8525,88.28,3300440.0,87.1904,35970
29881,BNTX,2020-12-29,90.00,90.89,83.0268,83.94,2941083.0,85.8906,35970


In [48]:
training_set = dataset_train.iloc[:, 2:3].values
training_set

array([[73.28],
       [77.91],
       [70.65],
       ...,
       [84.45],
       [90.  ],
       [83.38]])

In [49]:
# Feature Scaling
# Use Normalization (versus Standardization) for RNNs with Sigmoid Activation Functions
# 'MinMaxScalar' is a Normalization Library
from sklearn.preprocessing import MinMaxScaler
# 'feature_range = (0,1)' makes sure that training data is scaled to have values between 0 and 1
sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(training_set)

In [56]:
# Creating a data structure with 60 timesteps (look back 60 days) and 1 output
# This tells the RNN what to remember (Number of timesteps) when predicting the next Stock Price
# The wrong number of timesteps can lead to Overfitting or bogus results
# 'x_train' Input with 60 previous days' stock prices
X_train = []
# 'y_train' Output with next day's stock price
y_train = []
for i in range(60, 13355):
    X_train.append(training_set_scaled[i-60:i, 0])
    y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)

In [57]:
# Reshaping (add more dimensions)
# This lets you add more indicators that may potentially have corelation with Stock Prices
# Keras RNNs expects an input shape (Batch Size, Timesteps, input_dim)
# '.shape[0]' is the number of Rows (Batch Size)
# '.shape[1]' is the number of Columns (timesteps)
# 'input_dim' is the number of factors that may affect stock prices
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

# Show the dataset we're working with
display(dataset_train)

Unnamed: 0,Ticker,Date,Open,High,Low,Close,Trading Volume,Volume Weighted Average Price,Number of Transactions
0,ABT,2020-03-11,73.28,78.15,72.0100,74.74,14123240.0,74.9395,113015
1,ABT,2020-03-12,77.91,82.67,74.8300,81.65,18521541.0,77.7958,160884
2,ABT,2020-03-15,70.65,79.25,70.0000,73.66,14358978.0,75.4936,113523
3,ABT,2020-03-16,75.52,79.79,73.3400,79.49,13441141.0,77.9694,124950
4,ABT,2020-03-17,74.98,81.99,74.3900,79.26,17680823.0,78.7228,141919
...,...,...,...,...,...,...,...,...,...
29878,BNTX,2020-12-23,98.50,99.89,96.6400,96.96,1039109.0,97.8215,14211
29879,BNTX,2020-12-27,95.00,95.40,87.0700,88.11,4698872.0,89.9943,58940
29880,BNTX,2020-12-28,84.45,89.39,83.8525,88.28,3300440.0,87.1904,35970
29881,BNTX,2020-12-29,90.00,90.89,83.0268,83.94,2941083.0,85.8906,35970


In [58]:
# Part 2 - Building the RNN
# Building a robust stacked LSTM with dropout regularization

# Importing the Keras libraries and packages
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

In [59]:
# Initialising the RNN
# Regression is when you predict a continuous value
regressor = Sequential()

In [60]:
# Adding the first LSTM layer and some Dropout regularisation
# 'units' is the number of LSTM Memory Cells (Neurons) for higher dimensionality
# 'return_sequences = True' because we will add more stacked LSTM Layers
# 'input_shape' of x_train
regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
# 20% of Neurons will be ignored (10 out of 50 Neurons) to prevent Overfitting
regressor.add(Dropout(0.2))

In [61]:
# Adding a second LSTM layer and some Dropout regularisation
# Not need to specify input_shape for second Layer, it knows that we have 50 Neurons from the previous layer
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))

# Adding a third LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))

# Adding a fourth LSTM layer and some Dropout regularisation
# This is the last LSTM Layer. 'return_sequences = false' by default so we leave it out.
regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))

In [62]:
# Adding the output layer
# 'units = 1' because Output layer has one dimension
regressor.add(Dense(units = 1))

# Compiling the RNN
# Keras documentation recommends 'RMSprop' as a good optimizer for RNNs
# Trial and error suggests that 'adam' optimizer is a good choice
# loss = 'mean_squared_error' which is good for Regression vs. 'Binary Cross Entropy' previously used for Classification
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')

# Fitting the RNN to the Training set
# 'X_train' Independent variables
# 'y_train' Output Truths that we compare X_train to.
regressor.fit(X_train, y_train, epochs = 100, batch_size = 32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x7f9ae1b8d550>

In [66]:
# Part 3 - Making the predictions and visualising the results

# Getting the real stock price of 2021
dataset = pd.read_csv("Datasets/SPX_Health_Sector_stocks_all.csv")

dataset['Date'] =  pd.to_datetime(dataset['Date'], infer_datetime_format=True)
dataset_test = dataset[dataset["Date"].isin(pd.date_range("2021-01-01", "2021-12-30"))]
real_stock_price = dataset_test.iloc[:, 2:3].values

In [67]:
# Getting the predicted stock price of 2021
# We need 60 previous inputs for each day of the Test_set in 2021
# Combine 'dataset_train' and 'dataset_test'
# 'axis = 0' for Vertical Concatenation to add rows to the bottom
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis = 0)

In [72]:
# Extract Stock Prices for Test time period, plus 60 days previous
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
# 'reshape' function to get it into a NumPy format
inputs = inputs.reshape(-1,1)
# Inputs need to be scaled to match the model trained on Scaled Feature
inputs = sc.transform(inputs)
# The following is pasted from above and modified for Testing, romove all 'Ys'
X_test = []

for i in range(60, 16840):
    X_test.append(inputs[i-60:i, 0])

X_test = np.array(X_test)


In [73]:
# We need a 3D input so add another dimension
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
# Predict the Stock Price
predicted_stock_price = regressor.predict(X_test)
# We need to inverse the scaling of our prediction to get a Dollar amount
predicted_stock_price = sc.inverse_transform(predicted_stock_price)