In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
plt.style.use('ggplot')
from sklearn.preprocessing import MinMaxScaler
from keras.layers import Dense, LSTM, Dropout, GRU, Bidirectional
from keras.models import Sequential
from keras.optimizers import SGD
from sklearn.metrics import mean_squared_error
import math

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
dataset = pd.read_csv('../Data/AMZN_2006-01-01_to_2018-01-01.csv', index_col='Date', parse_dates=['Date'])
dataset.tail()

In [None]:
#Plot all the data of Amazon stocks that exist in the dataset
dataset.plot(subplots=True, figsize=(10,12))
plt.suptitle('AMZN stock attributes from 2006 to 2017')
plt.savefig('AMZN_stocks.png')
plt.show()

In [None]:
# Functions for using in LSTM and the GRU algorithm 
def plot_predictions(test,predicted):
    plt.plot(test, color='red',label='Real AMZ Stock Price')
    plt.plot(predicted, color='blue',label='Predicted AMZ Stock Price')
    plt.xlabel('Date')
    plt.ylabel('AMZ Stock Price')
    plt.title('AMZ Stock Price Prediction')
    plt.legend()
    plt.show()

def return_rmse(test,predicted):
    rmse = math.sqrt(mean_squared_error(test, predicted))
    print("The root mean squared error is {}.".format(rmse))

In [None]:
# spliting to train and test data. deciding that 2017 it will be the test year.
training_set = dataset[:'2016'].iloc[:,1:2].values
test_set = dataset['2017':].iloc[:,1:2].values

In [None]:
training_set.shape

In [None]:
test_set.shape

In [None]:
test_set.shape[0] / dataset.shape[0]

The test data is 8.3% of the whole data

In [None]:
# We have chosen 'Close' attribute for prices. Let's see what it looks like
dataset["Close"][:'2016'].plot(figsize=(16,4),legend=True)
dataset["Close"]['2017':].plot(figsize=(16,4),legend=True)
plt.legend(['Training set (Before 2017)','Test set (2017 and beyond)'])
plt.title('AMZ stock price')
plt.show()

In [None]:
# Scaling the training set
sc = MinMaxScaler(feature_range=(0,1))
training_set_scaled = sc.fit_transform(training_set)

In [None]:
# Since LSTMs store long term memory state, we create a data structure with 60 timesteps and 1 output
# So for each element of training set, we have 60 previous training set elements 
X_train = []
y_train = []
for i in range(60,training_set.shape[0]):
    X_train.append(training_set_scaled[i-60:i,0])
    y_train.append(training_set_scaled[i,0])
X_train, y_train = np.array(X_train), np.array(y_train)

In [None]:
# Reshaping X_train for efficient modelling
X_train = np.reshape(X_train, (X_train.shape[0],X_train.shape[1],1))

In [None]:
X_train.shape

# LSTM

In [None]:
# The LSTM architecture
regressor = Sequential()
# First LSTM layer with Dropout regularisation
regressor.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1],1)))
regressor.add(Dropout(0.2))
# Second LSTM layer
regressor.add(LSTM(units=50, return_sequences=True))
regressor.add(Dropout(0.2))
# Third LSTM layer
regressor.add(LSTM(units=50, return_sequences=True))
regressor.add(Dropout(0.2))
# Fourth LSTM layer
regressor.add(LSTM(units=50))
regressor.add(Dropout(0.2))
# The output layer
regressor.add(Dense(units=1))

# Compiling the RNN
regressor.compile(optimizer='rmsprop',loss='mean_squared_error')
# Fitting to the training set
regressor.fit(X_train,y_train,epochs=50,batch_size=32)

In [None]:
# Now to get the test set ready in a similar way as the training set.
# The following has been done so forst 60 entires of test set have 60 previous values which 
#is impossible to get unless we take the whole 
# 'Close' attribute data for processing
dataset_total = pd.concat((dataset["Close"][:'2016'],dataset["Close"]['2017':]),axis=0)
test_inputs = dataset_total[len(dataset_total)-len(test_set) - 60:].values
test_inputs = test_inputs.reshape(-1,1)
test_inputs  = sc.transform(test_inputs)

In [None]:
# Preparing X_test and predicting the prices
X_test = []
for i in range(60,test_inputs.shape[0]):
    X_test.append(test_inputs[i-60:i,0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
predicted_stock_price = regressor.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

In [None]:
# Visualizing the results for LSTM
plot_predictions(test_set,predicted_stock_price)

In [None]:
# Evaluating our model
return_rmse(test_set,predicted_stock_price)

# GRU

In [None]:
# The GRU architecture
regressorGRU = Sequential()
# First GRU layer with Dropout regularisation
regressorGRU.add(GRU(units=50, return_sequences=True, input_shape=(X_train.shape[1],1), activation='tanh'))
regressorGRU.add(Dropout(0.2))
# Second GRU layer
regressorGRU.add(GRU(units=50, return_sequences=True, input_shape=(X_train.shape[1],1), activation='tanh'))
regressorGRU.add(Dropout(0.2))
# Third GRU layer
regressorGRU.add(GRU(units=50, return_sequences=True, input_shape=(X_train.shape[1],1), activation='tanh'))
regressorGRU.add(Dropout(0.2))
# Fourth GRU layer
regressorGRU.add(GRU(units=50, activation='tanh'))
regressorGRU.add(Dropout(0.2))
# The output layer
regressorGRU.add(Dense(units=1))
# Compiling the RNN
regressorGRU.compile(optimizer=SGD(learning_rate=0.01, momentum=0.9, nesterov=False),loss='mean_squared_error')
# Fitting to the training set
regressorGRU.fit(X_train,y_train,epochs=50,batch_size=150)

In [None]:
# Preparing X_test and predicting the prices
X_test = []
for i in range(60,test_inputs.shape[0]):
    X_test.append(test_inputs[i-60:i,0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
GRU_predicted_stock_price = regressorGRU.predict(X_test)
GRU_predicted_stock_price = sc.inverse_transform(GRU_predicted_stock_price)

In [None]:
# Visualizing the results for GRU
plot_predictions(test_set,GRU_predicted_stock_price)

In [None]:
# Evaluating GRU
return_rmse(test_set,GRU_predicted_stock_price)

GRU algorithm is better in this case. The RMSE is better and we can see similar trend between the real stock prices and the predicted stock prices by the GRU algorithm.