In [None]:
# -*- coding: utf-8 -*-
# Indentation: Jupyter Notebook

'''
Google Stock Prediction using LSTM
'''

__version__ = 1.0
__author__ = "Sourav Raj"
__author_email__ = "souravraj.iitbbs@gmail.com"


In [1]:
### Prediction future stock price of google
# LSTM is used to figure out up & down trend of price which is better than ARIMA model
# we will do dropout regularization to avoid overfitting

# Data Preprocessing

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
df_train = pd.read_csv('../data/RNN/Google_Stock_Price_Train.csv')

In [5]:
df_train.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1/3/2012,325.25,332.83,324.97,663.59,7380500
1,1/4/2012,331.27,333.87,329.08,666.45,5749400
2,1/5/2012,329.83,330.75,326.89,657.21,6590300
3,1/6/2012,328.34,328.77,323.68,648.24,5405900
4,1/9/2012,322.04,322.29,309.46,620.76,11688800


In [23]:
train_set = df_train.iloc[:,1:2]
train_set.head()

Unnamed: 0,Open
0,325.25
1,331.27
2,329.83
3,328.34
4,322.04


In [25]:
train_set.dtypes

Open    float64
dtype: object

In [26]:
#to convert to numpy array
train = train_set.values

In [27]:
# normalization
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0,1))
train_scaled = sc.fit_transform(train)

In [28]:
train_scaled 

array([[ 0.08581368],
       [ 0.09701243],
       [ 0.09433366],
       ..., 
       [ 0.95725128],
       [ 0.93796041],
       [ 0.93688146]])

In [29]:
train_scaled.size

1258

In [10]:
# using 60 timesteps to predict 1 next output
# here 60 timesteps means 3 months data
X_train =[]
y_train =[]
for i in range(60, train_scaled.size):
    X_train.append(train_scaled[i-60:i, 0])
    y_train.append(train_scaled[i, 0])
X_train[1]

array([ 0.09701243,  0.09433366,  0.09156187,  0.07984225,  0.0643277 ,
        0.0585423 ,  0.06568569,  0.06109085,  0.06639259,  0.0614257 ,
        0.07474514,  0.02797827,  0.02379269,  0.02409033,  0.0159238 ,
        0.01078949,  0.00967334,  0.01642607,  0.02100231,  0.02280676,
        0.02273235,  0.02810849,  0.03212665,  0.0433812 ,  0.04475779,
        0.04790163,  0.0440695 ,  0.04648783,  0.04745517,  0.04873875,
        0.03936305,  0.04137213,  0.04034898,  0.04784582,  0.04325099,
        0.04356723,  0.04286033,  0.04602277,  0.05398467,  0.05738894,
        0.05714711,  0.05569611,  0.04421832,  0.04514845,  0.04605997,
        0.04412531,  0.03675869,  0.04486941,  0.05065481,  0.05214302,
        0.05612397,  0.05818885,  0.06540665,  0.06882953,  0.07243843,
        0.07993526,  0.07846566,  0.08034452,  0.08497656,  0.08627874])

In [11]:
X_train, y_train = np.array(X_train), np.array(y_train)
    

In [12]:
# Reshaping the data to add more dimentionality by 
# adding indicator
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

In [13]:
X_train.shape

(1198, 60, 1)

# Building RNN

In [14]:
# will use stacked LSTM & dropout 

In [15]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

Using TensorFlow backend.


In [16]:
# Initialising the RNN
regressor = Sequential()

In [17]:
# Adding the first LSTM layer and dropout regularization
# units : no of neurons in each LSTM layer
# return_sequences = True: as we building stacked LSTM 
# input_shape= [timesteps, indicator or predictor]
regressor.add(LSTM(units = 50, return_sequences = True, input_shape=(X_train.shape[1],1)))
regressor.add(Dropout(rate=0.2))

In [18]:
# Adding the 2nd LSTM layer and dropout regularization
# doesn't req to specify input_shape
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(rate=0.2))

In [19]:
# Adding the 3rd LSTM layer and dropout regularization
# doesn't req to specify input_shape
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(rate=0.2))

In [20]:
# Adding the 4th LSTM layer and dropout regularization
# doesn't req to specify input_shape
regressor.add(LSTM(units = 50, return_sequences = False))
regressor.add(Dropout(rate=0.2))

In [21]:
# Adding the output layer by using Dense class
# units: no of units in output layer (here only stock price)
regressor.add(Dense(units =1))

# Comiling the RNN

In [None]:
#since thie is regression problem we will use mse on loss
regressor.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
# fiiting the RNN with training set
regressor.fit(X_train, y_train, epochs=60, batch_size=32)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
 128/1198 [==>...........................] - ETA: 4s - loss: 0.0020

In [None]:
# Making the predictions and visualizing the result
# Real Stock price of 2017
df_test = pd.read_csv('../../data/Google_Stock_Price_Test.csv')

In [None]:
df_test.head()

In [None]:
y_real= df_test.iloc[:,1:2].values

In [None]:
# Getting the predicted stock price of 2017
# we have to merge train with test data to make 60 timesteps for test data
df_total = pd.concat((df_train['Open'], df_test['Open']), axis=0)
inputs = df_total[len(df_train)-len(df_test)-60:].values # to get lower bound & higher bound
inputs


In [None]:
inputs = inputs.reshape(-1,1)
inputs

In [None]:
inputs = sc.transform(inputs)
inputs.size

In [None]:
len(df_test)

In [None]:
X_test=[]
for i in range(60, 60+len(df_test)):
    X_test.append(inputs[i-60:i, 0])
X_test

In [None]:
X_test = np.array(X_test)
X_test=np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
y_pred = regressor.predict(X_test)

In [None]:
# to get original values from scaled version
y_pred = sc.inverse_transform(y_pred)
y_pred

In [None]:
# Visualising the results
plt.plot(y_real, color='red', label='Real Stock price')
plt.plot(y_pred, color='blue', label = 'predicted stock price')
plt.xlabel('Time')
plt.ylabel('Stock price')
plt.title('Stock Prediction')
plt.legend()
plt.show()