### Predict Stock Price of Google(Making a LSTM)

#### Work on 5 years(2012 Jan to 2016 Dec) of data to predict (2017 Jan Stock Price)

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [6]:
#Importing Training set
train_data = pd.read_csv("Google_Stock_Price_Train.csv")

In [7]:
#Check the head of the data
train_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1/3/2012,325.25,332.83,324.97,663.59,7380500
1,1/4/2012,331.27,333.87,329.08,666.45,5749400
2,1/5/2012,329.83,330.75,326.89,657.21,6590300
3,1/6/2012,328.34,328.77,323.68,648.24,5405900
4,1/9/2012,322.04,322.29,309.46,620.76,11688800


In [8]:
#Taking Open col as np array
training_set = train_data.iloc[:, 1:2].values

In [9]:
type(training_set)

numpy.ndarray

### Feature Scaling

In [10]:
#Mostly, in RNN, we use Normalization
from sklearn.preprocessing import MinMaxScaler

In [12]:
scaler = MinMaxScaler(feature_range=(0,1))

In [13]:
scaled_data = scaler.fit_transform(training_set)

In [15]:
scaled_data

array([[0.08581368],
       [0.09701243],
       [0.09433366],
       ...,
       [0.95725128],
       [0.93796041],
       [0.93688146]])

### Creating a data structure with 60 timesteps and 1 output

In [17]:
#if we take wrong 60 timesteps,it may overfit / it'll give non-sense predictions
#60 timesteps means -> at time t, RNN sees previous 60 Stock Prices

In [18]:
X_train = []
y_train = []

In [19]:
for i in range(60, 1258):
    X_train.append(scaled_data[i-60:i, 0])
    y_train.append(scaled_data[i,0])
    
#Make the X_train, y_train as np arrays as RNN takes as np arrays only
X_train, y_train = np.array(X_train), np.array(y_train)

In [22]:
X_train

array([[0.08581368, 0.09701243, 0.09433366, ..., 0.07846566, 0.08034452,
        0.08497656],
       [0.09701243, 0.09433366, 0.09156187, ..., 0.08034452, 0.08497656,
        0.08627874],
       [0.09433366, 0.09156187, 0.07984225, ..., 0.08497656, 0.08627874,
        0.08471612],
       ...,
       [0.92106928, 0.92438053, 0.93048218, ..., 0.95475854, 0.95204256,
        0.95163331],
       [0.92438053, 0.93048218, 0.9299055 , ..., 0.95204256, 0.95163331,
        0.95725128],
       [0.93048218, 0.9299055 , 0.93113327, ..., 0.95163331, 0.95725128,
        0.93796041]])

### Reshaping

In [24]:
#No. of predictors we use to predict
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))#1->We are using only Open col of the dataset
#(batch_size, timesteps, dimensions)

### Building RNN

In [25]:
#Importing keras libraries & packages
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

In [27]:
#Initializing the RNN
regressor = Sequential()

In [28]:
#Adding the 1st LSTM layer and Dropout Regularization(To avoid overfitting)
regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))#Relavant units = lead us to better result
regressor.add(Dropout(rate=0.2))

In [29]:
#Adding 2nd LSTM layer and Dropout Regularization
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(rate=0.2))

In [30]:
#Adding 3rd LSTM layer and Dropout Regularization
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(rate=0.2))

In [None]:
#Adding 4th LSTM layer and Dropout Regularization
regressor.add(LSTM(units = 50, return_sequences = False))
regressor.add(Dropout(0.2))