**Importing the libraries**

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Data Preprocessing**

Importing the Dataset

In [None]:
df = pd.read_csv("/kaggle/input/google-data-historic-dataset/GOOGL.csv")

In [None]:
df.head()

In [None]:
#Shape : Dimensions of the dataset
nRow, nCol = df.shape
print(f'There are {nRow} rows and {nCol} columns')

In [None]:
# descriptions
print('description de la base')
print(df.describe())

In [None]:
#info
df.info()

Handling of Missing Data

In [None]:
print("Total missing values:", df.isna().sum().sum())

**Data visualization**

In [None]:
from matplotlib import pyplot
import matplotlib.pyplot as plt

In [None]:
#histograms: here we have the histograms of each variant
# with fisize, we also customize the size of the figures
df.hist (figsize = (17,17))
plt.show ()

In [None]:
# it is a matrix which presents all the relations between each two variables
pd.plotting.scatter_matrix (df, figsize = (17,17))
pyplot.show ()

In [None]:
# Making train and test data
data_train = df[df['Date']<'2019-01-01'].copy()
data_test = df[df['Date']>='2019-01-01'].copy()
data_training=data_train.copy()

In [None]:
# Dropping 'Date' and 'Adj Close'
data_train = data_train.drop(['Date', 'Adj Close'], axis = 1)

In [None]:
print(data_train.shape)
print(data_test.shape)

Feature scaling

In [None]:
from sklearn.preprocessing import MinMaxScaler
#from sklearn.preprocessing import StandardScaler # used for feature scaling

# feature scaling
#We use feature scaling to convert different scales 
#to a standard scale to make it easier for Machine Learning algorithms.
# sc = StandardScaler()
sc = MinMaxScaler()
data_train= sc.fit_transform(data_train)
data_train

*Splitting training_data into X_train and y_train*

We are going to train the model on data of 60 days at a time. So the code mentioned below divides the data into chunks of 60 rows.

In [None]:
X_train = []
y_train = []

for i in range(60, data_train.shape[0]):
    X_train.append(data_train[i-60:i])
    y_train.append(data_train[i, 0])
    
X_train, y_train = np.array(X_train), np.array(y_train)

In [None]:
print(X_train.shape)
print(y_train.shape)

# Simple RNN

Importing keras libraries

In [None]:
# Importing the Keras libraries and packages

from keras.models import Sequential  
from keras.layers import Dense 
from keras.layers import SimpleRNN
from keras.layers import Dropout # it block to overfitting 

In [None]:
# Initialising the RNN
model1 = Sequential()

# Adding the first RNN layer and some Dropout regularisation
model1.add(SimpleRNN(units = 60,activation='relu', return_sequences = True, input_shape = (X_train.shape[1], 5)))
model1.add(Dropout(0.2))
# Adding a second RNN layer and some Dropout regularisation.
model1.add(SimpleRNN(units = 60,activation='relu', return_sequences = True))
model1.add(Dropout(0.2))

# Adding a third RNN layer and some Dropout regularisation. 
model1.add(SimpleRNN(units = 80,activation='relu', return_sequences = True))
model1.add(Dropout(0.2))

# Adding a fourth RNN layer and some Dropout regularisation.
model1.add(SimpleRNN(units = 120))
model1.add(Dropout(0.2))

# Adding the output layer
model1.add(Dense(units = 1))

In [None]:
model1.summary()

In [None]:
# Compiling the RNN
model1.compile(optimizer = 'adam', loss = 'mean_squared_error')

In [None]:
# Fitting the RNN to the Training set
model1.fit(X_train, y_train, epochs =50, batch_size = 32)

Making the predictions and visualizing the results

In [None]:
#Prepare test data
data_test.head()

In [None]:
past_60_days = data_training.tail(60)
data_test = past_60_days.append(data_test, ignore_index = True)
# Dropping 'Date' and 'Adj Close'
data_test = data_test.drop(['Date', 'Adj Close'], axis = 1)
data_test.head()

In [None]:
# Scaling test data
data_test = sc.transform(data_test)
data_test

In [None]:
X_test = []
y_test = []

for i in range(60, data_test.shape[0]):
    X_test.append(data_test[i-60:i])
    y_test.append(data_test[i, 0])

X_test, y_test = np.array(X_test), np.array(y_test)
X_test.shape, y_test.shape

In [None]:
#predictions
y_pred1 = model1.predict(X_test)
y_pred1.shape

In [None]:
sc.scale_

In [None]:
scale = 1/8.18605127e-04
scale

In [None]:
y_pred1 = y_pred1*scale
y_test = y_test*scale

In [None]:
# Visualising the results
plt.figure(figsize=(14,5))
plt.plot(y_test, color = 'orange', label = 'Real Google Stock Price')
plt.plot(y_pred1, color = "c", label = 'Predicted Google Stock Price, Simple RNN')
plt.title('Google Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Google Stock Price')
plt.legend()
plt.show()

# LSTM

Importing keras libraries

In [None]:
from tensorflow.keras.layers import LSTM

In [None]:
#Initializing the RNN
model2 = Sequential()

# Adding the first RNN layer and some Dropout regularisation
model2.add(LSTM(units = 60, activation = 'relu', return_sequences = True, input_shape = (X_train.shape[1], 5)))
model2.add(Dropout(0.2))
# Adding a second RNN layer and some Dropout regularisation.
model2.add(LSTM(units = 60, activation = 'relu', return_sequences = True))
model2.add(Dropout(0.2))
# Adding a third RNN layer and some Dropout regularisation. 
model2.add(LSTM(units = 80, activation = 'relu', return_sequences = True))
model2.add(Dropout(0.2))
# Adding a fourth RNN layer and some Dropout regularisation.
model2.add(LSTM(units = 120, activation = 'relu'))
model2.add(Dropout(0.2))
# Adding the output layer
model2.add(Dense(units = 1))

In [None]:
model2.summary()

In [None]:
#Compiling the RNN
model2.compile(optimizer = 'adam', loss = 'mean_squared_error')

In [None]:
model2.fit(X_train, y_train, epochs = 50, batch_size = 32)

In [None]:
X_test = []
y_test = []

for i in range(60, data_test.shape[0]):
    X_test.append(data_test[i-60:i])
    y_test.append(data_test[i, 0])

X_test, y_test = np.array(X_test), np.array(y_test)
X_test.shape, y_test.shape

In [None]:
#predictions
y_pred2 = model2.predict(X_test)
y_pred2.shape

In [None]:
sc.scale_

In [None]:
scale = 1/8.18605127e-04
scale

In [None]:
y_pred2 = y_pred2*scale
y_test = y_test*scale

In [None]:
# Visualising the results
plt.figure(figsize=(14,5))
plt.plot(y_test, color = 'LimeGreen', label = 'Real Google Stock Price')
plt.plot(y_pred2, color = 'Gold', label = 'Predicted Google Stock Price, LSTM')
plt.title('Google Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Google Stock Price')
plt.legend()
plt.show()

# Comparison between Simple RNN and LSTM

In [None]:
# Visualising the results
plt.figure(figsize=(14,5))
plt.plot(y_test, color = 'LimeGreen',linestyle='dashed', label = 'Real Google Stock Price')
plt.plot(y_pred1, color = 'c', label = 'Predicted Google Stock Price, Simple RNN')
plt.plot(y_pred2, color = 'Gold', label = 'Predicted Google Stock Price, LSTM')
plt.title('Google Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Google Stock Price')
plt.legend()
plt.show()