In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler
#to bring columns in same scale
#Do data processing than prediction

In [2]:
#load the data
data = pd.read_csv("google_data.csv", date_parser= True)
data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
4025,2020-08-14,1515.660034,1521.900024,1502.880005,1507.72998,1507.72998,1354800
4026,2020-08-17,1514.670044,1525.609985,1507.969971,1517.97998,1517.97998,1378300
4027,2020-08-18,1526.180054,1562.469971,1523.709961,1558.599976,1558.599976,2027100
4028,2020-08-19,1553.310059,1573.680054,1543.949951,1547.530029,1547.530029,1660000
4029,2020-08-20,1543.449951,1585.869995,1538.290039,1581.75,1581.75,1517568


In [3]:
#date not included to predict target

In [4]:
#training data - 2004 to dec 2018 
#testing - 2019
data_training = data[data['Date'] < '2019-01-01'].copy()

In [5]:
data_training.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
3612,2018-12-24,973.900024,1003.539978,970.109985,976.219971,976.219971,1590300
3613,2018-12-26,989.01001,1040.0,983.0,1039.459961,1039.459961,2373300
3614,2018-12-27,1017.150024,1043.890015,997.0,1043.880005,1043.880005,2109800
3615,2018-12-28,1049.619995,1055.560059,1033.099976,1037.079956,1037.079956,1414800
3616,2018-12-31,1050.959961,1052.699951,1023.590027,1035.609985,1035.609985,1493300


In [6]:
data_test = data[data['Date'] >= '2019-01-01'].copy()
data_test.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
4025,2020-08-14,1515.660034,1521.900024,1502.880005,1507.72998,1507.72998,1354800
4026,2020-08-17,1514.670044,1525.609985,1507.969971,1517.97998,1517.97998,1378300
4027,2020-08-18,1526.180054,1562.469971,1523.709961,1558.599976,1558.599976,2027100
4028,2020-08-19,1553.310059,1573.680054,1543.949951,1547.530029,1547.530029,1660000
4029,2020-08-20,1543.449951,1585.869995,1538.290039,1581.75,1581.75,1517568


In [7]:
training_data = data_training.drop(['Date', 'Adj Close'], axis=1)

In [8]:
training_data.head()

Unnamed: 0,Open,High,Low,Close,Volume
0,49.813286,51.835709,47.800831,49.982655,44871300
1,50.316402,54.336334,50.062355,53.95277,22942800
2,55.168217,56.528118,54.321388,54.495735,18342800
3,55.4123,55.591629,51.591621,52.239193,15319700
4,52.284027,53.798351,51.746044,52.802086,9232100


In [9]:
#we are going to predict open, we will read a set of 60days data
#based on 60 days data predict data of 61th day
scaler = MinMaxScaler()
#scale data between 0 and 1
training_data = scaler.fit_transform(training_data)
training_data.shape


(3617, 5)

In [10]:
#create training set
X_train = []
y_train = []
training_data.shape[0]

3617

In [11]:
#read 1st 60 days data , put it in X_train
#iterate from 60 to 3617
#from day 1 to 59th day in X train, 60th day data in y_train
for i in range(60, training_data.shape[0]):
    X_train.append(training_data[i-60 : i])
    #i starts from 60, range is (0,60)
    #y_train, get the Open 
    y_train.append(training_data[i,0])
    


In [12]:
len(X_train)

3557

In [13]:
#convert list into numpy array
X_train, y_train = np.array(X_train), np.array(y_train)

In [14]:
X_train.shape
y_train.shape

(3557,)

In [15]:
#build LSTM
#first we need to import Keras layers, which we import from tensorflow

from tensorflow.keras import Sequential

#next import from keras layers, dense LTSM and dropout
from tensorflow.keras.layers import Dense, LSTM, Dropout

In [19]:
#create LSTM model, regression (as continous value prediction)

regressor = Sequential()
# add 4 layer of LSTM
regressor.add(LSTM(units = 50, activation = 'relu', return_sequences = True, input_shape = (X_train.shape[1],5)))
#units - how many units we want in lstm network
#return seq -  first cell to next cell
#input shape - 
#20% of neurons dropped
regressor.add(Dropout(0.2))
# add another layer of lstm
regressor.add(LSTM(units = 60, activation = 'relu', return_sequences = True ))
regressor.add(Dropout(0.3))
# add another layer of lstm
regressor.add(LSTM(units = 80, activation = 'relu', return_sequences = True ))
regressor.add(Dropout(0.4))
# add final layer of lstm
regressor.add(LSTM(units = 120, activation = 'relu'))
regressor.add(Dropout(0.5))

#add a final dense layer, single output
regressor.add(Dense(units=1))

In [20]:
#X_train.shape[1],5
#get summary of model
regressor.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 60, 50)            11200     
_________________________________________________________________
dropout_5 (Dropout)          (None, 60, 50)            0         
_________________________________________________________________
lstm_6 (LSTM)                (None, 60, 60)            26640     
_________________________________________________________________
dropout_6 (Dropout)          (None, 60, 60)            0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 60, 80)            45120     
_________________________________________________________________
dropout_7 (Dropout)          (None, 60, 80)            0         
_________________________________________________________________
lstm_8 (LSTM)                (None, 120)              

In [22]:
#compile the model
regressor.compile(optimizer='adam', loss = 'mean_squared_error')

In [23]:
#final stage - train the model
regressor.fit(X_train, y_train, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1edcc02b508>

In [25]:
#do prediction, before that prepare the test dataset
#to predict the data of 1st data we need to have data 
#of previous 60 days
#to do that first get past 60 days of data

past_60_days = data_training.tail(60)
past_60_days

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
3557,2018-10-04,1195.329956,1197.51001,1155.57605,1168.189941,1168.189941,2209500
3558,2018-10-05,1167.5,1173.5,1145.119995,1157.349976,1157.349976,1184300
3559,2018-10-08,1150.109985,1168.0,1127.364014,1148.969971,1148.969971,1932400
3560,2018-10-09,1146.150024,1154.349976,1137.572021,1138.819946,1138.819946,1308700
3561,2018-10-10,1131.079956,1132.170044,1081.130005,1081.219971,1081.219971,2675700
3562,2018-10-11,1072.939941,1106.400024,1068.27002,1079.319946,1079.319946,2949000
3563,2018-10-12,1108.0,1115.0,1086.401978,1110.079956,1110.079956,2101300
3564,2018-10-15,1108.910034,1113.446045,1089.0,1092.25,1092.25,1372400
3565,2018-10-16,1104.589966,1124.219971,1102.5,1121.280029,1121.280029,1928500
3566,2018-10-17,1126.459961,1128.98999,1102.189941,1115.689941,1115.689941,1467200


In [29]:
#append this data in the test set
df = past_60_days.append(data_test,ignore_index= True)
df = df.drop(['Date', 'Adj Close'], axis=1)
df.head()

Unnamed: 0,Open,High,Low,Close,Volume
0,1195.329956,1197.51001,1155.57605,1168.189941,2209500
1,1167.5,1173.5,1145.119995,1157.349976,1184300
2,1150.109985,1168.0,1127.364014,1148.969971,1932400
3,1146.150024,1154.349976,1137.572021,1138.819946,1308700
4,1131.079956,1132.170044,1081.130005,1081.219971,2675700


In [30]:
#scale down vales 0 to 1
inputs = scaler.transform(df)
inputs

array([[0.93805611, 0.93755773, 0.92220906, 0.91781776, 0.0266752 ],
       [0.91527437, 0.91792904, 0.91350452, 0.90892169, 0.01425359],
       [0.90103881, 0.91343268, 0.89872289, 0.90204445, 0.02331778],
       ...,
       [1.2088917 , 1.23592021, 1.22867595, 1.23821685, 0.02446519],
       [1.23110046, 1.24508469, 1.24552549, 1.22913204, 0.0200173 ],
       [1.22302893, 1.25505023, 1.24081368, 1.25721546, 0.01829156]])

In [33]:
X_test = []
y_test = []

for i in range(60, inputs.shape[0]):
    X_test.append(inputs[i-60 : i])
    y_test.append(inputs[i, 0])


In [35]:
X_test, y_test = np.array(X_test), np.array(y_test)
X_test.shape


(413, 60, 5)

In [36]:
y_test.shape

(413,)

In [None]:
#do prediction video at 40:34