In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Keras imports
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

In [6]:
dataset_train = pd.read_csv("../input/trainset.csv")
dataset_train.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2013-01-02,357.385559,361.151062,355.959839,359.288177,359.288177,5115500
1,2013-01-03,360.122742,363.600128,358.031342,359.496826,359.496826,4666500
2,2013-01-04,362.313507,368.339294,361.488861,366.600616,366.600616,5562800
3,2013-01-07,365.348755,367.301056,362.929504,365.001007,365.001007,3332900
4,2013-01-08,365.393463,365.771027,359.874359,364.280701,364.280701,3373900


In [11]:
# select the open value
trainset = dataset_train.iloc[:,1:2].values
trainset[:5]

array([[357.385559],
       [360.122742],
       [362.313507],
       [365.348755],
       [365.393463]])

In [19]:
# As we can see, the open value widely varies, so performing normalization
sc = MinMaxScaler(feature_range = (0,1))
training_scaled = sc.fit_transform(trainset)
training_scaled[:5]

array([[0.01011148],
       [0.01388614],
       [0.01690727],
       [0.02109298],
       [0.02115463]])

In [24]:
x_train = []
y_train = []
# Use last 60 days open price to predict the current price.
# preparing training set accordingly
for i in range(60,len(training_scaled)):
    x_train.append(training_scaled[i-60:i, 0])
    y_train.append(training_scaled[i,0])
    
# convert the dataset to numpy arrays
x_train,y_train = np.array(x_train), np.array(y_train)

In [25]:
print(x_train.shape)
# Reshaping as RNN expects data to be in the format of (batch size, timesteps, input_size)
# https://keras.io/layers/recurrent/ (input dimension 3D tensor shape)
x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1))
print(x_train.shape)

(1199, 60)
(1199, 60, 1)


In [26]:
# Building the model
regressor = Sequential()
regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (x_train.shape[1],1)))
regressor.add(Dropout(0.2))
regressor.add(LSTM(units = 50,return_sequences = True))
regressor.add(Dropout(0.2))
regressor.add(LSTM(units = 50,return_sequences = True))
regressor.add(Dropout(0.2))
regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))
regressor.add(Dense(units = 1))

regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
regressor.fit(x_train, y_train, epochs = 100, batch_size = 32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x7fda392a1da0>

In [30]:
# Getting started with test data
dataset_test =pd.read_csv("../input/testset.csv")
real_stock_price = dataset_test.iloc[:,1:2].values
dataset_total = pd.concat((dataset_train['Open'],dataset_test['Open']),axis = 0)
dataset_total.shape

(1384,)

In [31]:
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
print(inputs.shape)
inputs = inputs.reshape(-1,1)
print(inputs.shape)

(185,)
(185, 1)


In [32]:
# again perform normalization
inputs = sc.transform(inputs)
inputs.shape

(185, 1)

In [36]:
x_test = []
for i in range(60,len(inputs)):
    x_test.append(inputs[i-60:i,0])

# prepare test data
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0],x_test.shape[1],1))
x_test.shape

(125, 60, 1)

In [37]:
predicted_price = regressor.predict(x_test)
predicted_price = sc.inverse_transform(predicted_price)
predicted_price[:5]

array([[1074.5787],
       [1073.0597],
       [1075.2504],
       [1083.6356],
       [1095.1995],
       [1106.5748],
       [1115.886 ],
       [1119.8424],
       [1120.882 ],
       [1120.2833],
       [1124.1501],
       [1130.2653],
       [1136.6787],
       [1141.7954],
       [1145.9778],
       [1152.7118],
       [1163.2269],
       [1173.1854],
       [1180.3412],
       [1184.5549],
       [1185.282 ],
       [1184.6243],
       [1182.9583],
       [1175.2152],
       [1160.1093],
       [1134.4993],
       [1115.8634],
       [1105.6588],
       [1096.1816],
       [1091.5514],
       [1091.141 ],
       [1093.1116],
       [1099.8033],
       [1109.2787],
       [1117.5427],
       [1124.5377],
       [1130.612 ],
       [1134.1398],
       [1137.3772],
       [1142.4177],
       [1144.957 ],
       [1142.2815],
       [1128.4342],
       [1113.5308],
       [1107.777 ],
       [1109.2283],
       [1117.7649],
       [1131.6864],
       [1149.5615],
       [1166.3041],


In [None]:
plt.plot(real_stock_price,color = 'red', label = 'Real Price')
plt.plot(predicted_price, color = 'blue', label = 'Predicted Price')
plt.title('Google Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Google Stock Price')
plt.legend()
plt.show()