In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense

In [14]:
df = pd.read_csv("google_stocks/Google_Stock_Price_Train.csv", header=0, index_col='Date')

In [15]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1/3/2012,325.25,332.83,324.97,663.59,7380500
1/4/2012,331.27,333.87,329.08,666.45,5749400
1/5/2012,329.83,330.75,326.89,657.21,6590300
1/6/2012,328.34,328.77,323.68,648.24,5405900
1/9/2012,322.04,322.29,309.46,620.76,11688800


In [7]:
df.shape

(1258, 6)

In [8]:
df = df.dropna()

In [9]:
df.shape

(1258, 6)

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1258 entries, 0 to 1257
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    1258 non-null   object 
 1   Open    1258 non-null   float64
 2   High    1258 non-null   float64
 3   Low     1258 non-null   float64
 4   Close   1258 non-null   object 
 5   Volume  1258 non-null   object 
dtypes: float64(3), object(3)
memory usage: 59.1+ KB


In [16]:
# Convert the price values to floats
df['Close'] = df['Close'].str.replace(',', '').astype(float)

In [19]:
# Extract the 'Close' column as the target variable
data = df.filter(['Close']).values
dataset_size = len(data)

In [20]:
# Split the data into training and testing sets
train_size = int(dataset_size * 0.8)
train_data = data[:train_size, :]
test_data = data[train_size:, :]

In [21]:
# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
train_data = scaler.fit_transform(train_data)
test_data = scaler.transform(test_data)

In [22]:
# Split the data into input and output sets
def create_dataset(data, look_back=1):
    X, Y = [], []
    for i in range(len(data) - look_back):
        X.append(data[i:(i + look_back), 0])
        Y.append(data[i + look_back, 0])
    return np.array(X), np.array(Y)

look_back = 60
X_train, Y_train = create_dataset(train_data, look_back)
X_test, Y_test = create_dataset(test_data, look_back)

In [23]:
# Reshape the input data to fit the LSTM model
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [24]:
# Build the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

In [25]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

In [31]:
# Train the model
model.fit(X_train, Y_train, batch_size=32, epochs=45)

Epoch 1/45
Epoch 2/45
Epoch 3/45
Epoch 4/45
Epoch 5/45

KeyboardInterrupt: 

In [29]:
testing = pd.read_csv("google_stocks/Google_Stock_Price_Test.csv")
testing .head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1/3/2017,778.81,789.63,775.8,786.14,1657300
1,1/4/2017,788.36,791.34,783.16,786.9,1073000
2,1/5/2017,786.08,794.48,785.02,794.02,1335200
3,1/6/2017,795.26,807.9,792.2,806.15,1640200
4,1/9/2017,806.4,809.97,802.83,806.65,1272400


In [32]:
# Make predictions
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)



In [33]:
# Calculate the root mean squared error (RMSE)
rmse = np.sqrt(np.mean(((predictions - Y_test) ** 2)))
print('Test RMSE:', rmse)

Test RMSE: 755.0346372817864
