In [1]:
# Description: This program uses an artificial recurrent neural network called Long Short Term Memory (LSTM) to predict the closing stock price of a corporation (Apple Inc.) using the past 60 day stock price

In [1]:
import math
import pandas_datareader as web
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequantial
from tensorflow.keras.layers import Dense, Dropout, LSTM

ModuleNotFoundError: No module named 'tensorflow'

In [5]:
# Load data
company = '005930'

start = dt.datetime(2012, 1, 1)
end = dt.datetime(2020, 1, 1)

data = web.DataReader(company, 'naver', start, end)
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012-01-02,21400,21600,21300,21600,263300
2012-01-03,21860,22100,21840,22100,339046
2012-01-04,22100,22200,21500,21600,342389
2012-01-05,21460,21580,21100,21100,346691
2012-01-06,21120,21319,20600,20800,376753
...,...,...,...,...,...
2019-12-23,56100,56400,55100,55500,9839252
2019-12-24,55600,55700,54800,55000,11868463
2019-12-26,54700,55400,54400,55400,9645034
2019-12-27,55700,56900,55500,56500,12313056


In [21]:
# Prepare data
scaler = MinMaxScaler(feature_range = (0,1))
scaled_data = scaler.fit_transform(data['Close'].values.reshape(-1,1))

print("Scaled data: ")
print(scaled_data[60-60:60, 0])
print(scaled_data[61-60:60, 0])

prediction_days = 60
x_train = []
y_train = []

for x in range(prediction_days, len(scaled_data)):
    x_train.append(scaled_data[x-prediction_days:x, 0])
    y_train.append(scaled_data[x, 0])

x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

print("x_train: ")
print(x_train)
print("y_train: ")
print(y_train)

Scaled data: 
[0.03468835 0.04823848 0.03468835 0.02113821 0.01300813 0.
 0.00542005 0.00271003 0.00650407 0.01626016 0.00758808 0.01084011
 0.00758808 0.0303523  0.04823848 0.05311653 0.05257453 0.05907859
 0.05365854 0.04932249 0.03414634 0.03468835 0.02710027 0.03143631
 0.04173442 0.04119241 0.03685637 0.02493225 0.03631436 0.03468835
 0.06449864 0.06449864 0.08672087 0.08617886 0.08888889 0.09810298
 0.07804878 0.08888889 0.08401084 0.09159892 0.10298103 0.0899729
 0.08563686 0.08888889 0.08455285 0.08888889 0.11598916 0.10514905
 0.11111111 0.12682927 0.12682927 0.1203252  0.13224932 0.13604336
 0.12249322 0.13116531 0.13279133 0.1403794  0.1598916  0.15501355]
[0.04823848 0.03468835 0.02113821 0.01300813 0.         0.00542005
 0.00271003 0.00650407 0.01626016 0.00758808 0.01084011 0.00758808
 0.0303523  0.04823848 0.05311653 0.05257453 0.05907859 0.05365854
 0.04932249 0.03414634 0.03468835 0.02710027 0.03143631 0.04173442
 0.04119241 0.03685637 0.02493225 0.03631436 0.03468835 

In [22]:
# Build the Model
model = Sequential()

model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1)) # Prediction of the next closing value

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, epochs=25, batch_size=32)

NameError: name 'Sequential' is not defined

In [24]:
''' Test the Model accuray on existing data '''
# Load test data
test_start = dt.datetime(2020, 1, 1)
test_end = dt.datetime.now()

test_data = web.DataReader(company, 'naver', test_start, test_end)
actual_prices = test_data['Close'].values

total_dataset = pd.concat((data['Close'], test_data['Close']), axis=0)

model_inputs = total_dataset[len(total_dataset) - len(test_data) - prediction_days:].value
model_inputs = model_inputs.reshape(-1, 1)
model_inputs = scaler.transform(model_inputs)

# Make predictions on test data
x_test = []
for x in range(prediction_days, len(model_inputs) + 1):
    x_test.append(model_inputs[x-prediction_days:x, 0])
    
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1]))

predicted_prices = model.predict(x_test)
predicted_prices = scaler.inverse_transform(predicted_prices)

# Plot the test prediction
plt.plot(actual_prices, color="black", label=f"Actual {company} prices")
plt.plot(predicted_prices, color="green", label=f"Predicted {company} prices")
plt.title(f"{company} Share Price")
plt.xlabel('Time')
plt.ylabale(f"{company} Share Price")
plt.legend()
plt.show()

AttributeError: 'Series' object has no attribute 'value'

In [None]:
# Predict Next Day
real_data = [model_inputs[len(model_inputs) + 1 - prediction_days:len(model_inputs + 1)]]
real_data = np.array(real_data)
real_data = np.reshape(real_data, (real_data.shape[0], real_data.shape[1], 1))

prediction = model.predict(real_data)
prediction = scaler.inverse_transform(prediction)
print(f"Prediction: {prediction}")