<a href="https://colab.research.google.com/github/mengwangk/trading-playground/blob/master/notebook/LSTM_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LSTM - Stock Analysis

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import io

In [None]:
# https://github.com/mwitiderrick/stockprice
# https://www.kdnuggets.com/2018/11/keras-long-short-term-memory-lstm-model-predict-stock-prices.html

In [None]:
from google.colab import files
uploaded = files.upload()
print(uploaded)

In [None]:
UOM = {'K': 1000, 'M': 1000000}

def convert_to_numeric(val):
    return float(val[:-1]) * UOM[val[-1]]

def clean_dataset(df):
    df.rename(columns={'Price':'Close', 'Vol.':'Volume'}, inplace=True)
    df.drop(columns=['Change %'], axis=1, inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df['Volume'] = df['Volume'].apply(convert_to_numeric)
    df.sort_values(by=['Date'], ascending=True, inplace=True)
    #df.set_index('Date', inplace=True)

# dataset = pd.read_csv('data/GENTING_train_set.csv', parse_dates=True)
#print(uploaded)
dataset = pd.read_csv(io.StringIO(uploaded['GENTING_train_set.csv'].decode('utf-8')), parse_dates=True)
clean_dataset(dataset)
training_set = dataset.iloc[:, 1:2].values
dataset.head(10)

In [None]:
# Feature Scaling
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(training_set)
print(training_set_scaled)

In [None]:
X_train = []
y_train = []
print(len(training_set_scaled))
for i in range(60, len(training_set_scaled)):
    X_train.append(training_set_scaled[i-60:i, 0])
    y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
print(X_train.shape)
print(y_train.shape)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
print(X_train.shape)
print(len(X_train))
print(len(y_train))

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.callbacks import EarlyStopping

In [None]:
regressor = Sequential()

regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))

regressor.add(Dense(units = 1))

regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
# regressor.fit(X_train, y_train, epochs = 100, batch_size = 32)
early_stop = EarlyStopping(monitor='loss', patience=3, verbose=1)
regressor.fit(X_train, y_train, epochs = 100, batch_size = 32, verbose=1, callbacks=[early_stop], shuffle=False)

In [None]:
dataset_test = pd.read_csv(io.StringIO(uploaded['GENTING_test_set.csv'].decode('utf-8')), parse_dates=True)
clean_dataset(dataset_test)
real_stock_price = dataset_test.iloc[:, 1:2].values
dataset_test.head(10)

In [None]:
dataset_total = pd.concat((dataset['Open'], dataset_test['Open']), axis = 0)
dataset_total.head(10)

In [None]:
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
print(len(dataset_test))
print(len(inputs))
print(inputs)

inputs = inputs.reshape(-1,1)
# print(inputs)
inputs = sc.transform(inputs)
X_test = []
for i in range(60, len(dataset_test) + 60):
    X_test.append(inputs[i-60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = regressor.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

In [None]:
plt.plot(real_stock_price, color = 'black', label = 'Genting Stock Price')
plt.plot(predicted_stock_price, color = 'green', label = 'Predicted Genting Stock Price')
plt.title('Genting Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Genting Stock Price')
plt.legend()
plt.show()