# AI6123 Assignment 3 Code

Author: Zhang Hanyu (G2001859G)

## Environment

This notebook is tested and run on **Kaggle Notebook**, which may be incompatible with local Jupyter environment. You can create a Kaggle account and upload this notebook to Kaggle to run.

## Dataset

The data used in this code is retrieved from Yahoo Finance according to the instruction of Assignment 3. This dataset has been uploaded to https://www.kaggle.com/crabass/aapl-stock, and you may directly import this dataset if you are using Kaggle Notebook.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

## Read the dataset

In [None]:
df = pd.read_csv('/kaggle/input/aapl-stock/AAPL.csv')
df.head()

In [None]:
# First calculate the mid prices from the highest and lowest
high_prices = df.loc[:,'High'].to_numpy()
low_prices = df.loc[:,'Low'].to_numpy()
mid_prices = (high_prices + low_prices) / 2.0

plt.figure(figsize = (18,9))
plt.plot(range(df.shape[0]), mid_prices)
plt.xticks(range(0,df.shape[0],200),df['Date'].loc[::200],rotation=45)
plt.xlabel('Date',fontsize=18)
plt.ylabel('Mid Price',fontsize=18)
plt.show()

## Training Data Preprocessing

In [None]:
TRAIN_DATA_LENGTH = 3000
train_data = mid_prices[:TRAIN_DATA_LENGTH]

scaler = MinMaxScaler(feature_range=(0, 1))
train_data = train_data.reshape(-1,1)
apple_training_scaled = scaler.fit_transform(train_data)

In [None]:
FEATURE_LENGTH = 60
features_set = []
labels = []
for i in range(FEATURE_LENGTH, TRAIN_DATA_LENGTH):
    features_set.append(apple_training_scaled[i - FEATURE_LENGTH:i, 0])
    labels.append(apple_training_scaled[i, 0])
features_set, labels = np.array(features_set), np.array(labels)
features_set = np.reshape(features_set, (features_set.shape[0], features_set.shape[1], 1))

## Build & Train the LSTM Model

In [None]:
model = Sequential()

model.add(LSTM(units=200, return_sequences=True, input_shape=(features_set.shape[1], 1)))
model.add(Dropout(0.2))

model.add(LSTM(units=200, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=200, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=100))
model.add(Dropout(0.2))

model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
model.fit(features_set, labels, epochs=100, batch_size=32)

## Testing

In [None]:
test_data = mid_prices[TRAIN_DATA_LENGTH:].reshape(-1, 1)
test_data = scaler.transform(test_data)

test_features = []
for i in range(len(train_data) - FEATURE_LENGTH, len(train_data)):
    test_features.append(apple_training_scaled[i - FEATURE_LENGTH:i, 0])
for i in range(FEATURE_LENGTH, len(test_data)):
    test_features.append(test_data[i - FEATURE_LENGTH:i, 0])

test_features = np.array(test_features)
test_features = np.reshape(test_features, (test_features.shape[0], test_features.shape[1], 1))

predictions = model.predict(test_features)
predictions = scaler.inverse_transform(predictions)
test_data = scaler.inverse_transform(test_data)

## Testing Result Visualization

In [None]:
plt.figure(figsize=(18, 9))
plt.plot(test_data, color='blue', label='Actual Apple Stock Mid-Price')
plt.plot(predictions, color='red', label='Predicted Apple Stock Mid-Price')
plt.title('Apple Stock Price Prediction',fontsize=18)
plt.xlabel('Date',fontsize=18)
plt.xticks(range(0,len(test_data),50),df['Date'].loc[TRAIN_DATA_LENGTH::50],rotation=45)
plt.ylabel('Apple Stock Mid-Price (USD)',fontsize=18)
plt.legend(fontsize=12)
plt.savefig('/kaggle/working/test_result.jpg', bbox_inches='tight', dpi=150)
plt.show()

## Evaluate

In [None]:
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error

print(f"MAPE: {mean_absolute_percentage_error(test_data, predictions)}")
print(f"MSE: {mean_squared_error(test_data, predictions)}")