In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import talib

from utils import get_model_inputs, seed_everything, load_stock_data
from models import MLP

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [2]:
full_data = pd.read_csv('ASX100.csv')

hidden_size = 150
n_back = 20
n_forward = 5
val_split = 0.2

train_data, test_data, scalers = load_stock_data(full_data, n_back, test_date_split="2019-01-01", TI=True)

all_stocks = train_data.keys()

input_data = {stock: get_model_inputs(stock, train_data, n_back, n_forward) for stock in all_stocks}

x = np.concatenate([input_data[stock][0] for stock in all_stocks], axis=0)
y = np.concatenate([input_data[stock][1] for stock in all_stocks], axis=0)
x = x.reshape((x.shape[0], -1))

x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=val_split, random_state=42)

In [3]:
seed_everything(1)
model = MLP(n_back, hidden_size, n_forward)
model.fit(x_train, y_train)
print(mean_squared_error(model.predict(x_val), y_val))

0.04081077355565656


In [4]:
metrics = model.get_metrics(all_stocks, test_data, scalers, n_back, n_forward)

In [5]:
metrics.mean()

rmse    1.065067
mae     0.808692
mape    0.034984
dtype: float64

In [7]:
model.save_model('./model_checkpoints/mlp.model')