In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.notebook import tqdm_notebook
from torch import nn, optim
from gru import GRUModel
from optimization import Optimization

from utilities import *


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

SUBDATASET_PATH = "data/subdataset/"
MODELS_PATH = os.path.join("models", "GRU")

HIDDEN_DIM = 64
LAYER_DIM = 3
BATCH_SIZE = 32
DROPOUT = 0.3
N_EPOCHS = 100
LEARNING_RATE = 1e-03
WEIGHT_RATE = 1e-05


In [None]:
RMSEs, MAEs, MAPEs = list(), list(), list()

for ticker in tqdm_notebook(os.listdir(SUBDATASET_PATH), desc="Train and Evaluate GRU"):
    if not os.path.exists(MODELS_PATH):
        os.makedirs(MODELS_PATH)

    model_path = os.path.join(MODELS_PATH, f"{ticker.split('.')[0]}")

    df = pd.read_csv(SUBDATASET_PATH+ticker, index_col="Date")
    df["Next 5 Days Close"] = df["Close"].shift(-5)
    df.dropna(inplace=True)

    X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(
        df, "Next 5 Days Close", 0.1)
    X_train_arr, X_val_arr, X_test_arr, y_train_arr, y_val_arr, y_test_arr, scaler = transform_data(
        X_train, X_val, X_test, y_train, y_val, y_test, scaling="minmax")
    train_loader, val_loader, test_loader = load_data_into_dataloader(
        X_train_arr, X_val_arr, X_test_arr, y_train_arr, y_val_arr, y_test_arr, batch_size=BATCH_SIZE)

    input_dim = len(X_train.columns)
    output_dim = len(y_train.columns)

    model_params = {
        "input_dim": input_dim,
        "hidden_dim": HIDDEN_DIM,
        "layer_dim": LAYER_DIM,
        "output_dim": output_dim,
        "dropout_prob": DROPOUT
    }

    model = GRUModel(**model_params)
    model = model.to(device)
    loss_fn = nn.MSELoss(reduction="mean")
    optimizer = optim.Adam(
        model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_RATE)

    opt = Optimization(model=model, loss_fn=loss_fn, optimizer=optimizer)
    opt.train(train_loader, val_loader, batch_size=BATCH_SIZE,
              n_epochs=N_EPOCHS, n_features=input_dim, model_path=model_path)

    predictions, values = opt.evaluate(
        test_loader,
        batch_size=1,
        n_features=input_dim
    )

    result = format_predictions(predictions, values, X_test, scaler)
    RMSE, MAE, MAPE = get_evaluation_metrics(result["true"], result["pred"])
    RMSEs.append(RMSE)
    MAEs.append(MAE)
    MAPEs.append(MAPE)


In [None]:
print(f"RMSE: {np.mean(RMSEs)}\nMAE: {np.mean(MAEs)}\nMAPE: {np.mean(MAPEs)}")
