In [None]:
from utilities import *
from statsmodels.tsa.arima.model import ARIMA, ARIMAResults
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.notebook import tqdm_notebook
import torch
from torch import nn, optim
from rnn import RNNModel
from lstm import LSTMModel
from gru import GRUModel
from optimization import Optimization

from sklearn.model_selection import train_test_split
import joblib


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

SUBDATASET_PATH = "data/subdataset/"
MODELS_PATH = "models"

HIDDEN_DIM = 64
LAYER_DIM = 3
BATCH_SIZE = 32
DROPOUT = 0.3
N_EPOCHS = 100
LEARNING_RATE = 1e-03
WEIGHT_RATE = 1e-05


In [None]:
result_dict = {}

for ticker in tqdm_notebook(os.listdir(SUBDATASET_PATH), desc=""):

    df = pd.read_csv(SUBDATASET_PATH+ticker, index_col="Date")
    df["Next 5 Days Close"] = df["Close"].shift(-5)
    df.dropna(inplace=True)

    X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(
        df, "Next 5 Days Close", 0.1)
    X_train_arr, X_val_arr, X_test_arr, y_train_arr, y_val_arr, y_test_arr, scaler = transform_data(
        X_train, X_val, X_test, y_train, y_val, y_test, scaling="minmax")
    train_loader, val_loader, test_loader = load_data_into_dataloader(
        X_train_arr, X_val_arr, X_test_arr, y_train_arr, y_val_arr, y_test_arr, batch_size=BATCH_SIZE)

    input_dim = len(X_train.columns)
    output_dim = len(y_train.columns)

    model_params = {
        "input_dim": input_dim,
        "hidden_dim": HIDDEN_DIM,
        "layer_dim": LAYER_DIM,
        "output_dim": output_dim,
        "dropout_prob": DROPOUT
    }

    results = {}
    for model_name in ["RNN", "LSTM", "GRU"]:

        if model_name == "RNN":
            model = RNNModel(**model_params)
        elif model_name == "LSTM":
            model = LSTMModel(**model_params)
        else:
            model = GRUModel(**model_params)

        model_path = glob.glob(os.path.join(
            MODELS_PATH, model_name, f"{ticker.split('.')[0]}_*.pth"), recursive=True)[0]
        model = model.to(device)
        model.load_state_dict(torch.load(model_path))

        loss_fn = nn.MSELoss(reduction="mean")
        optimizer = optim.Adam(
            model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_RATE)
        opt = Optimization(model=model, loss_fn=loss_fn, optimizer=optimizer)

        predictions, values = opt.evaluate(
            test_loader,
            batch_size=1,
            n_features=input_dim
        )
        result = format_predictions(predictions, values, X_test, scaler)
        RMSE, MAE, MAPE = get_evaluation_metrics(
            result["true"], result["pred"])
        results[model_name] = {"RMSE": RMSE, "MAE": MAE, "MAPE": MAPE}

    ##########################################################
    df = pd.read_csv(SUBDATASET_PATH+ticker, index_col="Date")
    df["Next 5 Days Close"] = df["Close"].shift(-5)
    df.dropna(inplace=True)

    X = df.drop("Next 5 Days Close", axis=1)
    y = df["Next 5 Days Close"]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=.1, shuffle=False)

    results_2 = {}
    for model_name in list(set(os.listdir(MODELS_PATH)) - set(["ARIMA", "RNN", "LSTM", "GRU"])):
        model_path = os.path.join(
            MODELS_PATH, model_name, f"{ticker.split('.')[0]}.joblib")
        model = joblib.load(model_path)
        y_pred = model.predict(X_test)
        RMSE, MAE, MAPE = get_evaluation_metrics(y_test, y_pred)
        results_2[model_name] = {"RMSE": RMSE, "MAE": MAE, "MAPE": MAPE}

    ###################################################################
    y_train.reset_index(drop=True, inplace=True)
    y_test.reset_index(drop=True, inplace=True)

    model_path = os.path.join(MODELS_PATH, "ARIMA",
                              f"{ticker.split('.')[0]}.pkl")
    model = ARIMAResults.load(model_path)
    y_pred = model.forecast(len(y_test))
    RMSE, MAE, MAPE = get_evaluation_metrics(y_test, y_pred)
    results_2["ARIMA"] = {"RMSE": RMSE, "MAE": MAE, "MAPE": MAPE}
    result_dict[f"{ticker.split('.')[0]}"] = {**results_2, **results}


In [None]:
import pickle

with open("results.pickle", "wb") as file:
    pickle.dump(result_dict, file, protocol=pickle.HIGHEST_PROTOCOL)
