# AS/RS SIM OPT - Metamodel training and testing

## Import

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import sys
import numpy as np
np.set_printoptions(threshold=sys.maxsize, suppress=True)
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd

from MetaSimOpt.metamodels import ModelFactory, RNN_Metamodel, LSTM_Metamodel, GRU_Metamodel
from MetaSimOpt.handlers import HandlerTraining, HandlerMetamodel, HandlerHyperSearch
from MetaSimOpt.utils import compute_residual_stats, print_residuals, plot_residuals, compute_metrics

In [None]:
# set device and current directory

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = 'cpu'
print(f'Device --> {device}')

curr_dir = os.getcwd()

## Training

### Load data

In [None]:
# load data

dir_data = os.path.join(curr_dir,"1_data","processed","metamodel_training")

path_features_rec = os.path.join(dir_data,"features_rec.npy")
features_rec = np.load(path_features_rec)

path_features_lin = os.path.join(dir_data,"features_lin.npy")
features_lin = np.load(path_features_lin)

path_output = os.path.join(dir_data,"output.npy")
output_ann = np.load(path_output)

print(f'Shape features rec = {features_rec.shape}')
print(f'Shape features lin = {features_lin.shape}')

# check if nan
nan_indices = np.argwhere(np.isnan(output_ann))

if nan_indices.size > 0:
    print("The output contains NaN values at the following coordinates:")
    for index in nan_indices:
        print(f"Row: {index[0]}, Column: {index[1]}")
else:
    print("The output does not contain NaN values.")

y_reg = output_ann[:,24:-1]
y_reg = output_ann
y_reg = np.mean(y_reg, axis=1, keepdims=True)
print(f'Shape labels = {y_reg.shape}')

data = {
    'features_rec' : features_rec,
    'features_lin' : features_lin,
    'labels' : y_reg
}

input_size_rec = features_rec.shape[-1]
input_size_lin = features_lin.shape[-1]
output_size = y_reg.shape[-1]
max_seq_length = features_rec.shape[1]

x = [features_rec, features_lin]
y = y_reg

### Hyperparameter search

In [None]:
map_metamodels = {
    "rnn" : RNN_Metamodel,
    "lstm" : LSTM_Metamodel,
    "gru" : GRU_Metamodel
}

In [None]:
for metamodel, metamodel_class in map_metamodels.items():
    
    print(f"\nSEARCHING HYPERPARAMETERS FOR REC CELL {metamodel.upper()}")

    factory = ModelFactory(model_class = metamodel_class, input_size_rec = input_size_rec, input_size_lin = input_size_lin, output_size = output_size, max_seq_length = max_seq_length)

    tr_handler = HandlerTraining(
        model_class = metamodel_class,
        factory=factory,
        device = device
    )

    tr_handler.load_dataset(x = x, y = y, normalisation="min-max")
    tr_handler.set_loss_function(loss_function = nn.L1Loss)
    search_space_model = tr_handler.model_class.get_search_space()
    search_space_training = tr_handler.get_search_space(model_class = tr_handler.model_class)
    search_space = search_space_model | search_space_training

    # hyperparameter search

    path_dir = os.path.join(curr_dir,"2_training_and_testing_results","hyperparameters_search",f'{metamodel}')

    search_method = "random"
    validation_method = "kfold"
    n_folds = 5
    searcher = HandlerHyperSearch(handler = tr_handler, search_space = search_space, method = search_method, cv_mode = validation_method, n_folds = n_folds, path_dir = path_dir)
    searcher.enable_pruner(method = "median", n_startup_trials = 10, n_warmup_steps = 50)

    print(f'Search method: {search_method}, validation method: {validation_method}')
    best_result = searcher.run(n_trials = 250, clear_results = False, reset_study = False)

    print("\nBest result")
    print(f'Loss -> {best_result["val_loss"]}')
    print(f'Hyperparameters model -> {best_result["hyper_model"]}')
    print(f'Hyperparameters training -> {best_result["hyper_train"]}')

### Training and validation

In [None]:
map_metamodels = {
    "rnn" : RNN_Metamodel,
    "lstm" : LSTM_Metamodel,
    "gru" : GRU_Metamodel
}

Set model hyperparameters

In [None]:
hyper_model_rnn = {}
hyper_model_lstm = {}
hyper_model_gru = {}

hyper_model_rnn["hid_rec_layers"] = 3
hyper_model_rnn["hid_rec_size"] = 8
hyper_model_rnn["rec_dropout"] = 0.0
hyper_model_rnn["hid_lin_layers_1"] = 1
hyper_model_rnn["hid_lin_size_1_0"] = 128
hyper_model_rnn["hid_lin_size_1_1"] = 0
hyper_model_rnn["hid_lin_layers_2"] = 1
hyper_model_rnn["hid_lin_size_2_0"] = 128
hyper_model_rnn["hid_lin_size_2_1"] = 0
hyper_model_rnn["linear_dropout_1_0"] = 0.3
hyper_model_rnn["linear_dropout_1_1"] = 0.0
hyper_model_rnn["linear_dropout_2_0"] = 0.3
hyper_model_rnn["linear_dropout_2_1"] = 0.0
hyper_model_rnn["bidirectional"] = True

hyper_model_lstm["hid_rec_layers"] = 1
hyper_model_lstm["hid_rec_size"] = 8
hyper_model_lstm["rec_dropout"] = 0.0
hyper_model_lstm["hid_lin_layers_1"] = 1
hyper_model_lstm["hid_lin_size_1_0"] = 32
hyper_model_lstm["hid_lin_size_1_1"] = 0
hyper_model_lstm["hid_lin_layers_2"] = 1
hyper_model_lstm["hid_lin_size_2_0"] = 64
hyper_model_lstm["hid_lin_size_2_1"] = 0
hyper_model_lstm["linear_dropout_1_0"] = 0.3
hyper_model_lstm["linear_dropout_1_1"] = 0.0
hyper_model_lstm["linear_dropout_2_0"] = 0.1
hyper_model_lstm["linear_dropout_2_1"] = 0.0
hyper_model_lstm["bidirectional"] = True

hyper_model_gru["hid_rec_layers"] = 1
hyper_model_gru["hid_rec_size"] = 8
hyper_model_gru["rec_dropout"] = 0.0
hyper_model_gru["hid_lin_layers_1"] = 1
hyper_model_gru["hid_lin_size_1_0"] = 32
hyper_model_gru["hid_lin_size_1_1"] = 0
hyper_model_gru["hid_lin_layers_2"] = 1
hyper_model_gru["hid_lin_size_2_0"] = 64
hyper_model_gru["hid_lin_size_2_1"] = 0
hyper_model_gru["linear_dropout_1_0"] = 0.3
hyper_model_gru["linear_dropout_1_1"] = 0.0
hyper_model_gru["linear_dropout_2_0"] = 0.1
hyper_model_gru["linear_dropout_2_1"] = 0.0
hyper_model_gru["bidirectional"] = True

set_hyper_model = {
    'rnn' : hyper_model_rnn,
    'lstm' : hyper_model_lstm,
    'gru' : hyper_model_gru
}

Set training hyperparamters

In [None]:
hyper_training_rnn = {}
hyper_training_lstm = {}
hyper_training_gru = {}

hyper_training_rnn["epochs"] = 400
hyper_training_rnn["batch_size"] = 32
hyper_training_rnn["learning_rate"] = 5e-4
hyper_training_rnn["w_decay"] = 1e-3
hyper_training_rnn["l1_lambda"] = 0

hyper_training_lstm["epochs"] = 400
hyper_training_lstm["batch_size"] = 32
hyper_training_lstm["learning_rate"] = 5e-4
hyper_training_lstm["w_decay"] = 1e-3
hyper_training_lstm["l1_lambda"] = 0

hyper_training_gru["epochs"] = 400
hyper_training_gru["batch_size"] = 32
hyper_training_gru["learning_rate"] = 5e-4
hyper_training_gru["w_decay"] = 1e-3
hyper_training_gru["l1_lambda"] = 0

set_hyper_training = {
    'rnn' : hyper_training_rnn,
    'lstm' : hyper_training_lstm,
    'gru' : hyper_training_gru
}

Train and validate (k-fold) one model for each type of rec cell

In [None]:
n_folds = 5

for metamodel, metamodel_class in map_metamodels.items():

    print(f"\nTRAINING AND VALIDATING METAMODEL WITH REC CELL {metamodel.upper()}")

    factory = ModelFactory(model_class = metamodel_class, input_size_rec = input_size_rec, input_size_lin = input_size_lin, output_size = output_size, max_seq_length = max_seq_length)

    tr_handler = HandlerTraining(
        model_class = metamodel_class,
        factory = factory,
        device = device
    )

    tr_handler.load_dataset(x = x, y = y, normalisation = "standard")
    tr_handler.set_model_hyperparameters(set_hyper_model[metamodel])
    tr_handler.set_training_hyperparameters(set_hyper_training[metamodel])
    tr_handler.set_loss_function(nn.L1Loss)
    tr_handler.set_optimiser(optim.Adam)

    losses, val_losses, scores, val_scores = tr_handler.train(validation = True, k_fold = True, parallel = True, print_progress = True, n_folds = n_folds)
    
    path_dir = os.path.join(curr_dir,"2_training_and_testing_results","metamodel_trained",f'{metamodel}')
    file_name = f"res_training_kfold_{metamodel}.xlsx"
    tr_handler.save_to_excel(path_dir = path_dir, file_name = file_name)

Train and validate (single split) one model for each type of rec cell

In [None]:
for metamodel, metamodel_class in map_metamodels.items():

    print(f"\nTRAINING AND VALIDATING METAMODEL WITH REC CELL {metamodel.upper()}")

    factory = ModelFactory(model_class = metamodel_class, input_size_rec = input_size_rec, input_size_lin = input_size_lin, output_size = output_size, max_seq_length = max_seq_length)

    tr_handler = HandlerTraining(
        model_class = metamodel_class,
        factory = factory,
        device = device
    )

    tr_handler.load_dataset(x = x, y = y, normalisation = "min-max")
    tr_handler.set_model_hyperparameters(set_hyper_model[metamodel])
    tr_handler.set_training_hyperparameters(set_hyper_training[metamodel])
    tr_handler.set_loss_function(nn.L1Loss)
    tr_handler.set_optimiser(optim.Adam)

    losses, val_losses, scores, val_scores = tr_handler.train(validation = True, test_size = 0.2)
    
    path_dir = os.path.join(curr_dir,"2_training_and_testing_results","metamodel_trained",f'{metamodel}')
    file_name = f"res_training_split_{metamodel}.xlsx"
    tr_handler.save_to_excel(path_dir = path_dir, file_name = file_name)

### Final training

Train one model for each type of rec cell with all the dataset

In [None]:
map_metamodels = {
    "rnn" : RNN_Metamodel,
    "lstm" : LSTM_Metamodel,
    "gru" : GRU_Metamodel
}

In [None]:
hyper_model_rnn = {}
hyper_model_lstm = {}
hyper_model_gru = {}

hyper_model_rnn["hid_rec_layers"] = 3
hyper_model_rnn["hid_rec_size"] = 8
hyper_model_rnn["rec_dropout"] = 0.0
hyper_model_rnn["hid_lin_layers_1"] = 1
hyper_model_rnn["hid_lin_size_1_0"] = 128
hyper_model_rnn["hid_lin_size_1_1"] = 0
hyper_model_rnn["hid_lin_layers_2"] = 1
hyper_model_rnn["hid_lin_size_2_0"] = 128
hyper_model_rnn["hid_lin_size_2_1"] = 0
hyper_model_rnn["linear_dropout_1_0"] = 0.3
hyper_model_rnn["linear_dropout_1_1"] = 0.0
hyper_model_rnn["linear_dropout_2_0"] = 0.3
hyper_model_rnn["linear_dropout_2_1"] = 0.0
hyper_model_rnn["bidirectional"] = True

hyper_model_lstm["hid_rec_layers"] = 1
hyper_model_lstm["hid_rec_size"] = 8
hyper_model_lstm["rec_dropout"] = 0.0
hyper_model_lstm["hid_lin_layers_1"] = 1
hyper_model_lstm["hid_lin_size_1_0"] = 32
hyper_model_lstm["hid_lin_size_1_1"] = 0
hyper_model_lstm["hid_lin_layers_2"] = 1
hyper_model_lstm["hid_lin_size_2_0"] = 64
hyper_model_lstm["hid_lin_size_2_1"] = 0
hyper_model_lstm["linear_dropout_1_0"] = 0.3
hyper_model_lstm["linear_dropout_1_1"] = 0.0
hyper_model_lstm["linear_dropout_2_0"] = 0.1
hyper_model_lstm["linear_dropout_2_1"] = 0.0
hyper_model_lstm["bidirectional"] = True

hyper_model_gru["hid_rec_layers"] = 1
hyper_model_gru["hid_rec_size"] = 8
hyper_model_gru["rec_dropout"] = 0.0
hyper_model_gru["hid_lin_layers_1"] = 1
hyper_model_gru["hid_lin_size_1_0"] = 32
hyper_model_gru["hid_lin_size_1_1"] = 0
hyper_model_gru["hid_lin_layers_2"] = 1
hyper_model_gru["hid_lin_size_2_0"] = 64
hyper_model_gru["hid_lin_size_2_1"] = 0
hyper_model_gru["linear_dropout_1_0"] = 0.3
hyper_model_gru["linear_dropout_1_1"] = 0.0
hyper_model_gru["linear_dropout_2_0"] = 0.1
hyper_model_gru["linear_dropout_2_1"] = 0.0
hyper_model_gru["bidirectional"] = True

set_hyper_model = {
    'rnn' : hyper_model_rnn,
    'lstm' : hyper_model_lstm,
    'gru' : hyper_model_gru
}

In [None]:
hyper_training_rnn = {}
hyper_training_lstm = {}
hyper_training_gru = {}

hyper_training_rnn["epochs"] = 400
hyper_training_rnn["batch_size"] = 32
hyper_training_rnn["learning_rate"] = 5e-4
hyper_training_rnn["w_decay"] = 1e-3
hyper_training_rnn["l1_lambda"] = 0

hyper_training_lstm["epochs"] = 400
hyper_training_lstm["batch_size"] = 32
hyper_training_lstm["learning_rate"] = 5e-4
hyper_training_lstm["w_decay"] = 1e-3
hyper_training_lstm["l1_lambda"] = 0

hyper_training_gru["epochs"] = 400
hyper_training_gru["batch_size"] = 32
hyper_training_gru["learning_rate"] = 5e-4
hyper_training_gru["w_decay"] = 1e-3
hyper_training_gru["l1_lambda"] = 0

set_hyper_training = {
    'rnn' : hyper_training_rnn,
    'lstm' : hyper_training_lstm,
    'gru' : hyper_training_gru
}

In [None]:
for metamodel, metamodel_class in map_metamodels.items():

    print(f"TRAINING METAMODEL WITH REC CELL {metamodel.upper()}")

    factory = ModelFactory(model_class = metamodel_class, input_size_rec = input_size_rec, input_size_lin = input_size_lin, output_size = output_size, max_seq_length = max_seq_length)

    tr_handler = HandlerTraining(
        model_class = metamodel_class,
        factory = factory,
        device = device
    )

    tr_handler.load_dataset(x = x, y = y, normalisation = "min-max")
    tr_handler.set_model_hyperparameters(set_hyper_model[metamodel])
    tr_handler.set_training_hyperparameters(set_hyper_training[metamodel])
    tr_handler.set_loss_function(nn.MSELoss)
    tr_handler.set_optimiser(optim.AdamW)

    losses, val_losses, scores, val_scores = tr_handler.train(validation = False, compute_score = False)
    
    path_dir = os.path.join(curr_dir,"2_training_and_testing_results","metamodel_trained",f'{metamodel}')

    file_name = f"metamodel_{metamodel}.pth"
    tr_handler.save_model(path_dir = path_dir, file_name = file_name, save_data_scaler = True)

## Testing

### Load data

In [None]:
# load data

dir_data = os.path.join(curr_dir,"1_data","processed","metamodel_testing")

path_features_rec = os.path.join(dir_data,"features_rec.npy")
features_rec = np.load(path_features_rec)

path_features_lin = os.path.join(dir_data,"features_lin.npy")
features_lin = np.load(path_features_lin)

path_output = os.path.join(dir_data,"output.npy")
output_ann = np.load(path_output)

print(f'Shape features rec = {features_rec.shape}')
print(f'Shape features lin = {features_lin.shape}')

# check if nan
nan_indices = np.argwhere(np.isnan(output_ann))

if nan_indices.size > 0:
    print("The output contains NaN values at the following coordinates:")
    for index in nan_indices:
        print(f"Row: {index[0]}, Column: {index[1]}")
else:
    print("The output does not contain NaN values.")

y_reg = output_ann[:,24:-1]
y_reg = output_ann
y_reg = np.mean(y_reg, axis=1, keepdims=True)
y_reg = np.mean(y_reg, axis=1, keepdims=True)
print(f'Shape labels = {y_reg.shape}')

data = {
    'features_rec' : features_rec,
    'features_lin' : features_lin,
    'labels' : y_reg
}

x = [features_rec, features_lin]
y = y_reg

### Test metamodels

In [None]:
map_metamodels = {
    "rnn" : RNN_Metamodel,
    "lstm" : LSTM_Metamodel,
    "gru" : GRU_Metamodel
}

### Single prediction

In [None]:
save_results = False

for metamodel, _ in map_metamodels.items():
    
    print(f"\nTESTING METAMODEL WITH REC CELL {metamodel.upper()}")

    mod_handler = HandlerMetamodel()
    dir_metamodel = os.path.join(curr_dir, "2_training_and_testing_results","metamodel_trained",f'{metamodel}')
    file_metamodel = f'metamodel_{metamodel}.pth'
    mod_handler.load_model_from_file(dir_metamodel = dir_metamodel, file_metamodel = file_metamodel)
    mod_handler.load_data(data = x)
    predictions = mod_handler.predict()

    metrics = ["mse", "mae", "mape"]
    results_metrics = compute_metrics(targets=y, predictions=predictions, metrics=metrics)
    print(f"MSE -> {round(results_metrics[0],2)}")
    print(f"MAE -> {round(results_metrics[1],2)}")
    print(f"MAPE -> {round(results_metrics[2],2)} %")

    residuals, residual_stats = compute_residual_stats(predictions=predictions, targets=y)
    plot_residuals(residuals=residuals, predictions=predictions)
    print_residuals(residual_stats)

    if save_results: # save results
        data = {
            'targets': y_reg.flatten().squeeze(),
            'predictions': predictions.flatten().squeeze(),
            'mae' : results_metrics[0].flatten(),
            'mape' : results_metrics[1].flatten()
            }
        df = pd.DataFrame(data)
        dir_results = os.path.join(curr_dir, "2_training_and_testing_results","testing_results",f'{metamodel}')
        path_file = os.path.join(dir_results,f"results_testing_{metamodel}.xlsx")
        df.to_excel(path_file, index=False)
        print(f"Results saved at {path_file}")

### MC dropout

In [None]:
save_results = False

for metamodel, metamodel_class in map_metamodels.items():
    
    print(f"\nTESTING METAMODEL WITH REC CELL {metamodel.upper()}")

    dir_metamodel = os.path.join(curr_dir, "2_training_and_testing_results","metamodel_trained",f'{metamodel}')
    file_metamodel = f'metamodel_{metamodel}.pth'
    mod_handler.load_model_from_file(dir_metamodel = dir_metamodel, file_metamodel = file_metamodel)
    mod_handler.load_data(data = x)
    predictions = mod_handler.predict(mc_samples = 10)

    metrics = ["mse", "mae", "mape"]
    results_metrics = compute_metrics(targets=y, predictions=predictions, metrics=metrics)
    print(f"MSE -> {round(results_metrics[0],2)}")
    print(f"MAE -> {round(results_metrics[1],2)}")
    print(f"MAPE -> {round(results_metrics[2],2)} %")

    residuals, residual_stats = compute_residual_stats(predictions=predictions, targets=y)
    plot_residuals(residuals=residuals, predictions=predictions)
    print_residuals(residual_stats)

    if save_results: # save results
        data = {
            'targets': y_reg.flatten().squeeze(),
            'predictions': predictions.flatten().squeeze(),
            'mae' : results_metrics[0].flatten(),
            'mape' : results_metrics[1].flatten()
            }
        df = pd.DataFrame(data)
        dir_results = os.path.join(curr_dir, "2_training_and_testing_results","testing_results",f'{metamodel}')
        path_file = os.path.join(dir_results,f"results_testing_{metamodel}.xlsx")
        df.to_excel(path_file, index=False)
        print(f"Results saved at {path_file}")