Setup and Project Root & Import Libraries

In [None]:
import sys, os
# ==== Set Project Root ====
project_root = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
sys.path.append(project_root)

# Ensure the model save directory exists
model_save_path = os.path.join(project_root, 'Models', 'Weights', 'GRU')
os.makedirs(model_save_path, exist_ok=True)

import pandas as pd
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler
from Models.GRU import GRUModel  # Assuming GRU model is defined in Models/GRU.py
from Training.Helper.PyTorchModular import optuna_tune_and_train, split_params  # Import Optuna function
from Training.Helper.dataPreprocessing import create_sequences, add_lagged_features, add_rolling_features, add_time_features, add_dimension, TRAIN_DATA_SPLIT
from Evaluation.Helper.evaluation_helpers import calc_metrics_arrays, make_evaluation_predictions


Load and Prepare Data & Feature Engineering

In [None]:
# ==== Load & Prepare CSV ====
train_file = os.path.join(project_root, "Data", "Train", "train1990s.csv")
df = pd.read_csv(train_file)
df['ds'] = pd.to_datetime(df['observation_date'], format='%m/%Y')
df = df.rename(columns={'fred_PCEPI': 'y'})


# ==== Feature Engineering ====

# Adding time-based features
df = add_time_features(df, date_col='ds')

# Adding sine and cosine monthly features of various wavelengths (seasonality features)
for k in range(1, 4):
    df[f'sin_{k}'] = np.sin(2 * np.pi * k * df['month'] / 12)
    df[f'cos_{k}'] = np.cos(2 * np.pi * k * df['month'] / 12)

# Adding lag features
df = add_lagged_features(df, target_cols=['y'], lags=[1, 3, 6])

# Adding rolling features
df = add_rolling_features(df, target_col='y', windows=[3, 6, 12])

df = df.dropna().reset_index(drop=True)


Scaling and Sequence Preparation & DataLoaders

In [None]:
# ==== Scaling ====
feature_cols = ['y'] + [col for col in df.columns if col.startswith(('sin_', 'cos_', 'y_lag_', 'rolling_'))]
scaler = StandardScaler()

scaled = scaler.fit_transform(df[feature_cols])
scaled_df = pd.DataFrame(scaled, columns=feature_cols)

# ==== Sequence Prep ====
sequence_length = 12
X, y = create_sequences(
    scaled_df[feature_cols].values,
    scaled_df['y'].values,
    sequence_length
)
X = X.reshape((-1, sequence_length, len(feature_cols)))
y = add_dimension(y)

# ==== Train/Val Split ====
split_idx = int(TRAIN_DATA_SPLIT * len(X))
X_train, y_train = X[:split_idx], y[:split_idx]
X_val, y_val = X[split_idx:], y[split_idx:]

# ==== DataLoaders ====
batch_size = 32
train_dataset = TensorDataset(torch.tensor(X_train).float(), torch.tensor(y_train).float())
val_dataset = TensorDataset(torch.tensor(X_val).float(), torch.tensor(y_val).float())

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)


Set Device and Train with Optuna & Prediction Logic

In [None]:
# ==== Device & Save Path ====
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs(model_save_path, exist_ok=True)

# ==== Use optuna_tune_and_train from PyTorchModular ====
#best_model, best_params = optuna_tune_and_train(
#    model_class=GRUModel,  # Pass the model class (GRUModel in this case)
#    train_loader=train_loader,
#    val_loader=val_loader,
#    device=device,
#    input_size=len(feature_cols), # Pass input_size dynamically based on the number of features
#    max_epochs=50,  # Number of epochs
#    model_save_path=model_save_path,
#    model_name="GRU_exog_Optuna",  # Model name for saving
#    n_trials=30,  # Number of Optuna trials
#    verbose=False  # Print progress
#)

# Pass input_size dynamically based on the number of features
model_invariates = {'input_size': len(feature_cols), 'output_size': 1}
model_search_space={"hidden_size": (int, (32, 512)), "num_layers": (int, (1, 6))}
optim_search_space={"lr": (float, (1e-5, 1e-1))}

best_model, metadata, study = optuna_tune_and_train(
    model_class=GRUModel, 
    train_loader=train_loader, 
    val_loader=val_loader, 
    device=device,
    model_search_space=model_search_space,
    model_invariates=model_invariates,
    optim_search_space=optim_search_space,
    max_epochs=50,
    model_save_path=model_save_path,
    model_name="GRU_exog_Optuna",
    n_trials=500,
    return_study=True,
    verbose=True
)

print("Training completed. Best hyperparameters:", study.best_params)

best_model_params, best_optimiser_params = split_params(study.best_params, model_search_space, optim_search_space)


# ==== Prediction Logic ====

# Function to load the model for inference
def load_model(model_save_path, model_invariates, best_model_params, device):
    best_model_path = os.path.join(model_save_path, "GRU_exog_Optuna_best.pth")
    best_model = GRUModel(**model_invariates, **best_model_params).to(device)
    best_model.load_state_dict(torch.load(best_model_path))
    best_model.eval()
    return best_model

# Function to make predictions and inverse transform results
def make_predictions(model, X_val, y_val, scaler, feature_cols, device):
    with torch.no_grad():
        y_pred = model(torch.tensor(X_val).float().to(device)).squeeze().cpu().numpy()

    # Inverse transform results
    y_val_original = scaler.inverse_transform(
        np.hstack([y_val.reshape(-1, 1), np.zeros((len(y_val), len(feature_cols)-1))])
    )[:, 0]

    y_pred_original = scaler.inverse_transform(
        np.hstack([y_pred.reshape(-1, 1), np.zeros((len(y_pred), len(feature_cols)-1))])
    )[:, 0]
    
    return y_val_original, y_pred_original

In [None]:
from Training.Helper.PyTorchModular import loss_curve

# An interesting consideration - look at a graph showing the loss stats over time for the final training run
loss_curve(metadata['trainLoss'], metadata['validLoss'], 'GRU Best Model Final Training vs. Validation Loss')

RMSE and Saving Predictions & Main Execution Function

In [None]:
# Function to trim the last N values of predictions and save the file
def trim_and_save_predictions(predictions_path, n_trim=48):
    gru_preds = np.load(predictions_path)
    gru_preds_trimmed = gru_preds[-n_trim:]
    np.save(predictions_path, gru_preds_trimmed)
    print(f"Trimmed GRU_exog.npy to {len(gru_preds_trimmed)} values and saved.")

# Main execution function for predictions
def main(model_save_path, model_invariates, best_model_params, val_loader, scaler, device, project_root):
    # Load the trained model
    model = load_model(model_save_path, model_invariates, best_model_params, device)

    # Make predictions and inverse transform
    y_pred_original, y_val_original = make_evaluation_predictions(model, val_loader, device=device, y_scaler=scaler, y_scaler_features=['y'])
    
    # Calculate metrics (change from default using 'metrics' keyword in calc_metrics_kwargs)
    metrics = calc_metrics_arrays(y_val_original,  y_pred_original, model_names=['GRU_exog'])
    print('Metrics on validation predictions:')
    display(metrics)

    # Save predictions to GRU.npy
    pred_save_path = os.path.join(project_root, "Predictions", "GRU.npy")
    np.save(pred_save_path, y_pred_original)

    # Trim the last 48 values from the predictions and save
    trim_and_save_predictions(pred_save_path, n_trim=48)

# Assuming these variables are already defined: model_save_path, best_params, val_loader, scaler, feature_cols, device, project_root
main(model_save_path, model_invariates, best_model_params, val_loader, scaler, device, project_root)

In [None]:
# In case this is found useful
#from Training.Helper.weightFileCleaner import cleanWeightFiles

#cleanWeightFiles('GRU_exog_Optuna', earlyStopped=True, dirPath=model_save_path, verbose=True)