In [None]:
pip install torch optuna pandas numpy scikit-learn

Setup and Project Root & Import Libraries

In [None]:
import sys, os

# Set paths
project_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))
sys.path.append(project_root)
model_save_path = os.path.join(project_root, "Models", "Weights", "RNN")
os.makedirs(model_save_path, exist_ok=True)

import pandas as pd
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

from Models.RNN import RNNModel
from Training.Helper.PyTorchModular import optuna_tune_and_train
from Training.Helper.dataPreprocessing import create_sequences, add_lagged_features, add_rolling_features, add_time_features
from Training.Helper.weightFileCleaner import cleanWeightFiles

Load and Prepare Data & Feature Engineering

In [None]:
# Load dataset
df = pd.read_csv(os.path.join(project_root, "Data", "Train", "train1990s.csv"))
df["ds"] = pd.to_datetime(df["observation_date"], format="%m/%Y")
df = df.rename(columns={"fred_PCEPI": "y"})
df["month"] = df["ds"].dt.month

# Add seasonal features
for k in range(1, 4):
    df[f"sin_{k}"] = np.sin(2 * np.pi * k * df["month"] / 12)
    df[f"cos_{k}"] = np.cos(2 * np.pi * k * df["month"] / 12)

# Add lag, rolling, and time-based features
df = add_lagged_features(df, target_cols=["y"], lags=[1, 3, 6])
df = add_rolling_features(df, target_col="y", windows=[3, 6, 12])
df = add_time_features(df, date_col="ds")
df = df.dropna().reset_index(drop=True)


Scaling and Sequence Preparation & DataLoaders

In [None]:
# Feature selection
feature_cols = ['y'] + [col for col in df.columns if col.startswith(("sin_", "cos_", "y_lag_", "rolling_"))]

# Scale features
scaler = StandardScaler()
scaled = scaler.fit_transform(df[feature_cols])
scaled_df = pd.DataFrame(scaled, columns=feature_cols)

# Create sequences
sequence_length = 12
X, y = create_sequences(scaled_df[feature_cols].values, scaled_df["y"].values, sequence_length)
X = X.reshape((-1, sequence_length, len(feature_cols)))

# Train/val split
split_idx = int(0.8 * len(X))
X_train, y_train = X[:split_idx], y[:split_idx]
X_val, y_val = X[split_idx:], y[split_idx:]


Set Device and Train with Optuna & Prediction Logic

In [None]:
# DataLoaders
train_loader = DataLoader(TensorDataset(torch.tensor(X_train).float(), torch.tensor(y_train).float()), batch_size=32, shuffle=True)
val_loader = DataLoader(TensorDataset(torch.tensor(X_val).float(), torch.tensor(y_val).float()), batch_size=32)

# Device and clean weight files
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cleanWeightFiles("RNN", dirPath=model_save_path, earlyStopped=False, verbose=True)

# Optuna tuning and training
best_model, best_params = optuna_tune_and_train(
    model_class=RNNModel,
    train_loader=train_loader,
    val_loader=val_loader,
    device=device,
    input_size=len(feature_cols),
    max_epochs=50,
    model_save_path=model_save_path,
    model_name="RNN_exog_Optuna",
    n_trials=30,
    verbose=True
)

print("Training done. Best hyperparameters:", best_params)

# Model evaluation loop
best_model.eval()
predictions, actuals = [], []
with torch.no_grad():
    for X_batch, y_batch in val_loader:
        preds = best_model(X_batch.to(device)).cpu().numpy().flatten()
        predictions.extend(preds)
        actuals.extend(y_batch.numpy().flatten())

# Inverse transform helper
def inverse_transform_single_column(values, scaler, target_index=0):
    dummy = np.zeros((len(values), len(feature_cols)))
    dummy[:, target_index] = values
    return scaler.inverse_transform(dummy)[:, target_index]

# Inverse transform
y_pred_inv = inverse_transform_single_column(np.array(predictions), scaler)
y_true_inv = inverse_transform_single_column(np.array(actuals), scaler)


RMSE and Saving Predictions & Main Execution Function

In [None]:
# Save predictions
pred_dir = os.path.join(project_root, "Predictions")
os.makedirs(pred_dir, exist_ok=True)

np.save(os.path.join(pred_dir, "RNN.npy"), y_pred_inv)
np.save(os.path.join(pred_dir, "RNN.npy"), y_pred_inv[-48:])
print("Trimmed and saved RNN.npy")


# Final metrics
rmse = np.sqrt(mean_squared_error(y_true_inv, y_pred_inv))
mae = np.mean(np.abs(y_true_inv - y_pred_inv))
print(f"Evaluation Metrics:\nRMSE: {rmse:.4f} | MAE: {mae:.4f}")
