In [None]:
#allows imports from other folders in project
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# NBEATSx MODEL

## Load Data and Create Model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import os
from Models.NBEATSx import NBeatsx
from dataPreprocessing import load_data, prepare_dataloader
from PyTorchModular import train_model, loss_curve
from EarlyStopping import EarlyStopping
from Evaluation.evaluation_helpers import evaluate_model

# Load Data with Exogenous & FFT Features
train_file = "../Data/Train/train1990s.csv"
sequence_length = 48
config = {
    "use_fft": False,  # Fourier Features Disabled
    "use_exog": True   # Exogenous Variables Enabled
}

X_train, X_exog_train, y_train, X_valid, X_exog_valid, y_valid, \
X_test, X_exog_test, y_test, dates, X_scaler, exog_scaler, y_scaler = load_data(train_file, sequence_length=sequence_length, config=config)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize Model
model = NBeatsx(input_size=X_train.shape[1], exog_size=X_exog_train.shape[1] if X_exog_train is not None else 0).to(device)


## Train

In [None]:
# Prepare DataLoaders
batch_size = 128
dataloader_train = prepare_dataloader(X_train, y_train, X_exog_train, batch_size)
dataloader_valid = prepare_dataloader(X_valid, y_valid, X_exog_valid, batch_size)
dataloader_test = prepare_dataloader(X_test, y_test, X_exog_test, batch_size)

# Training Setup
loss_fn = nn.SmoothL1Loss()  # Huber Loss (SmoothL1Loss) for Robust Training
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=5e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)
savepath = '../Models/Weights/'

# Train Model with Early Stopping
trainingMetadata = train_model(
    model=model, 
    maxEpochs=120, 
    modelSavePath=savepath, 
    modelName='nbeatsx', 
    dataLoaderTrain=dataloader_train, 
    dataLoaderValid=dataloader_valid, 
    lossFn=loss_fn, 
    optimizer=optimizer, 
    device=device, 
    scheduler=scheduler
)

## Visualise Output

In [None]:
# Load Best Model for Evaluation
best_model_path = trainingMetadata.get("best_model_path")
if best_model_path and os.path.exists(best_model_path):
    print(f"Loading best model from: {best_model_path}")
    predictions = evaluate_model(model, best_model_path, X_test, X_exog_test, device, y_scaler)
else:
    raise FileNotFoundError(f"Best model file not found: {best_model_path}")

# Convert y_test Back to Original Scale
y_test_original = y_scaler.inverse_transform(y_test)

# Plot Predictions vs Actual
plt.figure(figsize=(16, 8))
plt.plot(dates[-len(y_test_original):], y_test_original, label="Actual PCEPI", color="blue", linewidth=2)
plt.plot(dates[-len(predictions):], predictions, label="Predicted PCEPI (N-BEATSx)", linestyle="dashed", color="orange", linewidth=2)
plt.xlabel("Date")
plt.ylabel("PCEPI")
plt.title("Final Optimized N-BEATSx PCE Prediction")
plt.legend()
plt.grid()
plt.show()

## Evaluation

In [None]:
# Compute Evaluation Metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error

mae = mean_absolute_error(y_test_original, predictions)
rmse = np.sqrt(mean_squared_error(y_test_original, predictions))
mape = np.mean(np.abs((y_test_original - predictions) / y_test_original)) * 100

print(f"Mean Absolute Error (MAE): {mae:.5f}")
print(f"Root Mean Square Error (RMSE): {rmse:.5f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

In [None]:
# Compute Residual Errors
residuals = y_test_original - predictions

# Visualize Residual Errors Over Time
plt.figure(figsize=(16, 8))
plt.plot(dates[-len(residuals):], residuals, label="Residual Errors", color="red", linewidth=2)
plt.axhline(y=0, color='black', linestyle='dashed')  # Reference line at 0
plt.xlabel("Date")
plt.ylabel("Residual Error")
plt.title("Residual Errors Over Time")
plt.legend()
plt.grid()
plt.show()

In [None]:
# Plot Histogram of Residuals
plt.figure(figsize=(10, 6))
plt.hist(residuals, bins=30, edgecolor='black', color='red', alpha=0.7)
plt.xlabel("Residual Error")
plt.ylabel("Frequency")
plt.title("Histogram of Residuals")
plt.axvline(x=0, color='black', linestyle='dashed')  # Reference line at 0
plt.grid()
plt.show()
