In [None]:
#allows imports from other folders in project
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# NBEATS MODEL

## Load Data and Create Model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from Models.NBEATS import NBeats
from Helper.dataPreprocessing import load_data, prepare_dataloader, TRAIN_DATA_PATH_1990S
from Helper.PyTorchModular import train_model, loss_curve

# Load Data and Create Model
config = {
    "use_fft": False,  # No FFT needed
    "use_exog": False  # No exogenous variables in N-BEATS
}
X_train, y_train, X_valid, y_valid, X_test, y_test, dates, y_scaler = load_data(TRAIN_DATA_PATH_1990S, config=config)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = NBeats(input_size=X_train.shape[1]).to(device)


## Train

In [None]:
# Prepare DataLoaders
dataloader_train = prepare_dataloader(X_train, y_train)
dataloader_valid = prepare_dataloader(X_valid, y_valid)
dataloader_test = prepare_dataloader(X_test, y_test)

# Training Setup
loss_fn = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.005, weight_decay=5e-7)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)
savepath = os.path.join(module_path, 'Models', 'Weights', 'NBEATS')

# Train Model
trainingMetadata = train_model(
    model, 
    maxEpochs=120, 
    modelSavePath=savepath, 
    modelName='nbeats', 
    dataLoaderTrain=dataloader_train, 
    dataLoaderValid=dataloader_valid, 
    lossFn=loss_fn, 
    optimizer=optimizer, 
    device=device, 
    scheduler=scheduler
)

## Visualise Output and Make Evaluation Predictions

In [None]:
from Evaluation.Helper.evaluation_helpers import evaluate_model, make_evaluation_predictions

# Retrieve best model path from training metadata
best_model_path = trainingMetadata.get("best_model_path")

# Ensure the model file exists before proceeding
if best_model_path and os.path.exists(best_model_path):
    print(f"Loading best model from: {best_model_path}")
    df_comparison, rmse = evaluate_model(model, dataloader_test, y_scaler, dates, device, savepath=best_model_path, verbose=True)
    predictions, y_test_original = make_evaluation_predictions(model, dataloader_test, savepath=best_model_path, y_scaler=y_scaler)
else:
    raise FileNotFoundError(f"Best model file not found: {best_model_path}")

## Evaluation

In [None]:
# Compute Evaluation Metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error

mae = mean_absolute_error(y_test_original, predictions)
rmse = np.sqrt(mean_squared_error(y_test_original, predictions))

print(f"Mean Absolute Error (MAE): {mae:.5f}")
print(f"Root Mean Square Error (RMSE): {rmse:.5f}")

In [None]:
# Compute residual errors
residuals = y_test_original - predictions

# Visualize Residual Errors Over Time
plt.figure(figsize=(16, 8))
plt.plot(dates[-len(residuals):], residuals, label="Residual Errors", color="red", linewidth=2)
plt.axhline(y=0, color='black', linestyle='dashed')  # Reference line at 0
plt.xlabel("Date")
plt.ylabel("Residual Error")
plt.title("Residual Errors Over Time")
plt.legend()
plt.grid()
plt.show()

In [None]:
# Plot Histogram of Residuals
plt.figure(figsize=(10, 6))
plt.hist(residuals, bins=30, edgecolor='black', color='red', alpha=0.7)
plt.xlabel("Residual Error")
plt.ylabel("Frequency")
plt.title("Histogram of Residuals")
plt.axvline(x=0, color='black', linestyle='dashed')  # Reference line at 0
plt.grid()
plt.show()