In [12]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
import sys
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Set root and paths
ROOT_PATH = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
sys.path.append(ROOT_PATH)

from Training.Helper.dataPreprocessing import (
    add_time_features, add_lagged_features, add_rolling_features,
    sklearn_fit_transform, prepare_dataloader, rank_features_ccf,
    TRAIN_DATA_PATH_1990S
)
from Models.LSTM import LSTM

def create_direct_delta_sequences(X, y, seq_len=36, horizon=12):
    X_seq, y_seq = [], []
    for i in range(len(X) - seq_len - horizon):
        X_seq.append(X[i:i + seq_len])
        base = y[i + seq_len - 1]
        future = y[i + seq_len: i + seq_len + horizon]
        delta = future - base
        y_seq.append(delta)
    return np.array(X_seq), np.array(y_seq)

# === CONFIG ===
SEQ_LEN = 36
BATCH_SIZE = 16
EPOCHS = 100
PATIENCE = 10
LR = 1e-3
TOP_K_FEATURES = 30
HORIZONS = [1, 3, 6, 12]
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [32]:
# === LOAD & FEATURE ENGINEERING ===
df = pd.read_csv(TRAIN_DATA_PATH_1990S)
df["ds"] = pd.to_datetime(df["observation_date"], format="%m/%Y")
df = df.rename(columns={"fred_PCEPI": "y_original"})

df = add_time_features(df, "ds")
for k in [1, 2, 3, 4]:
    df[f"sin_{k}"] = np.sin(2 * np.pi * k * df["month"] / 12)
    df[f"cos_{k}"] = np.cos(2 * np.pi * k * df["month"] / 12)
df["pct_change"] = df["y_original"].pct_change()
df["momentum"] = df["pct_change"].diff()
df = add_lagged_features(df, ["y_original"], lags=[1, 6, 12])
df = add_rolling_features(df, "y_original", windows=[3, 6, 12])
df.dropna(inplace=True)

# === CCF SELECTION ===
df_numeric = df.select_dtypes(include=[np.number]).copy()
ccf_ranked = rank_features_ccf(df_numeric, targetCol="y_original")
selected_features = [col for col in list(ccf_ranked[:TOP_K_FEATURES]) if col in df.columns]
features = df[selected_features]
target_log = np.log1p(df["y_original"])

# === SCALE ===
features_scaled_list, x_scaler = sklearn_fit_transform(features, StandardScaler())
target_scaled_list, y_scaler = sklearn_fit_transform(target_log.to_frame(), StandardScaler())

X_scaled = features_scaled_list[0].values
y_scaled = target_scaled_list[0].values.flatten()

# === LOOP OVER HORIZONS ===
for HORIZON in HORIZONS:

    # === SEQUENCES ===
    X_seq, y_seq = create_direct_delta_sequences(X_scaled, y_scaled, SEQ_LEN, HORIZON)
    X_seq = X_seq.reshape(X_seq.shape[0], SEQ_LEN, -1)
    y_seq = y_seq.reshape(y_seq.shape[0], HORIZON)

    # === SPLIT ===
    val_split = int(len(X_seq) * 0.8)
    X_train, X_val = X_seq[:val_split], X_seq[val_split:]
    y_train, y_val = y_seq[:val_split], y_seq[val_split:]

    train_loader = prepare_dataloader(X_train, y_train, batch_size=BATCH_SIZE)
    val_loader = prepare_dataloader(X_val, y_val, shuffle=False, batch_size=BATCH_SIZE)


2025-04-24 21:54:53,999 - INFO - Added time features: year, month, quarter. DataFrame shape: (408, 363)
2025-04-24 21:54:54,004 - INFO - Added lagged features with lags [1, 6, 12] to target columns ['y_original']. DataFrame shape: (408, 376)
2025-04-24 21:54:54,006 - INFO - Added rolling mean and standard deviation features to target column y_original. DataFrame shape: (408, 382)


In [38]:
import optuna
from Training.Helper.PyTorchModular import optuna_tune_and_train_pytorch

HORIZONS = [1, 3, 6, 12]

for HORIZON in HORIZONS:
    print(f"\n=== Tuning and Training LSTM for horizon {HORIZON} ===")

    model_search_space = {
        "hidden_size": (int, 32, 128),
        "num_layers": (int, 1, 3),
        "dropout": (float, 0.0, 0.5),
    }

    optim_search_space = {
        "lr": (float, 1e-4, 1e-2, {"log": True})
    }

    model_invariates = {
        "input_size": X_seq.shape[2],
        "output_size": HORIZON
    }

    # Recreate sequences and loaders for each HORIZON to match target shape
    X_seq, y_seq = create_direct_delta_sequences(X_scaled, y_scaled, SEQ_LEN, HORIZON)
    X_seq = X_seq.reshape(X_seq.shape[0], SEQ_LEN, -1)
    y_seq = y_seq.reshape(y_seq.shape[0], HORIZON)

    val_split = int(len(X_seq) * 0.8)
    X_train, X_val = X_seq[:val_split], X_seq[val_split:]
    y_train, y_val = y_seq[:val_split], y_seq[val_split:]

    train_loader = prepare_dataloader(X_train, y_train, batch_size=BATCH_SIZE)
    val_loader = prepare_dataloader(X_val, y_val, shuffle=False, batch_size=BATCH_SIZE)

    model, metadata = optuna_tune_and_train_pytorch(
        model_class=LSTM,
        train_loader=train_loader,
        val_loader=val_loader,
        device=DEVICE,
        model_search_space=model_search_space,
        model_invariates=model_invariates,
        optim_search_space=optim_search_space,
        max_epochs=EPOCHS,
        model_save_path=os.path.join("..", "..", "Models", f"Horizon{HORIZON}"),
        model_name=f"LSTM_horizon_{HORIZON}",
        n_trials=20,
        n_epochs_per_trial=5,
        verbose=True
    )

    # === FINAL FORECAST ===
    model.eval()
    y_pred_final = []
    with torch.no_grad():
        x_input = X_seq[-1].copy()
        base_val = y_scaled[-1]

        for _ in range(12):
            x_tensor = torch.tensor(x_input[np.newaxis], dtype=torch.float32).to(DEVICE)
            pred_delta = model(x_tensor).cpu().numpy().flatten()

            step_index = min(HORIZON, len(pred_delta)) - 1
            next_scaled = base_val + pred_delta[step_index]
            y_pred_final.append(next_scaled)

            x_input = np.roll(x_input, -1, axis=0)
            x_input[-1] = np.concatenate([x_input[-2][:-1], [next_scaled]])
            base_val = next_scaled

    y_pred_rescaled = y_scaler.inverse_transform(np.array(y_pred_final).reshape(-1, 1)).flatten()
    y_pred_final = np.expm1(y_pred_rescaled)

    # === SAVE ===
    save_path = os.path.join("..", "..", "Predictions", f"Horizon{HORIZON}", f"LSTM_horizon_{HORIZON}.npy")
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    np.save(save_path, y_pred_final)
    print(f"Saved tuned and trained LSTM with residuals to: {save_path}")

[I 2025-04-24 22:21:43,355] A new study created in memory with name: LSTM_horizon_1_hyperparameter_optimisation



=== Tuning and Training LSTM for horizon 1 ===
Running Optuna hyperparameter tuning...


  0%|          | 0/20 [00:00<?, ?it/s]

[I 2025-04-24 22:21:44,898] Trial 0 finished with value: 0.00016229724294842326 and parameters: {'hidden_size': 43, 'num_layers': 3, 'dropout': 0.30870062689410327, 'lr': 0.0008356084131511738}. Best is trial 0 with value: 0.00016229724294842326.
[I 2025-04-24 22:21:45,492] Trial 1 finished with value: 0.003494447407623132 and parameters: {'hidden_size': 55, 'num_layers': 1, 'dropout': 0.36039773032852307, 'lr': 0.0003057463399686791}. Best is trial 0 with value: 0.00016229724294842326.
[I 2025-04-24 22:21:46,028] Trial 2 pruned. 
[I 2025-04-24 22:21:49,033] Trial 3 finished with value: 0.0007150119377507104 and parameters: {'hidden_size': 120, 'num_layers': 3, 'dropout': 0.2887207958491334, 'lr': 0.006068824980721918}. Best is trial 0 with value: 0.00016229724294842326.
[I 2025-04-24 22:21:51,239] Trial 4 pruned. 
[I 2025-04-24 22:21:54,217] Trial 5 finished with value: 0.0002224437533489739 and parameters: {'hidden_size': 114, 'num_layers': 3, 'dropout': 0.3684899246352909, 'lr': 0.0

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch 1/100 - Train Loss: 0.053930, Val Loss: 0.001719
Best model saved at ../../Models/Horizon1/LSTM_horizon_1_BEST_STOPPED_AT_1.pth (Epoch 1)
Epoch 2/100 - Train Loss: 0.013493, Val Loss: 0.000902
Best model saved at ../../Models/Horizon1/LSTM_horizon_1_BEST_STOPPED_AT_2.pth (Epoch 2)
Epoch 3/100 - Train Loss: 0.006401, Val Loss: 0.000185
Best model saved at ../../Models/Horizon1/LSTM_horizon_1_BEST_STOPPED_AT_3.pth (Epoch 3)
Epoch 4/100 - Train Loss: 0.002788, Val Loss: 0.000795
Epoch 5/100 - Train Loss: 0.001511, Val Loss: 0.000262
Epoch 6/100 - Train Loss: 0.001185, Val Loss: 0.000311
Epoch 7/100 - Train Loss: 0.000914, Val Loss: 0.000233
Epoch 8/100 - Train Loss: 0.000722, Val Loss: 0.000210
Epoch 9/100 - Train Loss: 0.000621, Val Loss: 0.000264
Epoch 10/100 - Train Loss: 0.000426, Val Loss: 0.000210
Epoch 11/100 - Train Loss: 0.000361, Val Loss: 0.000206
Epoch 12/100 - Train Loss: 0.000333, Val Loss: 0.000208
Epoch 13/100 - Train Loss: 0.000369, Val Loss: 0.000190
Epoch 14/100 -

[I 2025-04-24 22:22:25,780] A new study created in memory with name: LSTM_horizon_3_hyperparameter_optimisation


Epoch 18/100 - Train Loss: 0.000192, Val Loss: 0.000195
Early stopping. counter: 15
Best weights restored.
Early stopping at epoch 18. Best model restored.
Model training complete and saved!
Saved tuned and trained LSTM with residuals to: ../../Predictions/Horizon1/LSTM_horizon_1.npy

=== Tuning and Training LSTM for horizon 3 ===
Running Optuna hyperparameter tuning...


  0%|          | 0/20 [00:00<?, ?it/s]

[I 2025-04-24 22:22:26,661] Trial 0 finished with value: 0.001821543413421346 and parameters: {'hidden_size': 84, 'num_layers': 1, 'dropout': 0.24343150986384277, 'lr': 0.001496215348432405}. Best is trial 0 with value: 0.001821543413421346.
[I 2025-04-24 22:22:28,885] Trial 1 finished with value: 0.0014072859355817651 and parameters: {'hidden_size': 62, 'num_layers': 3, 'dropout': 0.3255401625485849, 'lr': 0.00023486406831604032}. Best is trial 1 with value: 0.0014072859355817651.
[I 2025-04-24 22:22:29,682] Trial 2 finished with value: 0.005928562617757254 and parameters: {'hidden_size': 88, 'num_layers': 1, 'dropout': 0.31938486786063663, 'lr': 0.00012657377182822332}. Best is trial 1 with value: 0.0014072859355817651.
[I 2025-04-24 22:22:33,486] Trial 3 finished with value: 0.0024353433141691815 and parameters: {'hidden_size': 101, 'num_layers': 3, 'dropout': 0.4343702899030695, 'lr': 0.00043099453141838946}. Best is trial 1 with value: 0.0014072859355817651.
[I 2025-04-24 22:22:34

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch 1/100 - Train Loss: 0.032145, Val Loss: 0.003770
Best model saved at ../../Models/Horizon3/LSTM_horizon_3_BEST_STOPPED_AT_1.pth (Epoch 1)
Epoch 2/100 - Train Loss: 0.002170, Val Loss: 0.002140
Best model saved at ../../Models/Horizon3/LSTM_horizon_3_BEST_STOPPED_AT_2.pth (Epoch 2)
Epoch 3/100 - Train Loss: 0.000945, Val Loss: 0.001469
Best model saved at ../../Models/Horizon3/LSTM_horizon_3_BEST_STOPPED_AT_3.pth (Epoch 3)
Epoch 4/100 - Train Loss: 0.000559, Val Loss: 0.000980
Best model saved at ../../Models/Horizon3/LSTM_horizon_3_BEST_STOPPED_AT_4.pth (Epoch 4)
Epoch 5/100 - Train Loss: 0.000457, Val Loss: 0.000845
Best model saved at ../../Models/Horizon3/LSTM_horizon_3_BEST_STOPPED_AT_5.pth (Epoch 5)
Epoch 6/100 - Train Loss: 0.000390, Val Loss: 0.000746
Best model saved at ../../Models/Horizon3/LSTM_horizon_3_BEST_STOPPED_AT_6.pth (Epoch 6)
Epoch 7/100 - Train Loss: 0.000376, Val Loss: 0.000750
Epoch 8/100 - Train Loss: 0.000370, Val Loss: 0.000729
Best model saved at ../../

[I 2025-04-24 22:22:47,142] A new study created in memory with name: LSTM_horizon_6_hyperparameter_optimisation


Epoch 19/100 - Train Loss: 0.000336, Val Loss: 0.000780
Early stopping. counter: 15
Best weights restored.
Early stopping at epoch 19. Best model restored.
Model training complete and saved!
Saved tuned and trained LSTM with residuals to: ../../Predictions/Horizon3/LSTM_horizon_3.npy

=== Tuning and Training LSTM for horizon 6 ===
Running Optuna hyperparameter tuning...


  0%|          | 0/20 [00:00<?, ?it/s]

[I 2025-04-24 22:22:48,574] Trial 0 finished with value: 0.0037525150085418995 and parameters: {'hidden_size': 101, 'num_layers': 1, 'dropout': 0.46205596114130365, 'lr': 0.0015564324099740894}. Best is trial 0 with value: 0.0037525150085418995.
[I 2025-04-24 22:22:49,901] Trial 1 finished with value: 0.006633510511420982 and parameters: {'hidden_size': 52, 'num_layers': 2, 'dropout': 0.38057407625569967, 'lr': 0.0001632028860581231}. Best is trial 0 with value: 0.0037525150085418995.
[I 2025-04-24 22:22:51,439] Trial 2 finished with value: 0.003236895036870535 and parameters: {'hidden_size': 88, 'num_layers': 2, 'dropout': 0.45354412153020107, 'lr': 0.002261600285796373}. Best is trial 2 with value: 0.003236895036870535.
[I 2025-04-24 22:22:52,726] Trial 3 finished with value: 0.003713541173927066 and parameters: {'hidden_size': 97, 'num_layers': 1, 'dropout': 0.11671336161291895, 'lr': 0.0008343211736006362}. Best is trial 2 with value: 0.003236895036870535.
[I 2025-04-24 22:22:53,30

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch 1/100 - Train Loss: 0.030837, Val Loss: 0.002088
Best model saved at ../../Models/Horizon6/LSTM_horizon_6_BEST_STOPPED_AT_1.pth (Epoch 1)
Epoch 2/100 - Train Loss: 0.000991, Val Loss: 0.002781
Epoch 3/100 - Train Loss: 0.000912, Val Loss: 0.002426
Epoch 4/100 - Train Loss: 0.000807, Val Loss: 0.002656
Epoch 5/100 - Train Loss: 0.000805, Val Loss: 0.001964
Best model saved at ../../Models/Horizon6/LSTM_horizon_6_BEST_STOPPED_AT_5.pth (Epoch 5)
Epoch 6/100 - Train Loss: 0.000652, Val Loss: 0.002095
Epoch 7/100 - Train Loss: 0.000775, Val Loss: 0.002666
Epoch 8/100 - Train Loss: 0.000611, Val Loss: 0.003045
Epoch 9/100 - Train Loss: 0.000590, Val Loss: 0.003902
Epoch 10/100 - Train Loss: 0.000624, Val Loss: 0.002727
Epoch 11/100 - Train Loss: 0.000562, Val Loss: 0.002238
Epoch 12/100 - Train Loss: 0.000535, Val Loss: 0.002550
Epoch 13/100 - Train Loss: 0.000501, Val Loss: 0.005071
Epoch 14/100 - Train Loss: 0.000549, Val Loss: 0.002772
Epoch 15/100 - Train Loss: 0.000433, Val Loss: 

[I 2025-04-24 22:23:31,119] A new study created in memory with name: LSTM_horizon_12_hyperparameter_optimisation


Epoch 16/100 - Train Loss: 0.000605, Val Loss: 0.003365
Early stopping. counter: 15
Best weights restored.
Early stopping at epoch 16. Best model restored.
Model training complete and saved!
Saved tuned and trained LSTM with residuals to: ../../Predictions/Horizon6/LSTM_horizon_6.npy

=== Tuning and Training LSTM for horizon 12 ===
Running Optuna hyperparameter tuning...


  0%|          | 0/20 [00:00<?, ?it/s]

[I 2025-04-24 22:23:32,569] Trial 0 finished with value: 0.01535551351095949 and parameters: {'hidden_size': 67, 'num_layers': 2, 'dropout': 0.19715174359731152, 'lr': 0.00016234042012073786}. Best is trial 0 with value: 0.01535551351095949.
[I 2025-04-24 22:23:33,312] Trial 1 finished with value: 0.006965970191439348 and parameters: {'hidden_size': 36, 'num_layers': 1, 'dropout': 0.31325831379064634, 'lr': 0.007965843231532373}. Best is trial 1 with value: 0.006965970191439348.
[I 2025-04-24 22:23:35,742] Trial 2 finished with value: 0.006258247892505356 and parameters: {'hidden_size': 93, 'num_layers': 2, 'dropout': 0.10959170748786556, 'lr': 0.0036352055447556693}. Best is trial 2 with value: 0.006258247892505356.
[I 2025-04-24 22:23:36,677] Trial 3 finished with value: 0.011891262871878487 and parameters: {'hidden_size': 99, 'num_layers': 1, 'dropout': 0.3025992548619762, 'lr': 0.00042953958846669556}. Best is trial 2 with value: 0.006258247892505356.
[I 2025-04-24 22:23:39,212] Tr

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch 1/100 - Train Loss: 0.013127, Val Loss: 0.010110
Best model saved at ../../Models/Horizon12/LSTM_horizon_12_BEST_STOPPED_AT_1.pth (Epoch 1)
Epoch 2/100 - Train Loss: 0.002098, Val Loss: 0.009207
Best model saved at ../../Models/Horizon12/LSTM_horizon_12_BEST_STOPPED_AT_2.pth (Epoch 2)
Epoch 3/100 - Train Loss: 0.001550, Val Loss: 0.008851
Best model saved at ../../Models/Horizon12/LSTM_horizon_12_BEST_STOPPED_AT_3.pth (Epoch 3)
Epoch 4/100 - Train Loss: 0.001350, Val Loss: 0.008653
Best model saved at ../../Models/Horizon12/LSTM_horizon_12_BEST_STOPPED_AT_4.pth (Epoch 4)
Epoch 5/100 - Train Loss: 0.001208, Val Loss: 0.007401
Best model saved at ../../Models/Horizon12/LSTM_horizon_12_BEST_STOPPED_AT_5.pth (Epoch 5)
Epoch 6/100 - Train Loss: 0.001180, Val Loss: 0.008737
Epoch 7/100 - Train Loss: 0.001141, Val Loss: 0.009991
Epoch 8/100 - Train Loss: 0.001035, Val Loss: 0.007409
Epoch 9/100 - Train Loss: 0.001086, Val Loss: 0.007518
Epoch 10/100 - Train Loss: 0.000846, Val Loss: 0.0