In [39]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
import sys
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Set root and paths
ROOT_PATH = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
sys.path.append(ROOT_PATH)

from Training.Helper.dataPreprocessing import (
    add_time_features, add_lagged_features, add_rolling_features,
    sklearn_fit_transform, prepare_dataloader, rank_features_ccf,
    TRAIN_DATA_PATH_1990S
)
from Models.LSTM import LSTM

def create_direct_delta_sequences(X, y, seq_len=36, horizon=12):
    X_seq, y_seq = [], []
    for i in range(len(X) - seq_len - horizon):
        X_seq.append(X[i:i + seq_len])
        base = y[i + seq_len - 1]
        future = y[i + seq_len: i + seq_len + horizon]
        delta = future - base
        y_seq.append(delta)
    return np.array(X_seq), np.array(y_seq)

# === CONFIG ===
SEQ_LEN = 36
BATCH_SIZE = 16
EPOCHS = 100
PATIENCE = 10
LR = 1e-3
TOP_K_FEATURES = 30
HORIZONS = [1, 3, 6, 12]
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [32]:
# === LOAD & FEATURE ENGINEERING ===
df = pd.read_csv(TRAIN_DATA_PATH_1990S)
df["ds"] = pd.to_datetime(df["observation_date"], format="%m/%Y")
df = df.rename(columns={"fred_PCEPI": "y_original"})

df = add_time_features(df, "ds")
for k in [1, 2, 3, 4]:
    df[f"sin_{k}"] = np.sin(2 * np.pi * k * df["month"] / 12)
    df[f"cos_{k}"] = np.cos(2 * np.pi * k * df["month"] / 12)
df["pct_change"] = df["y_original"].pct_change()
df["momentum"] = df["pct_change"].diff()
df = add_lagged_features(df, ["y_original"], lags=[1, 6, 12])
df = add_rolling_features(df, "y_original", windows=[3, 6, 12])
df.dropna(inplace=True)

# === CCF SELECTION ===
df_numeric = df.select_dtypes(include=[np.number]).copy()
ccf_ranked = rank_features_ccf(df_numeric, targetCol="y_original")
selected_features = [col for col in list(ccf_ranked[:TOP_K_FEATURES]) if col in df.columns]
features = df[selected_features]
target_log = np.log1p(df["y_original"])

# === SCALE ===
features_scaled_list, x_scaler = sklearn_fit_transform(features, StandardScaler())
target_scaled_list, y_scaler = sklearn_fit_transform(target_log.to_frame(), StandardScaler())

X_scaled = features_scaled_list[0].values
y_scaled = target_scaled_list[0].values.flatten()

# === LOOP OVER HORIZONS ===
for HORIZON in HORIZONS:

    # === SEQUENCES ===
    X_seq, y_seq = create_direct_delta_sequences(X_scaled, y_scaled, SEQ_LEN, HORIZON)
    X_seq = X_seq.reshape(X_seq.shape[0], SEQ_LEN, -1)
    y_seq = y_seq.reshape(y_seq.shape[0], HORIZON)

    # === SPLIT ===
    val_split = int(len(X_seq) * 0.8)
    X_train, X_val = X_seq[:val_split], X_seq[val_split:]
    y_train, y_val = y_seq[:val_split], y_seq[val_split:]

    train_loader = prepare_dataloader(X_train, y_train, batch_size=BATCH_SIZE)
    val_loader = prepare_dataloader(X_val, y_val, shuffle=False, batch_size=BATCH_SIZE)


2025-04-24 21:54:53,999 - INFO - Added time features: year, month, quarter. DataFrame shape: (408, 363)
2025-04-24 21:54:54,004 - INFO - Added lagged features with lags [1, 6, 12] to target columns ['y_original']. DataFrame shape: (408, 376)
2025-04-24 21:54:54,006 - INFO - Added rolling mean and standard deviation features to target column y_original. DataFrame shape: (408, 382)


In [42]:
from Training.Helper.PyTorchModular import optuna_tune_and_train_pytorch

HORIZONS = [1, 3, 6, 12]

for HORIZON in HORIZONS:
    print(f"\n=== Tuning and Training LSTM for horizon {HORIZON} ===")

    model_search_space = {
        "hidden_size": (int, 32, 128),
        "num_layers": (int, 1, 3),
        "dropout": (float, 0.0, 0.5),
    }

    optim_search_space = {
        "lr": (float, 1e-4, 1e-2, {"log": True})
    }

    model_invariates = {
        "input_size": X_seq.shape[2],
        "output_size": HORIZON
    }

    # Recreate sequences and loaders for each HORIZON to match target shape
    X_seq, y_seq = create_direct_delta_sequences(X_scaled, y_scaled, SEQ_LEN, HORIZON)
    X_seq = X_seq.reshape(X_seq.shape[0], SEQ_LEN, -1)
    y_seq = y_seq.reshape(y_seq.shape[0], HORIZON)

    val_split = int(len(X_seq) * 0.8)
    X_train, X_val = X_seq[:val_split], X_seq[val_split:]
    y_train, y_val = y_seq[:val_split], y_seq[val_split:]

    train_loader = prepare_dataloader(X_train, y_train, batch_size=BATCH_SIZE)
    val_loader = prepare_dataloader(X_val, y_val, shuffle=False, batch_size=BATCH_SIZE)

    model, metadata = optuna_tune_and_train_pytorch(
        model_class=LSTM,
        train_loader=train_loader,
        val_loader=val_loader,
        device=DEVICE,
        model_search_space=model_search_space,
        model_invariates=model_invariates,
        optim_search_space=optim_search_space,
        max_epochs=EPOCHS,
        model_save_path=os.path.join("..", "..", "Models","Weights", "LSTM", f"Horizon{HORIZON}"),
        model_name=f"LSTM_horizon_{HORIZON}",
        n_trials=20,
        n_epochs_per_trial=5,
        verbose=True
    )

    # === FINAL FORECAST ===
    model.eval()
    y_pred_final = []
    with torch.no_grad():
        x_input = X_seq[-1].copy()
        base_val = y_scaled[-1]

        for _ in range(12):
            x_tensor = torch.tensor(x_input[np.newaxis], dtype=torch.float32).to(DEVICE)
            pred_delta = model(x_tensor).cpu().numpy().flatten()

            step_index = min(HORIZON, len(pred_delta)) - 1
            next_scaled = base_val + pred_delta[step_index]
            y_pred_final.append(next_scaled)

            x_input = np.roll(x_input, -1, axis=0)
            x_input[-1] = np.concatenate([x_input[-2][:-1], [next_scaled]])
            base_val = next_scaled

    y_pred_rescaled = y_scaler.inverse_transform(np.array(y_pred_final).reshape(-1, 1)).flatten()
    y_pred_final = np.expm1(y_pred_rescaled)

    # === SAVE ===
    save_path = os.path.join("..", "..", "Predictions", f"Horizon{HORIZON}", f"LSTM_horizon_{HORIZON}.npy")
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    np.save(save_path, y_pred_final)
    print(f"Saved tuned and trained LSTM with residuals to: {save_path}")

[I 2025-04-24 22:37:55,881] A new study created in memory with name: LSTM_horizon_1_hyperparameter_optimisation



=== Tuning and Training LSTM for horizon 1 ===
Running Optuna hyperparameter tuning...


  0%|          | 0/20 [00:00<?, ?it/s]

[I 2025-04-24 22:37:58,873] Trial 0 finished with value: 0.0014949571664652063 and parameters: {'hidden_size': 94, 'num_layers': 3, 'dropout': 0.022034817585603106, 'lr': 0.00022427552398717043}. Best is trial 0 with value: 0.0014949571664652063.
[I 2025-04-24 22:37:59,902] Trial 1 finished with value: 0.000813305704569858 and parameters: {'hidden_size': 115, 'num_layers': 1, 'dropout': 0.49046961576313775, 'lr': 0.0010656205110117247}. Best is trial 1 with value: 0.000813305704569858.
[I 2025-04-24 22:38:02,211] Trial 2 finished with value: 0.0002952970260392047 and parameters: {'hidden_size': 127, 'num_layers': 2, 'dropout': 0.20719841914505382, 'lr': 0.006737422240871544}. Best is trial 2 with value: 0.0002952970260392047.
[I 2025-04-24 22:38:03,693] Trial 3 finished with value: 0.003413836368256145 and parameters: {'hidden_size': 36, 'num_layers': 3, 'dropout': 0.3763559868865426, 'lr': 0.00034176785621033666}. Best is trial 2 with value: 0.0002952970260392047.
[I 2025-04-24 22:38:

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch 1/100 - Train Loss: 0.066616, Val Loss: 0.000148
Best model saved at ../../Models/Weights/LSTM/Horizon1/LSTM_horizon_1_BEST_STOPPED_AT_1.pth (Epoch 1)
Epoch 2/100 - Train Loss: 0.002089, Val Loss: 0.001812
Epoch 3/100 - Train Loss: 0.000529, Val Loss: 0.000283
Epoch 4/100 - Train Loss: 0.000264, Val Loss: 0.000252
Epoch 5/100 - Train Loss: 0.000240, Val Loss: 0.000477
Epoch 6/100 - Train Loss: 0.000209, Val Loss: 0.000293
Epoch 7/100 - Train Loss: 0.000167, Val Loss: 0.000376
Epoch 8/100 - Train Loss: 0.000161, Val Loss: 0.000281
Epoch 9/100 - Train Loss: 0.000160, Val Loss: 0.000198
Epoch 10/100 - Train Loss: 0.000158, Val Loss: 0.000209
Epoch 11/100 - Train Loss: 0.000142, Val Loss: 0.000303
Epoch 12/100 - Train Loss: 0.000146, Val Loss: 0.000263
Epoch 13/100 - Train Loss: 0.000151, Val Loss: 0.000261
Epoch 14/100 - Train Loss: 0.000152, Val Loss: 0.000311
Epoch 15/100 - Train Loss: 0.000133, Val Loss: 0.000316


[I 2025-04-24 22:38:22,865] A new study created in memory with name: LSTM_horizon_3_hyperparameter_optimisation


Epoch 16/100 - Train Loss: 0.000123, Val Loss: 0.000251
Early stopping. counter: 15
Best weights restored.
Early stopping at epoch 16. Best model restored.
Model training complete and saved!
Saved tuned and trained LSTM with residuals to: ../../Predictions/Horizon1/LSTM_horizon_1.npy

=== Tuning and Training LSTM for horizon 3 ===
Running Optuna hyperparameter tuning...


  0%|          | 0/20 [00:00<?, ?it/s]

[I 2025-04-24 22:38:26,706] Trial 0 finished with value: 0.0016614171391766933 and parameters: {'hidden_size': 101, 'num_layers': 3, 'dropout': 0.33565578290229253, 'lr': 0.0003168278162653966}. Best is trial 0 with value: 0.0016614171391766933.
[I 2025-04-24 22:38:28,072] Trial 1 finished with value: 0.0011313498253002763 and parameters: {'hidden_size': 88, 'num_layers': 2, 'dropout': 0.3649210218822819, 'lr': 0.0036595440479575817}. Best is trial 1 with value: 0.0011313498253002763.
[I 2025-04-24 22:38:28,936] Trial 2 finished with value: 0.005334043487285574 and parameters: {'hidden_size': 81, 'num_layers': 1, 'dropout': 0.13153548388896552, 'lr': 0.0003929392701245735}. Best is trial 1 with value: 0.0011313498253002763.
[I 2025-04-24 22:38:30,576] Trial 3 finished with value: 0.0007728916284072006 and parameters: {'hidden_size': 41, 'num_layers': 3, 'dropout': 0.1812967978308065, 'lr': 0.00011798837509171583}. Best is trial 3 with value: 0.0007728916284072006.
[I 2025-04-24 22:38:3

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch 1/100 - Train Loss: 0.075450, Val Loss: 0.008479
Best model saved at ../../Models/Weights/LSTM/Horizon3/LSTM_horizon_3_BEST_STOPPED_AT_1.pth (Epoch 1)
Epoch 2/100 - Train Loss: 0.003478, Val Loss: 0.002372
Best model saved at ../../Models/Weights/LSTM/Horizon3/LSTM_horizon_3_BEST_STOPPED_AT_2.pth (Epoch 2)
Epoch 3/100 - Train Loss: 0.000740, Val Loss: 0.001222
Best model saved at ../../Models/Weights/LSTM/Horizon3/LSTM_horizon_3_BEST_STOPPED_AT_3.pth (Epoch 3)
Epoch 4/100 - Train Loss: 0.000473, Val Loss: 0.000974
Best model saved at ../../Models/Weights/LSTM/Horizon3/LSTM_horizon_3_BEST_STOPPED_AT_4.pth (Epoch 4)
Epoch 5/100 - Train Loss: 0.000391, Val Loss: 0.000897
Best model saved at ../../Models/Weights/LSTM/Horizon3/LSTM_horizon_3_BEST_STOPPED_AT_5.pth (Epoch 5)
Epoch 6/100 - Train Loss: 0.000364, Val Loss: 0.000860
Best model saved at ../../Models/Weights/LSTM/Horizon3/LSTM_horizon_3_BEST_STOPPED_AT_6.pth (Epoch 6)
Epoch 7/100 - Train Loss: 0.000356, Val Loss: 0.000789
Bes

[I 2025-04-24 22:38:47,593] A new study created in memory with name: LSTM_horizon_6_hyperparameter_optimisation


Epoch 18/100 - Train Loss: 0.000323, Val Loss: 0.000830
Early stopping. counter: 15
Best weights restored.
Early stopping at epoch 18. Best model restored.
Model training complete and saved!
Saved tuned and trained LSTM with residuals to: ../../Predictions/Horizon3/LSTM_horizon_3.npy

=== Tuning and Training LSTM for horizon 6 ===
Running Optuna hyperparameter tuning...


  0%|          | 0/20 [00:00<?, ?it/s]

[I 2025-04-24 22:38:48,998] Trial 0 finished with value: 0.002885661784782481 and parameters: {'hidden_size': 112, 'num_layers': 1, 'dropout': 0.3079622361033817, 'lr': 0.00290515073920013}. Best is trial 0 with value: 0.002885661784782481.
[I 2025-04-24 22:38:49,819] Trial 1 finished with value: 0.002237703147816511 and parameters: {'hidden_size': 82, 'num_layers': 1, 'dropout': 0.2798202818296304, 'lr': 0.009019854618858195}. Best is trial 1 with value: 0.002237703147816511.
[I 2025-04-24 22:38:54,101] Trial 2 finished with value: 0.0042892839057817 and parameters: {'hidden_size': 106, 'num_layers': 3, 'dropout': 0.43467859963981564, 'lr': 0.00029994744015681964}. Best is trial 1 with value: 0.002237703147816511.
[I 2025-04-24 22:38:56,483] Trial 3 finished with value: 0.0023751853273289514 and parameters: {'hidden_size': 107, 'num_layers': 2, 'dropout': 0.4434370036215599, 'lr': 0.0032895681427602243}. Best is trial 1 with value: 0.002237703147816511.
[I 2025-04-24 22:38:58,721] Tri

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch 1/100 - Train Loss: 0.031564, Val Loss: 0.006142
Best model saved at ../../Models/Weights/LSTM/Horizon6/LSTM_horizon_6_BEST_STOPPED_AT_1.pth (Epoch 1)
Epoch 2/100 - Train Loss: 0.002724, Val Loss: 0.004087
Best model saved at ../../Models/Weights/LSTM/Horizon6/LSTM_horizon_6_BEST_STOPPED_AT_2.pth (Epoch 2)
Epoch 3/100 - Train Loss: 0.001407, Val Loss: 0.003297
Best model saved at ../../Models/Weights/LSTM/Horizon6/LSTM_horizon_6_BEST_STOPPED_AT_3.pth (Epoch 3)
Epoch 4/100 - Train Loss: 0.001133, Val Loss: 0.002756
Best model saved at ../../Models/Weights/LSTM/Horizon6/LSTM_horizon_6_BEST_STOPPED_AT_4.pth (Epoch 4)
Epoch 5/100 - Train Loss: 0.000904, Val Loss: 0.002394
Best model saved at ../../Models/Weights/LSTM/Horizon6/LSTM_horizon_6_BEST_STOPPED_AT_5.pth (Epoch 5)
Epoch 6/100 - Train Loss: 0.000798, Val Loss: 0.002356
Best model saved at ../../Models/Weights/LSTM/Horizon6/LSTM_horizon_6_BEST_STOPPED_AT_6.pth (Epoch 6)
Epoch 7/100 - Train Loss: 0.000800, Val Loss: 0.002160
Bes

[I 2025-04-24 22:39:28,853] A new study created in memory with name: LSTM_horizon_12_hyperparameter_optimisation


Epoch 30/100 - Train Loss: 0.000602, Val Loss: 0.002024
Early stopping. counter: 15
Best weights restored.
Early stopping at epoch 30. Best model restored.
Model training complete and saved!
Saved tuned and trained LSTM with residuals to: ../../Predictions/Horizon6/LSTM_horizon_6.npy

=== Tuning and Training LSTM for horizon 12 ===
Running Optuna hyperparameter tuning...


  0%|          | 0/20 [00:00<?, ?it/s]

[I 2025-04-24 22:39:29,603] Trial 0 finished with value: 0.008287428226321936 and parameters: {'hidden_size': 52, 'num_layers': 1, 'dropout': 0.01226943226085675, 'lr': 0.0035654472802951255}. Best is trial 0 with value: 0.008287428226321936.
[I 2025-04-24 22:39:31,319] Trial 1 finished with value: 0.0089647318502622 and parameters: {'hidden_size': 89, 'num_layers': 2, 'dropout': 0.17307106062017213, 'lr': 0.0003193096308946454}. Best is trial 0 with value: 0.008287428226321936.
[I 2025-04-24 22:39:35,135] Trial 2 finished with value: 0.008313497395387717 and parameters: {'hidden_size': 124, 'num_layers': 3, 'dropout': 0.4386668724999063, 'lr': 0.00014171220023638422}. Best is trial 0 with value: 0.008287428226321936.
[I 2025-04-24 22:39:35,891] Trial 3 finished with value: 0.014094747602939606 and parameters: {'hidden_size': 63, 'num_layers': 1, 'dropout': 0.21653350629417462, 'lr': 0.00022475828292846403}. Best is trial 0 with value: 0.008287428226321936.
[I 2025-04-24 22:39:37,949] 

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch 1/100 - Train Loss: 0.031648, Val Loss: 0.012215
Best model saved at ../../Models/Weights/LSTM/Horizon12/LSTM_horizon_12_BEST_STOPPED_AT_1.pth (Epoch 1)
Epoch 2/100 - Train Loss: 0.002848, Val Loss: 0.007810
Best model saved at ../../Models/Weights/LSTM/Horizon12/LSTM_horizon_12_BEST_STOPPED_AT_2.pth (Epoch 2)
Epoch 3/100 - Train Loss: 0.001511, Val Loss: 0.006742
Best model saved at ../../Models/Weights/LSTM/Horizon12/LSTM_horizon_12_BEST_STOPPED_AT_3.pth (Epoch 3)
Epoch 4/100 - Train Loss: 0.001439, Val Loss: 0.006941
Epoch 5/100 - Train Loss: 0.001389, Val Loss: 0.006887
Epoch 6/100 - Train Loss: 0.001371, Val Loss: 0.006945
Epoch 7/100 - Train Loss: 0.001365, Val Loss: 0.006902
Epoch 8/100 - Train Loss: 0.001373, Val Loss: 0.006857
Epoch 9/100 - Train Loss: 0.001367, Val Loss: 0.007303
Epoch 10/100 - Train Loss: 0.001294, Val Loss: 0.007348
Epoch 11/100 - Train Loss: 0.001273, Val Loss: 0.007137
Epoch 12/100 - Train Loss: 0.001284, Val Loss: 0.007381
Epoch 13/100 - Train Loss