In [99]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import os
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"

import torch
import optuna

from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer, Baseline
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.data.encoders import NaNLabelEncoder
from torch.utils.data import DataLoader

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [55]:
df = pd.read_csv("/Users/yehana2002/Projects/DSGP/datasets/processed/final_merged_dataset_ready.csv")
df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values("Date").reset_index(drop=True)
df["series_id"] = "sri_lanka"
df.rename(columns={"Petrol_Price": "petrol_price"}, inplace=True)
df["time_idx"] = np.arange(len(df))

df.fillna(method="ffill", inplace=True)
df.fillna(method="bfill", inplace=True)

In [56]:
max_encoder_length = 36
max_prediction_length = 12
training_cutoff = df["time_idx"].max() - max_prediction_length

time_varying_known_reals = [
    "time_idx", "Date"
] + [col for col in df.columns if col not in ["petrol_price", "diesel_price", "series_id", "time_idx"] and df[col].dtype in [np.float64, np.int64]]

training = TimeSeriesDataSet(
    df[df.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="petrol_price",
    group_ids=["series_id"],
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=["series_id"],
    time_varying_known_reals=time_varying_known_reals,
    time_varying_unknown_reals=["petrol_price"],
    target_normalizer=GroupNormalizer(groups=["series_id"]),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

validation = TimeSeriesDataSet.from_dataset(training, df, predict=True, stop_randomization=True)

In [57]:
validation = TimeSeriesDataSet.from_dataset(training, df, predict=True, stop_randomization=True)

#### Optuna testing for Petrol model

In [58]:
def objective(trial):
    batch_size = trial.suggest_categorical("batch_size", [8, 16, 32])
    hidden_size = trial.suggest_int("hidden_size", 8, 64)
    attention_head_size = trial.suggest_int("attention_head_size", 1, 4)
    dropout = trial.suggest_float("dropout", 0.0, 0.3)
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)

    train_loader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
    val_loader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

    model = TemporalFusionTransformer.from_dataset(
        training,
        hidden_size=hidden_size,
        attention_head_size=attention_head_size,
        dropout=dropout,
        learning_rate=learning_rate,
        loss=SMAPE(),
        log_interval=0,
        reduce_on_plateau_patience=4,
    )

    trainer = Trainer(
        max_epochs=20,
        accelerator="cpu",
        logger=False,
        enable_checkpointing=False,
        callbacks=[EarlyStopping(monitor="val_loss", patience=5)],
    )

    trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)

    val_actuals = torch.cat([y[0] for x, y in iter(val_loader)])
    val_predictions = model.predict(val_loader)

    smape_score = SMAPE()(val_predictions, val_actuals).item()
    return smape_score

In [59]:
study = optuna.create_study(direction="minimize", study_name="TFT_Petrol_Tuning")
study.optimize(objective, n_trials=30)

print("Best SMAPE:", study.best_value)
print("Best Params:")
for key, value in study.best_params.items():
    print(f"  {key}: {value}")

[32m[I 2025-04-05 13:30:49,447][0m A new study created in memory with name: TFT_Petrol_Tuning[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 5.0 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 164 K 
6  | decoder_variable_s

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00,  5.78it/s, loss=0.54, train_loss_step=0.540, val_loss=0.993, train_loss_epoch=0.540] 

[32m[I 2025-04-05 13:30:53,153][0m Trial 0 finished with value: 0.992513120174408 and parameters: {'batch_size': 32, 'hidden_size': 60, 'attention_head_size': 3, 'dropout': 0.06831979376511693, 'learning_rate': 0.0003010774807632081}. Best is trial 0 with value: 0.992513120174408.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K 
4


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  6.05it/s, loss=0.39, train_loss_step=0.338, val_loss=0.197, train_loss_epoch=0.338] 

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  6.03it/s, loss=0.39, train_loss_step=0.338, val_loss=0.197, train_loss_epoch=0.338]

[32m[I 2025-04-05 13:31:00,066][0m Trial 1 finished with value: 0.19725435972213745 and parameters: {'batch_size': 32, 'hidden_size': 61, 'attention_head_size': 3, 'dropout': 0.15767956900492577, 'learning_rate': 0.001522494827357329}. Best is trial 1 with value: 0.19725435972213745.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 15: 100%|██████████| 7/7 [00:00<00:00,  7.11it/s, loss=0.0874, train_loss_step=0.0872, val_loss=0.101, train_loss_epoch=0.083]  

[32m[I 2025-04-05 13:31:15,544][0m Trial 2 finished with value: 0.10094732791185379 and parameters: {'batch_size': 8, 'hidden_size': 60, 'attention_head_size': 2, 'dropout': 0.08543085990762918, 'learning_rate': 0.001743171631944831}. Best is trial 2 with value: 0.10094732791185379.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K 


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 5: 100%|██████████| 2/2 [00:00<00:00,  7.29it/s, loss=0.318, train_loss_step=0.311, val_loss=0.663, train_loss_epoch=0.311]

[32m[I 2025-04-05 13:31:17,347][0m Trial 3 finished with value: 0.6634525656700134 and parameters: {'batch_size': 32, 'hidden_size': 36, 'attention_head_size': 4, 'dropout': 0.06923545567392277, 'learning_rate': 0.00045432455087031435}. Best is trial 2 with value: 0.10094732791185379.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 2.7 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 81.3 K
6  | decoder_variable_selection         | VariableSelectionNetwork        | 80.2 K
7  | static_context_variable_selection  | GatedResidualNetwork            | 3.3 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 3.3 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  7.92it/s, loss=0.457, train_loss_step=0.576, val_loss=1.140, train_loss_epoch=0.464]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  7.91it/s, loss=0.457, train_loss_step=0.576, val_loss=1.140, train_loss_epoch=0.464]

[32m[I 2025-04-05 13:31:27,685][0m Trial 4 finished with value: 1.1403995752334595 and parameters: {'batch_size': 16, 'hidden_size': 28, 'attention_head_size': 4, 'dropout': 0.16866939562964037, 'learning_rate': 0.0003537470110461809}. Best is trial 2 with value: 0.10094732791185379.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 3.8 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 122 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 120 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 8.0 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 8.0 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  7.95it/s, loss=0.198, train_loss_step=0.185, val_loss=0.307, train_loss_epoch=0.194]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  7.94it/s, loss=0.198, train_loss_step=0.185, val_loss=0.307, train_loss_epoch=0.194]


[32m[I 2025-04-05 13:31:45,581][0m Trial 5 finished with value: 0.30723097920417786 and parameters: {'batch_size': 8, 'hidden_size': 44, 'attention_head_size': 3, 'dropout': 0.08238156756428876, 'learning_rate': 0.0001667021204175059}. Best is trial 2 with value: 0.10094732791185379.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  7.84it/s, loss=0.432, train_loss_step=0.449, val_loss=0.095, train_loss_epoch=0.396] 

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  7.83it/s, loss=0.432, train_loss_step=0.449, val_loss=0.095, train_loss_epoch=0.396]


[32m[I 2025-04-05 13:32:03,112][0m Trial 6 finished with value: 0.09503764659166336 and parameters: {'batch_size': 8, 'hidden_size': 33, 'attention_head_size': 4, 'dropout': 0.28069702592619544, 'learning_rate': 0.0004770915875383292}. Best is trial 6 with value: 0.09503764659166336.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  7.41it/s, loss=0.438, train_loss_step=0.311, val_loss=0.765, train_loss_epoch=0.311]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  7.38it/s, loss=0.438, train_loss_step=0.311, val_loss=0.765, train_loss_epoch=0.311]

[32m[I 2025-04-05 13:32:08,750][0m Trial 7 finished with value: 0.7651171088218689 and parameters: {'batch_size': 32, 'hidden_size': 36, 'attention_head_size': 3, 'dropout': 0.06671419905573453, 'learning_rate': 0.0035985108957185393}. Best is trial 6 with value: 0.09503764659166336.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 4.4 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 142 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 141 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 11.1 K
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 11.1 K
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  6.64it/s, loss=0.682, train_loss_step=0.515, val_loss=0.754, train_loss_epoch=0.515]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  6.61it/s, loss=0.682, train_loss_step=0.515, val_loss=0.754, train_loss_epoch=0.515]

[32m[I 2025-04-05 13:32:14,942][0m Trial 8 finished with value: 0.7540280222892761 and parameters: {'batch_size': 32, 'hidden_size': 52, 'attention_head_size': 1, 'dropout': 0.2850560250427807, 'learning_rate': 0.0025041356329041555}. Best is trial 6 with value: 0.09503764659166336.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K 


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  6.28it/s, loss=0.289, train_loss_step=0.189, val_loss=0.074, train_loss_epoch=0.189] 

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  6.26it/s, loss=0.289, train_loss_step=0.189, val_loss=0.074, train_loss_epoch=0.189]

[32m[I 2025-04-05 13:32:21,128][0m Trial 9 finished with value: 0.07404226064682007 and parameters: {'batch_size': 32, 'hidden_size': 46, 'attention_head_size': 1, 'dropout': 0.016984507858361652, 'learning_rate': 0.0051786282162447584}. Best is trial 9 with value: 0.07404226064682007.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 1.5 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 41.4 K
6  | decoder_variable_selection         | VariableSelectionNetwork        | 40.8 K
7  | static_context_variable_selection  | GatedResidualNetwork            | 648   
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 648   
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

                                                                           

  rank_zero_warn(




  rank_zero_warn(


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  8.75it/s, loss=0.127, train_loss_step=0.104, val_loss=0.143, train_loss_epoch=0.106] 

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  8.73it/s, loss=0.127, train_loss_step=0.104, val_loss=0.143, train_loss_epoch=0.106]


[32m[I 2025-04-05 13:32:30,586][0m Trial 10 finished with value: 0.1430806964635849 and parameters: {'batch_size': 16, 'hidden_size': 12, 'attention_head_size': 1, 'dropout': 0.01414472549602333, 'learning_rate': 0.008814242273767326}. Best is trial 9 with value: 0.07404226064682007.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  8.60it/s, loss=0.308, train_loss_step=0.281, val_loss=0.507, train_loss_epoch=0.297]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  8.59it/s, loss=0.308, train_loss_step=0.281, val_loss=0.507, train_loss_epoch=0.297]


[32m[I 2025-04-05 13:32:47,165][0m Trial 11 finished with value: 0.507379949092865 and parameters: {'batch_size': 8, 'hidden_size': 24, 'attention_head_size': 2, 'dropout': 0.29072251124779946, 'learning_rate': 0.0006623188960949435}. Best is trial 9 with value: 0.07404226064682007.[0m
  rank_zero_warn(
  rank_zero_warn(


                                                                   

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 4.0 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 127 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 125 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 13: 100%|██████████| 7/7 [00:00<00:00,  7.62it/s, loss=0.119, train_loss_step=0.163, val_loss=0.0737, train_loss_epoch=0.124] 


[32m[I 2025-04-05 13:33:02,221][0m Trial 12 finished with value: 0.0737147256731987 and parameters: {'batch_size': 8, 'hidden_size': 46, 'attention_head_size': 1, 'dropout': 0.2198350495765554, 'learning_rate': 0.009752857730131799}. Best is trial 12 with value: 0.0737147256731987.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K 


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 8: 100%|██████████| 7/7 [00:00<00:00,  7.64it/s, loss=0.137, train_loss_step=0.166, val_loss=0.234, train_loss_epoch=0.131] 


[32m[I 2025-04-05 13:33:10,713][0m Trial 13 finished with value: 0.23411524295806885 and parameters: {'batch_size': 8, 'hidden_size': 46, 'attention_head_size': 1, 'dropout': 0.21813604241417742, 'learning_rate': 0.009797561824920968}. Best is trial 12 with value: 0.0737147256731987.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 14: 100%|██████████| 4/4 [00:00<00:00,  6.45it/s, loss=0.203, train_loss_step=0.165, val_loss=0.230, train_loss_epoch=0.177] 

[32m[I 2025-04-05 13:33:20,188][0m Trial 14 finished with value: 0.22979922592639923 and parameters: {'batch_size': 16, 'hidden_size': 47, 'attention_head_size': 2, 'dropout': 0.21923949949961447, 'learning_rate': 0.0048909210952282865}. Best is trial 12 with value: 0.0737147256731987.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  6.35it/s, loss=0.29, train_loss_step=0.210, val_loss=0.216, train_loss_epoch=0.210] 

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  6.33it/s, loss=0.29, train_loss_step=0.210, val_loss=0.216, train_loss_epoch=0.210]

[32m[I 2025-04-05 13:33:26,884][0m Trial 15 finished with value: 0.21592526137828827 and parameters: {'batch_size': 32, 'hidden_size': 52, 'attention_head_size': 1, 'dropout': 0.2135503191539309, 'learning_rate': 0.0055509521041481585}. Best is trial 12 with value: 0.0737147256731987.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 10: 100%|██████████| 7/7 [00:00<00:00,  7.56it/s, loss=0.138, train_loss_step=0.0847, val_loss=0.0714, train_loss_epoch=0.124]

[32m[I 2025-04-05 13:33:37,248][0m Trial 16 finished with value: 0.07138101011514664 and parameters: {'batch_size': 8, 'hidden_size': 43, 'attention_head_size': 2, 'dropout': 0.018800840854248935, 'learning_rate': 0.006352980994277746}. Best is trial 16 with value: 0.07138101011514664.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  8.90it/s, loss=0.233, train_loss_step=0.281, val_loss=0.0735, train_loss_epoch=0.223]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  8.89it/s, loss=0.233, train_loss_step=0.281, val_loss=0.0735, train_loss_epoch=0.223]


[32m[I 2025-04-05 13:33:53,313][0m Trial 17 finished with value: 0.0734911635518074 and parameters: {'batch_size': 8, 'hidden_size': 17, 'attention_head_size': 2, 'dropout': 0.1190569807410693, 'learning_rate': 0.0010546027260870802}. Best is trial 16 with value: 0.07138101011514664.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  9.50it/s, loss=0.321, train_loss_step=0.228, val_loss=0.464, train_loss_epoch=0.308]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  9.49it/s, loss=0.321, train_loss_step=0.228, val_loss=0.464, train_loss_epoch=0.308]


[32m[I 2025-04-05 13:34:08,520][0m Trial 18 finished with value: 0.46408772468566895 and parameters: {'batch_size': 8, 'hidden_size': 10, 'attention_head_size': 2, 'dropout': 0.12751255154083868, 'learning_rate': 0.0007924172710805882}. Best is trial 16 with value: 0.07138101011514664.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  8.58it/s, loss=0.145, train_loss_step=0.109, val_loss=0.0767, train_loss_epoch=0.130] 

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  8.57it/s, loss=0.145, train_loss_step=0.109, val_loss=0.0767, train_loss_epoch=0.130]


[32m[I 2025-04-05 13:34:25,055][0m Trial 19 finished with value: 0.07668229192495346 and parameters: {'batch_size': 8, 'hidden_size': 21, 'attention_head_size': 2, 'dropout': 0.0022176991611039254, 'learning_rate': 0.0011999503901538677}. Best is trial 16 with value: 0.07138101011514664.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  8.88it/s, loss=0.4, train_loss_step=0.362, val_loss=0.718, train_loss_epoch=0.429]  

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  8.87it/s, loss=0.4, train_loss_step=0.362, val_loss=0.718, train_loss_epoch=0.429]


[32m[I 2025-04-05 13:34:41,082][0m Trial 20 finished with value: 0.7175341248512268 and parameters: {'batch_size': 8, 'hidden_size': 17, 'attention_head_size': 2, 'dropout': 0.11715270283636857, 'learning_rate': 0.00012328514372646942}. Best is trial 16 with value: 0.07138101011514664.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 13: 100%|██████████| 7/7 [00:00<00:00,  7.75it/s, loss=0.126, train_loss_step=0.117, val_loss=0.131, train_loss_epoch=0.116] 


[32m[I 2025-04-05 13:34:54,163][0m Trial 21 finished with value: 0.13126392662525177 and parameters: {'batch_size': 8, 'hidden_size': 39, 'attention_head_size': 1, 'dropout': 0.18938002691986822, 'learning_rate': 0.0027592345545139002}. Best is trial 16 with value: 0.07138101011514664.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 9: 100%|██████████| 7/7 [00:00<00:00,  8.01it/s, loss=0.112, train_loss_step=0.0876, val_loss=0.155, train_loss_epoch=0.100]


[32m[I 2025-04-05 13:35:03,125][0m Trial 22 finished with value: 0.15505506098270416 and parameters: {'batch_size': 8, 'hidden_size': 30, 'attention_head_size': 2, 'dropout': 0.040137047359038225, 'learning_rate': 0.007336423451686361}. Best is trial 16 with value: 0.07138101011514664.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 13: 100%|██████████| 7/7 [00:00<00:00,  7.75it/s, loss=0.125, train_loss_step=0.119, val_loss=0.0837, train_loss_epoch=0.122]


[32m[I 2025-04-05 13:35:15,950][0m Trial 23 finished with value: 0.08365754038095474 and parameters: {'batch_size': 8, 'hidden_size': 41, 'attention_head_size': 2, 'dropout': 0.25115652458877524, 'learning_rate': 0.0033374075460964815}. Best is trial 16 with value: 0.07138101011514664.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 11: 100%|██████████| 7/7 [00:00<00:00,  7.50it/s, loss=0.136, train_loss_step=0.102, val_loss=0.107, train_loss_epoch=0.127]

[32m[I 2025-04-05 13:35:27,404][0m Trial 24 finished with value: 0.10663852840662003 and parameters: {'batch_size': 8, 'hidden_size': 56, 'attention_head_size': 1, 'dropout': 0.12563484908238676, 'learning_rate': 0.0018898203885517433}. Best is trial 16 with value: 0.07138101011514664.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 12: 100%|██████████| 7/7 [00:00<00:00,  7.57it/s, loss=0.168, train_loss_step=0.122, val_loss=0.157, train_loss_epoch=0.146]

[32m[I 2025-04-05 13:35:39,680][0m Trial 25 finished with value: 0.156851664185524 and parameters: {'batch_size': 8, 'hidden_size': 50, 'attention_head_size': 3, 'dropout': 0.2441760617810736, 'learning_rate': 0.004397831851162384}. Best is trial 16 with value: 0.07138101011514664.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K 



                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 6: 100%|██████████| 4/4 [00:00<00:00,  7.01it/s, loss=0.539, train_loss_step=0.556, val_loss=0.321, train_loss_epoch=0.508]

[32m[I 2025-04-05 13:35:43,882][0m Trial 26 finished with value: 0.32067248225212097 and parameters: {'batch_size': 16, 'hidden_size': 42, 'attention_head_size': 2, 'dropout': 0.18326463875345583, 'learning_rate': 0.00020847134265345097}. Best is trial 16 with value: 0.07138101011514664.[0m





  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 1.8 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 51.3 K
6  | decoder_variable_selection         | VariableSelectionNetwork        | 50.6 K
7  | static_context_variable_selection  

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 13: 100%|██████████| 7/7 [00:00<00:00,  9.13it/s, loss=0.11, train_loss_step=0.0972, val_loss=0.0567, train_loss_epoch=0.104] 


[32m[I 2025-04-05 13:35:54,973][0m Trial 27 finished with value: 0.056701671332120895 and parameters: {'batch_size': 8, 'hidden_size': 16, 'attention_head_size': 1, 'dropout': 0.10501453459070806, 'learning_rate': 0.007120219150765482}. Best is trial 27 with value: 0.056701671332120895.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 9: 100%|██████████| 7/7 [00:00<00:00,  8.40it/s, loss=0.169, train_loss_step=0.133, val_loss=0.119, train_loss_epoch=0.144] 


[32m[I 2025-04-05 13:36:03,209][0m Trial 28 finished with value: 0.1191493347287178 and parameters: {'batch_size': 8, 'hidden_size': 15, 'attention_head_size': 2, 'dropout': 0.04387737307713756, 'learning_rate': 0.006517627670110531}. Best is trial 27 with value: 0.056701671332120895.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 5: 100%|██████████| 7/7 [00:00<00:00, 12.89it/s, loss=0.666, train_loss_step=0.577, val_loss=0.436, train_loss_epoch=0.661]

[32m[I 2025-04-05 13:36:06,586][0m Trial 29 finished with value: 0.4356025755405426 and parameters: {'batch_size': 8, 'hidden_size': 8, 'attention_head_size': 3, 'dropout': 0.09243320597303416, 'learning_rate': 0.0011426498419219344}. Best is trial 27 with value: 0.056701671332120895.[0m



Best SMAPE: 0.056701671332120895
Best Params:
  batch_size: 8
  hidden_size: 16
  attention_head_size: 1
  dropout: 0.10501453459070806
  learning_rate: 0.007120219150765482


#### Retraining with the best parameters 

In [86]:
train_loader = training.to_dataloader(train=True, batch_size=8, num_workers=0)
val_loader = validation.to_dataloader(train=False, batch_size=8, num_workers=0)

# Define TFT with Optuna-optimized parameters
tft = TemporalFusionTransformer.from_dataset(
    training,
    hidden_size=16,
    attention_head_size=1,
    dropout=0.10501453459070806,
    learning_rate=0.007120219150765482,
    loss=SMAPE(),
    log_interval=10,
    reduce_on_plateau_patience=4,
)

# Lightning Trainer
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning import Trainer

early_stop = EarlyStopping(monitor="val_loss", patience=10, verbose=True, mode="min")
lr_logger = LearningRateMonitor()

trainer = Trainer(
    max_epochs=50,
    accelerator="cpu",
    callbacks=[early_stop, lr_logger],
    gradient_clip_val=0.1,
)

# Train the model
trainer.fit(tft, train_dataloaders=train_loader, val_dataloaders=val_loader)

  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.2 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 1.8 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 51.3 K
6  | decoder_variable_selection         | VariableSelectionNetwork        | 50.6 K
7  | static_context_variable_selection  

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 0: 100%|██████████| 7/7 [00:01<00:00,  4.90it/s, loss=0.443, v_num=31, train_loss_step=0.302, val_loss=1.340]

Metric val_loss improved. New best score: 1.341


Epoch 1: 100%|██████████| 7/7 [00:01<00:00,  4.55it/s, loss=0.448, v_num=31, train_loss_step=0.190, val_loss=1.210, train_loss_epoch=0.443]

Metric val_loss improved by 0.134 >= min_delta = 0.0. New best score: 1.207


Epoch 2: 100%|██████████| 7/7 [00:01<00:00,  4.38it/s, loss=0.426, v_num=31, train_loss_step=0.438, val_loss=1.000, train_loss_epoch=0.453]

Metric val_loss improved by 0.205 >= min_delta = 0.0. New best score: 1.002


Epoch 3: 100%|██████████| 7/7 [00:01<00:00,  4.84it/s, loss=0.386, v_num=31, train_loss_step=0.323, val_loss=0.784, train_loss_epoch=0.383]

Metric val_loss improved by 0.219 >= min_delta = 0.0. New best score: 0.784


Epoch 4: 100%|██████████| 7/7 [00:01<00:00,  4.75it/s, loss=0.351, v_num=31, train_loss_step=0.332, val_loss=0.634, train_loss_epoch=0.358]

Metric val_loss improved by 0.150 >= min_delta = 0.0. New best score: 0.634


Epoch 5: 100%|██████████| 7/7 [00:01<00:00,  4.66it/s, loss=0.323, v_num=31, train_loss_step=0.218, val_loss=0.441, train_loss_epoch=0.318]

Metric val_loss improved by 0.193 >= min_delta = 0.0. New best score: 0.441


Epoch 6: 100%|██████████| 7/7 [00:01<00:00,  4.77it/s, loss=0.283, v_num=31, train_loss_step=0.220, val_loss=0.284, train_loss_epoch=0.265]

Metric val_loss improved by 0.157 >= min_delta = 0.0. New best score: 0.284


Epoch 7: 100%|██████████| 7/7 [00:01<00:00,  4.78it/s, loss=0.249, v_num=31, train_loss_step=0.195, val_loss=0.171, train_loss_epoch=0.239]

Metric val_loss improved by 0.113 >= min_delta = 0.0. New best score: 0.171


Epoch 8: 100%|██████████| 7/7 [00:01<00:00,  4.68it/s, loss=0.214, v_num=31, train_loss_step=0.172, val_loss=0.0967, train_loss_epoch=0.213]

Metric val_loss improved by 0.074 >= min_delta = 0.0. New best score: 0.097


Epoch 9: 100%|██████████| 7/7 [00:01<00:00,  4.82it/s, loss=0.193, v_num=31, train_loss_step=0.163, val_loss=0.0753, train_loss_epoch=0.182]

Metric val_loss improved by 0.021 >= min_delta = 0.0. New best score: 0.075


Epoch 10: 100%|██████████| 7/7 [00:01<00:00,  4.03it/s, loss=0.17, v_num=31, train_loss_step=0.137, val_loss=0.075, train_loss_epoch=0.170]  

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.075


Epoch 11: 100%|██████████| 7/7 [00:01<00:00,  4.66it/s, loss=0.155, v_num=31, train_loss_step=0.134, val_loss=0.0748, train_loss_epoch=0.149]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.075


Epoch 12: 100%|██████████| 7/7 [00:01<00:00,  4.82it/s, loss=0.141, v_num=31, train_loss_step=0.111, val_loss=0.0746, train_loss_epoch=0.141]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.075


Epoch 15: 100%|██████████| 7/7 [00:01<00:00,  4.84it/s, loss=0.112, v_num=31, train_loss_step=0.0944, val_loss=0.0745, train_loss_epoch=0.109]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.074


Epoch 16: 100%|██████████| 7/7 [00:01<00:00,  4.79it/s, loss=0.105, v_num=31, train_loss_step=0.0901, val_loss=0.0743, train_loss_epoch=0.107]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.074


Epoch 17: 100%|██████████| 7/7 [00:01<00:00,  4.80it/s, loss=0.0991, v_num=31, train_loss_step=0.0813, val_loss=0.0742, train_loss_epoch=0.095]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.074


Epoch 27: 100%|██████████| 7/7 [00:01<00:00,  4.85it/s, loss=0.0635, v_num=31, train_loss_step=0.0584, val_loss=0.0857, train_loss_epoch=0.0636]

Monitored metric val_loss did not improve in the last 10 records. Best score: 0.074. Signaling Trainer to stop.


Epoch 27: 100%|██████████| 7/7 [00:02<00:00,  3.40it/s, loss=0.0635, v_num=31, train_loss_step=0.0584, val_loss=0.0857, train_loss_epoch=0.0613]


In [87]:
best_model = TemporalFusionTransformer.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)
actuals = torch.cat([y[0] for x, y in iter(val_loader)])
predictions = best_model.predict(val_loader)

y_true = actuals.detach().cpu().numpy()
y_pred = predictions.detach().cpu().numpy()

print(f"\n🔍 Final Evaluation Metrics (Optuna-tuned TFT):")
print(f"SMAPE: {SMAPE()(predictions, actuals).item():.2f}")
print(f"MAE: {mean_absolute_error(y_true, y_pred):.2f}")
print(f"RMSE: {mean_squared_error(y_true, y_pred, squared=False):.2f}")
print(f"R² Score: {r2_score(y_true, y_pred):.4f}")


🔍 Final Evaluation Metrics (Optuna-tuned TFT):
SMAPE: 0.09
MAE: 34.39
RMSE: 34.39
R² Score: nan


  rank_zero_warn(
  rank_zero_warn(


### Training TFT for petrol price prediction

In [103]:
df = pd.read_csv("/Users/yehana2002/Projects/DSGP/datasets/processed/final_merged_dataset_ready.csv")

In [104]:
df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values("Date").reset_index(drop=True)
df["series_id"] = "sri_lanka"
df.rename(columns={"Petrol_Price": "petrol_price"}, inplace=True)
df["time_idx"] = np.arange(len(df))

df.fillna(method="ffill", inplace=True)
df.fillna(method="bfill", inplace=True)

#### Feature engineering

In [105]:
df["crude_roc"] = df["Crude_Oil_Price"].pct_change().fillna(0)
df["usd_lkr_roc"] = df["USD_LKR"].pct_change().fillna(0)

# 3-month lag
df["petrol_lag3"] = df["petrol_price"].shift(3).fillna(method="bfill")
df["crude_lag3"] = df["Crude_Oil_Price"].shift(3).fillna(method="bfill")

# Interaction terms
df["crude_usd_interaction"] = df["Crude_Oil_Price"] * df["USD_LKR"]
df["inflation_x_policy"] = df["annual_cpi_inflation_rate"] * df["policy_rate"]

#### TFT configs

In [106]:
max_encoder_length = 36
max_prediction_length = 12
training_cutoff = df["time_idx"].max() - max_prediction_length

time_varying_known_reals = [
    "time_idx", "Date"
] + [col for col in df.columns if col not in [
    "petrol_price", "diesel_price", "series_id", "time_idx"
] and df[col].dtype in [np.float64, np.int64]]

#### Time series dataset

In [107]:
training_cutoff = df["time_idx"].max() - max_prediction_length

tft_training = TimeSeriesDataSet(
    df[df.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="petrol_price",
    group_ids=["series_id"],
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=["series_id"],
    time_varying_known_reals=time_varying_known_reals,
    time_varying_unknown_reals=["petrol_price"],
    target_normalizer=GroupNormalizer(groups=["series_id"]),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

tft_validation = TimeSeriesDataSet.from_dataset(tft_training, df, predict=True, stop_randomization=True)

#### Data Loaders

In [108]:
batch_size = 8
train_dataloader = tft_training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = tft_validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

#### Model

In [109]:
tft = TemporalFusionTransformer.from_dataset(
    tft_training,
    learning_rate=0.007120219150765482,
    hidden_size=16,
    attention_head_size=1,
    dropout=0.10501453459070806,
    loss=SMAPE(),
    log_interval=10,
    reduce_on_plateau_patience=4,
)

  rank_zero_warn(
  rank_zero_warn(


#### Trainer

In [110]:
early_stop_callback = EarlyStopping(monitor="val_loss", patience=10, verbose=True, mode="min")
lr_logger = LearningRateMonitor()

trainer = Trainer(
    max_epochs=100,
    accelerator="cpu",
    callbacks=[early_stop_callback, lr_logger],
    gradient_clip_val=0.1,
)

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


In [111]:
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)


   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 1.8 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 55.4 K
6  | decoder_variable_selection         | VariableSelectionNetwork        | 54.8 K
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork            | 1.1 

Sanity Checking DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.68it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 0: 100%|██████████| 7/7 [00:01<00:00,  4.51it/s, loss=0.518, v_num=33, train_loss_step=0.445, val_loss=2.000]

Metric val_loss improved. New best score: 2.000


Epoch 3: 100%|██████████| 7/7 [00:01<00:00,  3.87it/s, loss=0.46, v_num=33, train_loss_step=0.372, val_loss=1.740, train_loss_epoch=0.480] 

Metric val_loss improved by 0.257 >= min_delta = 0.0. New best score: 1.743


Epoch 4: 100%|██████████| 7/7 [00:01<00:00,  4.65it/s, loss=0.393, v_num=33, train_loss_step=0.319, val_loss=1.180, train_loss_epoch=0.355]

Metric val_loss improved by 0.563 >= min_delta = 0.0. New best score: 1.180


Epoch 5: 100%|██████████| 7/7 [00:01<00:00,  4.52it/s, loss=0.309, v_num=33, train_loss_step=0.272, val_loss=0.789, train_loss_epoch=0.305]

Metric val_loss improved by 0.391 >= min_delta = 0.0. New best score: 0.789


Epoch 6: 100%|██████████| 7/7 [00:01<00:00,  4.33it/s, loss=0.263, v_num=33, train_loss_step=0.169, val_loss=0.503, train_loss_epoch=0.252]

Metric val_loss improved by 0.286 >= min_delta = 0.0. New best score: 0.503


Epoch 7: 100%|██████████| 7/7 [00:01<00:00,  4.53it/s, loss=0.223, v_num=33, train_loss_step=0.163, val_loss=0.281, train_loss_epoch=0.215]

Metric val_loss improved by 0.222 >= min_delta = 0.0. New best score: 0.281


Epoch 8: 100%|██████████| 7/7 [00:01<00:00,  4.59it/s, loss=0.195, v_num=33, train_loss_step=0.188, val_loss=0.167, train_loss_epoch=0.188]

Metric val_loss improved by 0.114 >= min_delta = 0.0. New best score: 0.167


Epoch 9: 100%|██████████| 7/7 [00:01<00:00,  4.61it/s, loss=0.168, v_num=33, train_loss_step=0.124, val_loss=0.103, train_loss_epoch=0.165]

Metric val_loss improved by 0.064 >= min_delta = 0.0. New best score: 0.103


Epoch 10: 100%|██████████| 7/7 [00:01<00:00,  4.61it/s, loss=0.151, v_num=33, train_loss_step=0.131, val_loss=0.0623, train_loss_epoch=0.149]

Metric val_loss improved by 0.041 >= min_delta = 0.0. New best score: 0.062


Epoch 11: 100%|██████████| 7/7 [00:01<00:00,  4.62it/s, loss=0.137, v_num=33, train_loss_step=0.101, val_loss=0.060, train_loss_epoch=0.134] 

Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.060


Epoch 21: 100%|██████████| 7/7 [00:01<00:00,  4.31it/s, loss=0.0785, v_num=33, train_loss_step=0.0758, val_loss=0.0645, train_loss_epoch=0.0796]

Monitored metric val_loss did not improve in the last 10 records. Best score: 0.060. Signaling Trainer to stop.


Epoch 21: 100%|██████████| 7/7 [00:02<00:00,  3.01it/s, loss=0.0785, v_num=33, train_loss_step=0.0758, val_loss=0.0645, train_loss_epoch=0.075] 


In [112]:
best_model = TemporalFusionTransformer.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
predictions = best_model.predict(val_dataloader)

  rank_zero_warn(
  rank_zero_warn(


In [113]:
y_true = actuals.detach().cpu().numpy()
y_pred = predictions.detach().cpu().numpy()

In [114]:
smape = SMAPE()(predictions, actuals).item()
mae = mean_absolute_error(y_true, y_pred)
rmse = mean_squared_error(y_true, y_pred, squared=False)
r2 = r2_score(y_true, y_pred)

print(f"\n🔍 Evaluation Metrics for Petrol Price:")
print(f"SMAPE: {smape:.2f}")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.4f}")


🔍 Evaluation Metrics for Petrol Price:
SMAPE: 0.06
MAE: 26.04
RMSE: 26.04
R² Score: nan




In [119]:
residuals = actuals - predictions  # shape: (num_batches, prediction_length)
residuals_np = residuals.detach().cpu().numpy().flatten()

# Extracting dates from x object used in prediction
# Each sample in x has 'decoder_time_idx' which maps to actual prediction dates
decoded_time = x["decoder_time_idx"].detach().cpu().numpy().flatten()

# Match with dates from the original dataframe
date_lookup = df.set_index("time_idx")["Date"]
dates = pd.to_datetime([date_lookup[int(t)] for t in decoded_time])

# Now build residual DataFrame
res_df = pd.DataFrame({
    "date": dates,
    "residuals": residuals_np
})

In [120]:
res_df.to_csv("tft_petrol_residuals.csv", index=False)

### Training TFT for diesel price prediction

In [131]:
agri_index_df = pd.read_csv("/Users/yehana2002/Projects/DSGP/datasets/raw/diesel_other_factros.csv")
agri_index_df = agri_index_df[["Date", "sl_agri_production_index"]]
agri_index_df["Date"] = pd.to_datetime(agri_index_df["Date"])
agri_index_df["Year"] = agri_index_df["Date"].dt.year
agri_index_df = agri_index_df[["Year", "sl_agri_production_index"]]

In [132]:
final_df = pd.read_csv("/Users/yehana2002/Projects/DSGP/datasets/processed/final_merged_dataset_ready.csv")
final_df["Date"] = pd.to_datetime(final_df["Date"])
final_df["Year"] = final_df["Date"].dt.year

In [133]:
final_df = final_df.merge(agri_index_df, on="Year", how="left")

In [134]:
final_df["sl_agri_production_index"].fillna(method="ffill", inplace=True)

In [136]:
final_df[["Date", "Diesel_Price", "Year", "sl_agri_production_index"]].head()

Unnamed: 0,Date,Diesel_Price,Year,sl_agri_production_index
0,1992-08-21,14.5,1992,52.99
1,1992-12-21,14.5,1992,52.99
2,1993-06-07,14.5,1993,61.51
3,1993-06-11,14.5,1993,61.51
4,1993-07-12,14.6,1993,61.51


In [137]:
df = df.sort_values("Date").reset_index(drop=True)
df.rename(columns={"Diesel_Price": "diesel_price"}, inplace=True)
df["series_id"] = "sri_lanka"
df["time_idx"] = np.arange(len(df))

In [138]:
max_encoder_length = 36
max_prediction_length = 12
training_cutoff = df["time_idx"].max() - max_prediction_length

time_varying_known_reals = [
    "time_idx", "Date"
] + [col for col in df.columns if col not in ["diesel_price", "petrol_price", "series_id", "time_idx"] and df[col].dtype in [np.float64, np.int64]]

In [139]:
training = TimeSeriesDataSet(
    df[df.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="diesel_price",
    group_ids=["series_id"],
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=["series_id"],
    time_varying_known_reals=time_varying_known_reals,
    time_varying_unknown_reals=["diesel_price"],
    target_normalizer=GroupNormalizer(groups=["series_id"]),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

validation = TimeSeriesDataSet.from_dataset(training, df, predict=True, stop_randomization=True)

#### Optuna testing for diesel model

In [140]:
def objective(trial):
    batch_size = trial.suggest_categorical("batch_size", [8, 16, 32])
    hidden_size = trial.suggest_int("hidden_size", 8, 64)
    attention_head_size = trial.suggest_int("attention_head_size", 1, 4)
    dropout = trial.suggest_float("dropout", 0.0, 0.3)
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)

    train_loader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
    val_loader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

    model = TemporalFusionTransformer.from_dataset(
        training,
        hidden_size=hidden_size,
        attention_head_size=attention_head_size,
        dropout=dropout,
        learning_rate=learning_rate,
        loss=SMAPE(),
        log_interval=0,
        reduce_on_plateau_patience=4,
    )

    trainer = Trainer(
        max_epochs=20,
        accelerator="cpu",
        logger=False,
        enable_checkpointing=False,
        callbacks=[EarlyStopping(monitor="val_loss", patience=5)],
    )

    trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)

    val_actuals = torch.cat([y[0] for x, y in iter(val_loader)])
    val_predictions = model.predict(val_loader)

    smape_score = SMAPE()(val_predictions, val_actuals).item()
    return smape_score

In [141]:
study = optuna.create_study(direction="minimize", study_name="TFT_Diesel_Tuning")
study.optimize(objective, n_trials=30)

# Print results
print("✅ Best SMAPE:", study.best_value)
print("🎯 Best Params:")
for k, v in study.best_params.items():
    print(f"  {k}: {v}")

[32m[I 2025-04-05 16:56:04,565][0m A new study created in memory with name: TFT_Diesel_Tuning[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 1.2 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 32.3 K
6  | decoder_variable_s

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 19: 100%|██████████| 2/2 [00:00<00:00, 11.91it/s, loss=0.451, train_loss_step=0.406, val_loss=1.100, train_loss_epoch=0.406]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 2/2 [00:00<00:00, 11.84it/s, loss=0.451, train_loss_step=0.406, val_loss=1.100, train_loss_epoch=0.406]


[32m[I 2025-04-05 16:56:08,406][0m Trial 0 finished with value: 1.0984548330307007 and parameters: {'batch_size': 32, 'hidden_size': 8, 'attention_head_size': 4, 'dropout': 0.13922589728412424, 'learning_rate': 0.001274811529330767}. Best is trial 0 with value: 1.0984548330307007.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 16: 100%|██████████| 4/4 [00:00<00:00,  5.53it/s, loss=0.195, train_loss_step=0.167, val_loss=0.176, train_loss_epoch=0.171] 

[32m[I 2025-04-05 16:56:20,569][0m Trial 1 finished with value: 0.17587685585021973 and parameters: {'batch_size': 16, 'hidden_size': 62, 'attention_head_size': 2, 'dropout': 0.05069085243340647, 'learning_rate': 0.004701295505814713}. Best is trial 1 with value: 0.17587685585021973.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 2.6 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 85.1 K
6  | decoder_variable_selection         | VariableSelectionNetwork        | 84.0 K
7  | static_context_variable_selection  | GatedResidualNetwork            | 3.1 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 3.1 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  7.26it/s, loss=0.542, train_loss_step=0.425, val_loss=0.544, train_loss_epoch=0.425]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  7.23it/s, loss=0.542, train_loss_step=0.425, val_loss=0.544, train_loss_epoch=0.425]

[32m[I 2025-04-05 16:56:26,332][0m Trial 2 finished with value: 0.5440833568572998 and parameters: {'batch_size': 32, 'hidden_size': 27, 'attention_head_size': 2, 'dropout': 0.0005682560601292574, 'learning_rate': 0.0030959541833530282}. Best is trial 1 with value: 0.17587685585021973.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 3.9 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 134 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 132 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 8.4 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 8.4 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  6.28it/s, loss=0.389, train_loss_step=0.355, val_loss=0.622, train_loss_epoch=0.336]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  6.27it/s, loss=0.389, train_loss_step=0.355, val_loss=0.622, train_loss_epoch=0.336]

[32m[I 2025-04-05 16:56:39,135][0m Trial 3 finished with value: 0.6217017769813538 and parameters: {'batch_size': 16, 'hidden_size': 45, 'attention_head_size': 1, 'dropout': 0.08667813933926133, 'learning_rate': 0.0001388958795440624}. Best is trial 1 with value: 0.17587685585021973.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 5.2 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 185 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 183 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 16.3 K
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 16.3 K
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  5.62it/s, loss=0.0986, train_loss_step=0.0815, val_loss=0.0835, train_loss_epoch=0.0767]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  5.61it/s, loss=0.0986, train_loss_step=0.0815, val_loss=0.0835, train_loss_epoch=0.0767]

[32m[I 2025-04-05 16:56:53,499][0m Trial 4 finished with value: 0.08354290574789047 and parameters: {'batch_size': 16, 'hidden_size': 63, 'attention_head_size': 3, 'dropout': 5.71315030161168e-05, 'learning_rate': 0.0013366258010220143}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 3.4 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 115 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 113 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 6.0 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 6.0 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  7.54it/s, loss=0.396, train_loss_step=0.354, val_loss=0.402, train_loss_epoch=0.389]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  7.53it/s, loss=0.396, train_loss_step=0.354, val_loss=0.402, train_loss_epoch=0.389]

[32m[I 2025-04-05 16:57:12,306][0m Trial 5 finished with value: 0.40248024463653564 and parameters: {'batch_size': 8, 'hidden_size': 38, 'attention_head_size': 1, 'dropout': 0.19012744622237504, 'learning_rate': 0.0006248849720219954}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 4.8 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 168 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 166 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 13.3 K
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 13.3 K
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  5.94it/s, loss=0.575, train_loss_step=0.474, val_loss=0.415, train_loss_epoch=0.474]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  5.92it/s, loss=0.575, train_loss_step=0.474, val_loss=0.415, train_loss_epoch=0.474]

[32m[I 2025-04-05 16:57:19,397][0m Trial 6 finished with value: 0.41468545794487 and parameters: {'batch_size': 32, 'hidden_size': 57, 'attention_head_size': 1, 'dropout': 0.03768522190546938, 'learning_rate': 0.00016014575553082125}. Best is trial 4 with value: 0.08354290574789047.[0m





  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 4.4 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 154 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 152 K 
7  | static_context_variable_selection  

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  7.12it/s, loss=0.283, train_loss_step=0.277, val_loss=1.270, train_loss_epoch=0.269]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  7.11it/s, loss=0.283, train_loss_step=0.277, val_loss=1.270, train_loss_epoch=0.269]

[32m[I 2025-04-05 16:57:40,716][0m Trial 7 finished with value: 1.2692643404006958 and parameters: {'batch_size': 8, 'hidden_size': 52, 'attention_head_size': 4, 'dropout': 0.1312595433756385, 'learning_rate': 0.00011204180922231109}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 4.0 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 137 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 135 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 8.7 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 8.7 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 10: 100%|██████████| 7/7 [00:00<00:00,  7.11it/s, loss=0.63, train_loss_step=0.580, val_loss=0.476, train_loss_epoch=0.602] 

[32m[I 2025-04-05 16:57:51,797][0m Trial 8 finished with value: 0.47556909918785095 and parameters: {'batch_size': 8, 'hidden_size': 46, 'attention_head_size': 3, 'dropout': 0.2662527706205085, 'learning_rate': 0.00014445576899354835}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 4.5 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 157 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 155 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 11.6 K
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 11.6 K
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  5.70it/s, loss=0.359, train_loss_step=0.206, val_loss=0.138, train_loss_epoch=0.206] 

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  5.68it/s, loss=0.359, train_loss_step=0.206, val_loss=0.138, train_loss_epoch=0.206]

[32m[I 2025-04-05 16:57:58,653][0m Trial 9 finished with value: 0.13784198462963104 and parameters: {'batch_size': 32, 'hidden_size': 53, 'attention_head_size': 3, 'dropout': 0.21157128374499476, 'learning_rate': 0.007570671575891093}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 2.3 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 74.3 K
6  | decoder_variable_selection         | VariableSelectionNetwork        | 73.3 K
7  | static_context_variable_selection  | GatedResidualNetwork            | 2.3 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 2.3 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  7.16it/s, loss=0.563, train_loss_step=0.591, val_loss=0.518, train_loss_epoch=0.538]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  7.15it/s, loss=0.563, train_loss_step=0.591, val_loss=0.518, train_loss_epoch=0.538]

[32m[I 2025-04-05 16:58:10,312][0m Trial 10 finished with value: 0.5179170966148376 and parameters: {'batch_size': 16, 'hidden_size': 23, 'attention_head_size': 3, 'dropout': 0.28172925321432674, 'learning_rate': 0.0006073412924502034}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 5.3 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 188 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 186 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 16.8 K
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 16.8 K
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  5.86it/s, loss=0.472, train_loss_step=0.237, val_loss=0.0897, train_loss_epoch=0.237]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  5.84it/s, loss=0.472, train_loss_step=0.237, val_loss=0.0897, train_loss_epoch=0.237]

[32m[I 2025-04-05 16:58:17,453][0m Trial 11 finished with value: 0.08968407660722733 and parameters: {'batch_size': 32, 'hidden_size': 64, 'attention_head_size': 3, 'dropout': 0.20858772419939392, 'learning_rate': 0.0091742835249257}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 5.3 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 188 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 186 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 16.8 K
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 16.8 K
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  5.73it/s, loss=0.183, train_loss_step=0.177, val_loss=0.0953, train_loss_epoch=0.155]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  5.72it/s, loss=0.183, train_loss_step=0.177, val_loss=0.0953, train_loss_epoch=0.155]

[32m[I 2025-04-05 16:58:31,699][0m Trial 12 finished with value: 0.0952734425663948 and parameters: {'batch_size': 16, 'hidden_size': 64, 'attention_head_size': 3, 'dropout': 0.2166873276327294, 'learning_rate': 0.0021695796156208113}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 5.2 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 185 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 183 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 16.3 K
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 16.3 K
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00,  5.53it/s, loss=0.595, train_loss_step=0.507, val_loss=0.311, train_loss_epoch=0.507]

[32m[I 2025-04-05 16:58:35,543][0m Trial 13 finished with value: 0.3109791576862335 and parameters: {'batch_size': 32, 'hidden_size': 63, 'attention_head_size': 4, 'dropout': 0.17225631819968423, 'learning_rate': 0.008723519492691676}. Best is trial 4 with value: 0.08354290574789047.[0m





  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 3.9 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 134 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 132 K 
7  | static_context_variable_selection  

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  6.15it/s, loss=0.353, train_loss_step=0.307, val_loss=0.267, train_loss_epoch=0.310]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  6.14it/s, loss=0.353, train_loss_step=0.307, val_loss=0.267, train_loss_epoch=0.310]

[32m[I 2025-04-05 16:58:49,356][0m Trial 14 finished with value: 0.26667460799217224 and parameters: {'batch_size': 16, 'hidden_size': 45, 'attention_head_size': 2, 'dropout': 0.24194640851903765, 'learning_rate': 0.0014152497223972594}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 3.1 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 104 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 102 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 4.8 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 4.8 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  6.70it/s, loss=0.512, train_loss_step=0.404, val_loss=0.679, train_loss_epoch=0.478]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  6.69it/s, loss=0.512, train_loss_step=0.404, val_loss=0.679, train_loss_epoch=0.478]

[32m[I 2025-04-05 16:59:01,561][0m Trial 15 finished with value: 0.678747832775116 and parameters: {'batch_size': 16, 'hidden_size': 34, 'attention_head_size': 3, 'dropout': 0.103191942878087, 'learning_rate': 0.00038377036896848}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 4.6 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 159 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 157 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 12.0 K
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 12.0 K
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  5.96it/s, loss=0.482, train_loss_step=0.228, val_loss=0.151, train_loss_epoch=0.228]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  5.94it/s, loss=0.482, train_loss_step=0.228, val_loss=0.151, train_loss_epoch=0.228]

[32m[I 2025-04-05 16:59:08,737][0m Trial 16 finished with value: 0.15063147246837616 and parameters: {'batch_size': 32, 'hidden_size': 54, 'attention_head_size': 2, 'dropout': 0.006193514895418838, 'learning_rate': 0.0040699599013324095}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 1.3 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 36.8 K
6  | decoder_variable_selection         | VariableSelectionNetwork        | 36.4 K
7  | static_context_variable_selection  | GatedResidualNetwork            | 378   
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 378   
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  8.10it/s, loss=0.682, train_loss_step=0.568, val_loss=0.782, train_loss_epoch=0.646]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  8.08it/s, loss=0.682, train_loss_step=0.568, val_loss=0.782, train_loss_epoch=0.646]

[32m[I 2025-04-05 16:59:19,145][0m Trial 17 finished with value: 0.7820293307304382 and parameters: {'batch_size': 16, 'hidden_size': 9, 'attention_head_size': 4, 'dropout': 0.1660525270106718, 'learning_rate': 0.00029830333745294014}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 4.9 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 174 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 171 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 14.3 K
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 14.3 K
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

Sanity Checking DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 27.99it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  5.61it/s, loss=0.391, train_loss_step=0.267, val_loss=0.444, train_loss_epoch=0.267]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  5.60it/s, loss=0.391, train_loss_step=0.267, val_loss=0.444, train_loss_epoch=0.267]

[32m[I 2025-04-05 16:59:26,433][0m Trial 18 finished with value: 0.443815678358078 and parameters: {'batch_size': 32, 'hidden_size': 59, 'attention_head_size': 3, 'dropout': 0.08568620738723899, 'learning_rate': 0.002051828067165024}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 3.4 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 115 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 113 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 6.0 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 6.0 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 15: 100%|██████████| 7/7 [00:00<00:00,  7.39it/s, loss=0.279, train_loss_step=0.243, val_loss=0.158, train_loss_epoch=0.261] 

[32m[I 2025-04-05 16:59:41,956][0m Trial 19 finished with value: 0.15750490128993988 and parameters: {'batch_size': 8, 'hidden_size': 38, 'attention_head_size': 2, 'dropout': 0.24927212107122426, 'learning_rate': 0.0008349311204576643}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 4.3 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 148 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 146 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 10.3 K
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 10.3 K
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  6.08it/s, loss=0.536, train_loss_step=0.410, val_loss=0.378, train_loss_epoch=0.410]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  6.06it/s, loss=0.536, train_loss_step=0.410, val_loss=0.378, train_loss_epoch=0.410]

[32m[I 2025-04-05 16:59:48,819][0m Trial 20 finished with value: 0.37797415256500244 and parameters: {'batch_size': 32, 'hidden_size': 50, 'attention_head_size': 4, 'dropout': 0.29602105572002796, 'learning_rate': 0.006539563220478549}. Best is trial 4 with value: 0.08354290574789047.[0m





  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 5.3 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 188 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 186 K 
7  | static_context_variable_selection  

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 16: 100%|██████████| 4/4 [00:00<00:00,  5.70it/s, loss=0.214, train_loss_step=0.159, val_loss=0.199, train_loss_epoch=0.175]

[32m[I 2025-04-05 17:00:01,346][0m Trial 21 finished with value: 0.19850240647792816 and parameters: {'batch_size': 16, 'hidden_size': 64, 'attention_head_size': 3, 'dropout': 0.2064456962988528, 'learning_rate': 0.001895743014866036}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 4.8 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 171 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 169 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 13.8 K
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 13.8 K
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 15: 100%|██████████| 4/4 [00:00<00:00,  5.67it/s, loss=0.213, train_loss_step=0.146, val_loss=0.144, train_loss_epoch=0.170]

[32m[I 2025-04-05 17:00:12,857][0m Trial 22 finished with value: 0.14441730082035065 and parameters: {'batch_size': 16, 'hidden_size': 58, 'attention_head_size': 3, 'dropout': 0.23042737154524665, 'learning_rate': 0.0026113741465503626}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 5.3 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 188 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 186 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 16.8 K
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 16.8 K
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 14: 100%|██████████| 4/4 [00:00<00:00,  5.63it/s, loss=0.171, train_loss_step=0.144, val_loss=0.181, train_loss_epoch=0.140] 

[32m[I 2025-04-05 17:00:23,568][0m Trial 23 finished with value: 0.1814696192741394 and parameters: {'batch_size': 16, 'hidden_size': 64, 'attention_head_size': 3, 'dropout': 0.21487186015970328, 'learning_rate': 0.005247886456768439}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 4.8 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 171 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 169 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 13.8 K
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 13.8 K
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  5.68it/s, loss=0.265, train_loss_step=0.256, val_loss=0.214, train_loss_epoch=0.236]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  5.67it/s, loss=0.265, train_loss_step=0.256, val_loss=0.214, train_loss_epoch=0.236]

[32m[I 2025-04-05 17:00:38,195][0m Trial 24 finished with value: 0.21383516490459442 and parameters: {'batch_size': 16, 'hidden_size': 58, 'attention_head_size': 3, 'dropout': 0.1867655966276005, 'learning_rate': 0.0009017791560890321}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 1.8 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 55.4 K
6  | decoder_variable_selection         | VariableSelectionNetwork        | 54.8 K
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  7.75it/s, loss=0.278, train_loss_step=0.238, val_loss=0.196, train_loss_epoch=0.226]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 4/4 [00:00<00:00,  7.73it/s, loss=0.278, train_loss_step=0.238, val_loss=0.196, train_loss_epoch=0.226]

[32m[I 2025-04-05 17:00:48,684][0m Trial 25 finished with value: 0.19580619037151337 and parameters: {'batch_size': 16, 'hidden_size': 16, 'attention_head_size': 2, 'dropout': 0.11521299183327335, 'learning_rate': 0.003371586671173494}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 4.2 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 145 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 144 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 9.9 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 9.9 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 9: 100%|██████████| 4/4 [00:00<00:00,  5.99it/s, loss=0.279, train_loss_step=0.215, val_loss=0.254, train_loss_epoch=0.209] 

[32m[I 2025-04-05 17:00:55,563][0m Trial 26 finished with value: 0.2538076341152191 and parameters: {'batch_size': 16, 'hidden_size': 49, 'attention_head_size': 3, 'dropout': 0.15267554149583246, 'learning_rate': 0.00990113752180465}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 4.9 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 174 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 171 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 14.3 K
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 14.3 K
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 15: 100%|██████████| 7/7 [00:01<00:00,  6.45it/s, loss=0.155, train_loss_step=0.0936, val_loss=0.0877, train_loss_epoch=0.143]

[32m[I 2025-04-05 17:01:13,087][0m Trial 27 finished with value: 0.0877448320388794 and parameters: {'batch_size': 8, 'hidden_size': 59, 'attention_head_size': 4, 'dropout': 0.2553123927912326, 'learning_rate': 0.0017127537818602252}. Best is trial 4 with value: 0.08354290574789047.[0m





  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 4.7 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 165 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 163 K 
7  | static_context_variable_selection  

                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  7.04it/s, loss=0.166, train_loss_step=0.136, val_loss=0.121, train_loss_epoch=0.152]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  7.03it/s, loss=0.166, train_loss_step=0.136, val_loss=0.121, train_loss_epoch=0.152]

[32m[I 2025-04-05 17:01:33,578][0m Trial 28 finished with value: 0.1207784041762352 and parameters: {'batch_size': 8, 'hidden_size': 56, 'attention_head_size': 4, 'dropout': 0.26984035923091754, 'learning_rate': 0.0013684442453263679}. Best is trial 4 with value: 0.08354290574789047.[0m
  rank_zero_warn(
  rank_zero_warn(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 3.6 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 123 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 122 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 7.0 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 7.0 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  7.30it/s, loss=0.327, train_loss_step=0.228, val_loss=0.101, train_loss_epoch=0.299]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 7/7 [00:00<00:00,  7.29it/s, loss=0.327, train_loss_step=0.228, val_loss=0.101, train_loss_epoch=0.299]

[32m[I 2025-04-05 17:01:53,095][0m Trial 29 finished with value: 0.1008903905749321 and parameters: {'batch_size': 8, 'hidden_size': 41, 'attention_head_size': 4, 'dropout': 0.2545447585067546, 'learning_rate': 0.0003433546261363879}. Best is trial 4 with value: 0.08354290574789047.[0m



✅ Best SMAPE: 0.08354290574789047
🎯 Best Params:
  batch_size: 16
  hidden_size: 63
  attention_head_size: 3
  dropout: 5.71315030161168e-05
  learning_rate: 0.0013366258010220143


In [153]:
train_loader = training.to_dataloader(train=True, batch_size=16, num_workers=0)
val_loader = validation.to_dataloader(train=False, batch_size=16, num_workers=0)

tft_diesel = TemporalFusionTransformer.from_dataset(
    training,
    hidden_size=63,
    attention_head_size=3,
    dropout=0.01,
    learning_rate=0.0013366258010220143,
    loss=SMAPE(),
    log_interval=10,
    reduce_on_plateau_patience=4,
)
early_stop = EarlyStopping(monitor="val_loss", patience=10, verbose=True, mode="min")
lr_logger = LearningRateMonitor()

  rank_zero_warn(
  rank_zero_warn(


In [154]:
trainer = Trainer(
    max_epochs=50,
    accelerator="cpu",
    callbacks=[early_stop, lr_logger],
    gradient_clip_val=0.1,
)

trainer.fit(tft_diesel, train_dataloaders=train_loader, val_dataloaders=val_loader)

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 1.3 K 
4  | static_variable_selection          | VariableSelectionNetwork        | 5.2 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 185 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 183 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 

Sanity Checking DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.74it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 0: 100%|██████████| 4/4 [00:01<00:00,  2.74it/s, loss=0.482, v_num=41, train_loss_step=0.473, val_loss=1.410]

Metric val_loss improved. New best score: 1.411


Epoch 1: 100%|██████████| 4/4 [00:01<00:00,  2.77it/s, loss=0.483, v_num=41, train_loss_step=0.402, val_loss=1.400, train_loss_epoch=0.482]

Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 1.405


Epoch 2: 100%|██████████| 4/4 [00:01<00:00,  2.86it/s, loss=0.475, v_num=41, train_loss_step=0.488, val_loss=1.320, train_loss_epoch=0.485]

Metric val_loss improved by 0.082 >= min_delta = 0.0. New best score: 1.323


Epoch 3: 100%|██████████| 4/4 [00:01<00:00,  2.91it/s, loss=0.464, v_num=41, train_loss_step=0.542, val_loss=1.320, train_loss_epoch=0.459]

Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 1.322


Epoch 4: 100%|██████████| 4/4 [00:01<00:00,  2.56it/s, loss=0.456, v_num=41, train_loss_step=0.402, val_loss=1.240, train_loss_epoch=0.430]

Metric val_loss improved by 0.082 >= min_delta = 0.0. New best score: 1.239


Epoch 5: 100%|██████████| 4/4 [00:01<00:00,  2.92it/s, loss=0.45, v_num=41, train_loss_step=0.357, val_loss=1.230, train_loss_epoch=0.427] 

Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 1.234


Epoch 6: 100%|██████████| 4/4 [00:01<00:00,  2.50it/s, loss=0.438, v_num=41, train_loss_step=0.401, val_loss=1.130, train_loss_epoch=0.418]

Metric val_loss improved by 0.105 >= min_delta = 0.0. New best score: 1.130


Epoch 7: 100%|██████████| 4/4 [00:01<00:00,  2.94it/s, loss=0.413, v_num=41, train_loss_step=0.466, val_loss=1.110, train_loss_epoch=0.388]

Metric val_loss improved by 0.018 >= min_delta = 0.0. New best score: 1.112


Epoch 8: 100%|██████████| 4/4 [00:01<00:00,  2.81it/s, loss=0.397, v_num=41, train_loss_step=0.441, val_loss=0.958, train_loss_epoch=0.376]

Metric val_loss improved by 0.153 >= min_delta = 0.0. New best score: 0.958


Epoch 9: 100%|██████████| 4/4 [00:01<00:00,  2.91it/s, loss=0.386, v_num=41, train_loss_step=0.300, val_loss=0.955, train_loss_epoch=0.332]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.955


Epoch 10: 100%|██████████| 4/4 [00:01<00:00,  2.93it/s, loss=0.366, v_num=41, train_loss_step=0.280, val_loss=0.804, train_loss_epoch=0.320]

Metric val_loss improved by 0.150 >= min_delta = 0.0. New best score: 0.804


Epoch 11: 100%|██████████| 4/4 [00:01<00:00,  2.37it/s, loss=0.34, v_num=41, train_loss_step=0.362, val_loss=0.801, train_loss_epoch=0.316] 

Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 0.801


Epoch 12: 100%|██████████| 4/4 [00:01<00:00,  2.95it/s, loss=0.326, v_num=41, train_loss_step=0.283, val_loss=0.670, train_loss_epoch=0.291]

Metric val_loss improved by 0.131 >= min_delta = 0.0. New best score: 0.670


Epoch 13: 100%|██████████| 4/4 [00:01<00:00,  2.92it/s, loss=0.306, v_num=41, train_loss_step=0.225, val_loss=0.666, train_loss_epoch=0.281]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.666


Epoch 14: 100%|██████████| 4/4 [00:01<00:00,  2.28it/s, loss=0.292, v_num=41, train_loss_step=0.291, val_loss=0.552, train_loss_epoch=0.263]

Metric val_loss improved by 0.114 >= min_delta = 0.0. New best score: 0.552


Epoch 16: 100%|██████████| 4/4 [00:01<00:00,  2.94it/s, loss=0.267, v_num=41, train_loss_step=0.179, val_loss=0.463, train_loss_epoch=0.238]

Metric val_loss improved by 0.089 >= min_delta = 0.0. New best score: 0.463


Epoch 17: 100%|██████████| 4/4 [00:01<00:00,  2.92it/s, loss=0.259, v_num=41, train_loss_step=0.187, val_loss=0.459, train_loss_epoch=0.245]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.459


Epoch 18: 100%|██████████| 4/4 [00:01<00:00,  2.94it/s, loss=0.247, v_num=41, train_loss_step=0.248, val_loss=0.368, train_loss_epoch=0.225]

Metric val_loss improved by 0.092 >= min_delta = 0.0. New best score: 0.368


Epoch 20: 100%|██████████| 4/4 [00:01<00:00,  2.95it/s, loss=0.225, v_num=41, train_loss_step=0.215, val_loss=0.286, train_loss_epoch=0.194]

Metric val_loss improved by 0.082 >= min_delta = 0.0. New best score: 0.286


Epoch 21: 100%|██████████| 4/4 [00:01<00:00,  2.85it/s, loss=0.217, v_num=41, train_loss_step=0.183, val_loss=0.278, train_loss_epoch=0.200]

Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 0.278


Epoch 22: 100%|██████████| 4/4 [00:01<00:00,  2.05it/s, loss=0.204, v_num=41, train_loss_step=0.136, val_loss=0.203, train_loss_epoch=0.188]

Metric val_loss improved by 0.075 >= min_delta = 0.0. New best score: 0.203


Epoch 24: 100%|██████████| 4/4 [00:01<00:00,  2.88it/s, loss=0.19, v_num=41, train_loss_step=0.154, val_loss=0.156, train_loss_epoch=0.165] 

Metric val_loss improved by 0.046 >= min_delta = 0.0. New best score: 0.156


Epoch 26: 100%|██████████| 4/4 [00:01<00:00,  2.93it/s, loss=0.177, v_num=41, train_loss_step=0.148, val_loss=0.130, train_loss_epoch=0.166]

Metric val_loss improved by 0.026 >= min_delta = 0.0. New best score: 0.130


Epoch 28: 100%|██████████| 4/4 [00:01<00:00,  2.87it/s, loss=0.164, v_num=41, train_loss_step=0.147, val_loss=0.107, train_loss_epoch=0.144]

Metric val_loss improved by 0.024 >= min_delta = 0.0. New best score: 0.107


Epoch 30: 100%|██████████| 4/4 [00:01<00:00,  2.93it/s, loss=0.151, v_num=41, train_loss_step=0.174, val_loss=0.103, train_loss_epoch=0.136]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.103


Epoch 31: 100%|██████████| 4/4 [00:01<00:00,  2.92it/s, loss=0.146, v_num=41, train_loss_step=0.150, val_loss=0.103, train_loss_epoch=0.138]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.103


Epoch 32: 100%|██████████| 4/4 [00:01<00:00,  2.94it/s, loss=0.14, v_num=41, train_loss_step=0.139, val_loss=0.103, train_loss_epoch=0.135] 

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.103


Epoch 42: 100%|██████████| 4/4 [00:01<00:00,  2.93it/s, loss=0.105, v_num=41, train_loss_step=0.104, val_loss=0.133, train_loss_epoch=0.0962] 

Monitored metric val_loss did not improve in the last 10 records. Best score: 0.103. Signaling Trainer to stop.


Epoch 42: 100%|██████████| 4/4 [00:02<00:00,  1.97it/s, loss=0.105, v_num=41, train_loss_step=0.104, val_loss=0.133, train_loss_epoch=0.102] 


In [155]:
actuals = torch.cat([y[0] for x, y in iter(val_loader)])
predictions = tft_diesel.predict(val_loader)

y_true = actuals.detach().cpu().numpy()
y_pred = predictions.detach().cpu().numpy()

print(f"\n🔍 Final Evaluation Metrics for Diesel (Optuna-Tuned):")
print(f"SMAPE: {SMAPE()(predictions, actuals).item():.2f}")
print(f"MAE: {mean_absolute_error(y_true, y_pred):.2f}")
print(f"RMSE: {mean_squared_error(y_true, y_pred, squared=False):.2f}")


🔍 Final Evaluation Metrics for Diesel (Optuna-Tuned):
SMAPE: 0.13
MAE: 50.57
RMSE: 50.57


In [156]:
raw_predictions, x = tft_diesel.predict(val_loader, mode="raw", return_x=True)

# Computing residuals
residuals = actuals - predictions
residuals_np = residuals.detach().cpu().numpy().flatten()

# Mapping decoder time_idx to real dates
decoded_time = x["decoder_time_idx"].detach().cpu().numpy().flatten()
date_lookup = df.set_index("time_idx")["Date"]
dates = pd.to_datetime([date_lookup[int(t)] for t in decoded_time])

# Building and saving residual DataFrame
res_df = pd.DataFrame({
    "date": dates,
    "residuals": residuals_np
})

res_df.to_csv("tft_diesel_residuals.csv", index=False)
print("Diesel residuals saved to 'tft_diesel_residuals.csv'")

Diesel residuals saved to 'tft_diesel_residuals.csv'
