In [34]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt

import os
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"

from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger

from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer, Baseline
from pytorch_forecasting.metrics import QuantileLoss, SMAPE, MAE, RMSE

In [3]:
seed_everything(42)

Global seed set to 42


42

In [4]:
df = pd.read_csv("/Users/yehana2002/Projects/DSGP/datasets/processed/final_merged_dataset_ready.csv")

In [6]:
print(df.columns)

Index(['Date', 'Petrol_Price', 'Diesel_Price', 'Crude_Oil_Price', 'Year',
       'Month', 'Quarter', 'DayOfYear', 'Petrol_Lag1', 'Diesel_Lag1',
       'Crude_Lag1', 'Crude_MA7', 'Petrol_MA7', 'Diesel_MA7',
       'central_bank_total_assets ', 'currency_in_circulation',
       'gov_deposits_with_cb ', 'srr_commercial_bank_deposits',
       'reserve_money', 'narrow_money_m1', 'broad_money_m2', 'broad_money_m2b',
       'quasi_money_public', 'net_foreign_assets', 'credit_to_government_net',
       ' credit_to_public_corporations', 'credit_to_private_sector',
       'total_domestic_credit', 'broad_money_m4',
       'net_credit_to_gov_central_bank', 'net_credit_to_gov_commercial_total',
       'net_credit_to_gov_commercial_dbu', 'net_credit_to_gov_commercial_obu',
       'credit_to_sobes_dbu', 'credit_to_sobes_obu', 'credit_to_private_dbu',
       'credit_to_private_obu', 'm2b_other_items_net',
       'nfa_monetary_authorities', 'nfa_commercial_banks_total',
       'nfa_commercial_banks_dbu

In [7]:
# Normalize column names to lowercase and strip whitespace
df.columns = df.columns.str.lower().str.strip()

In [17]:
# Filling remaining NaNs in usd_lkr
df["usd_lkr"] = df["usd_lkr"].fillna(method="ffill").fillna(method="bfill")

In [18]:
df["date"] = pd.to_datetime(df["date"])
df = df.sort_values("date")

In [19]:
df["time_idx"] = (df["date"] - df["date"].min()).dt.days

In [20]:
max_encoder_length = 30
max_prediction_length = 6

In [21]:
df["fuel_type"] = "petrol"

In [None]:
training = TimeSeriesDataSet(
    df,
    time_idx="time_idx",
    target="petrol_price",
    group_ids=["fuel_type"],
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=["fuel_type"],
    time_varying_known_reals=["time_idx", "usd_lkr"] + list(df.columns[df.columns.str.contains("month|inflation|gdp|cpi|interest|reserve", case=False)]),
    time_varying_unknown_reals=["petrol_price"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True
)

In [26]:
train_df = df[df["date"] < "2023-01-01"]
val_df = df[df["date"] >= "2023-01-01"]

train_dataset = TimeSeriesDataSet.from_dataset(training, train_df)
val_dataset = TimeSeriesDataSet.from_dataset(training, val_df)

train_dataloader = train_dataset.to_dataloader(train=True, batch_size=64, num_workers=0)
val_dataloader = val_dataset.to_dataloader(train=False, batch_size=64, num_workers=0)

In [None]:
tft_model = TemporalFusionTransformer.from_dataset(
    train_dataset,
    learning_rate=0.005,
    hidden_size=64,
    attention_head_size=4,
    dropout=0.2,
    loss=QuantileLoss(),
    log_interval=-1,
    reduce_on_plateau_patience=4,
)

  rank_zero_warn(
  rank_zero_warn(


In [50]:
logger = TensorBoardLogger("lightning_logs", name="tft_model")
early_stop_callback = EarlyStopping(monitor="val_loss", patience=5, verbose=True, mode="min")
lr_monitor = LearningRateMonitor(logging_interval="epoch")

In [51]:
trainer = Trainer(
    max_epochs=100,
    accelerator="cpu",
    devices=1,
    callbacks=[early_stop_callback, lr_monitor],
    logger=logger,
    gradient_clip_val=0.1,
)

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


In [52]:
trainer.fit(tft_model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)


   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 208   
4  | static_variable_selection          | VariableSelectionNetwork        | 5.3 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 18.2 K
6  | decoder_variable_selection         | VariableSelectionNetwork        | 16.2 K
7  | static_context_variable_selection  | GatedResidualNetwork            | 16.8 K
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 16.8 K
9  | static_context_initial_cell_lstm   | GatedResidualNetwork            | 16.8

                                                                           

  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Epoch 0:  50%|█████     | 1/2 [00:00<00:00, 17.53it/s, loss=0, v_num=4, train_loss_step=0.000]







Epoch 0: 100%|██████████| 2/2 [00:00<00:00, 25.49it/s, loss=0, v_num=4, train_loss_step=0.000, val_loss=5.000]

Metric val_loss improved. New best score: 5.000


Epoch 1:  50%|█████     | 1/2 [00:00<00:00, 16.45it/s, loss=0, v_num=4, train_loss_step=0.000, val_loss=5.000, train_loss_epoch=0.000]







Epoch 2:   0%|          | 0/2 [00:00<?, ?it/s, loss=0, v_num=4, train_loss_step=0.000, val_loss=5.000, train_loss_epoch=0.000]        



Epoch 3:   0%|          | 0/2 [00:00<?, ?it/s, loss=0, v_num=4, train_loss_step=0.000, val_loss=5.000, train_loss_epoch=0.000]        



Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 35.54it/s, loss=0, v_num=4, train_loss_step=0.000, val_loss=5.000, train_loss_epoch=0.000]



Epoch 4:  50%|█████     | 1/2 [00:00<00:00, 31.10it/s, loss=0, v_num=4, train_loss_step=0.000, val_loss=5.000, train_loss_epoch=0.000]



Epoch 4: 100%|██████████| 2/2 [00:00<00:00, 34.56it/s, loss=0, v_num=4, train_loss_step=0.000, val_loss=5.000, train_loss_epoch=0.000]



Epoch 5:  50%|█████     | 1/2 [00:00<00:00, 30.50it/s, loss=0, v_num=4, train_loss_step=0.000, val_loss=5.000, train_loss_epoch=0.000]



Epoch 5: 100%|██████████| 2/2 [00:00<00:00, 37.38it/s, loss=0, v_num=4, train_loss_step=0.000, val_loss=5.000, train_loss_epoch=0.000]

Monitored metric val_loss did not improve in the last 5 records. Best score: 5.000. Signaling Trainer to stop.


Epoch 5: 100%|██████████| 2/2 [00:00<00:00, 23.43it/s, loss=0, v_num=4, train_loss_step=0.000, val_loss=5.000, train_loss_epoch=0.000]


In [53]:
raw_predictions, x = tft_model.predict(val_dataloader, mode="raw", return_x=True)



In [54]:
y_pred = raw_predictions["prediction"].detach().cpu()
y_true = x["decoder_target"].detach().cpu()

In [55]:
if y_pred.shape[-1] > 1:
    y_pred = y_pred[..., 0]

In [56]:
print("\n Evaluation Metrics on Validation Set")
print(f"SMAPE: {SMAPE()(y_pred, y_true):.4f}")
print(f"MAE:   {MAE()(y_pred, y_true):.4f}")
print(f"RMSE:  {RMSE()(y_pred, y_true):.4f}")


 Evaluation Metrics on Validation Set
SMAPE: 0.0233
MAE:   10.0000
RMSE:  14.1421
