In [1]:
from pathlib import Path

import lightning.pytorch as pl
import matplotlib.pyplot as plt
import pandas as pd
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_forecasting import (
    TemporalFusionTransformer,
    TimeSeriesDataSet,
    QuantileLoss,
)
from torch.utils.tensorboard.writer import SummaryWriter

from thesis.dataloading import load_eld
from thesis.metrics import METRICS

In [4]:
# TODO: add quantile loss everywhere
# TODO: Add more covariates (like in the original paper implementation)

In [5]:
pl.seed_everything(42)
ROOT_DIR = Path("output", "eld", "tft")

Seed set to 42


In [6]:
# data loading
data, freq = load_eld("./datasets/LD2011_2014.txt")
data = (
    data.reset_index()
    .reset_index()
    .rename(columns={"index": "time_idx"})
    .set_index(["time_idx", "date"])
    .rename_axis("series", axis="columns")
    .stack()
    .rename("value")  # type: ignore
    .reset_index()
)
data["weekday"] = data["date"].dt.weekday.astype("string").astype("category")
data["hour"] = data["date"].dt.hour.astype("string").astype("category")
data["series"] = data["series"].astype("category")

In [7]:
# slicing configuration
horizon = pd.Timedelta(7, "day")
assert horizon % freq == pd.Timedelta(0)
output_length = horizon // freq
input_length = 3 * output_length
validation_cutoff = data["time_idx"].max() - output_length
training_cutoff = validation_cutoff - 4 * output_length

assert pd.DataFrame.equals(
    data[(data["series"] == "MT_001") & (data["time_idx"] <= validation_cutoff)],
    data[(data["series"] == "MT_001") & (data["date"] <= data["date"].max() - horizon)],
)

print(
    f"{input_length = }\n{output_length = }\n{validation_cutoff = }\n{training_cutoff = }\n{data['time_idx'].max() = }"
)

input_length = 252
output_length = 84
validation_cutoff = 2820
training_cutoff = 2484
data['time_idx'].max() = 2904


In [14]:
# datasets and dataloaders
train = TimeSeriesDataSet(
    data[data["time_idx"] <= training_cutoff],
    time_idx="time_idx",
    target="value",
    group_ids=["series"],
    time_varying_unknown_reals=["value"],
    max_encoder_length=input_length,
    max_prediction_length=output_length,
    time_varying_known_categoricals=["hour", "weekday"],
    static_categoricals=["series"],
)
val = TimeSeriesDataSet.from_dataset(
    train,
    data[data["time_idx"] <= validation_cutoff],
    min_prediction_idx=training_cutoff + 1,
)
test = TimeSeriesDataSet.from_dataset(
    train,
    data,
    # min_prediction_idx=validation_cutoff + 1,
    predict=True,
)


print(f"{len(train) = }\n{len(val) = }\n{len(test) = }")

batch_size = 64
train_dataloader = train.to_dataloader(
    train=True,
    batch_size=batch_size,
    num_workers=2,
)
val_dataloader = val.to_dataloader(
    train=False,
    batch_size=batch_size,
    num_workers=2,
)
test_dataloader = test.to_dataloader(
    train=False,
    batch_size=batch_size,
    num_workers=0,
)



len(train) = 107500
len(val) = 12650
len(test) = 50


In [15]:
# model
model = TemporalFusionTransformer.from_dataset(
    train,
    hidden_size=160,
    hidden_continuous_size=160,
    dropout=0.1,
    lstm_layers=1,
    attention_head_size=4,
    output_size=3,
    learning_rate=1e-3,
    log_interval=100,
    optimizer="Ranger",
    loss=QuantileLoss([0.1, 0.5, 0.9]),
)

/home/konstantinos/projects/thesis/code/env/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/home/konstantinos/projects/thesis/code/env/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.


In [16]:
# trainer
early_stop_callback = EarlyStopping(
    monitor="val_loss", min_delta=1e-4, patience=10, mode="min", verbose=False
)
checkpoint_callback = ModelCheckpoint(monitor="val_loss", mode="min", verbose=False)
trainer = pl.Trainer(
    max_epochs=100,
    callbacks=[early_stop_callback, checkpoint_callback],
    gradient_clip_val=0.01,
    gradient_clip_algorithm="norm",
    default_root_dir=ROOT_DIR,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [17]:
# fit
trainer.fit(
    model,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)
_ = trainer.test(ckpt_path="best", dataloaders=test_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 807   
3  | prescalers                         | ModuleDict                      | 32    
4  | static_variable_selection          | VariableSelectionNetwork        | 96    
5  | encoder_variable_selection         | VariableSelectionNetwork        | 2.2 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 308   
7  | static_context_variable_selection  | GatedResidualNetwork            | 4.3 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 4.3 K 
9  | static_context_initial_cell_lstm 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Restoring states from the checkpoint path at output/eld/tft/lightning_logs/version_1/checkpoints/epoch=14-step=3135.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at output/eld/tft/lightning_logs/version_1/checkpoints/epoch=14-step=3135.ckpt
/home/konstantinos/projects/thesis/code/env/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_MAE             9.981331825256348
        test_MAPE           0.1862286627292633
        test_RMSE           20.802223205566406
       test_SMAPE           0.15230879187583923
        test_loss            6.705451011657715
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


## Further Logging

In [18]:
# load best
best_model_path = trainer.checkpoint_callback.best_model_path  # type: ignore
best_model = TemporalFusionTransformer.load_from_checkpoint(best_model_path)

/home/konstantinos/projects/thesis/code/env/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/home/konstantinos/projects/thesis/code/env/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.


In [52]:
# predict
out = best_model.predict(
    test,
    mode="raw",
    return_x=True,
    return_y=True,
    return_index=True,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/konstantinos/projects/thesis/code/env/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


In [21]:
performance = {
    metric_fn.__name__: {
        name: metric_fn(y_true, y_pred).item()
        for name, y_true, y_pred in zip(
            out.index["series"],
            out.y[0],
            out.output.prediction.mean(-1),
        )
    }
    for metric_fn in METRICS
}

# TODO: Find a way to log quantile losses.

pd.DataFrame(performance).to_csv(Path(trainer.log_dir, "performance.csv"))  # type: ignore

In [22]:
# Tensorboard
summary_writer: SummaryWriter = trainer.logger.experiment  # type: ignore

In [48]:
# plot preds
for i, name in out.index["series"].items():
    fig = best_model.plot_prediction(
        out.x,
        out.output,
        idx=i,
    )
    summary_writer.add_figure(f"prediction/{name}", fig)

In [55]:
predictions_vs_actuals = best_model.calculate_prediction_actual_by_variable(out.x, out.output.prediction)
figs = best_model.plot_prediction_actual_by_variable(predictions_vs_actuals)

for name, fig in figs.items():  # type: ignore
    summary_writer.add_figure(f"prediction_actual_by_variable/{name}", fig)

In [51]:
# Not needed because it is done automatically in trainer.test
# interpretation = model.interpret_output(out.output, reduction="sum")
# figs = model.plot_interpretation(interpretation)

# for name, fig in figs.items():  # type: ignore
#     summary_writer.add_figure(f"interpretation/{name}", fig)