In [1]:
from pathlib import Path

import lightning.pytorch as pl
import matplotlib.pyplot as plt
import pandas as pd
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_forecasting import (
    DeepAR,
    MultivariateNormalDistributionLoss,
    TimeSeriesDataSet,
)
from torch.utils.tensorboard.writer import SummaryWriter

from thesis.dataloading import load_eld
from thesis.metrics import METRICS

In [2]:
pl.seed_everything(42)
ROOT_DIR = Path("output", "eld", "deepar")

Seed set to 42


In [3]:
# data loading
data, freq = load_eld("./datasets/LD2011_2014.txt")
data = (
    data.reset_index()
    .reset_index()
    .rename(columns={"index": "time_idx"})
    .set_index(["time_idx", "date"])
    .rename_axis("series", axis="columns")
    .stack()
    .rename("value")  # type: ignore
    .reset_index()
)
data["weekday"] = data["date"].dt.weekday.astype("string").astype("category")
data["hour"] = data["date"].dt.hour.astype("string").astype("category")
data["series"] = data["series"].astype("category")

In [4]:
# slicing configuration
horizon = pd.Timedelta(7, "day")
assert horizon % freq == pd.Timedelta(0)
output_length = horizon // freq
input_length = 3 * output_length
validation_cutoff = data["time_idx"].max() - output_length
training_cutoff = validation_cutoff - 4 * output_length

assert pd.DataFrame.equals(
    data[(data["series"] == "MT_001") & (data["time_idx"] <= validation_cutoff)],
    data[(data["series"] == "MT_001") & (data["date"] <= data["date"].max() - horizon)],
)

print(
    f"{input_length = }\n{output_length = }\n{validation_cutoff = }\n{training_cutoff = }\n{data['time_idx'].max() = }"
)

input_length = 252
output_length = 84
validation_cutoff = 2820
training_cutoff = 2484
data['time_idx'].max() = 2904


In [5]:
# datasets and dataloaders
train = TimeSeriesDataSet(
    data[data["time_idx"] <= training_cutoff],
    time_idx="time_idx",
    target="value",
    group_ids=["series"],
    time_varying_unknown_reals=["value"],
    max_encoder_length=input_length,
    max_prediction_length=output_length,
    time_varying_known_categoricals=["hour", "weekday"],
    static_categoricals=["series"],
)
val = TimeSeriesDataSet.from_dataset(
    train,
    data[data["time_idx"] <= validation_cutoff],
    min_prediction_idx=training_cutoff + 1,
)
test = TimeSeriesDataSet.from_dataset(
    train,
    data,
    # min_prediction_idx=validation_cutoff + 1,
    predict=True,
)


print(f"{len(train) = }\n{len(val) = }\n{len(test) = }")

batch_size = 1024
train_dataloader = train.to_dataloader(
    train=True,
    batch_size=batch_size,
    num_workers=2,
    batch_sampler="synchronized"
)
val_dataloader = val.to_dataloader(
    train=False,
    batch_size=batch_size,
    num_workers=2,
    batch_sampler="synchronized"
)
test_dataloader = test.to_dataloader(
    train=False,
    batch_size=batch_size,
    num_workers=0,
    batch_sampler="synchronized"
)

  y = torch.as_tensor(y)


len(train) = 107500
len(val) = 12650
len(test) = 50




In [6]:
# model
model = DeepAR.from_dataset(
    train,
    learning_rate=1e-2,
    log_interval=10,
    log_val_interval=50,
    hidden_size=30,
    rnn_layers=2,
    optimizer="Adam",
    loss=MultivariateNormalDistributionLoss(rank=30),
)

/home/konstantinos/projects/thesis/code/env/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/home/konstantinos/projects/thesis/code/env/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.


In [7]:
# trainer
early_stop_callback = EarlyStopping(
    monitor="val_loss", min_delta=1e-4, patience=10, mode="min", verbose=False
)
checkpoint_callback = ModelCheckpoint(monitor="val_loss", mode="min", verbose=False)
trainer = pl.Trainer(
    max_epochs=100,
    callbacks=[early_stop_callback, checkpoint_callback],
    gradient_clip_val=1.0,
    gradient_clip_algorithm="norm",
    default_root_dir=ROOT_DIR,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [8]:
# fit
trainer.fit(
    model,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)
_ = trainer.test(ckpt_path="best", dataloaders=test_dataloader)

You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name                   | Type                               | Params
------------------------------------------------------------------------------
0 | loss                   | MultivariateNormalDistributionLoss | 0     
1 | logging_metrics        | ModuleList                         | 0     
2 | embeddings             | MultiEmbedding                     | 807   
3 | rnn                    | LSTM                               | 14.4 K
4 | distribution_projector | Linear                             | 992   
-----------------------------------------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
Restoring states from the checkpoint path at output/eld/deepar/lightning_logs/version_1/checkpoints/epoch=0-step=2150.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at output/eld/deepar/lightning_logs/version_1/checkpoints/epoch=0-step=2150.ckpt
/home/konstantinos/projects/thesis/code/env/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_MAE             7.104928970336914
        test_MAPE           0.10535421967506409
        test_MASE           0.6273716688156128
        test_RMSE           16.135351181030273
       test_SMAPE           0.09655757248401642
        test_loss           154.58518981933594
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


## Further Logging

In [9]:
# load best
best_model_path = trainer.checkpoint_callback.best_model_path  # type: ignore
best_model = DeepAR.load_from_checkpoint(best_model_path)

In [10]:
# predict
out = best_model.predict(
    test_dataloader,
    mode="raw",
    return_x=True,
    return_y=True,
    return_index=True,
    n_samples=100,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/konstantinos/projects/thesis/code/env/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


In [11]:
# Correlation matrix of the average prediction random variable (84 predictions)
cov = best_model.loss.map_x_to_distribution(
    best_model.predict(test_dataloader, mode=("raw", "prediction"), n_samples=None)  # type: ignore
).base_dist.covariance_matrix.mean(0).cpu()  # type: ignore

corr = cov / cov.diag().outer(cov.diag()).sqrt()

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/konstantinos/projects/thesis/code/env/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


In [12]:
performance = {
    metric_fn.__name__: {
        name: metric_fn(y_true, y_pred).item()
        for name, y_true, y_pred in zip(
            out.index["series"],
            out.y[0],
            out.output.prediction.mean(-1),
        )
    }
    for metric_fn in METRICS
}

pd.DataFrame(performance).to_csv(Path(trainer.log_dir, "performance.csv"))  # type: ignore

In [13]:
# Tensorboard
summary_writer: SummaryWriter = trainer.logger.experiment  # type: ignore

In [14]:
# plot preds
for i, name in out.index["series"].items():
    fig = best_model.plot_prediction(
        out.x,
        out.output,
        idx=i,
    )
    summary_writer.add_figure(f"prediction/{name}", fig)

In [15]:
# Correlation matrix
fig = plt.figure()
plt.imshow(corr, cmap="bwr", vmin=-1, vmax=1)
plt.colorbar()
summary_writer.add_figure("correlation", fig)

In [16]:
# Correlations histogram
fig = plt.figure()
plt.hist(corr[corr < 1], edgecolor="black")
summary_writer.add_figure("correlation_histogram", fig)