In [1]:
from pathlib import Path

import lightning.pytorch as pl
import pandas as pd
import torch.nn as nn
from thesis.metrics import METRICS
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_forecasting import Baseline, NBeats, TimeSeriesDataSet
from pytorch_forecasting.metrics import MAE, MAPE, MASE, RMSE, SMAPE
from torch.utils.tensorboard.writer import SummaryWriter

from thesis.dataloading import load_eld

In [2]:
pl.seed_everything(42)
ROOT_DIR = Path("output", "eld", "nbeats")

Seed set to 42


In [3]:
# data loading
data, freq = load_eld("./datasets/LD2011_2014.txt")
data = (
    data.reset_index()
    .reset_index()
    .rename(columns={"index": "time_idx"})
    .set_index(["time_idx", "date"])
    .rename_axis("series", axis="columns")
    .stack()
    .rename("value")  # type: ignore
    .reset_index()
)

In [4]:
# slicing configuration
horizon = pd.Timedelta(7, "day")

output_length = horizon // freq
input_length = 3 * output_length
validation_cutoff = data["time_idx"].max() - output_length
training_cutoff = validation_cutoff - 4 * output_length

assert pd.DataFrame.equals(
    data[(data["series"] == "MT_001") & (data["time_idx"] <= validation_cutoff)],
    data[(data["series"] == "MT_001") & (data["date"] <= data["date"].max() - horizon)],
)

print(
    f"{input_length = }\n{output_length = }\n{validation_cutoff = }\n{training_cutoff = }\n{data['time_idx'].max() = }"
)

input_length = 252
output_length = 84
validation_cutoff = 2820
training_cutoff = 2484
data['time_idx'].max() = 2904


In [5]:
# datasets and dataloaders
train = TimeSeriesDataSet(
    data[data["time_idx"] <= training_cutoff],
    time_idx="time_idx",
    target="value",
    group_ids=["series"],
    # only unknown variable is "value" - and N-Beats can also not take any additional variables
    time_varying_unknown_reals=["value"],
    max_encoder_length=input_length,
    max_prediction_length=output_length,
)
val = TimeSeriesDataSet.from_dataset(
    train,
    data[data["time_idx"] <= validation_cutoff],
    min_prediction_idx=training_cutoff + 1,
)
test = TimeSeriesDataSet.from_dataset(
    train,
    data,
    # min_prediction_idx=validation_cutoff + 1,
    predict=True,
)


print(f"{len(train) = }\n{len(val) = }\n{len(test) = }")

batch_size = 1024
train_dataloader = train.to_dataloader(
    train=True,
    batch_size=batch_size,
    num_workers=2,
)
val_dataloader = val.to_dataloader(
    train=False,
    batch_size=batch_size,
    num_workers=2,
)
test_dataloader = test.to_dataloader(
    train=False,
    batch_size=batch_size,
    num_workers=0,
)

  y = torch.as_tensor(y)


len(train) = 107500
len(val) = 12650
len(test) = 50


In [6]:
# Baseline
baseline = Baseline(
    logging_metrics=nn.ModuleList([SMAPE(), MAE(), RMSE(), MAPE(), MASE()])
)
baseline_trainer = pl.Trainer(logger=False, enable_checkpointing=False)

print("Validation Baseline")
_ = baseline_trainer.test(baseline, val_dataloader)
print("Test Baseline")
_ = baseline_trainer.test(baseline, test_dataloader)

/home/konstantinos/projects/thesis/code/env/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/home/konstantinos/projects/thesis/code/env/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Validation Baseline


You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_MAE             26.64547348022461
        test_MAPE           0.37555065751075745
        test_MASE           1.9665417671203613
        test_RMSE            47.16768264770508
       test_SMAPE           0.29618605971336365
        test_loss           0.29618605971336365
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Test Baseline


/home/konstantinos/projects/thesis/code/env/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_MAE             18.84429359436035
        test_MAPE           0.25164616107940674
        test_MASE           1.4602044820785522
        test_RMSE            42.6780891418457
       test_SMAPE           0.2258290946483612
        test_loss           0.2258290946483612
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [7]:
# model
model = NBeats.from_dataset(
    train,
    expansion_coefficient_lengths=[3, 12],
    widths=[128, 512],
    learning_rate=5e-4,
    log_interval=50,
)

  coefficients = torch.tensor([backcast_linspace**i for i in range(thetas_dim)], dtype=torch.float32)


In [8]:
# trainer
early_stop_callback = EarlyStopping(
    monitor="val_loss", min_delta=1e-4, patience=20, verbose=True, mode="min"
)
checkpoint_callback = ModelCheckpoint(monitor="val_loss", mode="min", verbose=True)
trainer = pl.Trainer(
    max_epochs=100,
    callbacks=[early_stop_callback, checkpoint_callback],
    gradient_clip_val=1.0,
    gradient_clip_algorithm="norm",
    default_root_dir=ROOT_DIR,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [9]:
# fit
trainer.fit(
    model,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)
_ = trainer.test(ckpt_path="best", dataloaders=test_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type       | Params
-----------------------------------------------
0 | loss            | MASE       | 0     
1 | logging_metrics | ModuleList | 0     
2 | net_blocks      | ModuleList | 2.3 M 
-----------------------------------------------
2.3 M     Trainable params
0         Non-trainable params
2.3 M     Total params
9.164     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 1.218
Epoch 0, global step 104: 'val_loss' reached 1.21789 (best 1.21789), saving model to 'output/eld/nbeats/lightning_logs/version_1/checkpoints/epoch=0-step=104.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.018 >= min_delta = 0.0001. New best score: 1.200
Epoch 1, global step 208: 'val_loss' reached 1.20020 (best 1.20020), saving model to 'output/eld/nbeats/lightning_logs/version_1/checkpoints/epoch=1-step=208.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.000 >= min_delta = 0.0001. New best score: 1.200
Epoch 2, global step 312: 'val_loss' reached 1.20006 (best 1.20006), saving model to 'output/eld/nbeats/lightning_logs/version_1/checkpoints/epoch=2-step=312.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.003 >= min_delta = 0.0001. New best score: 1.197
Epoch 3, global step 416: 'val_loss' reached 1.19704 (best 1.19704), saving model to 'output/eld/nbeats/lightning_logs/version_1/checkpoints/epoch=3-step=416.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 4, global step 520: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.006 >= min_delta = 0.0001. New best score: 1.191
Epoch 5, global step 624: 'val_loss' reached 1.19141 (best 1.19141), saving model to 'output/eld/nbeats/lightning_logs/version_1/checkpoints/epoch=5-step=624.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.010 >= min_delta = 0.0001. New best score: 1.182
Epoch 6, global step 728: 'val_loss' reached 1.18180 (best 1.18180), saving model to 'output/eld/nbeats/lightning_logs/version_1/checkpoints/epoch=6-step=728.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 7, global step 832: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 8, global step 936: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 9, global step 1040: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 10, global step 1144: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 11, global step 1248: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 12, global step 1352: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 13, global step 1456: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 14, global step 1560: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 15, global step 1664: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 16, global step 1768: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 17, global step 1872: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.0001. New best score: 1.180
Epoch 18, global step 1976: 'val_loss' reached 1.18035 (best 1.18035), saving model to 'output/eld/nbeats/lightning_logs/version_1/checkpoints/epoch=18-step=1976.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 19, global step 2080: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 20, global step 2184: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 21, global step 2288: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 22, global step 2392: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 23, global step 2496: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 24, global step 2600: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 25, global step 2704: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 26, global step 2808: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 27, global step 2912: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 28, global step 3016: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 29, global step 3120: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 30, global step 3224: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 31, global step 3328: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 32, global step 3432: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 33, global step 3536: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 34, global step 3640: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 35, global step 3744: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 36, global step 3848: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 37, global step 3952: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 20 records. Best score: 1.180. Signaling Trainer to stop.
Epoch 38, global step 4056: 'val_loss' was not in top 1
Restoring states from the checkpoint path at output/eld/nbeats/lightning_logs/version_1/checkpoints/epoch=18-step=1976.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at output/eld/nbeats/lightning_logs/version_1/checkpoints/epoch=18-step=1976.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_MAE            12.233638763427734
        test_MAPE           0.23147329688072205
        test_MASE            1.195258378982544
        test_RMSE           27.086397171020508
       test_SMAPE           0.1692611575126648
        test_loss            1.195258378982544
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [10]:
# load best
best_model_path = trainer.checkpoint_callback.best_model_path  # type: ignore
best_model = NBeats.load_from_checkpoint(best_model_path)

In [11]:
# predict
out = best_model.predict(
    test_dataloader, mode="raw", return_x=True, return_y=True, return_index=True
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/konstantinos/projects/thesis/code/env/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


## Further Logging

In [12]:
performance = {
    metric_fn.__name__: {
        name: metric_fn(y_true, y_pred).item()
        for name, y_true, y_pred in zip(
            out.index["series"],
            out.y[0],
            out.output.prediction,
        )
    }
    for metric_fn in METRICS
}

pd.DataFrame(performance).to_csv(Path(trainer.log_dir, "performance.csv"))  # type: ignore

In [13]:
# Tensorboard
summary_writer: SummaryWriter = trainer.logger.experiment  # type: ignore

In [14]:
# plot preds
for i, name in out.index["series"].items():
    fig = best_model.plot_prediction(
        out.x,
        out.output,
        idx=i,
    )
    summary_writer.add_figure(f"prediction/{name}", fig)

In [15]:
# plot interpretation
for i, name in out.index["series"].items():
    fig = best_model.plot_interpretation(out.x, out.output, idx=i)
    summary_writer.add_figure(f"interpretation/{name}", fig)