# TSLib for v2 - Example notebook for full pipeline

## Basic imports for getting started

This notebook is a basic vignette for the usage of the `tslib` data module on the `TimeXer` model for the v2 of PyTorch Forecasting. This is an experimental version and is an unstable version of the API.

Feedback and suggestions on this pipeline - PR [#1836](https://github.com/sktime/pytorch-forecasting/pull/1836)

In [None]:
from typing import Any, Optional, Union

import numpy as np
import pandas as pd
from sklearn.preprocessing import RobustScaler, StandardScaler
import torch
from torch.optim import Optimizer
from torch.utils.data import Dataset

from pytorch_forecasting.data._tslib_data_module import TslibDataModule
from pytorch_forecasting.data.encoders import (
    EncoderNormalizer,
    NaNLabelEncoder,
    TorchNormalizer,
)
from pytorch_forecasting.data.timeseries import TimeSeries
from pytorch_forecasting.models.timexer._timexer_v2 import TimeXer

## Construct a time series dataset

In [3]:
num_series = 100
seq_length = 50
data_list = []
for i in range(num_series):
    x = np.arange(seq_length)
    y = np.sin(x / 5.0) + np.random.normal(scale=0.1, size=seq_length)
    category = i % 5
    static_value = np.random.rand()
    for t in range(seq_length - 1):
        data_list.append(
            {
                "series_id": i,
                "time_idx": t,
                "x": y[t],
                "y": y[t + 1],
                "category": category,
                "future_known_feature": np.cos(t / 10),
                "static_feature": static_value,
                "static_feature_cat": i % 3,
            }
        )
data_df = pd.DataFrame(data_list)
data_df.head()

Unnamed: 0,series_id,time_idx,x,y,category,future_known_feature,static_feature,static_feature_cat
0,0,0,-0.053474,0.079365,0,1.0,0.07624,0
1,0,1,0.079365,0.475101,0,0.995004,0.07624,0
2,0,2,0.475101,0.553274,0,0.980067,0.07624,0
3,0,3,0.553274,0.59346,0,0.955336,0.07624,0
4,0,4,0.59346,0.999893,0,0.921061,0.07624,0


In [4]:
dataset = TimeSeries(
    data=data_df,
    time="time_idx",
    target="y",
    group=["series_id"],
    num=["x", "future_know_feature", "static_feature"],
    cat=["category", "static_feature_cat"],
    known=["future_known_feature"],
    unknown=["x", "category"],
    static=["static_feature", "static_feature_cat"],
)

  warn(


## Initialise the `TslibDataModule` using the dataset

In [5]:
data_module = TslibDataModule(
    time_series_dataset=dataset,
    context_length=30,
    prediction_length=1,
    add_relative_time_idx=True,
    target_normalizer=TorchNormalizer(),
    categorical_encoders={
        "category": NaNLabelEncoder(add_nan=True),
        "static_feature_cat": NaNLabelEncoder(add_nan=True),
    },
    scalers={
        "x": StandardScaler(),
        "future_known_feature": StandardScaler(),
        "static_feature": StandardScaler(),
    },
    batch_size=32,
)



In [6]:
type(data_module.metadata)

dict

In [7]:
data_module.metadata

{'feature_names': {'categorical': ['category', 'static_feature_cat'],
  'continuous': ['x', 'future_known_feature', 'static_feature'],
  'static': ['static_feature', 'static_feature_cat'],
  'known': ['future_known_feature'],
  'unknown': ['x', 'category', 'static_feature', 'static_feature_cat'],
  'target': ['y'],
  'all': ['x',
   'category',
   'future_known_feature',
   'static_feature',
   'static_feature_cat'],
  'static_categorical': ['static_feature_cat'],
  'static_continuous': ['static_feature']},
 'feature_indices': {'categorical': [1, 4],
  'continuous': [0, 2, 3],
  'static': [],
  'known': [2],
  'unknown': [0, 1, 3, 4],
  'target': [0]},
 'n_features': {'categorical': 2,
  'continuous': 3,
  'static': 2,
  'known': 1,
  'unknown': 4,
  'target': 1,
  'all': 5,
  'static_categorical': 1,
  'static_continuous': 1},
 'context_length': 30,
 'prediction_length': 1,
 'freq': 'h',
 'features': 'MS'}

In [8]:
import torch.nn as nn

from pytorch_forecasting.metrics import MAE, SMAPE, QuantileLoss

## Initialise the model

We shall try out two versions of this model, one using `MAE()` and one with `QuantileLoss()`.

In [9]:
model1 = TimeXer(
    loss=nn.MSELoss(),
    hidden_size=64,
    nhead=4,
    e_layers=2,
    d_ff=256,
    dropout=0.1,
    patch_length=4,
    logging_metrics=[MAE(), SMAPE()],
    optimizer="adam",
    optimizer_params={"lr": 1e-3},
    lr_scheduler="reduce_lr_on_plateau",
    lr_scheduler_params={
        "mode": "min",
        "factor": 0.5,
        "patience": 5,
    },
    metadata=data_module.metadata,
)

  warn(
  warn(
  warn.warn(
  warn.warn(


In [10]:
model2 = TimeXer(
    loss=QuantileLoss(quantiles=[0.1, 0.5, 0.9]),  # quantiles of 0.1, 0.5 and 0.9 used.
    hidden_size=64,
    nhead=4,
    e_layers=2,
    d_ff=256,
    dropout=0.1,
    patch_length=4,
    logging_metrics=[MAE(), SMAPE()],
    optimizer="adam",
    optimizer_params={"lr": 1e-3},
    lr_scheduler="reduce_lr_on_plateau",
    lr_scheduler_params={
        "mode": "min",
        "factor": 0.5,
        "patience": 5,
    },
    metadata=data_module.metadata,
)

In [11]:
from lightning.pytorch import Trainer

trainer1 = Trainer(
    max_epochs=5,
    accelerator="auto",
    devices=1,
    enable_progress_bar=True,
    enable_model_summary=True,
)

trainer2 = Trainer(
    max_epochs=4,
    accelerator="gpu",
    devices=1,
    enable_progress_bar=True,
    enable_model_summary=True,
)

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


## Fit the trainer on the model and feed data using the data module

In [12]:
trainer1.fit(model1, data_module)

You are using a CUDA device ('NVIDIA GeForce RTX 3050 6GB Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type                   | Params | Mode 
----------------------------------------------------------------
0 | loss         | MSELoss                | 0      | train
1 | en_embedding | EnEmbedding            | 320    | train
2 | ex_embedding | DataEmbedding_inverted | 2.0 K  | train
3 | encoder      | Encoder                | 133 K  | train
4 | head         | FlattenHead            | 513    | train
----------------------------------------------------------------
136 K     Trainable params
0         Non-trainable params
136 K     Total params
0.546     Tot

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\prana\Desktop\code\pytorch-forecasting\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
c:\Users\prana\Desktop\code\pytorch-forecasting\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
c:\Users\prana\Desktop\code\pytorch-forecasting\.venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (42) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


Now let us train the model using `QuantileLoss`.

In [13]:
trainer2.fit(model2, data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type                   | Params | Mode 
----------------------------------------------------------------
0 | loss         | QuantileLoss           | 0      | train
1 | en_embedding | EnEmbedding            | 320    | train
2 | ex_embedding | DataEmbedding_inverted | 2.0 K  | train
3 | encoder      | Encoder                | 133 K  | train
4 | head         | FlattenHead            | 1.5 K  | train
----------------------------------------------------------------
137 K     Trainable params
0         Non-trainable params
137 K     Total params
0.550     Total estimated model params size (MB)
57        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\prana\Desktop\code\pytorch-forecasting\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
c:\Users\prana\Desktop\code\pytorch-forecasting\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
c:\Users\prana\Desktop\code\pytorch-forecasting\.venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (42) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=4` reached.


## Test the model

In [14]:
test_metrics = trainer1.test(model1, data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\prana\Desktop\code\pytorch-forecasting\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_MAE            0.47346481680870056
       test_SMAPE           1.0982568264007568
        test_loss           0.01038370467722416
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [15]:
model1.eval()

TimeXer(
  (loss): MSELoss()
  (en_embedding): EnEmbedding(
    (value_embedding): Linear(in_features=4, out_features=64, bias=False)
    (position_embedding): PositionalEmbedding()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (ex_embedding): DataEmbedding_inverted(
    (value_embedding): Linear(in_features=30, out_features=64, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): Encoder(
    (layers): ModuleList(
      (0-1): 2 x EncoderLayer(
        (self_attention): AttentionLayer(
          (inner_attention): FullAttention(
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (query_projection): Linear(in_features=64, out_features=64, bias=True)
          (key_projection): Linear(in_features=64, out_features=64, bias=True)
          (value_projection): Linear(in_features=64, out_features=64, bias=True)
          (out_projection): Linear(in_features=64, out_features=64, bias=True)
        )
        (cross_attention): AttentionLayer(
 

In [16]:
with torch.no_grad():
    test_batch = next(iter(data_module.test_dataloader()))
    x_test, y_test = test_batch
    y_pred = model1(x_test)

    print("Prediction:", y_pred["prediction"])

Prediction: tensor([[[-0.0154]],

        [[ 0.1871]],

        [[ 0.3392]],

        [[ 0.4948]],

        [[ 0.6630]],

        [[ 0.7778]],

        [[ 0.8391]],

        [[ 0.9001]],

        [[ 0.9442]],

        [[ 0.9302]],

        [[ 0.8536]],

        [[ 0.7712]],

        [[ 0.6658]],

        [[ 0.5068]],

        [[ 0.3144]],

        [[ 0.1697]],

        [[-0.0298]],

        [[-0.1914]],

        [[-0.3421]],

        [[-0.0522]],

        [[ 0.1340]],

        [[ 0.3186]],

        [[ 0.4486]],

        [[ 0.5877]],

        [[ 0.7270]],

        [[ 0.8507]],

        [[ 0.9346]],

        [[ 0.9891]],

        [[ 0.9665]],

        [[ 0.9295]],

        [[ 0.8394]],

        [[ 0.6876]]])


In [17]:
y_pred["prediction"].shape

torch.Size([32, 1, 1])

Let us do the same for `QuantileLoss` predictions.

In [18]:
test_metrics = trainer2.test(model2, data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\prana\Desktop\code\pytorch-forecasting\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_MAE            14.938094139099121
       test_SMAPE           32.958351135253906
        test_loss            7.362493991851807
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [20]:
model2.eval()

TimeXer(
  (loss): QuantileLoss(quantiles=[0.1, 0.5, 0.9])
  (en_embedding): EnEmbedding(
    (value_embedding): Linear(in_features=4, out_features=64, bias=False)
    (position_embedding): PositionalEmbedding()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (ex_embedding): DataEmbedding_inverted(
    (value_embedding): Linear(in_features=30, out_features=64, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): Encoder(
    (layers): ModuleList(
      (0-1): 2 x EncoderLayer(
        (self_attention): AttentionLayer(
          (inner_attention): FullAttention(
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (query_projection): Linear(in_features=64, out_features=64, bias=True)
          (key_projection): Linear(in_features=64, out_features=64, bias=True)
          (value_projection): Linear(in_features=64, out_features=64, bias=True)
          (out_projection): Linear(in_features=64, out_features=64, bias=True)
        )
        (cross

In [21]:
with torch.no_grad():
    test_batch = next(iter(data_module.test_dataloader()))
    x_test, y_test = test_batch
    y_pred = model2(x_test)

    print("Prediction:", y_pred["prediction"])

Prediction: tensor([[[[-0.0882, -0.0966,  0.2563]]],


        [[[ 0.0720,  0.0759,  0.3883]]],


        [[[ 0.1974,  0.2169,  0.5460]]],


        [[[ 0.3700,  0.3511,  0.6714]]],


        [[[ 0.5103,  0.4835,  0.8189]]],


        [[[ 0.6425,  0.6295,  0.9646]]],


        [[[ 0.7363,  0.7258,  1.0382]]],


        [[[ 0.8186,  0.8055,  1.1244]]],


        [[[ 0.8566,  0.8481,  1.1788]]],


        [[[ 0.8571,  0.8442,  1.1794]]],


        [[[ 0.8222,  0.7861,  1.1303]]],


        [[[ 0.7280,  0.6996,  1.0412]]],


        [[[ 0.6072,  0.5680,  0.9375]]],


        [[[ 0.5022,  0.4539,  0.7923]]],


        [[[ 0.3323,  0.3054,  0.6894]]],


        [[[ 0.1943,  0.1492,  0.5411]]],


        [[[ 0.0051, -0.0246,  0.3653]]],


        [[[-0.1639, -0.1800,  0.1996]]],


        [[[-0.3275, -0.3510,  0.0374]]],


        [[[-0.1448, -0.1228,  0.2111]]],


        [[[ 0.0111,  0.0275,  0.3488]]],


        [[[ 0.1809,  0.1731,  0.4954]]],


        [[[ 0.3314,  0.3016,  0.6251]]],



In [22]:
y_pred["prediction"].shape

torch.Size([32, 1, 1, 3])