In [22]:
import warnings

warnings.filterwarnings("ignore")

import torch
from torch import nn


class FullyConnectedModule(nn.Module):
    def __init__(
        self, input_size: int, output_size: int, hidden_size: int, n_hidden_layers: int
    ):
        super().__init__()

        # input layer
        module_list = [nn.Linear(input_size, hidden_size), nn.ReLU()]
        # hidden layers
        for _ in range(n_hidden_layers):
            module_list.extend([nn.Linear(hidden_size, hidden_size), nn.ReLU()])
        # output layer
        module_list.append(nn.Linear(hidden_size, output_size))

        self.sequential = nn.Sequential(*module_list)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # x of shape: batch_size x n_timesteps_in
        # output of shape batch_size x n_timesteps_out
        return self.sequential(x)


# test that network works as intended
network = FullyConnectedModule(
    input_size=5, output_size=2, hidden_size=10, n_hidden_layers=2
)
x = torch.rand(20, 5)
network(x).shape

torch.Size([20, 2])

In [23]:


from typing import Dict

from pytorch_forecasting.models import BaseModel


class FullyConnectedModel(BaseModel):
    def __init__(
        self,
        input_size: int,
        output_size: int,
        hidden_size: int,
        n_hidden_layers: int,
        **kwargs,
    ):
        # saves arguments in signature to `.hparams` attribute, mandatory call - do not skip this
        self.save_hyperparameters()
        # pass additional arguments to BaseModel.__init__, mandatory call - do not skip this
        super().__init__(**kwargs)
        self.network = FullyConnectedModule(
            input_size=self.hparams.input_size,
            output_size=self.hparams.output_size,
            hidden_size=self.hparams.hidden_size,
            n_hidden_layers=self.hparams.n_hidden_layers,
        )

    def forward(self, x: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        # x is a batch generated based on the TimeSeriesDataset
        network_input = x["encoder_cont"].squeeze(-1)
        prediction = self.network(network_input)

        # rescale predictions into target space
        prediction = self.transform_output(prediction, target_scale=x["target_scale"])

        # We need to return a dictionary that at least contains the prediction
        # The parameter can be directly forwarded from the input.
        # The conversion to a named tuple can be directly achieved with the `to_network_output` function.
        return self.to_network_output(prediction=prediction)



In [28]:
import numpy as np
import pandas as pd

test_data = pd.DataFrame(
    dict(
        value=np.random.rand(30) - 0.5,
        group=np.repeat(np.arange(3), 10),
        time_idx=np.tile(np.arange(10), 3),
    )
)
test_data

Unnamed: 0,value,group,time_idx
0,-0.338069,0,0
1,-0.406051,0,1
2,0.181241,0,2
3,-0.420523,0,3
4,-0.21454,0,4
5,-0.49931,0,5
6,-0.093068,0,6
7,0.226169,0,7
8,0.363002,0,8
9,-0.164583,0,9


In [29]:
from pytorch_forecasting import TimeSeriesDataSet

# create the dataset from the pandas dataframe
dataset = TimeSeriesDataSet(
    test_data,
    group_ids=["group"],
    target="value",
    time_idx="time_idx",
    min_encoder_length=5,
    max_encoder_length=5,
    min_prediction_length=2,
    max_prediction_length=2,
    time_varying_unknown_reals=["value"],
)

In [30]:
dataset.get_parameters()

{'time_idx': 'time_idx',
 'target': 'value',
 'group_ids': ['group'],
 'weight': None,
 'max_encoder_length': 5,
 'min_encoder_length': 5,
 'min_prediction_idx': 0,
 'min_prediction_length': 2,
 'max_prediction_length': 2,
 'static_categoricals': None,
 'static_reals': None,
 'time_varying_known_categoricals': None,
 'time_varying_known_reals': None,
 'time_varying_unknown_categoricals': None,
 'time_varying_unknown_reals': ['value'],
 'variable_groups': None,
 'constant_fill_strategy': None,
 'allow_missing_timesteps': False,
 'lags': None,
 'add_relative_time_idx': False,
 'add_target_scales': False,
 'add_encoder_length': False,
 'target_normalizer': GroupNormalizer(
 	method='standard',
 	groups=None,
 	center=True,
 	scale_by_group=False,
 	transformation=None,
 	method_kwargs={}
 ),
 'categorical_encoders': {'__group_id__group': NaNLabelEncoder(add_nan=False, warn=True),
  'group': NaNLabelEncoder(add_nan=False, warn=True)},
 'scalers': {},
 'randomize_length': None,
 'predict_mo

In [31]:
# convert the dataset to a dataloader
dataloader = dataset.to_dataloader(batch_size=4)

# and load the first batch
x, y = next(iter(dataloader))
print("x =", x)
print("\ny =", y)
print("\nsizes of x =")
for key, value in x.items():
    print(f"\t{key} = {value.size()}")



x = {'encoder_cat': tensor([], size=(4, 5, 0), dtype=torch.int64), 'encoder_cont': tensor([[[-1.3391],
         [-0.5887],
         [-1.6261],
         [-0.1462],
         [ 1.0168]],

        [[-1.3801],
         [-0.4705],
         [-1.0244],
         [-0.5923],
         [-0.0192]],

        [[-0.0243],
         [ 1.0012],
         [-0.5013],
         [ 0.7556],
         [ 0.9409]],

        [[-0.5013],
         [ 0.7556],
         [ 0.9409],
         [ 1.2719],
         [ 0.5904]]]), 'encoder_target': tensor([[-0.4205, -0.2145, -0.4993, -0.0931,  0.2262],
        [-0.4318, -0.1821, -0.3342, -0.2155, -0.0582],
        [-0.0596,  0.2219, -0.1906,  0.1545,  0.2053],
        [-0.1906,  0.1545,  0.2053,  0.2962,  0.1091]]), 'encoder_lengths': tensor([5, 5, 5, 5]), 'decoder_cat': tensor([], size=(4, 2, 0), dtype=torch.int64), 'decoder_cont': tensor([[[ 1.5153],
         [-0.4067]],

        [[-1.5492],
         [ 0.9239]],

        [[ 1.2719],
         [ 0.5904]],

        [[ 1.0590],
   

In [33]:
model = FullyConnectedModel.from_dataset(
    dataset, input_size=5, output_size=2, hidden_size=10, n_hidden_layers=2
)
x, y = next(iter(dataloader))
model(x)



Output(prediction=tensor([[-0.1340, -0.0104],
        [-0.1223, -0.0217],
        [-0.1550,  0.0159],
        [-0.1186, -0.0216]], grad_fn=<AddBackward0>))

In [35]:
dataset.x_to_index(x)

Unnamed: 0,time_idx,group
0,8,2
1,6,1
2,6,0
3,5,2


In [36]:
class FullyConnectedModel(BaseModel):
    def __init__(
        self,
        input_size: int,
        output_size: int,
        hidden_size: int,
        n_hidden_layers: int,
        **kwargs,
    ):
        # saves arguments in signature to `.hparams` attribute, mandatory call - do not skip this
        self.save_hyperparameters()
        # pass additional arguments to BaseModel.__init__, mandatory call - do not skip this
        super().__init__(**kwargs)
        self.network = FullyConnectedModule(
            input_size=self.hparams.input_size,
            output_size=self.hparams.output_size,
            hidden_size=self.hparams.hidden_size,
            n_hidden_layers=self.hparams.n_hidden_layers,
        )

    def forward(self, x: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        # x is a batch generated based on the TimeSeriesDataset
        network_input = x["encoder_cont"].squeeze(-1)
        prediction = self.network(network_input).unsqueeze(-1)

        # rescale predictions into target space
        prediction = self.transform_output(prediction, target_scale=x["target_scale"])

        # We need to return a dictionary that at least contains the prediction.
        # The parameter can be directly forwarded from the input.
        # The conversion to a named tuple can be directly achieved with the `to_network_output` function.
        return self.to_network_output(prediction=prediction)

    @classmethod
    def from_dataset(cls, dataset: TimeSeriesDataSet, **kwargs):
        new_kwargs = {
            "output_size": dataset.max_prediction_length,
            "input_size": dataset.max_encoder_length,
        }
        new_kwargs.update(
            kwargs
        )  # use to pass real hyperparameters and override defaults set by dataset
        # example for dataset validation
        assert (
            dataset.max_prediction_length == dataset.min_prediction_length
        ), "Decoder only supports a fixed length"
        assert (
            dataset.min_encoder_length == dataset.max_encoder_length
        ), "Encoder only supports a fixed length"
        assert (
            len(dataset._time_varying_known_categoricals) == 0
            and len(dataset._time_varying_known_reals) == 0
            and len(dataset._time_varying_unknown_categoricals) == 0
            and len(dataset._static_categoricals) == 0
            and len(dataset._static_reals) == 0
            and len(dataset._time_varying_unknown_reals) == 1
            and dataset._time_varying_unknown_reals[0] == dataset.target
        ), "Only covariate should be the target in 'time_varying_unknown_reals'"

        return super().from_dataset(dataset, **new_kwargs)

In [37]:
from lightning.pytorch.utilities.model_summary import ModelSummary

model = FullyConnectedModel.from_dataset(dataset, hidden_size=10, n_hidden_layers=2)
print(ModelSummary(model, max_depth=-1))
model.hparams

   | Name                 | Type                 | Params | Mode 
-----------------------------------------------------------------------
0  | loss                 | SMAPE                | 0      | train
1  | logging_metrics      | ModuleList           | 0      | train
2  | network              | FullyConnectedModule | 302    | train
3  | network.sequential   | Sequential           | 302    | train
4  | network.sequential.0 | Linear               | 60     | train
5  | network.sequential.1 | ReLU                 | 0      | train
6  | network.sequential.2 | Linear               | 110    | train
7  | network.sequential.3 | ReLU                 | 0      | train
8  | network.sequential.4 | Linear               | 110    | train
9  | network.sequential.5 | ReLU                 | 0      | train
10 | network.sequential.6 | Linear               | 22     | train
-----------------------------------------------------------------------
302       Trainable params
0         Non-trainable params
302   

"dataset_parameters":          {'time_idx': 'time_idx', 'target': 'value', 'group_ids': ['group'], 'weight': None, 'max_encoder_length': 5, 'min_encoder_length': 5, 'min_prediction_idx': 0, 'min_prediction_length': 2, 'max_prediction_length': 2, 'static_categoricals': None, 'static_reals': None, 'time_varying_known_categoricals': None, 'time_varying_known_reals': None, 'time_varying_unknown_categoricals': None, 'time_varying_unknown_reals': ['value'], 'variable_groups': None, 'constant_fill_strategy': None, 'allow_missing_timesteps': False, 'lags': None, 'add_relative_time_idx': False, 'add_target_scales': False, 'add_encoder_length': False, 'target_normalizer': GroupNormalizer(
	method='standard',
	groups=None,
	center=True,
	scale_by_group=False,
	transformation=None,
	method_kwargs={}
), 'categorical_encoders': {'__group_id__group': NaNLabelEncoder(add_nan=False, warn=True), 'group': NaNLabelEncoder(add_nan=False, warn=True)}, 'scalers': {}, 'randomize_length': None, 'predict_mode':

In [38]:
model.hparams



"dataset_parameters":          {'time_idx': 'time_idx', 'target': 'value', 'group_ids': ['group'], 'weight': None, 'max_encoder_length': 5, 'min_encoder_length': 5, 'min_prediction_idx': 0, 'min_prediction_length': 2, 'max_prediction_length': 2, 'static_categoricals': None, 'static_reals': None, 'time_varying_known_categoricals': None, 'time_varying_known_reals': None, 'time_varying_unknown_categoricals': None, 'time_varying_unknown_reals': ['value'], 'variable_groups': None, 'constant_fill_strategy': None, 'allow_missing_timesteps': False, 'lags': None, 'add_relative_time_idx': False, 'add_target_scales': False, 'add_encoder_length': False, 'target_normalizer': GroupNormalizer(
	method='standard',
	groups=None,
	center=True,
	scale_by_group=False,
	transformation=None,
	method_kwargs={}
), 'categorical_encoders': {'__group_id__group': NaNLabelEncoder(add_nan=False, warn=True), 'group': NaNLabelEncoder(add_nan=False, warn=True)}, 'scalers': {}, 'randomize_length': None, 'predict_mode':

In [40]:
classification_test_data = pd.DataFrame(
    dict(
        target=np.random.choice(
            ["A", "B", "C"], size=30
        ),  # CHANGING values to predict to a categorical
        value=np.random.rand(
            30
        ),  # INPUT values - see next section on covariates how to use categorical inputs
        group=np.repeat(np.arange(3), 10),
        time_idx=np.tile(np.arange(10), 3),
    )
)
classification_test_data



Unnamed: 0,target,value,group,time_idx
0,C,0.070268,0,0
1,C,0.245169,0,1
2,A,0.469164,0,2
3,B,0.730962,0,3
4,C,0.337209,0,4
5,B,0.84903,0,5
6,B,0.590698,0,6
7,B,0.505766,0,7
8,B,0.538745,0,8
9,C,0.508861,0,9


In [41]:
from pytorch_forecasting.data.encoders import NaNLabelEncoder

# create the dataset from the pandas dataframe
classification_dataset = TimeSeriesDataSet(
    classification_test_data,
    group_ids=["group"],
    target="target",  # SWITCHING to categorical target
    time_idx="time_idx",
    min_encoder_length=5,
    max_encoder_length=5,
    min_prediction_length=2,
    max_prediction_length=2,
    time_varying_unknown_reals=["value"],
    target_normalizer=NaNLabelEncoder(),  # Use the NaNLabelEncoder to encode categorical target
)

x, y = next(iter(classification_dataset.to_dataloader(batch_size=4)))
y[0]  # target values are encoded categories



tensor([[0, 0],
        [2, 1],
        [1, 1],
        [0, 2]])

In [42]:
from pytorch_forecasting.metrics import CrossEntropy


class FullyConnectedClassificationModel(BaseModel):
    def __init__(
        self,
        input_size: int,
        output_size: int,
        hidden_size: int,
        n_hidden_layers: int,
        n_classes: int,
        loss=CrossEntropy(),
        **kwargs,
    ):
        # saves arguments in signature to `.hparams` attribute, mandatory call - do not skip this
        self.save_hyperparameters()
        # pass additional arguments to BaseModel.__init__, mandatory call - do not skip this
        super().__init__(loss=loss, **kwargs)
        self.network = FullyConnectedModule(
            input_size=self.hparams.input_size,
            output_size=self.hparams.output_size * self.hparams.n_classes,
            hidden_size=self.hparams.hidden_size,
            n_hidden_layers=self.hparams.n_hidden_layers,
        )

    def forward(self, x: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        # x is a batch generated based on the TimeSeriesDataset
        batch_size = x["encoder_cont"].size(0)
        network_input = x["encoder_cont"].squeeze(-1)
        prediction = self.network(network_input)
        # RESHAPE output to batch_size x n_decoder_timesteps x n_classes
        prediction = prediction.unsqueeze(-1).view(
            batch_size, -1, self.hparams.n_classes
        )

        # rescale predictions into target space
        prediction = self.transform_output(prediction, target_scale=x["target_scale"])

        # We need to return a named tuple that at least contains the prediction.
        # The parameter can be directly forwarded from the input.
        # The conversion to a named tuple can be directly achieved with the `to_network_output` function.
        return self.to_network_output(prediction=prediction)

    @classmethod
    def from_dataset(cls, dataset: TimeSeriesDataSet, **kwargs):
        assert isinstance(
            dataset.target_normalizer, NaNLabelEncoder
        ), "target normalizer has to encode categories"
        new_kwargs = {
            "n_classes": len(
                dataset.target_normalizer.classes_
            ),  # ADD number of classes as encoded by the target normalizer
            "output_size": dataset.max_prediction_length,
            "input_size": dataset.max_encoder_length,
        }
        new_kwargs.update(
            kwargs
        )  # use to pass real hyperparameters and override defaults set by dataset
        # example for dataset validation
        assert (
            dataset.max_prediction_length == dataset.min_prediction_length
        ), "Decoder only supports a fixed length"
        assert (
            dataset.min_encoder_length == dataset.max_encoder_length
        ), "Encoder only supports a fixed length"
        assert (
            len(dataset._time_varying_known_categoricals) == 0
            and len(dataset._time_varying_known_reals) == 0
            and len(dataset._time_varying_unknown_categoricals) == 0
            and len(dataset._static_categoricals) == 0
            and len(dataset._static_reals) == 0
            and len(dataset._time_varying_unknown_reals) == 1
        ), "Only covariate should be in 'time_varying_unknown_reals'"

        return super().from_dataset(dataset, **new_kwargs)


model = FullyConnectedClassificationModel.from_dataset(
    classification_dataset, hidden_size=10, n_hidden_layers=2
)
print(ModelSummary(model, max_depth=-1))
model.hparams

   | Name                 | Type                 | Params | Mode 
-----------------------------------------------------------------------
0  | loss                 | CrossEntropy         | 0      | train
1  | logging_metrics      | ModuleList           | 0      | train
2  | network              | FullyConnectedModule | 346    | train
3  | network.sequential   | Sequential           | 346    | train
4  | network.sequential.0 | Linear               | 60     | train
5  | network.sequential.1 | ReLU                 | 0      | train
6  | network.sequential.2 | Linear               | 110    | train
7  | network.sequential.3 | ReLU                 | 0      | train
8  | network.sequential.4 | Linear               | 110    | train
9  | network.sequential.5 | ReLU                 | 0      | train
10 | network.sequential.6 | Linear               | 66     | train
-----------------------------------------------------------------------
346       Trainable params
0         Non-trainable params
346   

"dataset_parameters":          {'time_idx': 'time_idx', 'target': 'target', 'group_ids': ['group'], 'weight': None, 'max_encoder_length': 5, 'min_encoder_length': 5, 'min_prediction_idx': 0, 'min_prediction_length': 2, 'max_prediction_length': 2, 'static_categoricals': None, 'static_reals': None, 'time_varying_known_categoricals': None, 'time_varying_known_reals': None, 'time_varying_unknown_categoricals': None, 'time_varying_unknown_reals': ['value'], 'variable_groups': None, 'constant_fill_strategy': None, 'allow_missing_timesteps': False, 'lags': None, 'add_relative_time_idx': False, 'add_target_scales': False, 'add_encoder_length': False, 'target_normalizer': NaNLabelEncoder(add_nan=False, warn=True), 'categorical_encoders': {'__group_id__group': NaNLabelEncoder(add_nan=False, warn=True)}, 'scalers': {'value': StandardScaler()}, 'randomize_length': None, 'predict_mode': False}
"hidden_size":                 10
"input_size":                  5
"learning_rate":               0.001
"l

In [48]:
# passing x through model
model(x)["prediction"].shape
print(x["prediction"])



RuntimeError: mat1 and mat2 shapes cannot be multiplied (20x2 and 5x10)

In [None]:
# PREDICT 2 TARGETS

multi_target_test_data = pd.DataFrame(
    dict(
        target1=np.random.rand(30),
        target2=np.random.rand(30),
        group=np.repeat(np.arange(3), 10),
        time_idx=np.tile(np.arange(10), 3),
    )
)
multi_target_test_data



Unnamed: 0,target1,target2,group,time_idx
0,0.749697,0.46848,0,0
1,0.382384,0.191151,0,1
2,0.746308,0.37818,0,2
3,0.274687,0.839516,0,3
4,0.64417,0.434613,0,4
5,0.886505,0.329179,0,5
6,0.428987,0.00253,0,6
7,0.508131,0.466195,0,7
8,0.880189,0.13368,0,8
9,0.987291,0.059997,0,9


In [46]:
from pytorch_forecasting.data.encoders import (
    EncoderNormalizer,
    MultiNormalizer,
    TorchNormalizer,
)

# create the dataset from the pandas dataframe
multi_target_dataset = TimeSeriesDataSet(
    multi_target_test_data,
    group_ids=["group"],
    target=["target1", "target2"],  # USING two targets
    time_idx="time_idx",
    min_encoder_length=5,
    max_encoder_length=5,
    min_prediction_length=2,
    max_prediction_length=2,
    time_varying_unknown_reals=["target1", "target2"],
    target_normalizer=MultiNormalizer(
        [EncoderNormalizer(), TorchNormalizer()]
    ),  # Use the NaNLabelEncoder to encode categorical target
)

x, y = next(iter(multi_target_dataset.to_dataloader(batch_size=4)))
y[0]  # target values are a list of targets

[tensor([[0.8865, 0.4290],
         [0.6423, 0.8096],
         [0.9244, 0.2557],
         [0.2371, 0.8288]]),
 tensor([[0.3292, 0.0025],
         [0.5787, 0.6834],
         [0.8391, 0.9472],
         [0.4756, 0.9696]])]