In [1]:
import warnings

warnings.filterwarnings("ignore")

from pathlib import Path
from typing import List, Optional, Union

import numpy as np
import pandas as pd

from tsururu.dataset import Pipeline, TSDataset
from tsururu.model_training.trainer import DLTrainer
from tsururu.model_training.validator import KFoldCrossValidator
from tsururu.models.torch_based.dlinear import DLinear_NN
from tsururu.strategies import DirectStrategy, RecursiveStrategy
from tsururu.transformers import (
    LagTransformer,
    SequentialTransformer,
    TargetGenerator,
    UnionTransformer,
)

In [2]:
def get_results(
    cv: int,
    regime: str,
    y_true: Optional[List[np.ndarray]] = None,
    y_pred: Optional[List[np.ndarray]] = None,
    ids: Optional[List[Union[float, str]]] = None,
) -> pd.DataFrame:
    def _get_fold_value(
        value: Optional[Union[float, np.ndarray]], idx: int
    ) -> List[Optional[Union[float, np.ndarray]]]:
        if value is None:
            return [None]
        if isinstance(value[idx], float):
            return value[idx]
        if isinstance(value[idx], np.ndarray):
            return value[idx].reshape(-1)
        raise TypeError(f"Unexpected value type. Value: {value}")

    df_res_dict = {}

    for idx_fold in range(cv):
        # Fill df_res_dict
        for name, value in [("y_true", y_true), ("y_pred", y_pred)]:
            df_res_dict[f"{name}_{idx_fold+1}"] = _get_fold_value(value, idx_fold)
        if regime != "local":
            df_res_dict[f"id_{idx_fold+1}"] = _get_fold_value(ids, idx_fold)

    # Save datasets to specified directory
    df_res = pd.DataFrame(df_res_dict)
    return df_res

## Initialize TSDataset, Pipeline, Model, Validator, Strategy

The initialization of the main components is exactly the same as when using ML models. The only difference is that `DLTrainer` allows you to pass many more parameters compared to `MLTrainer`.

### TSDataset

In [3]:
df_path = Path("../datasets/global/simulated_data_to_check.csv")

dataset_params = {
    "target": {
        "columns": ["value"],
        "type": "continuous",
    },
    "date": {
        "columns": ["date"],
        "type": "datetime",
    },
    "id": {
        "columns": ["id"],
        "type": "categorical",
    }
}

In [4]:
dataset = TSDataset(
    data=pd.read_csv(df_path),
    columns_params=dataset_params,
    print_freq_period_info=True,
)

freq: Day; period: 1


### Pipeline

In [5]:
lag = LagTransformer(lags=7)
target_generator = TargetGenerator()

union_1 = UnionTransformer(transformers_list=[lag, target_generator])
seq_1 = SequentialTransformer(transformers_list=[union_1], input_features=["value"])
union = UnionTransformer(transformers_list=[seq_1])

pipeline = Pipeline(union, multivariate=True)

### Trainer

Currently, the available architectures are `DLinear` and `PatchTST`. However, adding your own architecture is quite simple if you follow the logic of the base model class.

In [6]:
# Configure the model parameters
model = DLinear_NN  # DLinear_NN or PatchTST
model_params = {"moving_avg": 7, "individual": False, "enc_in": None}

# Configure the validation parameters
validation = KFoldCrossValidator
validation_params = {
    "n_splits": 2,
}

trainer_params = {
    "device": "cpu",
    "num_workers": 0,
    "best_by_metric": True,
    "save_to_dir": False,
}

trainer = DLTrainer(
    model, 
    model_params, 
    validation, 
    validation_params, 
    **trainer_params
)

### Strategy

In [7]:
horizon = 7
model_horizon = 7
history = 7

In [8]:
strategy = RecursiveStrategy(
    pipeline=pipeline,
    trainer=trainer,
    horizon=horizon,
    history=history,
)

In [9]:
strategy.fit(dataset)

length of train dataset: 496
length of val dataset: 497


Epoch 1/10, cost time: 2.32s
train loss: 394.8396
Validation, Loss: 152.7835, Metric: -152.7835
val loss: 152.7835, val metric: -152.7835
Epoch 2/10, cost time: 2.05s
train loss: 63.7407
Validation, Loss: 45.0496, Metric: -45.0496
val loss: 45.0496, val metric: -45.0496
Epoch 3/10, cost time: 2.10s
train loss: 16.8944
Validation, Loss: 7.9484, Metric: -7.9484
val loss: 7.9484, val metric: -7.9484
Epoch 4/10, cost time: 2.14s
train loss: 6.3505
Validation, Loss: 4.6323, Metric: -4.6323
val loss: 4.6323, val metric: -4.6323
Epoch 5/10, cost time: 2.09s
train loss: 4.5366
Validation, Loss: 3.9856, Metric: -3.9856
val loss: 3.9856, val metric: -3.9856
Epoch 6/10, cost time: 2.06s
train loss: 4.0089
Validation, Loss: 3.9551, Metric: -3.9551
val loss: 3.9551, val metric: -3.9551
Removing worst model snapshot: from epoch 0
Epoch 7/10, cost time: 2.10s
train loss: 3.8999
Validation, Loss: 3.9088, Metric: -3.9088
val loss: 3.9088, val metric: -3.9088
Removing worst model snapshot: from epoch 1


(77.95777916908264,
 <tsururu.strategies.recursive.RecursiveStrategy at 0x7fa3e5905d50>)

In [10]:
forecast_time, current_pred = strategy.predict(dataset)

freq: Day; period: 1
length of test dataset: 1
length of test dataset: 1


length of test dataset: 1
length of test dataset: 1
length of test dataset: 1
length of test dataset: 1
length of test dataset: 1


In [11]:
current_pred

Unnamed: 0,id,date,value
0,0,2022-09-27,1996.918457
1,0,2022-09-28,1997.491211
2,0,2022-09-29,1997.960571
3,0,2022-09-30,1998.343872
4,0,2022-10-01,1998.670898
...,...,...,...
65,9,2022-09-29,11004.71582
66,9,2022-09-30,11006.132812
67,9,2022-10-01,11007.585938
68,9,2022-10-02,11009.080078


Saving and loading checkpoints is an essential practice in training DL models. 

Let's explore how to save checkpoints to disk, what structure the saved files have, and how to restore the model from a checkpoint for either fine-tuning or inference.

## Save and load checkpoints

Let’s consider working with checkpoints using the Direct strategy as an example.

In [12]:
trainer_params = {
    "device": "cpu",
    "num_workers": 0,
    "best_by_metric": True,
    # Let's enable save_to_dir (by the way, default value is True)
    "save_to_dir": True,
    "checkpoint_path": "checkpoints/",
    # Save checkpoints for 3 best model
    "save_k_best": 3,
    # Average checkpoints for the final model
    "average_snapshots": True,
}

trainer = DLTrainer(
    model, 
    model_params, 
    validation, 
    validation_params, 
    **trainer_params
)

strategy = DirectStrategy(
    pipeline=pipeline,
    trainer=trainer,
    horizon=horizon,
    history=history,
)

### Save checkpoint

In [13]:
strategy.fit(dataset)

length of train dataset: 496
length of val dataset: 497


Epoch 1/10, cost time: 2.05s
train loss: 403.3343
Validation, Loss: 167.4490, Metric: -167.4490
val loss: 167.4490, val metric: -167.4490
Last epoch model saved to checkpoints/trainer_0/fold_0/model_0.pth
Last epoch optimizer saved to checkpoints/trainer_0/fold_0/opt_0.pth
Best model snapshot saved to checkpoints/trainer_0/fold_0/model_0.pth
Checkpoint manager saved to checkpoints/trainer_0/fold_0/es_checkpoint_manager.pth
Epoch 2/10, cost time: 2.05s
train loss: 64.2173
Validation, Loss: 47.8190, Metric: -47.8190
val loss: 47.8190, val metric: -47.8190
Last epoch model saved to checkpoints/trainer_0/fold_0/model_1.pth
Last epoch optimizer saved to checkpoints/trainer_0/fold_0/opt_1.pth
Best model snapshot saved to checkpoints/trainer_0/fold_0/model_1.pth
Checkpoint manager saved to checkpoints/trainer_0/fold_0/es_checkpoint_manager.pth
Epoch 3/10, cost time: 2.07s
train loss: 16.2793
Validation, Loss: 8.0289, Metric: -8.0289
val loss: 8.0289, val metric: -8.0289
Last epoch model saved

(540.7743926048279,
 <tsururu.strategies.direct.DirectStrategy at 0x7fa3e59047c0>)

### Load checkpoint for finetune

Once we have the saved checkpoints, we can continue training by passing the pretrained path and another checkpoint path to the trainer’s parameters. All other parameters remain the same.

In [14]:
trainer_params = {
    "device": "cpu",
    "num_workers": 0,
    "best_by_metric": True,
    # Let's enable save_to_dir (by the way, default value is True)
    "save_to_dir": True,
    "pretrained_path": "checkpoints/",
    "checkpoint_path": "checkpoints_finetuned/",
    # Save checkpoints for 3 best model
    "save_k_best": 3,
    # Average checkpoints for the final model
    "average_snapshots": True,
}

trainer = DLTrainer(
    model, 
    model_params, 
    validation, 
    validation_params, 
    **trainer_params
)

strategy = DirectStrategy(
    pipeline=pipeline,
    trainer=trainer,
    horizon=horizon,
    history=history,
)

In [15]:
strategy.fit(dataset)

length of train dataset: 496
length of val dataset: 497


Epoch 1/10, cost time: 2.10s
train loss: 2.5178
Validation, Loss: 2.5265, Metric: -2.5265
val loss: 2.5265, val metric: -2.5265
Last epoch model saved to checkpoints_finetuned/trainer_0/fold_0/model_0.pth
Last epoch optimizer saved to checkpoints_finetuned/trainer_0/fold_0/opt_0.pth
Best model snapshot saved to checkpoints_finetuned/trainer_0/fold_0/model_0.pth
Checkpoint manager saved to checkpoints_finetuned/trainer_0/fold_0/es_checkpoint_manager.pth
Epoch 2/10, cost time: 2.10s
train loss: 2.5056
Validation, Loss: 2.5124, Metric: -2.5124
val loss: 2.5124, val metric: -2.5124
Last epoch model saved to checkpoints_finetuned/trainer_0/fold_0/model_1.pth
Last epoch optimizer saved to checkpoints_finetuned/trainer_0/fold_0/opt_1.pth
Best model snapshot saved to checkpoints_finetuned/trainer_0/fold_0/model_1.pth
Checkpoint manager saved to checkpoints_finetuned/trainer_0/fold_0/es_checkpoint_manager.pth
Epoch 3/10, cost time: 2.09s
train loss: 2.4927
Validation, Loss: 2.5028, Metric: -2.5

(542.532794713974,
 <tsururu.strategies.direct.DirectStrategy at 0x7fa3e5906560>)

### Load checkpoint for inference

In [20]:
trainer_params = {
    "device": "cpu",
    "num_workers": 0,
    "n_epochs": 1,
    "pretrained_path": "checkpoints_finetuned/",
    # Average checkpoints for the final model
    "average_snapshots": True,
}

trainer = DLTrainer(
    model, 
    model_params, 
    validation, 
    validation_params, 
    **trainer_params
)

strategy = DirectStrategy(
    pipeline=pipeline,
    trainer=trainer,
    horizon=horizon,
    history=history,
)

In [21]:
strategy.fit(dataset)

length of train dataset: 496
length of val dataset: 497


Epoch 1/1, cost time: 2.15s
train loss: 2.3693
Validation, Loss: 2.3725, Metric: -2.3725
val loss: 2.3725, val metric: -2.3725
Last epoch model saved to checkpoints/trainer_0/fold_0/model_0.pth
Last epoch optimizer saved to checkpoints/trainer_0/fold_0/opt_0.pth
Best model snapshot saved to checkpoints/trainer_0/fold_0/model_0.pth
Checkpoint manager saved to checkpoints/trainer_0/fold_0/es_checkpoint_manager.pth
Training finished.
Fold 0. Score: -2.3724923133850098
length of train dataset: 497
length of val dataset: 496
Epoch 1/1, cost time: 2.09s
train loss: 3.4084
Validation, Loss: 3.3751, Metric: -3.3751
val loss: 3.3751, val metric: -3.3751
Last epoch model saved to checkpoints/trainer_0/fold_1/model_0.pth
Last epoch optimizer saved to checkpoints/trainer_0/fold_1/opt_0.pth
Best model snapshot saved to checkpoints/trainer_0/fold_1/model_0.pth
Checkpoint manager saved to checkpoints/trainer_0/fold_1/es_checkpoint_manager.pth
Training finished.
Fold 1. Score: -3.37510347366333
Mean s

(54.834699869155884,
 <tsururu.strategies.direct.DirectStrategy at 0x7fa3d861fc40>)

In [18]:
forecast_time, current_pred = strategy.predict(dataset)

freq: Day; period: 1
length of test dataset: 1


length of test dataset: 1
length of test dataset: 1
length of test dataset: 1
length of test dataset: 1
length of test dataset: 1
length of test dataset: 1


In [19]:
current_pred

Unnamed: 0,id,date,value
0,0,2022-09-27,1997.147827
1,0,2022-09-28,1997.578491
2,0,2022-09-29,1997.824463
3,0,2022-09-30,1998.122559
4,0,2022-10-01,1998.426758
...,...,...,...
65,9,2022-09-29,11004.701172
66,9,2022-09-30,11006.19043
67,9,2022-10-01,11007.738281
68,9,2022-10-02,11009.234375
