In [1]:
import warnings

warnings.filterwarnings("ignore")

from pathlib import Path
from typing import List, Optional, Union

import numpy as np
import pandas as pd

from tsururu.dataset import Pipeline, TSDataset
from tsururu.model_training.trainer import DLTrainer
from tsururu.model_training.validator import KFoldCrossValidator
from tsururu.models.torch_based.dlinear import DLinear_NN
from tsururu.strategies import DirectStrategy, RecursiveStrategy
from tsururu.transformers import (
    LagTransformer,
    SequentialTransformer,
    TargetGenerator,
    UnionTransformer,
)

In [2]:
def get_results(
    cv: int,
    regime: str,
    y_true: Optional[List[np.ndarray]] = None,
    y_pred: Optional[List[np.ndarray]] = None,
    ids: Optional[List[Union[float, str]]] = None,
) -> pd.DataFrame:
    def _get_fold_value(
        value: Optional[Union[float, np.ndarray]], idx: int
    ) -> List[Optional[Union[float, np.ndarray]]]:
        if value is None:
            return [None]
        if isinstance(value[idx], float):
            return value[idx]
        if isinstance(value[idx], np.ndarray):
            return value[idx].reshape(-1)
        raise TypeError(f"Unexpected value type. Value: {value}")

    df_res_dict = {}

    for idx_fold in range(cv):
        # Fill df_res_dict
        for name, value in [("y_true", y_true), ("y_pred", y_pred)]:
            df_res_dict[f"{name}_{idx_fold+1}"] = _get_fold_value(value, idx_fold)
        if regime != "local":
            df_res_dict[f"id_{idx_fold+1}"] = _get_fold_value(ids, idx_fold)

    # Save datasets to specified directory
    df_res = pd.DataFrame(df_res_dict)
    return df_res

## Initialize TSDataset, Pipeline, Model, Validator, Strategy

The initialization of the main components is exactly the same as when using ML models. The only difference is that `DLTrainer` allows you to pass many more parameters compared to `MLTrainer`.

### TSDataset

In [3]:
df_path = Path("datasets/global/simulated_data_to_check.csv")

dataset_params = {
    "target": {
        "columns": ["value"],
        "type": "continious",
    },
    "date": {
        "columns": ["date"],
        "type": "datetime",
    },
    "id": {
        "columns": ["id"],
        "type": "categorical",
    }
}

In [4]:
dataset = TSDataset(
    data=pd.read_csv(df_path),
    columns_params=dataset_params,
    print_freq_period_info=True,
)

freq: Day; period: 1


### Pipeline

In [5]:
lag = LagTransformer(lags=7)
target_generator = TargetGenerator()

union_1 = UnionTransformer(transformers_list=[lag, target_generator])
seq_1 = SequentialTransformer(transformers_list=[union_1], input_features=["value"])
union = UnionTransformer(transformers_list=[seq_1])

pipeline = Pipeline(union, multivariate=True)

### Trainer

Currently, the available architectures are `DLinear` and `PatchTST`. However, adding your own architecture is quite simple if you follow the logic of the base model class.

In [6]:
# Configure the model parameters
model = DLinear_NN  # DLinear_NN or PatchTST
model_params = {"moving_avg": 7, "individual": False, "enc_in": None}

# Configure the validation parameters
validation = KFoldCrossValidator
validation_params = {
    "n_splits": 2,
}

trainer_params = {
    "device": "cpu",
    "num_workers": 0,
    "best_by_metric": True,
    "save_to_dir": False,
}

trainer = DLTrainer(
    model, 
    model_params, 
    validation, 
    validation_params, 
    **trainer_params
)

### Strategy

In [7]:
horizon = 7
model_horizon = 7
history = 7

In [8]:
strategy = RecursiveStrategy(
    pipeline=pipeline,
    trainer=trainer,
    horizon=horizon,
    history=history,
)

In [9]:
strategy.fit(dataset)

length of train dataset: 496
length of val dataset: 497
Epoch 1/10, cost time: 0.69s
train loss: 414.3740
Validation, Loss: 194.2519, Metric: -194.2519
val loss: 194.2519
Epoch 2/10, cost time: 0.66s
train loss: 66.8766
Validation, Loss: 54.1316, Metric: -54.1316
val loss: 54.1316
Epoch 3/10, cost time: 0.58s
train loss: 17.5994
Validation, Loss: 11.6868, Metric: -11.6868
val loss: 11.6868
Epoch 4/10, cost time: 0.57s
train loss: 5.7403
Validation, Loss: 2.8006, Metric: -2.8006
val loss: 2.8006
Epoch 5/10, cost time: 0.95s
train loss: 3.3647
Validation, Loss: 3.2068, Metric: -3.2068
val loss: 3.2068
Early stopping counter: 1
Epoch 6/10, cost time: 0.66s
train loss: 2.8860
Validation, Loss: 2.8542, Metric: -2.8542
val loss: 2.8542
Removing worst model snapshot: from epoch 0
Early stopping counter: 2
Epoch 7/10, cost time: 0.71s
train loss: 2.7676
Validation, Loss: 2.7613, Metric: -2.7613
val loss: 2.7613
Removing worst model snapshot: from epoch 1
Epoch 8/10, cost time: 0.74s
train loss

(25.945719003677368,
 <tsururu.strategies.recursive.RecursiveStrategy at 0x308757280>)

In [10]:
forecast_time, current_pred = strategy.predict(dataset)

length of test dataset: 1
Validation, Loss: nan, Metric: nan
Validation, Loss: nan, Metric: nan
length of test dataset: 1
Validation, Loss: nan, Metric: nan
Validation, Loss: nan, Metric: nan
length of test dataset: 1
Validation, Loss: nan, Metric: nan
Validation, Loss: nan, Metric: nan
length of test dataset: 1
Validation, Loss: nan, Metric: nan
Validation, Loss: nan, Metric: nan
length of test dataset: 1
Validation, Loss: nan, Metric: nan
Validation, Loss: nan, Metric: nan
length of test dataset: 1
Validation, Loss: nan, Metric: nan
Validation, Loss: nan, Metric: nan
length of test dataset: 1
Validation, Loss: nan, Metric: nan
Validation, Loss: nan, Metric: nan


In [11]:
current_pred

Unnamed: 0,id,date,value
0,0,2022-09-27,1997.017334
1,0,2022-09-28,1997.603149
2,0,2022-09-29,1998.090576
3,0,2022-09-30,1998.494995
4,0,2022-10-01,1998.843994
...,...,...,...
65,9,2022-09-29,11004.65918
66,9,2022-09-30,11006.064453
67,9,2022-10-01,11007.509766
68,9,2022-10-02,11008.994141


Saving and loading checkpoints is an essential practice in training DL models. 

Let's explore how to save checkpoints to disk, what structure the saved files have, and how to restore the model from a checkpoint for either fine-tuning or inference.

## Save and load checkpoints

Let’s consider working with checkpoints using the Direct strategy as an example.

In [12]:
trainer_params = {
    "device": "cpu",
    "num_workers": 0,
    "best_by_metric": True,
    # Let's enable save_to_dir (by the way, default value is True)
    "save_to_dir": True,
    "checkpoint_path": "checkpoints/",
    # Save checkpoints for 3 best model
    "save_k_best": 3,
    # Average checkpoints for the final model
    "average_snapshots": True,
}

trainer = DLTrainer(
    model, 
    model_params, 
    validation, 
    validation_params, 
    **trainer_params
)

strategy = DirectStrategy(
    pipeline=pipeline,
    trainer=trainer,
    horizon=horizon,
    history=history,
)

### Save checkpoint

In [13]:
strategy.fit(dataset)

length of train dataset: 496
length of val dataset: 497
Epoch 1/10, cost time: 0.64s
train loss: 408.1764
Validation, Loss: 177.2360, Metric: -177.2360
val loss: 177.2360
Last epoch model saved to checkpoints/trainer_0/fold_0/model_0.pth
Last epoch optimizer saved to checkpoints/trainer_0/fold_0/opt_0.pth
Best model snapshot saved to checkpoints/trainer_0/fold_0/model_0.pth
Checkpoint manager saved to checkpoints/trainer_0/fold_0/es_checkpoint_manager.pth
Epoch 2/10, cost time: 0.69s
train loss: 65.3214
Validation, Loss: 51.6638, Metric: -51.6638
val loss: 51.6638
Last epoch model saved to checkpoints/trainer_0/fold_0/model_1.pth
Last epoch optimizer saved to checkpoints/trainer_0/fold_0/opt_1.pth
Best model snapshot saved to checkpoints/trainer_0/fold_0/model_1.pth
Checkpoint manager saved to checkpoints/trainer_0/fold_0/es_checkpoint_manager.pth
Epoch 3/10, cost time: 0.65s
train loss: 16.8678
Validation, Loss: 9.6748, Metric: -9.6748
val loss: 9.6748
Last epoch model saved to checkp

(179.15967321395874, <tsururu.strategies.direct.DirectStrategy at 0x31abfff70>)

### Load checkpoint for finetune

Once we have the saved checkpoints, we can continue training by passing the pretrained path and another checkpoint path to the trainer’s parameters. All other parameters remain the same.

In [14]:
trainer_params = {
    "device": "cpu",
    "num_workers": 0,
    "best_by_metric": True,
    # Let's enable save_to_dir (by the way, default value is True)
    "save_to_dir": True,
    "pretrained_path": "checkpoints/",
    "checkpoint_path": "checkpoints_finetuned/",
    # Save checkpoints for 3 best model
    "save_k_best": 3,
    # Average checkpoints for the final model
    "average_snapshots": True,
}

trainer = DLTrainer(
    model, 
    model_params, 
    validation, 
    validation_params, 
    **trainer_params
)

strategy = DirectStrategy(
    pipeline=pipeline,
    trainer=trainer,
    horizon=horizon,
    history=history,
)

In [15]:
strategy.fit(dataset)

length of train dataset: 496
length of val dataset: 497
Epoch 1/10, cost time: 0.60s
train loss: 2.5679
Validation, Loss: 2.5761, Metric: -2.5761
val loss: 2.5761
Last epoch model saved to checkpoints_finetuned/trainer_0/fold_0/model_0.pth
Last epoch optimizer saved to checkpoints_finetuned/trainer_0/fold_0/opt_0.pth
Best model snapshot saved to checkpoints_finetuned/trainer_0/fold_0/model_0.pth
Checkpoint manager saved to checkpoints_finetuned/trainer_0/fold_0/es_checkpoint_manager.pth
Epoch 2/10, cost time: 0.59s
train loss: 2.5570
Validation, Loss: 2.5764, Metric: -2.5764
val loss: 2.5764
Last epoch model saved to checkpoints_finetuned/trainer_0/fold_0/model_1.pth
Last epoch optimizer saved to checkpoints_finetuned/trainer_0/fold_0/opt_1.pth
Best model snapshot saved to checkpoints_finetuned/trainer_0/fold_0/model_1.pth
Early stopping counter: 1
Checkpoint manager saved to checkpoints_finetuned/trainer_0/fold_0/es_checkpoint_manager.pth
Epoch 3/10, cost time: 0.74s
train loss: 2.546

(185.71449208259583, <tsururu.strategies.direct.DirectStrategy at 0x31abfed10>)

### Load checkpoint for inference

In [16]:
trainer_params = {
    "device": "cpu",
    "num_workers": 0,
    "n_epochs": 0,
    "pretrained_path": "checkpoints_finetuned/",
    # Average checkpoints for the final model
    "average_snapshots": True,
}

trainer = DLTrainer(
    model, 
    model_params, 
    validation, 
    validation_params, 
    **trainer_params
)

strategy = DirectStrategy(
    pipeline=pipeline,
    trainer=trainer,
    horizon=horizon,
    history=history,
)

In [17]:
strategy.fit(dataset)

length of train dataset: 496
length of val dataset: 497
Training finished.
Fold 0. Score: nan
length of train dataset: 497
length of val dataset: 496
Training finished.
Fold 1. Score: nan
Mean score: nan
Std: nan
length of train dataset: 496
length of val dataset: 496
Training finished.
Fold 0. Score: nan
length of train dataset: 496
length of val dataset: 496
Training finished.
Fold 1. Score: nan
Mean score: nan
Std: nan
length of train dataset: 495
length of val dataset: 496
Training finished.
Fold 0. Score: nan
length of train dataset: 496
length of val dataset: 495
Training finished.
Fold 1. Score: nan
Mean score: nan
Std: nan
length of train dataset: 495
length of val dataset: 495
Training finished.
Fold 0. Score: nan
length of train dataset: 495
length of val dataset: 495
Training finished.
Fold 1. Score: nan
Mean score: nan
Std: nan
length of train dataset: 494
length of val dataset: 495
Training finished.
Fold 0. Score: nan
length of train dataset: 495
length of val dataset: 49

(0.10583090782165527,
 <tsururu.strategies.direct.DirectStrategy at 0x31e9833a0>)

In [18]:
forecast_time, current_pred = strategy.predict(dataset)

length of test dataset: 1
Validation, Loss: nan, Metric: nan
Validation, Loss: nan, Metric: nan
length of test dataset: 1
Validation, Loss: nan, Metric: nan
Validation, Loss: nan, Metric: nan
length of test dataset: 1
Validation, Loss: nan, Metric: nan
Validation, Loss: nan, Metric: nan
length of test dataset: 1
Validation, Loss: nan, Metric: nan
Validation, Loss: nan, Metric: nan
length of test dataset: 1
Validation, Loss: nan, Metric: nan
Validation, Loss: nan, Metric: nan
length of test dataset: 1
Validation, Loss: nan, Metric: nan
Validation, Loss: nan, Metric: nan
length of test dataset: 1
Validation, Loss: nan, Metric: nan
Validation, Loss: nan, Metric: nan


In [19]:
current_pred

Unnamed: 0,id,date,value
0,0,2022-09-27,1997.192505
1,0,2022-09-28,1997.647217
2,0,2022-09-29,1997.732788
3,0,2022-09-30,1998.238525
4,0,2022-10-01,1998.347656
...,...,...,...
65,9,2022-09-29,11004.697266
66,9,2022-09-30,11005.898438
67,9,2022-10-01,11007.662109
68,9,2022-10-02,11009.277344
