In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd

from tsururu.strategies import RecursiveStrategy, DirectStrategy, MIMOStrategy, FlatWideMIMOStrategy
from tsururu.transformers import (
    StandardScalerTransformer, LagTransformer, DateSeasonsGenerator,
    UnionTransformer, SequentialTransformer, TargetGenerator, DifferenceNormalizer, LastKnownNormalizer
)
from tsururu.models import CatBoostRegressor_CV
from tsururu.dataset import TSDataset, IndexSlicer
from tsururu.dataset import Pipeline

In [2]:
from tsururu.dataset import IndexSlicer

In [3]:
df_path = "datasets/global/simulated_data_to_check.csv"

HORIZON = 3
HISTORY = 7
STEP = 1

dataset_params = {
    "target": {
        "columns": ["value"],
        "type": "continious",
    },
    "date": {
        "columns": ["date"],
        "type": "datetime",
    },
    "id": {
        "columns": ["id"],
        "type": "categorical",
    }
}

# Configure the model parameters
model_params = {
    "loss_function": "MultiRMSE",
    "early_stopping_rounds": 100,
    "verbose": 500,
}

# Configure the validation parameters
validation_params = {
    "type": 'KFold',
    "n_splits": 2,
}

In [4]:
dataset = TSDataset(
    data=pd.read_csv(df_path),
    columns_params=dataset_params,
)

freq: Day; period: 1


In [5]:
model = CatBoostRegressor_CV(validation_params, model_params)

In [6]:
index_slicer = IndexSlicer()

__Руками создаем пайплайн__

In [7]:
standard_scaler = StandardScalerTransformer(
    transform_features=True,
    transform_target=True
)

difference_normalizer = DifferenceNormalizer(
    regime="delta",
    transform_features=True,
    transform_target=True,
)

lkn = LastKnownNormalizer(
    regime="delta",
    transform_features=True,
    transform_target=True,
)

lag = LagTransformer(
    lags=3
)

date_lag = LagTransformer(
    lags=3,
)

id_lag = LagTransformer(
    lags=1,
)

target_generator = TargetGenerator()

date_seasons = DateSeasonsGenerator(
    seasonalities=["doy", "m", "wd"],
    from_target_date=True,
)

In [8]:
union_1 = UnionTransformer(transformers_list=[lag, target_generator])

seq_1 = SequentialTransformer(transformers_list=[standard_scaler, union_1], input_features=["value"])
seq_2 = SequentialTransformer(transformers_list=[date_seasons, date_lag], input_features=["date"])
seq_3 = SequentialTransformer(transformers_list=[id_lag], input_features=["id"])

union = UnionTransformer(transformers_list=[seq_1, seq_2, seq_3])

In [9]:
pipeline = Pipeline(union, multivariate=True)

In [10]:
strategy = FlatWideMIMOStrategy(HORIZON, HISTORY, STEP, model, pipeline)

In [11]:
fit_time, _ = strategy.fit(dataset)

0:	learn: 3.0392306	test: 3.0470113	best: 3.0470113 (0)	total: 158ms	remaining: 2m 37s
500:	learn: 0.0198984	test: 0.0227448	best: 0.0227448 (500)	total: 8.78s	remaining: 8.74s
999:	learn: 0.0147129	test: 0.0185442	best: 0.0185436 (998)	total: 17.2s	remaining: 0us

bestTest = 0.01854356543
bestIteration = 998

Shrink model to first 999 iterations.
Fold 0:
MultiRMSE: 0.018543565425972918
0:	learn: 3.0464362	test: 3.0393966	best: 3.0393966 (0)	total: 17.8ms	remaining: 17.7s
500:	learn: 0.0205435	test: 0.0221623	best: 0.0221623 (500)	total: 8.55s	remaining: 8.52s
999:	learn: 0.0149471	test: 0.0180082	best: 0.0180082 (999)	total: 17.1s	remaining: 0us

bestTest = 0.01800821735
bestIteration = 999

Fold 1:
MultiRMSE: 0.018008217352635712
Mean MultiRMSE: 0.0183
Std: 0.0003


In [12]:
forecast_time, current_pred = strategy.predict(dataset)

freq: Day; period: 1


In [13]:
current_pred

Unnamed: 0,id,date,value
0,0,2022-09-27,1991.474161
1,0,2022-09-28,1991.474161
2,0,2022-09-29,1991.474161
3,1,2022-09-27,2991.474161
4,1,2022-09-28,2991.474161
5,1,2022-09-29,2991.474161
6,2,2022-09-27,3991.474161
7,2,2022-09-28,3991.474161
8,2,2022-09-29,3991.474161
9,3,2022-09-27,4991.474161
