In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd

from tsururu.strategies import RecursiveStrategy, DirectStrategy, MIMOStrategy, FlatWideMIMOStrategy
from tsururu.transformers import (
    StandardScalerTransformer, LagTransformer, DateSeasonsGenerator,
    UnionTransformer, SequentialTransformer, TargetGenerator, DifferenceNormalizer, LastKnownNormalizer
)
from tsururu.models import CatBoostRegressor_CV
from tsururu.dataset import TSDataset, IndexSlicer
from tsururu.dataset import Pipeline

In [2]:
df_path = "datasets/global/simulated_data_to_check.csv"

HORIZON = 3
HISTORY = 7
STEP = 1

dataset_params = {
    "target": {
        "columns": ["value"],
        "type": "continious",
    },
    "date": {
        "columns": ["date"],
        "type": "datetime",
    },
    "id": {
        "columns": ["id"],
        "type": "categorical",
    }
}

# Configure the model parameters
model_params = {
    "loss_function": "MultiRMSE",
    "early_stopping_rounds": 100,
    "verbose": 500,
}

# Configure the validation parameters
validation_params = {
    "type": 'KFold',
    "n_splits": 3,
}

In [3]:
dataset = TSDataset(
    data=pd.read_csv(df_path),
    columns_params=dataset_params,
)

freq: Day; period: 1


In [4]:
model = CatBoostRegressor_CV(validation_params, model_params)

In [5]:
index_slicer = IndexSlicer()

__Руками создаем пайплайн__

In [6]:
standard_scaler = StandardScalerTransformer(
    transform_features=True,
    transform_target=True
)

difference_normalizer = DifferenceNormalizer(
    regime="delta",
    transform_features=True,
    transform_target=True,
)

lkn = LastKnownNormalizer(
    regime="delta",
    transform_features=True,
    transform_target=True,
)

lag = LagTransformer(
    lags=3
)

date_lag = LagTransformer(
    lags=3,
)

id_lag = LagTransformer(
    lags=1,
)

target_generator = TargetGenerator()

date_seasons = DateSeasonsGenerator(
    seasonalities=["doy", "m", "wd"],
    from_target_date=True,
)

In [7]:
union_1 = UnionTransformer(transformers_list=[lag, target_generator])

seq_1 = SequentialTransformer(transformers_list=[standard_scaler, union_1], input_features=["value"])
seq_2 = SequentialTransformer(transformers_list=[date_seasons, date_lag], input_features=["date"])
seq_3 = SequentialTransformer(transformers_list=[id_lag], input_features=["id"])

union = UnionTransformer(transformers_list=[seq_1, seq_2, seq_3])

In [8]:
pipeline = Pipeline(union, multivariate=False)

In [9]:
strategy = FlatWideMIMOStrategy(HORIZON, HISTORY, STEP, model, pipeline)

In [10]:
fit_time, _ = strategy.fit(dataset)

0:	learn: 0.9602876	test: 0.9632420	best: 0.9632420 (0)	total: 135ms	remaining: 2m 15s
500:	learn: 0.0047635	test: 0.0048658	best: 0.0048658 (500)	total: 994ms	remaining: 990ms
999:	learn: 0.0031012	test: 0.0032143	best: 0.0032143 (999)	total: 1.83s	remaining: 0us

bestTest = 0.003214289746
bestIteration = 999

Fold 0:
MultiRMSE: 0.0032142897457271427
0:	learn: 0.9607328	test: 0.9621985	best: 0.9621985 (0)	total: 2.27ms	remaining: 2.27s
500:	learn: 0.0047387	test: 0.0047191	best: 0.0047191 (500)	total: 821ms	remaining: 818ms
999:	learn: 0.0031225	test: 0.0031351	best: 0.0031351 (999)	total: 1.64s	remaining: 0us

bestTest = 0.003135082972
bestIteration = 999

Fold 1:
MultiRMSE: 0.00313508297163902
0:	learn: 0.9626580	test: 0.9584699	best: 0.9584699 (0)	total: 1.94ms	remaining: 1.94s
500:	learn: 0.0047214	test: 0.0047839	best: 0.0047839 (500)	total: 824ms	remaining: 820ms
999:	learn: 0.0030820	test: 0.0031831	best: 0.0031831 (999)	total: 1.61s	remaining: 0us

bestTest = 0.00318310384
bes

In [11]:
forecast_time, current_pred = strategy.predict(dataset)

freq: Day; period: 1


In [12]:
current_pred

Unnamed: 0,id,date,value
0,0,2022-09-27,1996.277633
1,0,2022-09-28,1995.581345
2,0,2022-09-29,1996.097516
3,1,2022-09-27,2996.449115
4,1,2022-09-28,2995.752998
5,1,2022-09-29,2996.276784
6,2,2022-09-27,3996.438894
7,2,2022-09-28,3995.739842
8,2,2022-09-29,3996.262592
9,3,2022-09-27,4996.514138
