In [13]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd

from tsururu.strategies import RecursiveStrategy, DirectStrategy, MIMOStrategy, FlatWideMIMOStrategy, DirRecStrategy
from tsururu.transformers import (
    StandardScalerTransformer, LagTransformer, DateSeasonsGenerator,
    UnionTransformer, SequentialTransformer, TargetGenerator, DifferenceNormalizer, LastKnownNormalizer
)
from tsururu.models import CatBoostRegressor_CV
from tsururu.dataset import TSDataset, IndexSlicer
from tsururu.dataset import Pipeline

In [14]:
df_path = "datasets/global/simulated_data_to_check.csv"

HORIZON = 3
HISTORY = 7
STEP = 1

dataset_params = {
    "target": {
        "columns": ["value"],
        "type": "continious",
    },
    "date": {
        "columns": ["date"],
        "type": "datetime",
    },
    "id": {
        "columns": ["id"],
        "type": "categorical",
    }
}

# Configure the model parameters
model_params = {
    "loss_function": "MultiRMSE",
    "early_stopping_rounds": 100,
    "verbose": 500,
}

# Configure the validation parameters
validation_params = {
    "type": 'KFold',
    "n_splits": 3,
}

In [15]:
dataset = TSDataset(
    data=pd.read_csv(df_path),
    columns_params=dataset_params,
)

freq: Day; period: 1


In [16]:
model = CatBoostRegressor_CV(validation_params, model_params)

In [17]:
index_slicer = IndexSlicer()

__Руками создаем пайплайн__

In [18]:
standard_scaler = StandardScalerTransformer(
    transform_features=True,
    transform_target=True
)

difference_normalizer = DifferenceNormalizer(
    regime="delta",
    transform_features=True,
    transform_target=True,
)

lkn = LastKnownNormalizer(
    regime="delta",
    transform_features=True,
    transform_target=True,
)

lag = LagTransformer(
    lags=3
)

date_lag = LagTransformer(
    lags=2,
)

target_generator = TargetGenerator()

date_seasons = DateSeasonsGenerator(
    seasonalities=["doy", "m", "wd"],
    from_target_date=True,
)

In [19]:
union_1 = UnionTransformer(transformers_list=[lag, target_generator])

seq_1 = SequentialTransformer(transformers_list=[union_1, lkn], input_features=["value"])
seq_2 = SequentialTransformer(transformers_list=[date_seasons, date_lag], input_features=["date"])

union = UnionTransformer(transformers_list=[seq_1, seq_2])

In [20]:
pipeline = Pipeline(union)

In [21]:
strategy = MIMOStrategy(HORIZON, HISTORY, STEP, model, pipeline)

In [22]:
fit_time, _ = strategy.fit(dataset)

0:	learn: 1.9169574	test: 1.9213508	best: 1.9213508 (0)	total: 1.99ms	remaining: 1.99s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 1.38540353
bestIteration = 244

Shrink model to first 245 iterations.
Fold 0:
MultiRMSE: 1.3854035297493579
0:	learn: 1.9478725	test: 1.8579387	best: 1.8579387 (0)	total: 1.7ms	remaining: 1.7s
500:	learn: 1.2300783	test: 1.2814549	best: 1.2813110 (490)	total: 1.08s	remaining: 1.08s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 1.28098747
bestIteration = 539

Shrink model to first 540 iterations.
Fold 1:
MultiRMSE: 1.2809874696268464
0:	learn: 1.8899345	test: 1.9745784	best: 1.9745784 (0)	total: 1.75ms	remaining: 1.75s
500:	learn: 1.2287907	test: 1.4148157	best: 1.4146086 (491)	total: 1.09s	remaining: 1.08s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 1.413182478
bestIteration = 559

Shrink model to first 560 iterations.
Fold 2:
MultiRMSE: 1.4131824781422277
Mean MultiRMSE: 1.3599
Std: 0.0569


In [23]:
forecast_time, current_pred = strategy.predict(dataset)

freq: Day; period: 1


In [24]:
current_pred

Unnamed: 0,id,date,value
0,0,2018-01-01,-0.545811
1,0,2018-01-02,-0.391379
2,0,2018-01-03,-0.502318
3,1,2018-01-01,-2.198915
4,1,2018-01-02,-1.789257
5,1,2018-01-03,-1.777337
6,2,2018-01-01,-1.57482
7,2,2018-01-02,-1.383325
8,2,2018-01-03,-1.458117
9,3,2018-01-01,-3.249785
