In [38]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
c:\Users\Joaquín Amat\Documents\GitHub\skforecast


In [39]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from lightgbm import LGBMRegressor

from skforecast.datasets import fetch_dataset
from skforecast.preprocessing import RollingFeatures
from skforecast.recursive import ForecasterRecursiveMultiSeries
from skforecast.model_selection import TimeSeriesFold
from skforecast.model_selection import backtesting_forecaster_multiseries
from skforecast.model_selection import grid_search_forecaster_multiseries
from skforecast.model_selection import bayesian_search_forecaster_multiseries

In [40]:
# Data download
# ==============================================================================
data = fetch_dataset(name="items_sales")
data.head()

items_sales
-----------
Simulated time series for the sales of 3 different items.
Simulated data.
Shape of the dataset: (1097, 3)


Unnamed: 0_level_0,item_1,item_2,item_3
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2012-01-01,8.253175,21.047727,19.429739
2012-01-02,22.777826,26.578125,28.009863
2012-01-03,27.549099,31.751042,32.078922
2012-01-04,25.895533,24.567708,27.252276
2012-01-05,21.379238,18.191667,20.357737


In [41]:
# Split data into train-val-test
# ==============================================================================
end_train = '2012-07-15 23:59:00'
data_train = data.loc[:end_train, :].copy()
data_test  = data.loc[end_train:, :].copy()

print(
    f"Train dates : {data_train.index.min()} --- {data_train.index.max()}   "
    f"(n={len(data_train)})"
)
print(
    f"Test dates  : {data_test.index.min()} --- {data_test.index.max()}   "
    f"(n={len(data_test)})"
)

Train dates : 2012-01-01 00:00:00 --- 2012-07-15 00:00:00   (n=197)
Test dates  : 2012-07-16 00:00:00 --- 2015-01-01 00:00:00   (n=900)


In [42]:
# Create and train ForecasterRecursiveMultiSeries
# ==============================================================================
forecaster = ForecasterRecursiveMultiSeries(
                 regressor          = LGBMRegressor(random_state=123, verbose=-1),
                 lags               = 24,
                 window_features    = RollingFeatures(stats=['mean', 'mean'], window_sizes=[24, 48]),
                 encoding           = 'ordinal',
                 transformer_series = None,
                 transformer_exog   = None,
                 weight_func        = None,
                 series_weights     = None,
                 differentiation    = None,
                 dropna_from_series = False,
                 fit_kwargs         = None,
                 forecaster_id      = None
             )

forecaster.fit(series=data_train)
forecaster

In [43]:
{k: np.sum(v) for k, v in forecaster.in_sample_residuals_.items()}

{'item_1': -0.013414809009656636,
 'item_2': 2.825167102096384,
 'item_3': -2.811744892477858,
 '_unknown_level': 7.400608868834979e-06}

In [44]:
{k: len(v) for k, v in forecaster.in_sample_residuals_.items()}

{'item_1': 149, 'item_2': 149, 'item_3': 149, '_unknown_level': 447}

In [45]:
{k: np.sum(v) for k, v in forecaster.in_sample_residuals_.items()}


{'item_1': -0.013414809009656636,
 'item_2': 2.825167102096384,
 'item_3': -2.811744892477858,
 '_unknown_level': 7.400608868834979e-06}

In [46]:
{k: len(v) for k, v in forecaster.in_sample_residuals_.items()}

{'item_1': 149, 'item_2': 149, 'item_3': 149, '_unknown_level': 447}

In [47]:
forecaster.out_sample_residuals_by_bin_

In [50]:
forecaster.out_sample_residuals_

{'item_1': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'item_2': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.

In [49]:
forecaster.set_out_sample_residuals(
    y_true=data_train.to_dict(orient="series"),
    y_pred=data_train.to_dict(orient="series")
)

aquiiiiiiiiiiiii


In [10]:
# Predictions and prediction intervals
# ==============================================================================
steps = 24

# Predictions for item_1
predictions_item_1 = forecaster.predict(steps=steps, levels='item_1')
display(predictions_item_1.head(3))

Unnamed: 0,level,pred
2012-07-16,item_1,23.61103
2012-07-17,item_1,24.752059
2012-07-18,item_1,25.45848


In [11]:
# Interval predictions for item_1 and item_2
predictions_intervals = forecaster.predict_interval(
    steps  = steps,
    levels = ['item_1', 'item_2'],
    n_boot = 50
)
predictions_intervals

TypeError: ForecasterRecursiveMultiSeries._create_predict_inputs() got an unexpected keyword argument 'use_binned_residuals'

In [10]:
[len(v) for v in forecaster.in_sample_residuals_.values()]

[879, 879, 879, 1000]

In [None]:
forecaster.in_sample_residuals_by_bin_

AttributeError: 'ForecasterRecursiveMultiSeries' object has no attribute 'in_sample_residuals_by_bin_'