In [3]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

c:\Users\jaesc2\GitHub\skforecast


In [4]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from lightgbm import LGBMRegressor
from scipy.stats import norm

from skforecast.datasets import fetch_dataset
from skforecast.preprocessing import series_long_to_dict
from skforecast.preprocessing import exog_long_to_dict
from skforecast.preprocessing import RollingFeatures
from skforecast.recursive import ForecasterRecursiveMultiSeries
from skforecast.model_selection import TimeSeriesFold
from skforecast.model_selection import backtesting_forecaster_multiseries
from skforecast.model_selection import grid_search_forecaster_multiseries
from skforecast.model_selection import bayesian_search_forecaster_multiseries

In [5]:
# Load time series of multiple lengths and exogenous variables
# ==============================================================================
series = pd.read_csv(
    'https://raw.githubusercontent.com/skforecast/skforecast-datasets/main/data/demo_multi_series.csv'
)
exog = pd.read_csv(
    'https://raw.githubusercontent.com/skforecast/skforecast-datasets/main/data/demo_multi_series_exog.csv'
)

series['timestamp'] = pd.to_datetime(series['timestamp'])
exog['timestamp'] = pd.to_datetime(exog['timestamp'])

display(series.head())
print("")
display(exog.head())

Unnamed: 0,series_id,timestamp,value
0,id_1000,2016-01-01,1012.500694
1,id_1000,2016-01-02,1158.500099
2,id_1000,2016-01-03,983.000099
3,id_1000,2016-01-04,1675.750496
4,id_1000,2016-01-05,1586.250694





Unnamed: 0,series_id,timestamp,sin_day_of_week,cos_day_of_week,air_temperature,wind_speed
0,id_1000,2016-01-01,-0.433884,-0.900969,6.416639,4.040115
1,id_1000,2016-01-02,-0.974928,-0.222521,6.366474,4.530395
2,id_1000,2016-01-03,-0.781831,0.62349,6.555272,3.273064
3,id_1000,2016-01-04,0.0,1.0,6.704778,4.865404
4,id_1000,2016-01-05,0.781831,0.62349,2.392998,5.228913


In [4]:
# Transform series and exog to dictionaries
# ==============================================================================
series_dict = series_long_to_dict(
    data      = series,
    series_id = 'series_id',
    index     = 'timestamp',
    values    = 'value',
    freq      = 'D'
)

exog_dict = exog_long_to_dict(
    data      = exog,
    series_id = 'series_id',
    index     = 'timestamp',
    freq      = 'D'
)



In [5]:
# Drop some exogenous variables for series 'id_1000' and 'id_1003'
# ==============================================================================
exog_dict['id_1000'] = exog_dict['id_1000'].drop(columns=['air_temperature', 'wind_speed'])
exog_dict['id_1003'] = exog_dict['id_1003'].drop(columns=['cos_day_of_week'])

In [6]:
# Partition data in train and test
# ==============================================================================
end_train = '2016-07-31 23:59:00'

series_dict_train = {k: v.loc[: end_train,] for k, v in series_dict.items()}
exog_dict_train   = {k: v.loc[: end_train,] for k, v in exog_dict.items()}
series_dict_test  = {k: v.loc[end_train:,] for k, v in series_dict.items()}
exog_dict_test    = {k: v.loc[end_train:,] for k, v in exog_dict.items()}

In [7]:
# Fit forecaster
# ==============================================================================
regressor = LGBMRegressor(random_state=123, verbose=-1, max_depth=5)
forecaster = ForecasterRecursiveMultiSeries(
                 regressor          = regressor, 
                 lags               = 14, 
                 window_features    = RollingFeatures(stats=['mean', 'mean'], window_sizes=[7, 14]),
                 encoding           = "ordinal", 
                 dropna_from_series = False
             )

forecaster.fit(series=series_dict_train, exog=exog_dict_train, suppress_warnings=True)
forecaster

In [8]:
preds = forecaster.predict(steps=10, exog=exog_dict_test)
preds



Unnamed: 0,level,pred
2016-08-01,id_1000,1453.312971
2016-08-01,id_1001,2849.347882
2016-08-01,id_1003,2706.851726
2016-08-01,id_1004,7496.555367
2016-08-02,id_1000,1440.763196
2016-08-02,id_1001,2947.579536
2016-08-02,id_1003,2310.075968
2016-08-02,id_1004,8685.42599
2016-08-03,id_1000,1410.151437
2016-08-03,id_1001,2875.847691


In [9]:
preds = forecaster.predict_bootstrapping(
    steps=10, exog=exog_dict_test, n_boot=3, suppress_warnings=True
)
preds.head(3)

Unnamed: 0,level,pred_boot_0,pred_boot_1,pred_boot_2
2016-08-01,id_1000,1173.586189,1484.67557,1418.862097
2016-08-01,id_1001,2738.4065,3184.698632,2119.125183
2016-08-01,id_1003,2901.17202,2577.806548,2465.634521


In [10]:
preds.index

DatetimeIndex(['2016-08-01', '2016-08-01', '2016-08-01', '2016-08-01',
               '2016-08-02', '2016-08-02', '2016-08-02', '2016-08-02',
               '2016-08-03', '2016-08-03', '2016-08-03', '2016-08-03',
               '2016-08-04', '2016-08-04', '2016-08-04', '2016-08-04',
               '2016-08-05', '2016-08-05', '2016-08-05', '2016-08-05',
               '2016-08-06', '2016-08-06', '2016-08-06', '2016-08-06',
               '2016-08-07', '2016-08-07', '2016-08-07', '2016-08-07',
               '2016-08-08', '2016-08-08', '2016-08-08', '2016-08-08',
               '2016-08-09', '2016-08-09', '2016-08-09', '2016-08-09',
               '2016-08-10', '2016-08-10', '2016-08-10', '2016-08-10'],
              dtype='datetime64[ns]', freq=None)

In [78]:
forecaster = ForecasterRecursiveMultiSeries(
    regressor=LGBMRegressor(
        n_estimators=2, random_state=123, verbose=-1, max_depth=2
    ),
    lags=14,
    encoding='ordinal',
    dropna_from_series=False,
    transformer_series=StandardScaler(),
    transformer_exog=StandardScaler(),
)
forecaster.fit(
    series=series_dict_train, exog=exog_dict_train, suppress_warnings=True
)
predictions = forecaster.predict_interval(
    steps=5, exog=exog_dict_test, suppress_warnings=True, n_boot=10, interval=[5, 95]
)
predictions

Unnamed: 0,level,pred,lower_bound,upper_bound
2016-08-01,id_1000,1433.297592,1145.66183,1832.803467
2016-08-01,id_1001,2063.854011,1275.450099,2872.839822
2016-08-01,id_1003,2166.434783,1913.930958,2538.823808
2016-08-01,id_1004,7257.175987,5198.95,8597.788342
2016-08-02,id_1000,1461.209555,761.257523,1717.132505
2016-08-02,id_1001,2088.432356,904.569576,2869.878345
2016-08-02,id_1003,2075.324745,1491.383819,2448.884434
2016-08-02,id_1004,7486.368167,5585.929861,9175.776314
2016-08-03,id_1000,1461.209555,1374.688792,1774.645023
2016-08-03,id_1001,2088.432356,905.469576,2843.495775


In [None]:
preds = forecaster.predict_interval(
    steps=3, exog=exog_dict_test, n_boot=5, suppress_warnings=True
)
preds

Unnamed: 0,level,pred,lower_bound,upper_bound
2016-08-01,id_1000,1453.312971,1209.130225,1564.005374
2016-08-01,id_1001,2849.347882,2755.277127,3279.002987
2016-08-01,id_1003,2706.851726,2553.588371,2854.562116
2016-08-01,id_1004,7496.555367,7099.337865,7640.095357
2016-08-02,id_1000,1440.763196,1356.484185,1534.631945
2016-08-02,id_1001,2947.579536,2452.491092,3404.677476
2016-08-02,id_1003,2310.075968,1916.419934,2370.788136
2016-08-02,id_1004,8685.42599,8424.687306,8882.647361
2016-08-03,id_1000,1410.151437,1401.003123,1446.808327
2016-08-03,id_1001,2875.847691,2417.308783,3307.598144


In [None]:
preds = forecaster.predict_quantiles(
    steps=3, exog=exog_dict_test, n_boot=5, suppress_warnings=True
)
preds

Unnamed: 0,level,q_0.05,q_0.5,q_0.95
2016-08-01,id_1000,1209.130225,1418.862097,1564.005374
2016-08-01,id_1001,2755.277127,2941.044595,3279.002987
2016-08-01,id_1003,2553.588371,2721.819816,2854.562116
2016-08-01,id_1004,7099.337865,7562.004274,7640.095357
2016-08-02,id_1000,1356.484185,1426.963184,1534.631945
2016-08-02,id_1001,2452.491092,3203.828306,3404.677476
2016-08-02,id_1003,1916.419934,2175.260402,2370.788136
2016-08-02,id_1004,8424.687306,8728.791739,8882.647361
2016-08-03,id_1000,1401.003123,1421.263357,1446.808327
2016-08-03,id_1001,2417.308783,2795.301289,3307.598144


In [None]:
preds = forecaster.predict_dist(
    steps=3, exog=exog_dict_test, n_boot=5, suppress_warnings=True,
    distribution=norm
)
preds

Unnamed: 0,level,loc,scale
2016-08-01,id_1000,1402.45361,137.78698
2016-08-01,id_1001,3008.027546,204.323833
2016-08-01,id_1003,2711.079834,117.808039
2016-08-01,id_1004,7412.883313,233.442427
2016-08-02,id_1000,1442.229888,70.417452
2016-08-02,id_1001,3009.582727,383.114991
2016-08-02,id_1003,2140.553473,176.807352
2016-08-02,id_1004,8682.156795,179.282271
2016-08-03,id_1000,1423.810787,17.714483
2016-08-03,id_1001,2853.8734,355.271136


In [None]:

preds.iloc[:, 1:].apply(
    lambda x: norm.fit(x), axis=1, result_type='expand'
)

Unnamed: 0,0,1
2016-08-01,770.120295,632.333315
2016-08-01,1606.175689,1401.851856
2016-08-01,1414.443937,1296.635897
2016-08-01,3823.16287,3589.720443
2016-08-02,756.32367,685.906218
2016-08-02,1696.348859,1313.233868
2016-08-02,1158.680412,981.87306
2016-08-02,4430.719533,4251.437262
2016-08-03,720.762635,703.048152
2016-08-03,1604.572268,1249.301132


In [63]:
interval = np.array([5, 95]) / 100

preds[['lower_bound', 'upper_bound']] = (
    preds.iloc[:, 1:].quantile(q=interval, axis=1).transpose()
)
preds = preds[['level', 'lower_bound', 'upper_bound']]
preds

Unnamed: 0,level,lower_bound,upper_bound
2016-08-01,id_1000,1209.130225,1564.005374
2016-08-01,id_1001,2755.277127,3279.002987
2016-08-01,id_1003,2553.588371,2854.562116
2016-08-01,id_1004,7099.337865,7640.095357
2016-08-02,id_1000,1356.484185,1534.631945
2016-08-02,id_1001,2452.491092,3404.677476
2016-08-02,id_1003,1916.419934,2370.788136
2016-08-02,id_1004,8424.687306,8882.647361
2016-08-03,id_1000,1401.003123,1446.808327
2016-08-03,id_1001,2417.308783,3307.598144


In [26]:
predictions_array = preds.to_numpy()
prediction_index = preds.index
levels = preds.columns

n_steps, n_levels = predictions_array.shape

df_long = pd.DataFrame({
    #'step':       np.repeat(prediction_index, n_levels),
    'level':      np.tile(levels, n_steps),
    'pred' : predictions_array.ravel()  # o .flatten()
},
index=np.repeat(prediction_index, n_levels))

df_long.head()

Unnamed: 0,level,pred
2016-08-01,id_1000,1453.312971
2016-08-01,id_1000_lower_bound,1140.542393
2016-08-01,id_1000_upper_bound,1529.475428
2016-08-01,id_1001,2849.347882
2016-08-01,id_1001_lower_bound,2172.050285


In [116]:
# Backtesting
# ==============================================================================
forecaster = ForecasterRecursiveMultiSeries(
                 regressor          = regressor, 
                 lags               = 14, 
                 window_features    = RollingFeatures(stats=['mean', 'mean'], window_sizes=[7, 14]),
                 encoding           = "ordinal", 
                 dropna_from_series = False
             )

cv = TimeSeriesFold(
         steps                 = 24,
         initial_train_size    = len(series_dict_train["id_1000"]),
         refit                 = False,
         allow_incomplete_fold = True,
     )

metrics_levels, backtest_predictions = backtesting_forecaster_multiseries(
    forecaster            = forecaster,
    series                = series_dict,
    exog                  = exog_dict,
    cv                    = cv,
    levels                = None,
    metric                = "mean_absolute_error",
    add_aggregated_metric = True,
    n_jobs                ="auto",
    verbose               = False,
    interval              = "bootstrapping",
    n_boot                = 25,
    show_progress         = True,
    suppress_warnings     = True
)

display(metrics_levels)
print("")
display(backtest_predictions)

  0%|          | 0/7 [00:00<?, ?it/s]

Unnamed: 0,levels,mean_absolute_error
0,id_1000,167.502214
1,id_1001,1103.313887
2,id_1002,
3,id_1003,280.492603
4,id_1004,711.078359
5,average,565.596766
6,weighted_average,535.467442
7,pooling,572.944127





Unnamed: 0,level,pred,pred_boot_0,pred_boot_1,pred_boot_2,pred_boot_3,pred_boot_4,pred_boot_5,pred_boot_6,pred_boot_7,...,pred_boot_15,pred_boot_16,pred_boot_17,pred_boot_18,pred_boot_19,pred_boot_20,pred_boot_21,pred_boot_22,pred_boot_23,pred_boot_24
2016-08-01,id_1000,1453.312971,1173.586189,1484.675570,1418.862097,1583.837825,1351.306370,1372.982760,1439.512959,1409.814539,...,1323.631696,1485.943585,1132.281444,983.341389,1398.443783,1483.429494,1474.022549,1409.243375,1470.410114,1470.410114
2016-08-01,id_1001,2849.347882,2690.225531,3315.267219,2119.125183,2462.458516,2708.835260,2462.458516,2690.225531,2577.980086,...,2708.835260,2738.406500,2738.406500,3315.267219,2888.212764,3081.756898,3081.756898,3210.573857,3133.946060,2888.212764
2016-08-01,id_1003,2706.851726,2584.527246,2539.871235,2919.298994,2643.660797,2532.615558,2790.712069,2756.217796,2869.734475,...,2598.919573,2680.117097,2688.405280,2471.135342,2782.541300,2832.232915,2843.289713,2287.494729,2770.632998,2786.290979
2016-08-01,id_1004,7496.555367,7288.915846,5797.501530,7654.538247,7309.451525,7555.799319,7211.963211,7210.689038,7569.243608,...,7746.280512,7309.451525,7203.459456,7206.599391,7607.784914,7566.729479,5797.501530,7288.915846,7218.055198,8310.772954
2016-08-02,id_1000,1440.763196,1434.802833,1500.851630,1448.720409,1479.964783,1440.970575,1402.795567,1531.175270,1404.024956,...,1450.468310,1382.930068,1443.716395,1426.529660,1474.445573,1387.315108,1161.036414,1362.734887,1457.344704,1397.264764
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-12-30,id_1001,1132.535774,486.690637,980.143039,1150.541243,1344.130896,759.371090,1096.960855,1212.352374,622.022276,...,1212.352374,1213.542041,1481.881637,1577.641217,1031.205842,1209.206300,390.432962,755.528966,1031.205842,1470.150355
2016-12-30,id_1003,2089.261345,1898.669067,2026.453904,1996.942433,2118.391255,1823.392510,1772.466935,2109.059187,1687.628235,...,2335.792632,1820.908274,2102.769041,2101.640683,2501.867497,1773.001477,1676.982167,2275.556777,1898.023625,1733.417968
2016-12-31,id_1000,1393.128313,1471.673592,1411.773924,1441.976529,1327.751241,1453.778083,1412.582346,1444.307640,1464.359752,...,1398.314933,1363.110982,1068.748507,1430.000054,1394.973479,1380.985564,1317.879488,1377.544932,1381.832287,1389.415499
2016-12-31,id_1001,1106.034061,947.305218,335.753749,1401.327198,1104.841331,1204.858857,1104.841331,1252.284776,1598.398091,...,1405.346151,785.674870,859.657225,1197.637269,1070.366800,1107.787662,1200.654903,1418.268641,670.153300,365.726196


In [8]:
import re 
import pytest
from skforecast.exceptions import IgnoredArgumentWarning
from skforecast.recursive import ForecasterRecursive
from skforecast.recursive import ForecasterRecursiveMultiSeries
from skforecast.direct import ForecasterDirectMultiVariate
from skforecast.model_selection import backtesting_forecaster_multiseries
from skforecast.model_selection._split import TimeSeriesFold
from skforecast.preprocessing import RollingFeatures
from sklearn.linear_model import Ridge, LinearRegression
from skforecast.preprocessing import TimeSeriesDifferentiator

# Fixtures
from skforecast.recursive.tests.tests_forecaster_recursive_multiseries.fixtures_forecaster_recursive_multiseries import series, exog, exog_predict, expected_df_to_long_format

In [9]:
import joblib

series_dict = joblib.load(
    r"C:\Users\jaesc2\GitHub\skforecast\skforecast\model_selection\tests\fixture_sample_multi_series.joblib"
)

exog_dict = joblib.load(
    r"C:\Users\jaesc2\GitHub\skforecast\skforecast\model_selection\tests\fixture_sample_multi_series_exog.joblib"
)

In [11]:
from skforecast.recursive.tests.tests_forecaster_recursive_multiseries.fixtures_forecaster_recursive_multiseries import series, exog, exog_predict, expected_df_to_long_format
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression

transformer_exog = ColumnTransformer(
                       [('scale', StandardScaler(), ['exog_1']),
                        ('onehot', OneHotEncoder(), ['exog_2'])],
                       remainder = 'passthrough',
                       verbose_feature_names_out = False
                   )

In [20]:
forecaster = ForecasterRecursiveMultiSeries(
                    regressor          = LinearRegression(),
                    lags               = 3,
                    transformer_series = StandardScaler(),
                    transformer_exog   = transformer_exog,
                )

forecaster.fit(series=series, exog=exog)
forecaster.out_sample_residuals_ = forecaster.in_sample_residuals_
results = forecaster.predict_quantiles(
                steps                   = 2,
                quantiles               = (0.05, 0.55, 0.95),
                levels                  = None,
                exog                    = exog_predict,
                n_boot                  = 4,
                use_in_sample_residuals = False
            )
results

Unnamed: 0,level,q_0.05,q_0.55,q_0.95
50,1,0.128375,0.339203,0.471576
50,2,0.161536,0.655217,0.924551
51,1,0.093859,0.32079,0.62316
51,2,0.07375,0.107499,0.218463


In [22]:
results.index

Index([50, 50, 51, 51], dtype='int64')

In [21]:
results.to_dict(orient='list')

{'level': ['1', '2', '1', '2'],
 'q_0.05': [0.1283750019253314,
  0.1615361493231256,
  0.09385929028369558,
  0.07374959117551036],
 'q_0.55': [0.3392034161273868,
  0.6552169189586099,
  0.3207904249631374,
  0.10749930737109713],
 'q_0.95': [0.47157639833964976,
  0.9245514385384845,
  0.6231596160709784,
  0.2184633069802528]}

In [16]:
expected = pd.DataFrame(
                   data    = np.array([[0.12837500, 0.33920342, 0.47157640],
                                       [0.09385929, 0.32079042, 0.62315962]]),
                   columns = ['1_q_0.05', '1_q_0.55', '1_q_0.95'],
                   index   = pd.RangeIndex(start=50, stop=52)
               )
expected

Unnamed: 0,1_q_0.05,1_q_0.55,1_q_0.95
50,0.128375,0.339203,0.471576
51,0.093859,0.32079,0.62316
