In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

import numpy as np
import pandas as pd

c:\Users\jaesc2\GitHub\skforecast


In [2]:
# Libraries
# ==============================================================================
import pandas as pd
import matplotlib.pyplot as plt
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error

from skforecast.datasets import fetch_dataset
from skforecast.preprocessing import RollingFeatures
from skforecast.recursive import ForecasterRecursive

In [3]:
# Download data
# ==============================================================================
data = fetch_dataset(
    name="h2o", raw=True, kwargs_read_csv={"names": ["y", "datetime"], "header": 0}
)

# Data preprocessing
# ==============================================================================
data['datetime'] = pd.to_datetime(data['datetime'], format='%Y-%m-%d')
data = data.set_index('datetime')
data = data.asfreq('MS')
data = data['y']
data = data.sort_index()

# Split train-test
# ==============================================================================
steps = 36
data_train = data[:-steps]
data_test  = data[-steps:]


h2o
---
Monthly expenditure ($AUD) on corticosteroid drugs that the Australian health
system had between 1991 and 2008.
Hyndman R (2023). fpp3: Data for Forecasting: Principles and Practice(3rd
Edition). http://pkg.robjhyndman.com/fpp3package/,https://github.com/robjhyndman
/fpp3package, http://OTexts.com/fpp3.
Shape of the dataset: (204, 2)


In [5]:
# Create and fit forecaster
# ==============================================================================
forecaster = ForecasterRecursive(
                 regressor       = LGBMRegressor(random_state=123, verbose=-1),
                 lags            = 15,
                 window_features = RollingFeatures(stats=['mean'], window_sizes=10)
             )

forecaster.fit(y=data_train)
forecaster.training_range_

DatetimeIndex(['1991-07-01', '2005-06-01'], dtype='datetime64[ns]', name='datetime', freq=None)

In [24]:
(
X_train,
y_train,
_,
_,
_,
X_train_features_names_out_,
*_
) = forecaster._create_train_X_y(y=data_train)

X_train_features_names_out_

['lag_1',
 'lag_2',
 'lag_3',
 'lag_4',
 'lag_5',
 'lag_6',
 'lag_7',
 'lag_8',
 'lag_9',
 'lag_10',
 'lag_11',
 'lag_12',
 'lag_13',
 'lag_14',
 'lag_15',
 'roll_mean_10']

In [27]:
(
X_train,
y_train,
*_,
a,
_
) = forecaster._create_train_X_y(y=data_train)

X_train_features_names_out_

['lag_1',
 'lag_2',
 'lag_3',
 'lag_4',
 'lag_5',
 'lag_6',
 'lag_7',
 'lag_8',
 'lag_9',
 'lag_10',
 'lag_11',
 'lag_12',
 'lag_13',
 'lag_14',
 'lag_15',
 'roll_mean_10']

In [9]:
from skforecast.utils import preprocess_y

expected_training_range = forecaster.training_range_
training_range = preprocess_y(y=data_train, return_values=False)[1][[0, -1]]

if not expected_training_range.equals(training_range):
    raise AssertionError

In [16]:
expected_training_range[0]

Timestamp('1991-07-01 00:00:00')

In [12]:
preprocess_y(y=data_train, return_values=False)[1][[0, -1]]

DatetimeIndex(['1991-07-01', '2005-06-01'], dtype='datetime64[ns]', name='datetime', freq=None)

In [21]:
predictions = np.array(
    [[1, 10, 100],
    [2, 20, 200],
    [3, 30, 300]], dtype=float
)

lower_bound = np.array(
    [[0.1, 1, 10],
    [0.2, 2, 20],
    [0.3, 3, 30]], dtype=float
)

upper_bound = np.array(
    [[1.1, 11, 101],
    [2.2, 22, 202],
    [3.3, 33, 303]], dtype=float
)

predictions = np.array([predictions, lower_bound, upper_bound], dtype=float).swapaxes(0, 2)
predictions

array([[[1.00e+00, 1.00e-01, 1.10e+00],
        [2.00e+00, 2.00e-01, 2.20e+00],
        [3.00e+00, 3.00e-01, 3.30e+00]],

       [[1.00e+01, 1.00e+00, 1.10e+01],
        [2.00e+01, 2.00e+00, 2.20e+01],
        [3.00e+01, 3.00e+00, 3.30e+01]],

       [[1.00e+02, 1.00e+01, 1.01e+02],
        [2.00e+02, 2.00e+01, 2.02e+02],
        [3.00e+02, 3.00e+01, 3.03e+02]]])

In [23]:
for i in range(3):
    print(predictions[i, :, :])

[[1.  0.1 1.1]
 [2.  0.2 2.2]
 [3.  0.3 3.3]]
[[10.  1. 11.]
 [20.  2. 22.]
 [30.  3. 33.]]
[[100.  10. 101.]
 [200.  20. 202.]
 [300.  30. 303.]]
