In [1]:
import pandas as pd

from sklearn.metrics import root_mean_squared_error  # type: ignore
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

from skforecast.Sarimax import Sarimax
from skforecast.ForecasterSarimax import ForecasterSarimax
from skforecast.model_selection_sarimax import backtesting_sarimax

In [2]:
train_df = pd.read_parquet("../../../data/time_series/train_df.parquet").asfreq("D")
validation_df = pd.read_parquet(
    "../../../data/time_series/validation_df.parquet"
).asfreq("D")
test_df = pd.read_parquet("../../../data/time_series/test_df.parquet").asfreq("D")

In [3]:
train_and_validation_df = pd.concat([train_df, validation_df])
df = pd.concat([train_df, validation_df, test_df])

In [4]:
arima_order = (0, 1, 3)
seasonal_order = (2, 0, 0, 7)

In [5]:
forecaster = ForecasterSarimax(
    regressor=Sarimax(order=arima_order, seasonal_order=seasonal_order, maxiter=200)  # type: ignore
)

result = backtesting_sarimax(
    forecaster=forecaster,
    y=df["gap"],
    initial_train_size=len(train_and_validation_df) - 7 * 3,
    fixed_train_size=False,
    steps=len(test_df),
    metric=[
        root_mean_squared_error,
        mean_absolute_error,
        mean_absolute_percentage_error,
    ],
    refit=True,
    n_jobs="auto",
    suppress_warnings_fit=True,
    verbose=True,
    show_progress=True,
)

print(f"RMSE: {result[0][0]}")
print(f"MAE: {result[0][1]}")
print(f"MAPE: {result[0][2]}")

Information of backtesting process
----------------------------------
Number of observations used for initial training: 5001
Number of observations used for backtesting: 51
    Number of folds: 2
    Number of steps per fold: 30
    Number of steps to exclude from the end of each train set before test (gap): 0
    Last fold only includes 21 observations.

Fold: 0
    Training:   2001-11-01 00:00:00 -- 2015-07-11 00:00:00  (n=5001)
    Validation: 2015-07-12 00:00:00 -- 2015-08-10 00:00:00  (n=30)
Fold: 1
    Training:   2001-11-01 00:00:00 -- 2015-08-10 00:00:00  (n=5031)
    Validation: 2015-08-11 00:00:00 -- 2015-08-31 00:00:00  (n=21)



  0%|          | 0/2 [00:00<?, ?it/s]

RMSE: 11647.80376617601
MAE: 9122.899726245927
MAPE: 0.11621801161370673
