# Benchmarking skforecast Recursive Forecasters

This notebook benchmarks the performance (velocity) of the `skforecast` in its different versions and keeps track of the results.

**Notes**

+ In version `0.15.0` the binning of residuals was introduced in multi-series forecasters. This explains the increase in the time taken to fit the model.
+ Since version `0.17.0`, the `RecursiveMultiSeriesForecaster` only accepts as input a long format DataFrame with a MultiIndex or a dictionary of series. Wide format DataFrames where each column is a different time series are no longer supported. If input data is a pandas dataframe with with MultiIndex, it is internally converted to a dictionary of series what increases notably the computation time.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

c:\Users\jaesc2\GitHub\skforecast


In [2]:
# !pip install --upgrade numpy < 2.0 && \
# pip install --upgrade pandas && \
# pip install --upgrade scikit-learn

In [3]:
# !pip install skforecast==0.15.1
# !pip install skforecast==0.14.0
# !pip install skforecast==0.13.0

In [2]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import sklearn
import joblib
from benchmarking import (
    plot_benchmark_results,
    run_benchmark_ForecasterRecursiveMultiSeries,
    run_benchmark_ForecasterRecursive
)
import skforecast
import platform
import psutil
import plotly.express as px

In [3]:
print(f"Python version: {platform.python_version()}")
print(f"skforecast version: {skforecast.__version__}")
print(f"scikit-learn version: {sklearn.__version__}")
print(f"pandas version: {pd.__version__}")
print(f"numpy version: {np.__version__}")
print(f"Computer network name: {platform.node()}")
print(f"Processor type: {platform.processor()}")
print(f"Platform type: {platform.platform()}")
print(f"Operating system: {platform.system()}")
print(f"Operating system release: {platform.release()}")
print(f"Operating system version: {platform.version()}")
print(f"Number of physical cores: {psutil.cpu_count(logical=False)}")
print(f"Number of logical cores: {psutil.cpu_count(logical=True)}")

Python version: 3.12.11
skforecast version: 0.17.0
scikit-learn version: 1.7.1
pandas version: 2.3.1
numpy version: 2.1.3
Computer network name: ITES015-NB0029
Processor type: Intel64 Family 6 Model 141 Stepping 1, GenuineIntel
Platform type: Windows-11-10.0.26100-SP0
Operating system: Windows
Operating system release: 11
Operating system version: 10.0.26100
Number of physical cores: 8
Number of logical cores: 16


In [4]:
import warnings
warnings.filterwarnings(
    "ignore",
    category=FutureWarning,
    message="'force_all_finite' was renamed to 'ensure_all_finite'"
)

# ForecasterRecursiveMultiSeries

In [5]:
# Mock data for benchmarking
# ==========================================================
# series_dict, series_df_wide, series_df_long contain the same data but in different formats.
# exog_dict, exog_df_wide, exog_df_long contain the same data but in different formats.
# exog_df_wide_prediction, exog_dict_prediction, exog_df_long_prediction contain the same data but in different formats.
# series_dict_different_length, exog_dict_different_length are created from series_dict and exog_dict respectively,
# but removing a random number of rows from each series/exogenous variable so that they have different lengths.

n_series = 600
len_series = 2000

series_dict = {}
rng = np.random.default_rng(321)
for i in range(n_series):
    series_dict[f'series_{i}'] = pd.Series(
        data = rng.normal(loc=20, scale=5, size=len_series),
        index=pd.date_range(
            start='2010-01-01',
            periods=len_series,
            freq='h'
        ),
        name=f'series_{i}'
    )

rng = np.random.default_rng(321)
series_dict_different_length = {
    k: v.iloc[:-rng.integers(low=1, high=1000)].copy() for k, v in series_dict.items()
}

series_df_wide = pd.DataFrame(series_dict, index=series_dict['series_0'].index)

series_df_long = series_df_wide.stack()
series_df_long.index = series_df_long.index.set_names(['datetime', 'series_id'])
series_df_long = series_df_long.swaplevel().sort_index()
series_df_long = series_df_long.to_frame(name='value')


exog_df_wide = pd.DataFrame(
        index=series_df_wide.index,
        )
exog_df_wide['day_of_week'] = exog_df_wide.index.dayofweek
exog_df_wide['week_of_year'] = exog_df_wide.index.isocalendar().week.astype(int)
exog_df_wide['month'] = exog_df_wide.index.month

exog_dict = {}
for k in series_dict.keys():
    exog_dict[k] = exog_df_wide.copy()

exog_df_long = (
    pd.concat([exog.assign(series_id=k) for k, exog in exog_dict.items()])
    .reset_index()
    .rename(columns={'index': 'datetime'})
    .set_index(['series_id', 'datetime'])
)

exog_df_wide_prediction = pd.DataFrame(
    index=pd.date_range(
        start=series_df_wide.index.max() + pd.Timedelta(hours=1),
        periods=100,
        freq='h'
    )
)
exog_df_wide_prediction['day_of_week'] = exog_df_wide_prediction.index.dayofweek
exog_df_wide_prediction['week_of_year'] = exog_df_wide_prediction.index.isocalendar().week.astype(int)
exog_df_wide_prediction['month'] = exog_df_wide_prediction.index.month

exog_dict_prediction = {}
for k in series_dict.keys():
    exog_dict_prediction[k] = exog_df_wide_prediction.copy()
    
exog_df_long_prediction = (
    pd.concat([exog.assign(series_id=k) for k, exog in exog_dict_prediction.items()])
    .reset_index()
    .rename(columns={'index': 'datetime'})
    .set_index(['series_id', 'datetime'])
)

In [8]:
run_benchmark_ForecasterRecursiveMultiSeries(
    series_dict                  = series_dict,
    series_df_long               = series_df_long,
    series_dict_different_length = series_dict_different_length,
    exog_dict                    = exog_dict,
    exog_df_long                 = exog_df_long,
    exog_df_wide                 = exog_df_wide,
    exog_dict_prediction         = exog_dict_prediction,
    exog_df_long_prediction      = exog_df_long_prediction,
    exog_df_wide_prediction      = exog_df_wide_prediction
)

Running benchmarks for ForecasterRecursiveMultiSeries...
skforecast version: 0.17.0
Benchmarking function: ForecasterRecursiveMultiSeries__create_train_X_y_series_is_dict_no_exog
Benchmarking function: ForecasterRecursiveMultiSeries__create_train_X_y_series_is_dict_exog_is_dict
Benchmarking function: ForecasterRecursiveMultiSeries__create_train_X_y_series_is_dict_different_length_exog_is_dict
Benchmarking function: ForecasterRecursiveMultiSeries__create_train_X_y_series_is_dict_exog_is_df_wide


Benchmarking function: ForecasterRecursiveMultiSeries__create_train_X_y_series_is_df_long_no_exog


Benchmarking function: ForecasterRecursiveMultiSeries__create_train_X_y_series_is_df_long_exog_is_df_long


Benchmarking function: ForecasterRecursiveMultiSeries__create_train_X_y_series_is_df_long_exog_is_df_wide
Benchmarking function: ForecasterRecursiveMultiSeries__create_train_X_y_single_series
Benchmarking function: ForecasterRecursiveMultiSeries_fit_series_is_dict_no_exog
Benchmarking function: ForecasterRecursiveMultiSeries_fit_series_is_dict_exog_is_dict
Benchmarking function: ForecasterRecursiveMultiSeries_fit_series_is_dict_different_length_exog_is_dict


Benchmarking function: ForecasterRecursiveMultiSeries_fit_series_is_dataframe_no_exog


Benchmarking function: ForecasterRecursiveMultiSeries_fit_series_is_dataframe_exog_is_dataframe


Benchmarking function: ForecasterRecursiveMultiSeries_fit_series_is_dataframe_exog_is_dict
Benchmarking function: ForecasterRecursiveMultiSeries_predict_exog_is_dict
Benchmarking function: ForecasterRecursiveMultiSeries_predict_exog_is_df_long
Benchmarking function: ForecasterRecursiveMultiSeries_predict_interval_exog_is_dict_conformal
Benchmarking function: ForecasterRecursiveMultiSeries__create_predict_inputs_exog_is_dict
Benchmarking function: ForecasterRecursiveMultiSeries__create_predict_inputs_exog_is_df_long
Benchmarking function: ForecasterRecursiveMultiSeries__check_predict_inputs
Benchmarking function: ForecasterRecursiveMultiSeries__create_predict_inputs_exog_is_df_wide
Benchmarking function: ForecasterRecursiveMultiSeries_backtesting_series_is_dict_no_exog
Benchmarking function: ForecasterRecursiveMultiSeries_backtesting_series_is_dict_exog_is_dict
Benchmarking function: ForecasterRecursiveMultiSeries_backtesting_series_is_dict_no_exog_conformal
Benchmarking function: Forec

In [9]:
# Plot results
# ==============================================================================
display_df = False
selected_date = None
# 'Linux-6.11.0-24-generic-x86_64-with-glibc2.39'
# 'Windows-10-10.0.19045-SP0'
selected_platform = None

results_benchmark_all = joblib.load("./benchmark.joblib")
results_benchmark = results_benchmark_all.query("forecaster_name in ['ForecasterRecursiveMultiSeries', 'ForecasterAutoregMultiSeries']")
results_benchmark = results_benchmark.query("regressor_name == 'LGBMRegressor'")
for function_name in results_benchmark['function_name'].unique():
    df = results_benchmark.query(f"function_name == '{function_name}'")
    if selected_date:
        df = df[df['datetime'].dt.date == pd.to_datetime(selected_date).date()]
    if selected_platform:
        df = df[df['platform'] == selected_platform]
    if display_df:
        display(df.tail(3))
    plot_benchmark_results(df.copy(), function_name, add_median=True, add_mean=True)

# ForecasterRecursive

In [6]:
# Mock data for benchmarking
# ==========================================================
len_series = 2000
rng = np.random.default_rng(321)
y = pd.Series(
        data = rng.normal(loc=20, scale=5, size=len_series),
        index=pd.date_range(
            start='2010-01-01',
            periods=len_series,
            freq='h'
        ),
        name='y'
    )
rng = np.random.default_rng(321)
exog = pd.DataFrame(index=y.index)
exog['day_of_week'] = exog.index.dayofweek
exog['week_of_year'] = exog.index.isocalendar().week.astype(int)
exog['month'] = exog.index.month
exog_prediction = pd.DataFrame(
                    index=pd.date_range(
                        start=exog.index.max() + pd.Timedelta(hours=1),
                        periods=100,
                        freq='h'
                    )
                 ) 
exog_prediction['day_of_week'] = exog_prediction.index.dayofweek
exog_prediction['week_of_year'] = exog_prediction.index.isocalendar().week.astype(int)
exog_prediction['month'] = exog_prediction.index.month

In [11]:
run_benchmark_ForecasterRecursive(y=y, exog=exog, exog_prediction=exog_prediction)

Benchmarking function: ForecasterRecursive__create_train_X_y
Benchmarking function: ForecasterRecursive_fit
Benchmarking function: ForecasterRecursive_predict
Benchmarking function: ForecasterRecursive_predict_interval_conformal
Benchmarking function: ForecasterRecursive__create_predict_inputs
Benchmarking function: ForecasterRecursive_backtesting
Benchmarking function: ForecasterRecursive_backtesting_conformal


In [9]:
# Plot results
# ==============================================================================
display_df = False
selected_date = None
# 'Linux-6.11.0-24-generic-x86_64-with-glibc2.39'
# 'Windows-10-10.0.19045-SP0'
selected_platform = None

results_benchmark_all = joblib.load("./benchmark.joblib")
results_benchmark = results_benchmark_all.query("forecaster_name in ['ForecasterRecursive', 'ForecasterAutoreg']")
results_benchmark = results_benchmark.query("regressor_name == 'LGBMRegressor'")
for function_name in results_benchmark['function_name'].unique():
    df = results_benchmark.query(f"function_name == '{function_name}'")
    if selected_date:
        df = df[df['datetime'].dt.date == pd.to_datetime(selected_date).date()]
    if selected_platform:
        df = df[df['platform'] == selected_platform]
    if display_df:
        display(df.tail(3))
    plot_benchmark_results(df.copy(), function_name, add_median=True, add_mean=True)

# Summary of historical results

In [13]:
selected_date = None
# 'Linux-6.11.0-24-generic-x86_64-with-glibc2.39'
# 'Windows-10-10.0.19045-SP0'
selected_platform = None

results_benchmark_all = joblib.load("./benchmark.joblib")
results_benchmark_all['function_name'] = results_benchmark_all['function_name'].str.split('_', n=1).str[1]
if selected_date:
    results_benchmark_all = results_benchmark_all.query("datetime.dt.date == @selected_date")
if selected_platform:
    results_benchmark_all = results_benchmark_all.query("platform == @selected_platform")
results_benchmark_all = results_benchmark_all.groupby(['forecaster_name', 'skforecast_version', 'function_name'])['run_time_avg'].agg('median').reset_index()
results_benchmark_all = results_benchmark_all.sort_values(by=['function_name'])

fig = px.bar(
    results_benchmark_all.query("forecaster_name in ['ForecasterRecursiveMultiSeries', 'ForecasterAutoregMultiSeries']"),
    x='function_name',
    y='run_time_avg',
    color='skforecast_version',
    barmode='group',
    title='MultiSeries Forecasters - Median Run Time by Function and skforecast Version',
    labels={'run_time_avg': 'Median Run Time (s)', 'function_name': 'Function'},
    category_orders={'skforecast_version': sorted(results_benchmark_all['skforecast_version'].unique())}
)
fig.update_layout(xaxis_tickangle=-45, height=600)
fig.show()

fig = px.bar(
    results_benchmark_all.query("forecaster_name in ['ForecasterRecursive', 'ForecasterAutoreg']"),
    x='function_name',
    y='run_time_avg',
    color='skforecast_version',
    barmode='group',
    title='Single-Series Forecasters - Median Run Time by Function and skforecast Version',
    labels={'run_time_avg': 'Median Run Time (s)', 'function_name': 'Function'},
    category_orders={'skforecast_version': sorted(results_benchmark_all['skforecast_version'].unique())}
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()
