# Benchmarking skforecast Direct Forecasters

This notebook benchmarks the performance (velocity) of the `skforecast` in its different versions and keeps track of the results.

**Notes**

+ In version `0.15.0` the binning of residuals was introduced in multi-series forecasters. This explains the increase in the time taken to fit the model.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

/home/joaquin/Documents/GitHub/skforecast


In [None]:
# !pip install --upgrade numpy < 2.0 && \
# pip install --upgrade pandas && \
# pip install --upgrade scikit-learn

# !pip install skforecast==0.15.1
# !pip install skforecast==0.14.0
# !pip install skforecast==0.13.0

In [4]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import sklearn
import joblib
from benchmarking import (
    plot_benchmark_results,
    run_benchmark_ForecasterDirectMultiVariate,
    run_benchmark_ForecasterDirect
)
import skforecast
import platform
import psutil
import plotly.express as px

In [5]:
print(f"Python version: {platform.python_version()}")
print(f"skforecast version: {skforecast.__version__}")
print(f"scikit-learn version: {sklearn.__version__}")
print(f"pandas version: {pd.__version__}")
print(f"numpy version: {np.__version__}")
print(f"Computer network name: {platform.node()}")
print(f"Processor type: {platform.processor()}")
print(f"Platform type: {platform.platform()}")
print(f"Operating system: {platform.system()}")
print(f"Operating system release: {platform.release()}")
print(f"Operating system version: {platform.version()}")
print(f"Number of physical cores: {psutil.cpu_count(logical=False)}")
print(f"Number of logical cores: {psutil.cpu_count(logical=True)}")

Python version: 3.12.9
skforecast version: 0.16.0
scikit-learn version: 1.6.1
pandas version: 2.2.3
numpy version: 2.2.5
Computer network name: joaquin-HP-ProBook-440-G6
Processor type: x86_64
Platform type: Linux-6.11.0-24-generic-x86_64-with-glibc2.39
Operating system: Linux
Operating system release: 6.11.0-24-generic
Operating system version: #24~24.04.1-Ubuntu SMP PREEMPT_DYNAMIC Tue Mar 25 20:14:34 UTC 2
Number of physical cores: 4
Number of logical cores: 8


In [6]:
import warnings
warnings.filterwarnings(
    "ignore",
    category=FutureWarning,
    message="'force_all_finite' was renamed to 'ensure_all_finite'"
)

# ForecasterDirectMultivariate

In [7]:
# Mock data for benchmarking
# ==========================================================
n_series = 13
len_series = 1000
rng = np.random.default_rng(321)
series = pd.DataFrame(
    data = rng.normal(
        loc = 10,
        scale = 3,
        size = (len_series, n_series)
    ),
    columns = [f'series_{i}' for i in range(n_series)],
    index = pd.date_range(
        start = '2020-01-01',
        periods = len_series,
        freq = 'h'
    )
)

exog = pd.DataFrame(index=series.index)
exog['day_of_week'] = exog.index.dayofweek
exog['week_of_year'] = exog.index.isocalendar().week.astype(int)
exog['month'] = exog.index.month

exog_prediction = pd.DataFrame(
        index=pd.date_range(
                start=series.index.max() + pd.Timedelta(hours=1),
                periods=10,
                freq='h'
            ),
        )
exog_prediction['day_of_week'] = exog_prediction.index.dayofweek
exog_prediction['week_of_year'] = exog_prediction.index.isocalendar().week.astype(int)
exog_prediction['month'] = exog_prediction.index.month

In [8]:
run_benchmark_ForecasterDirectMultiVariate(series=series, exog=exog, exog_prediction=exog_prediction)

Benchmarking function: ForecasterDirectMultiVariate__create_train_X_y
Benchmarking function: ForecasterDirectMultiVariate__create_train_X_y_no_exog
Benchmarking function: ForecasterDirectMultiVariate_fit
Benchmarking function: ForecasterDirectMultiVariate_fit_series_no_exog
Benchmarking function: ForecasterDirectMultiVariate_predict
Benchmarking function: ForecasterDirectMultiVariate_predict_interval_conformal
Benchmarking function: ForecasterDirectMultiVariate__create_predict_inputs
Benchmarking function: ForecasterDirectMultiVariate__check_predict_inputs




Benchmarking function: ForecasterDirectMultiVariate_backtesting
Benchmarking function: ForecasterDirectMultiVariate_backtesting_no_exog
Benchmarking function: ForecasterDirectMultiVariate_backtesting_conformal


In [39]:
# Plot results
# ==============================================================================
display_df = True
selected_date = None
# 'Linux-6.11.0-24-generic-x86_64-with-glibc2.39'
# 'Windows-10-10.0.19045-SP0'
selected_platform = None

results_benchmark_all = joblib.load("./benchmark.joblib")
results_benchmark = results_benchmark_all.query("forecaster_name in ['ForecasterDirectMultiVariate', 'ForecasterDirectMultiVariate']")
results_benchmark = results_benchmark.query("regressor_name == 'LGBMRegressor'")
for function_name in results_benchmark['function_name'].unique():
    df = results_benchmark.query(f"function_name == '{function_name}'")
    if selected_date:
        df = df[df['datetime'].dt.date == pd.to_datetime(selected_date).date()]
    if selected_platform:
        df = df[df['platform'] == selected_platform]
    if display_df:
        display(df.tail(3))
    plot_benchmark_results(df.copy(), function_name, add_median=True, add_mean=True)

Unnamed: 0,forecaster_name,regressor_name,function_name,function_hash,run_time_avg,run_time_std_dev,datetime,python_version,skforecast_version,numpy_version,pandas_version,sklearn_version,lightgbm_version,platform,processor,cpu_count,memory_gb
372,ForecasterDirectMultiVariate,LGBMRegressor,ForecasterDirectMultiVariate__create_train_X_y,7c6a60e7bf98bddb8e7beb94e7776c31,,,2025-04-27 23:25:41.215516,3.12.9,0.14.0,2.2.5,2.2.3,1.6.1,4.6.0,Linux-6.11.0-24-generic-x86_64-with-glibc2.39,x86_64,8,16.64
376,ForecasterDirectMultiVariate,LGBMRegressor,ForecasterDirectMultiVariate__create_train_X_y,7c6a60e7bf98bddb8e7beb94e7776c31,0.071376,0.013139,2025-04-27 23:26:22.174143,3.12.9,0.14.0,2.2.5,2.2.3,1.6.1,4.6.0,Linux-6.11.0-24-generic-x86_64-with-glibc2.39,x86_64,8,16.64


Unnamed: 0,forecaster_name,regressor_name,function_name,function_hash,run_time_avg,run_time_std_dev,datetime,python_version,skforecast_version,numpy_version,pandas_version,sklearn_version,lightgbm_version,platform,processor,cpu_count,memory_gb
373,ForecasterDirectMultiVariate,LGBMRegressor,ForecasterDirectMultiVariate__create_train_X_y...,be7e3e65c7b0f0f469fa2453e0641212,,,2025-04-27 23:25:41.225178,3.12.9,0.14.0,2.2.5,2.2.3,1.6.1,4.6.0,Linux-6.11.0-24-generic-x86_64-with-glibc2.39,x86_64,8,16.64
377,ForecasterDirectMultiVariate,LGBMRegressor,ForecasterDirectMultiVariate__create_train_X_y...,be7e3e65c7b0f0f469fa2453e0641212,0.065658,0.005512,2025-04-27 23:26:22.836576,3.12.9,0.14.0,2.2.5,2.2.3,1.6.1,4.6.0,Linux-6.11.0-24-generic-x86_64-with-glibc2.39,x86_64,8,16.64


Unnamed: 0,forecaster_name,regressor_name,function_name,function_hash,run_time_avg,run_time_std_dev,datetime,python_version,skforecast_version,numpy_version,pandas_version,sklearn_version,lightgbm_version,platform,processor,cpu_count,memory_gb
374,ForecasterDirectMultiVariate,LGBMRegressor,ForecasterDirectMultiVariate_fit,16dfbbc3a78ab63d1c3e363011a74168,,,2025-04-27 23:25:41.233077,3.12.9,0.14.0,2.2.5,2.2.3,1.6.1,4.6.0,Linux-6.11.0-24-generic-x86_64-with-glibc2.39,x86_64,8,16.64
378,ForecasterDirectMultiVariate,LGBMRegressor,ForecasterDirectMultiVariate_fit,16dfbbc3a78ab63d1c3e363011a74168,31.97374,2.835506,2025-04-27 23:29:02.711156,3.12.9,0.14.0,2.2.5,2.2.3,1.6.1,4.6.0,Linux-6.11.0-24-generic-x86_64-with-glibc2.39,x86_64,8,16.64


Unnamed: 0,forecaster_name,regressor_name,function_name,function_hash,run_time_avg,run_time_std_dev,datetime,python_version,skforecast_version,numpy_version,pandas_version,sklearn_version,lightgbm_version,platform,processor,cpu_count,memory_gb
375,ForecasterDirectMultiVariate,LGBMRegressor,ForecasterDirectMultiVariate_fit_series_no_exog,6444339f3f447d1afa03b5044099fa9a,,,2025-04-27 23:25:41.241039,3.12.9,0.14.0,2.2.5,2.2.3,1.6.1,4.6.0,Linux-6.11.0-24-generic-x86_64-with-glibc2.39,x86_64,8,16.64
379,ForecasterDirectMultiVariate,LGBMRegressor,ForecasterDirectMultiVariate_fit_series_no_exog,6444339f3f447d1afa03b5044099fa9a,33.387298,2.240629,2025-04-27 23:31:49.656099,3.12.9,0.14.0,2.2.5,2.2.3,1.6.1,4.6.0,Linux-6.11.0-24-generic-x86_64-with-glibc2.39,x86_64,8,16.64


Unnamed: 0,forecaster_name,regressor_name,function_name,function_hash,run_time_avg,run_time_std_dev,datetime,python_version,skforecast_version,numpy_version,pandas_version,sklearn_version,lightgbm_version,platform,processor,cpu_count,memory_gb
380,ForecasterDirectMultiVariate,LGBMRegressor,ForecasterDirectMultiVariate_predict,0ffb9da1d6c8abea84c52163dd47e452,,,2025-04-27 23:32:24.713236,3.12.9,0.14.0,2.2.5,2.2.3,1.6.1,4.6.0,Linux-6.11.0-24-generic-x86_64-with-glibc2.39,x86_64,8,16.64


Unnamed: 0,forecaster_name,regressor_name,function_name,function_hash,run_time_avg,run_time_std_dev,datetime,python_version,skforecast_version,numpy_version,pandas_version,sklearn_version,lightgbm_version,platform,processor,cpu_count,memory_gb
381,ForecasterDirectMultiVariate,LGBMRegressor,ForecasterDirectMultiVariate_predict_interval_...,e779cc954984524d1af5f1c4d4685454,,,2025-04-27 23:32:24.719948,3.12.9,0.14.0,2.2.5,2.2.3,1.6.1,4.6.0,Linux-6.11.0-24-generic-x86_64-with-glibc2.39,x86_64,8,16.64


Unnamed: 0,forecaster_name,regressor_name,function_name,function_hash,run_time_avg,run_time_std_dev,datetime,python_version,skforecast_version,numpy_version,pandas_version,sklearn_version,lightgbm_version,platform,processor,cpu_count,memory_gb
382,ForecasterDirectMultiVariate,LGBMRegressor,ForecasterDirectMultiVariate__create_predict_i...,9bb10749b62da6821c5f38ada3494060,,,2025-04-27 23:32:24.727043,3.12.9,0.14.0,2.2.5,2.2.3,1.6.1,4.6.0,Linux-6.11.0-24-generic-x86_64-with-glibc2.39,x86_64,8,16.64


# ForecasterAutoregDirect

In [8]:
# Mock data for benchmarking
# ==========================================================
len_series = 2000
rng = np.random.default_rng(321)
y = pd.Series(
        data = rng.normal(loc=20, scale=5, size=len_series),
        index=pd.date_range(
            start='2010-01-01',
            periods=len_series,
            freq='h'
        ),
        name='y'
    )
rng = np.random.default_rng(321)
exog = pd.DataFrame(index=y.index)
exog['day_of_week'] = exog.index.dayofweek
exog['week_of_year'] = exog.index.isocalendar().week.astype(int)
exog['month'] = exog.index.month
exog_prediction = pd.DataFrame(
                    index=pd.date_range(
                        start=exog.index.max() + pd.Timedelta(hours=1),
                        periods=100,
                        freq='h'
                    )
                 ) 
exog_prediction['day_of_week'] = exog_prediction.index.dayofweek
exog_prediction['week_of_year'] = exog_prediction.index.isocalendar().week.astype(int)
exog_prediction['month'] = exog_prediction.index.month

In [9]:
run_benchmark_ForecasterDirect(y=y, exog=exog, exog_prediction=exog_prediction)

Benchmarking function: ForecasterDirect__create_train_X_y
Benchmarking function: ForecasterDirect_fit




Benchmarking function: ForecasterDirect_predict
Benchmarking function: ForecasterDirect_predict_interval_conformal
Benchmarking function: ForecasterDirect__create_predict_inputs
Benchmarking function: ForecasterDirect_backtesting
Benchmarking function: ForecasterDirect_backtesting_conformal


In [12]:
# Plot results
# ==============================================================================
display_df = False
selected_date = None
# 'Linux-6.11.0-24-generic-x86_64-with-glibc2.39'
# 'Windows-10-10.0.19045-SP0'
selected_platform = None

results_benchmark_all = joblib.load("./benchmark.joblib")
results_benchmark = results_benchmark_all.query("forecaster_name in ['ForecasterDirect', 'ForecasterAutoregDirect']")
results_benchmark = results_benchmark.query("regressor_name == 'LGBMRegressor'")
for function_name in results_benchmark['function_name'].unique():
    df = results_benchmark.query(f"function_name == '{function_name}'")
    if selected_date:
        df = df[df['datetime'].dt.date == pd.to_datetime(selected_date).date()]
    if selected_platform:
        df = df[df['platform'] == selected_platform]
    if display_df:
        display(df)
    plot_benchmark_results(df.copy(), function_name, add_median=True, add_mean=True)

# Summary of historical results

In [13]:
selected_date = None
# 'Linux-6.11.0-24-generic-x86_64-with-glibc2.39'
# 'Windows-10-10.0.19045-SP0'
selected_platform = None

results_benchmark_all = joblib.load("./benchmark.joblib")
results_benchmark_all['function_name'] = results_benchmark_all['function_name'].str.split('_', n=1).str[1]
if selected_date:
    results_benchmark_all = results_benchmark_all.query("datetime.dt.date == @selected_date")
if selected_platform:
    results_benchmark_all = results_benchmark_all.query("platform == @selected_platform")
results_benchmark_all = results_benchmark_all.groupby(['forecaster_name', 'skforecast_version', 'function_name'])['run_time_avg'].agg('median').reset_index()
results_benchmark_all = results_benchmark_all.sort_values(by=['function_name'])

fig = px.bar(
    results_benchmark_all.query("forecaster_name in ['ForecasterDirectMultiVariate', 'ForecasterDirectMultiVariate']"),
    x='function_name',
    y='run_time_avg',
    color='skforecast_version',
    barmode='group',
    title='MultiSeries Forecasters - Median Run Time by Function and skforecast Version',
    labels={'run_time_avg': 'Median Run Time (s)', 'function_name': 'Function'},
    category_orders={'skforecast_version': sorted(results_benchmark_all['skforecast_version'].unique())}
)
fig.update_layout(xaxis_tickangle=-45, height=600)
fig.show()

fig = px.bar(
    results_benchmark_all.query("forecaster_name in ['ForecasterDirect', 'ForecasterAutoregDirect']"),
    x='function_name',
    y='run_time_avg',
    color='skforecast_version',
    barmode='group',
    title='Single-Series Forecasters - Median Run Time by Function and skforecast Version',
    labels={'run_time_avg': 'Median Run Time (s)', 'function_name': 'Function'},
    category_orders={'skforecast_version': sorted(results_benchmark_all['skforecast_version'].unique())}
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()
