# Benchmarking - comparing estimator performance

Using sktime objects and interfaces.

In [1]:
from sktime.benchmarking.forecasting import ForecastingBenchmark
from sktime.datasets import load_airline
from sktime.forecasting.model_selection import ExpandingWindowSplitter
from sktime.forecasting.naive import NaiveForecaster
from sktime.performance_metrics.forecasting import MeanSquaredPercentageError

In [2]:
benchmark = ForecastingBenchmark()

### Add estimators to compare

In [3]:
benchmark.add_estimator(
    estimator_entrypoint=NaiveForecaster,
    estimator_kwargs={"strategy": "mean", "sp": 12},
    estimator_id="NaiveForecaster-mean-v1",
)
benchmark.add_estimator(
    estimator_entrypoint=NaiveForecaster,
    estimator_kwargs={"strategy": "last", "sp": 12},
    estimator_id="NaiveForecaster-last-v1",
)

### Add prediction/validation tasks on which to compare estimator performance

In [4]:
# Specify cross-validation split methods
cv_splitter = ExpandingWindowSplitter(
    initial_window=24,
    step_length=12,
    fh=12,
)

# Specify comparison metrics
scorers = [MeanSquaredPercentageError()]

# Specify dataset loaders
dataset_loaders = [load_airline]

# Add tasks, optionally use loops etc. to easily set up multiple tasks
for dataset_loader in dataset_loaders:
    benchmark.add_task(
        dataset_loader,
        cv_splitter,
        scorers,
    )

### Run task-estimator combinations and store results

Note that `run` won't recompute results, so adding a new
estimator and running again will only run tasks for that
new estimator.

In [5]:
results_df = benchmark.run("./forecasting_results.csv")
results_df

Unnamed: 0,validation_id,model_id,runtime_secs,MeanSquaredPercentageError_fold_0_test,MeanSquaredPercentageError_fold_1_test,MeanSquaredPercentageError_fold_2_test,MeanSquaredPercentageError_fold_3_test,MeanSquaredPercentageError_fold_4_test,MeanSquaredPercentageError_fold_5_test,MeanSquaredPercentageError_fold_6_test,MeanSquaredPercentageError_fold_7_test,MeanSquaredPercentageError_fold_8_test,MeanSquaredPercentageError_fold_9_test,MeanSquaredPercentageError_mean,MeanSquaredPercentageError_std
0,[dataset=load_airline]_[cv_splitter=ExpandingW...,NaiveForecaster-last-v1,0.121201,0.024532,0.020831,0.001213,0.01495,0.031067,0.008373,0.007972,9e-06,0.028191,0.003906,0.014104,0.011451
1,[dataset=load_airline]_[cv_splitter=ExpandingW...,NaiveForecaster-mean-v1,0.126832,0.049681,0.0737,0.05352,0.081063,0.138163,0.145125,0.154337,0.123298,0.185644,0.184654,0.118918,0.051265
