# Compare autoarima from Skforecast, pmdarima and statsforecast

In [7]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)
import skforecast

print(skforecast.__version__)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
/home/joaquin/Documents/GitHub/skforecast
0.20.0


In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pmdarima import auto_arima
from skforecast.stats import Arima
from statsforecast.models import AutoARIMA
from statsforecast.arima import arima_string
from statsforecast import StatsForecast
from skforecast.datasets import fetch_dataset
from skforecast.plot import set_dark_theme
import warnings
import timeit

In [9]:
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv"
passengers_data = pd.read_csv(url)
passengers_data = passengers_data['Passengers']

from statsmodels.datasets import sunspots
sunspots_data = sunspots.load_pandas().data['SUNACTIVITY']

from statsmodels.datasets import co2
co2_data = co2.load_pandas().data['co2'].resample('M').mean().dropna()

from statsmodels.datasets import macrodata
macrodata_data = macrodata.load_pandas().data['realgdp'].pct_change().dropna()

from statsmodels.datasets import nile
nile_data = nile.load_pandas().data['volume']


  co2_data = co2.load_pandas().data['co2'].resample('M').mean().dropna()


In [None]:
data = fetch_dataset(name='fuel_consumption', raw=True, verbose=False)
data = data[['Fecha', 'Gasolinas']]
data = data.rename(columns={'Fecha':'date', 'Gasolinas':'litters'})
data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d')
data = data.set_index('date')
data = data.loc[:'1990-01-01 00:00:00']
data = data.asfreq('MS')
data = data['litters']

In [11]:
datasets = [data, passengers_data, sunspots_data, co2_data, macrodata_data, nile_data]
# Seasonal period (m) for each dataset
seasonal_periods = [12, 12, 1, 12, 4, 1]

## pmdarima

In [None]:
warnings.filterwarnings("ignore", message=".*force_all_finite.*", category=FutureWarning)

i = 0
results = []
for data, m in zip(datasets, seasonal_periods):
    print(f"Dataset {i+1} of {len(datasets)}")
    i += 1
    # pmdarima
    try:
        print("    pmdarima...")
        start = timeit.default_timer()
        res = auto_arima(
                    y                 = data,
                    start_p           = 0,
                    start_q           = 0,
                    max_p             = 3,
                    max_q             = 3,
                    max_P             = 2,
                    max_D             = 1,
                    max_Q             = 2,
                    seasonal          = True,
                    test              = 'kpss',
                    m                 = m,   # periodicidad de la estacionalidad
                    d                 = None, # El algoritmo determina 'd'
                    D                 = None, # El algoritmo determina 'D'
                    trace             = False,
                    error_action      = 'ignore',
                    suppress_warnings = True,
                    stepwise          = True
                )
        end = timeit.default_timer()
        time_pmdarima = end - start
        best_model = f"{res.order}, {res.seasonal_order[:3]}[{res.seasonal_order[3]}]"
    except Exception as e:
        time_pmdarima = np.nan
        best_model = "(nan, nan, nan),(nan, nan, nan)[nan]"
    results.append(['pmdarima', best_model, time_pmdarima])

    # skforecast
    print("    skforecast...")
    try:
        start = timeit.default_timer()
        res = Arima(
                    order             = None, # El algoritmo determina 'order'
                    seasonal_order    = None, # El algoritmo determina 'seasonal_order'
                    start_p           = 0,
                    start_q           = 0,
                    max_p             = 3,
                    max_q             = 3,
                    max_P             = 2,
                    max_Q             = 2,
                    max_order         = 5,
                    max_d             = 2,
                    max_D             = 1,
                    seasonal          = True,
                    test              = 'kpss',
                    m                 = m,   # periodicidad de la estacionalidad
                    trace             = False,
                    stepwise          = True
                )
        res.fit(y=data, suppress_warnings= True)
        end = timeit.default_timer()
        time_skforecast = end - start
        best_model = f"{res.best_params_['order']}, {res.best_params_['seasonal_order']}[{res.best_params_['m']}]"
    except Exception as e:
        time_skforecast = np.nan
        best_model = "(nan, nan, nan),(nan, nan, nan)[nan]"
    results.append(['skforecast', best_model, time_skforecast])

    # statsforecast
    print("    statsforecast...")
    try:
        start = timeit.default_timer()
        res = AutoARIMA(
            start_p=0,
            start_q=0,
            max_p=3,
            max_q=3,
            max_P=2,
            max_D=1,
            max_Q=2,
            test='kpss',
            d=None,               # choose differencing automatically
            D=None,               # choose seasonal differencing automatically
            seasonal=True,        # include seasonal part
            season_length=m,     # periodicidad de la estacionalidad
            stepwise=True,        # stepwise search
            trace=False,           # show progress
        )
        res.fit(y=data)
        end = timeit.default_timer()
        time_statsforecast = end - start
        best_model = arima_string(res.model_)[5:]
    except Exception as e:
        time_statsforecast = np.nan
        best_model = "(nan, nan, nan),(nan, nan, nan)[nan]"
    results.append(['statsforecast', best_model, time_statsforecast])

Dataset 1 of 6
    pmdarima...
    skforecast...
    statsforecast...


  return np.all(x[0] == x)


Dataset 2 of 6
    pmdarima...
    skforecast...
    statsforecast...
Dataset 3 of 6
    pmdarima...
    skforecast...
    statsforecast...
Dataset 4 of 6
    pmdarima...
    skforecast...
    statsforecast...


  return np.all(x[0] == x)


Dataset 5 of 6
    pmdarima...
    skforecast...
    statsforecast...


  $$\hat{y}_t = \\frac{\hat{z}_t}{\hat{p}_t}$$
  $$\hat{y}_t = \\frac{\hat{z}_t}{\hat{p}_t}$$
  $$\hat{y}_t = \\frac{\hat{z}_t}{\hat{p}_t}$$
  \end{cases}


KeyError: 0

In [22]:
# Table of results
# ==============================================================================
results_df = pd.DataFrame(
    results,
    columns=["library", "Best Model", "Time"],
    #index=[f"Dataset {i+1}" for i in range(6) for _ in range(3)]
)
results_df['Best Model'] = results_df['Best Model'].str.replace(', ', ',')
results_df

Unnamed: 0,library,Best Model,Time
0,pmdarima,"(1,1,1),(0,1,1)[12]",18.81742
1,skforecast,"(1,1,1),(0,1,1)[12]",5.585826
2,statsforecast,"(1,1,1)(0,1,1)[12]",1.872145
3,pmdarima,"(0,1,1),(2,1,0)[12]",10.097812
4,skforecast,"(2,1,1),(0,1,0)[12]",2.49657
5,statsforecast,"(1,1,0)(0,1,0)[12]",0.359847
6,pmdarima,"(2,1,3),(0,0,0)[0]",6.632005
7,skforecast,"(3,1,0),(0,0,0)[1]",0.278504
8,statsforecast,"(2,1,3)",2.575222
9,pmdarima,"(3,1,1),(2,0,1)[12]",145.435878
