## Benchmark Nixtla implementation single-process

In [1]:
!pip install hierarchicalforecast
!pip install -U numba statsforecast datasetsforecast

In [11]:
import numpy as np
import pandas as pd

#obtain hierarchical data
from datasetsforecast.hierarchical import HierarchicalData

# compute base forecast no coherent
from statsforecast.core import StatsForecast
from statsforecast.models import AutoARIMA, Naive

#obtain hierarchical reconciliation methods and evaluation
from hierarchicalforecast.core import HierarchicalReconciliation
from hierarchicalforecast.evaluation import HierarchicalEvaluation
from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut

from timeit import default_timer as timer

In this example we will use the `TourismSmall` dataset. The following cell gets the time series for the different levels in the hierarchy, the summing matrix `S` which recovers the full dataset from the bottom level hierarchy and the indices of each hierarchy denoted by `tags`.

In [3]:
Y_df, S_df, tags = HierarchicalData.load('./data', 'TourismSmall')
Y_df['ds'] = pd.to_datetime(Y_df['ds'])

100%|████████████████████████████████████████████████████████████████████| 1.30M/1.30M [00:00<00:00, 2.86MiB/s]
INFO:datasetsforecast.utils:Successfully downloaded datasets.zip, 1297279, bytes.
INFO:datasetsforecast.utils:Decompressing zip file...
INFO:datasetsforecast.utils:Successfully decompressed data/hierarchical/datasets.zip


In [4]:
Y_df.head()

Unnamed: 0,unique_id,ds,y
0,total,1998-03-31,84503
1,total,1998-06-30,65312
2,total,1998-09-30,72753
3,total,1998-12-31,70880
4,total,1999-03-31,86893


In [5]:
S_df.iloc[:5, :5]

Unnamed: 0,nsw-hol-city,nsw-hol-noncity,vic-hol-city,vic-hol-noncity,qld-hol-city
total,1.0,1.0,1.0,1.0,1.0
hol,1.0,1.0,1.0,1.0,1.0
vfr,0.0,0.0,0.0,0.0,0.0
bus,0.0,0.0,0.0,0.0,0.0
oth,0.0,0.0,0.0,0.0,0.0


In [6]:
tags

{'Country': array(['total'], dtype=object),
 'Country/Purpose': array(['hol', 'vfr', 'bus', 'oth'], dtype=object),
 'Country/Purpose/State': array(['nsw-hol', 'vic-hol', 'qld-hol', 'sa-hol', 'wa-hol', 'tas-hol',
        'nt-hol', 'nsw-vfr', 'vic-vfr', 'qld-vfr', 'sa-vfr', 'wa-vfr',
        'tas-vfr', 'nt-vfr', 'nsw-bus', 'vic-bus', 'qld-bus', 'sa-bus',
        'wa-bus', 'tas-bus', 'nt-bus', 'nsw-oth', 'vic-oth', 'qld-oth',
        'sa-oth', 'wa-oth', 'tas-oth', 'nt-oth'], dtype=object),
 'Country/Purpose/State/CityNonCity': array(['nsw-hol-city', 'nsw-hol-noncity', 'vic-hol-city',
        'vic-hol-noncity', 'qld-hol-city', 'qld-hol-noncity',
        'sa-hol-city', 'sa-hol-noncity', 'wa-hol-city', 'wa-hol-noncity',
        'tas-hol-city', 'tas-hol-noncity', 'nt-hol-city', 'nt-hol-noncity',
        'nsw-vfr-city', 'nsw-vfr-noncity', 'vic-vfr-city',
        'vic-vfr-noncity', 'qld-vfr-city', 'qld-vfr-noncity',
        'sa-vfr-city', 'sa-vfr-noncity', 'wa-vfr-city', 'wa-vfr-noncity',
     

We split the dataframe in train/test splits.

In [7]:
Y_test_df = Y_df.groupby('unique_id').tail(12)
Y_train_df = Y_df.drop(Y_test_df.index)

In [8]:
Y_test_df = Y_test_df.set_index('unique_id')
Y_train_df = Y_train_df.set_index('unique_id')

The following cell computes the *base forecast* for each time series using the `auto_arima` and `naive` models. Observe that `Y_hat_df` contains the forecasts but they are not coherent.

In [9]:
%%capture
fcst = StatsForecast(
    df=Y_train_df, 
    models=[AutoARIMA(season_length=12), Naive()], 
    freq='M', 
    n_jobs=-1
)
Y_hat_df = fcst.forecast(h=12)

In [13]:
Y_hat_df

Unnamed: 0_level_0,ds,AutoARIMA,Naive
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bus,2004-01-31,11017.998047,11020.0
bus,2004-02-29,11017.998047,11020.0
bus,2004-03-31,11017.997070,11020.0
bus,2004-04-30,11017.997070,11020.0
bus,2004-05-31,11017.997070,11020.0
...,...,...,...
wa-vfr-noncity,2004-08-31,1012.761719,1177.0
wa-vfr-noncity,2004-09-30,1012.761719,1177.0
wa-vfr-noncity,2004-10-31,1012.761719,1177.0
wa-vfr-noncity,2004-11-30,1012.761719,1177.0


In [15]:
Y_train_df

Unnamed: 0_level_0,ds,y
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1
total,1998-03-31,84503
total,1998-06-30,65312
total,1998-09-30,72753
total,1998-12-31,70880
total,1999-03-31,86893
...,...,...
nt-oth-noncity,2002-12-31,248
nt-oth-noncity,2003-03-31,13
nt-oth-noncity,2003-06-30,104
nt-oth-noncity,2003-09-30,345


The following cell makes the previous forecasts coherent using the `HierarchicalReconciliation` class. The used methods to make the forecasts coherent are:
- `BottomUp`: The reconciliation of the method is a simple addition to the upper levels.
- `TopDown`: The second method constrains the base-level predictions to the top-most aggregate-level serie and then distributes it to the disaggregate series through the use of proportions. 
- `MiddleOut`: Anchors the base predictions in a middle level.

In [12]:
reconcilers = [
    BottomUp(),
    #TopDown(method='forecast_proportions'),
    #MiddleOut(middle_level='Country/Purpose/State', 
    #          top_down_method='forecast_proportions')
]
hrec = HierarchicalReconciliation(reconcilers=reconcilers)

start = timer()
Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_train_df, 
                          S=S_df, tags=tags)
end = timer()
print(end - start)

0.02190937499995016


In [16]:
Y_df

Unnamed: 0,unique_id,ds,y
0,total,1998-03-31,84503
1,total,1998-06-30,65312
2,total,1998-09-30,72753
3,total,1998-12-31,70880
4,total,1999-03-31,86893
...,...,...,...
3199,nt-oth-noncity,2005-12-31,59
3200,nt-oth-noncity,2006-03-31,25
3201,nt-oth-noncity,2006-06-30,52
3202,nt-oth-noncity,2006-09-30,72


In [17]:
Y_hat_df

Unnamed: 0_level_0,ds,AutoARIMA,Naive
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bus,2004-01-31,11017.998047,11020.0
bus,2004-02-29,11017.998047,11020.0
bus,2004-03-31,11017.997070,11020.0
bus,2004-04-30,11017.997070,11020.0
bus,2004-05-31,11017.997070,11020.0
...,...,...,...
wa-vfr-noncity,2004-08-31,1012.761719,1177.0
wa-vfr-noncity,2004-09-30,1012.761719,1177.0
wa-vfr-noncity,2004-10-31,1012.761719,1177.0
wa-vfr-noncity,2004-11-30,1012.761719,1177.0


In [18]:
Y_rec_df

Unnamed: 0_level_0,ds,AutoARIMA,Naive,AutoARIMA/BottomUp,Naive/BottomUp
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
bus,2004-01-31,11017.998047,11020.0,11038.705078,11020.0
bus,2004-02-29,11017.998047,11020.0,11028.713867,11020.0
bus,2004-03-31,11017.997070,11020.0,11026.210938,11020.0
bus,2004-04-30,11017.997070,11020.0,11018.519531,11020.0
bus,2004-05-31,11017.997070,11020.0,11012.330078,11020.0
...,...,...,...,...,...
wa-vfr-noncity,2004-08-31,1012.761719,1177.0,1012.761719,1177.0
wa-vfr-noncity,2004-09-30,1012.761719,1177.0,1012.761719,1177.0
wa-vfr-noncity,2004-10-31,1012.761719,1177.0,1012.761719,1177.0
wa-vfr-noncity,2004-11-30,1012.761719,1177.0,1012.761719,1177.0
