## Benchmark Nixtla implementation single-process on M5

In [None]:
!pip install hierarchicalforecast
!pip install -U numba statsforecast datasetsforecast

In [12]:
import numpy as np
import pandas as pd

#obtain hierarchical data
from datasetsforecast.hierarchical import HierarchicalData

# compute base forecast no coherent
from statsforecast.core import StatsForecast
from statsforecast.models import AutoARIMA, Naive

#obtain hierarchical reconciliation methods and evaluation
from hierarchicalforecast.core import HierarchicalReconciliation
from hierarchicalforecast.evaluation import HierarchicalEvaluation
from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut

from timeit import default_timer as timer

ROOT = "/data/cmu/large-scale-hts-reconciliation/"
data_dir = ROOT + "notebooks/"

In [13]:
import pickle

tags = pickle.load(open(data_dir + 'm5_prediction_raw/tags.pkl', 'rb'))
S_df = pickle.load(open(data_dir + 'm5_prediction_raw/nixtla_s.pkl', 'rb'))
Y_df = pd.read_csv(data_dir + 'm5_prediction_raw/nixtla_y_gt.csv')
Y_hat_df = pd.read_csv(data_dir + 'm5_prediction_raw/nixtla_y_hat.csv')

#Y_df, S_df, tags = HierarchicalData.load('./data', 'TourismSmall')    
Y_df['ds'] = pd.to_datetime(Y_df['ds'])

In [14]:
Y_df.set_index('unique_id', inplace=True)
Y_hat_df.set_index('unique_id', inplace=True)

In [15]:
Y_df.head()

Unnamed: 0_level_0,ds,y
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1
HOBBIES_1_001_CA_1,2021-01-16,4
HOBBIES_1_001_CA_1,2021-01-17,0
HOBBIES_1_001_CA_1,2021-01-18,0
HOBBIES_1_001_CA_1,2021-01-19,0
HOBBIES_1_001_CA_1,2021-01-20,0


In [16]:
S_df.iloc[:5, :5]

Unnamed: 0,HOBBIES_1_001_CA_1,HOBBIES_1_002_CA_1,HOBBIES_1_003_CA_1,HOBBIES_1_004_CA_1,HOBBIES_1_005_CA_1
HOBBIES,1,1,1,1,1
HOUSEHOLD,0,0,0,0,0
FOODS,0,0,0,0,0
HOBBIES_1,1,1,1,1,1
HOBBIES_2,0,0,0,0,0


In [17]:
tags

{'Cat': array(['HOBBIES', 'HOUSEHOLD', 'FOODS'], dtype=object),
 'Cat/Dept': array(['HOBBIES_1', 'HOBBIES_2', 'HOUSEHOLD_1', 'HOUSEHOLD_2', 'FOODS_1',
        'FOODS_2', 'FOODS_3'], dtype=object),
 'Cat/Dept/Item': array(['HOBBIES_1_001', 'HOBBIES_1_002', 'HOBBIES_1_003', ...,
        'FOODS_3_825', 'FOODS_3_826', 'FOODS_3_827'], dtype=object),
 'Cat/Dept/Item/State': array(['HOBBIES_1_001_CA_1', 'HOBBIES_1_002_CA_1', 'HOBBIES_1_003_CA_1',
        ..., 'FOODS_3_825_WI_3', 'FOODS_3_826_WI_3', 'FOODS_3_827_WI_3'],
       dtype=object)}

In [18]:
Y_hat_df['ds'] = pd.to_datetime(Y_hat_df['ds'])
Y_hat_df.head()

Unnamed: 0_level_0,ds,prophet
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1
HOBBIES_1_001_CA_1,2021-01-16,0.641144
HOBBIES_1_001_CA_1,2021-01-17,0.855605
HOBBIES_1_001_CA_1,2021-01-18,0.713611
HOBBIES_1_001_CA_1,2021-01-19,1.213304
HOBBIES_1_001_CA_1,2021-01-20,1.142696


The following cell makes the previous forecasts coherent using the `HierarchicalReconciliation` class. The used methods to make the forecasts coherent are:
- `BottomUp`: The reconciliation of the method is a simple addition to the upper levels.
- `TopDown`: The second method constrains the base-level predictions to the top-most aggregate-level serie and then distributes it to the disaggregate series through the use of proportions. 
- `MiddleOut`: Anchors the base predictions in a middle level.

In [19]:
date = '2021-01-16'

Y_hat = Y_hat_df.loc[Y_hat_df['ds'] == date]
Y = Y_df.loc[Y_df['ds'] == date]

reconcilers = [
    BottomUp(),
    #TopDown(method='forecast_proportions'),
    #MiddleOut(middle_level='Country/Purpose/State', 
    #          top_down_method='forecast_proportions')
]
hrec = HierarchicalReconciliation(reconcilers=reconcilers)

start = timer()
Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat, Y_df=Y, 
                          S=S_df, tags=tags)
end = timer()
print(end - start)

146.78287241235375


In [20]:
Y_df

Unnamed: 0_level_0,ds,y
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1
HOBBIES_1_001_CA_1,2021-01-16,4
HOBBIES_1_001_CA_1,2021-01-17,0
HOBBIES_1_001_CA_1,2021-01-18,0
HOBBIES_1_001_CA_1,2021-01-19,0
HOBBIES_1_001_CA_1,2021-01-20,0
...,...,...
FOODS,2021-04-21,24790
FOODS,2021-04-22,24737
FOODS,2021-04-23,28136
FOODS,2021-04-24,33599


In [21]:
Y_rec_df

Unnamed: 0_level_0,ds,prophet,prophet/BottomUp
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
HOBBIES_1_001_CA_1,2021-01-16,0.641144,0.641144
HOBBIES_1_001,2021-01-16,4.989419,4.911092
HOBBIES_1_002,2021-01-16,2.136553,2.076482
HOBBIES_1_002_CA_1,2021-01-16,0.226241,0.226241
HOBBIES_1_003_CA_1,2021-01-16,0.407938,0.407938
...,...,...,...
FOODS_3_823_WI_3,2021-01-16,0.464440,0.464440
FOODS_3_824_WI_3,2021-01-16,0.398286,0.398286
FOODS_3_825_WI_3,2021-01-16,1.145024,1.145024
FOODS_3_826_WI_3,2021-01-16,1.467588,1.467588


In [22]:
S_df

Unnamed: 0,HOBBIES_1_001_CA_1,HOBBIES_1_002_CA_1,HOBBIES_1_003_CA_1,HOBBIES_1_004_CA_1,HOBBIES_1_005_CA_1,HOBBIES_1_006_CA_1,HOBBIES_1_007_CA_1,HOBBIES_1_008_CA_1,HOBBIES_1_009_CA_1,HOBBIES_1_010_CA_1,...,FOODS_3_818_WI_3,FOODS_3_819_WI_3,FOODS_3_820_WI_3,FOODS_3_821_WI_3,FOODS_3_822_WI_3,FOODS_3_823_WI_3,FOODS_3_824_WI_3,FOODS_3_825_WI_3,FOODS_3_826_WI_3,FOODS_3_827_WI_3
HOBBIES,1,1,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
HOUSEHOLD,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
FOODS,0,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1
HOBBIES_1,1,1,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
HOBBIES_2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
FOODS_3_823_WI_3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
FOODS_3_824_WI_3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
FOODS_3_825_WI_3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
FOODS_3_826_WI_3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [23]:
def smape(a, f):
    return 1/len(a) * np.sum(2 * np.abs(f-a) / (np.abs(a) + np.abs(f) + 1e-9) * 100)

evaluator = HierarchicalEvaluation(evaluators=[smape])
evaluator.evaluate(Y_hat_df=Y_rec_df, Y_test_df=Y, 
                   tags=tags, benchmark='prophet')

Unnamed: 0_level_0,Unnamed: 1_level_0,prophet,prophet/BottomUp
level,metric,Unnamed: 2_level_1,Unnamed: 3_level_1
Overall,smape-scaled,1.0,0.999735
Cat,smape-scaled,1.0,1.018164
Cat/Dept,smape-scaled,1.0,1.06845
Cat/Dept/Item,smape-scaled,1.0,0.993646
Cat/Dept/Item/State,smape-scaled,1.0,1.0


In [24]:
Y

Unnamed: 0_level_0,ds,y
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1
HOBBIES_1_001_CA_1,2021-01-16,4
HOBBIES_1_002_CA_1,2021-01-16,0
HOBBIES_1_003_CA_1,2021-01-16,1
HOBBIES_1_004_CA_1,2021-01-16,2
HOBBIES_1_005_CA_1,2021-01-16,5
...,...,...
FOODS_2,2021-01-16,6932
FOODS_3,2021-01-16,23751
HOBBIES,2021-01-16,4965
HOUSEHOLD,2021-01-16,13064


In [25]:
Y_rec_df

Unnamed: 0_level_0,ds,prophet,prophet/BottomUp
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
HOBBIES_1_001_CA_1,2021-01-16,0.641144,0.641144
HOBBIES_1_001,2021-01-16,4.989419,4.911092
HOBBIES_1_002,2021-01-16,2.136553,2.076482
HOBBIES_1_002_CA_1,2021-01-16,0.226241,0.226241
HOBBIES_1_003_CA_1,2021-01-16,0.407938,0.407938
...,...,...,...
FOODS_3_823_WI_3,2021-01-16,0.464440,0.464440
FOODS_3_824_WI_3,2021-01-16,0.398286,0.398286
FOODS_3_825_WI_3,2021-01-16,1.145024,1.145024
FOODS_3_826_WI_3,2021-01-16,1.467588,1.467588
