## Benchmark Nixtla implementation single-process on M5

In [None]:
!pip install hierarchicalforecast
!pip install -U numba statsforecast datasetsforecast

In [1]:
import numpy as np
import pandas as pd

# obtain hierarchical data
from datasetsforecast.hierarchical import HierarchicalData

# compute base forecast no coherent
from statsforecast.core import StatsForecast
from statsforecast.models import AutoARIMA, Naive

# obtain hierarchical reconciliation methods and evaluation
from hierarchicalforecast.core import HierarchicalReconciliation
from hierarchicalforecast.evaluation import HierarchicalEvaluation
from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut

from timeit import default_timer as timer

DATA_ROOT = "m5_hobbies_nixtla"
ROOT = "/data/cmu/large-scale-hts-reconciliation/"
data_dir = ROOT + "notebooks/"

  from tqdm.autonotebook import tqdm


In [2]:
import pickle

tags = pickle.load(open(data_dir + DATA_ROOT + "/tags.pkl", "rb"))
S_df = pickle.load(open(data_dir + DATA_ROOT + "/nixtla_s.pkl", "rb"))
Y_df = pd.read_csv(data_dir + DATA_ROOT + "/nixtla_y_gt.csv")
Y_hat_df = pd.read_csv(data_dir + DATA_ROOT + "/nixtla_y_hat.csv")

# Y_df, S_df, tags = HierarchicalData.load('./data', 'TourismSmall')
Y_df["ds"] = pd.to_datetime(Y_df["ds"])

In [3]:
Y_df.set_index("unique_id", inplace=True)
Y_hat_df.set_index("unique_id", inplace=True)

In [4]:
Y_df.head()

Unnamed: 0_level_0,ds,y
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1
HOBBIES_1_001_CA_1,2021-01-16,4
HOBBIES_1_001_CA_1,2021-01-17,0
HOBBIES_1_001_CA_1,2021-01-18,0
HOBBIES_1_001_CA_1,2021-01-19,0
HOBBIES_1_001_CA_1,2021-01-20,0


In [5]:
S_df.iloc[:5, :5]

Unnamed: 0,HOBBIES_1_001_CA_1,HOBBIES_1_002_CA_1,HOBBIES_1_003_CA_1,HOBBIES_1_004_CA_1,HOBBIES_1_005_CA_1
HOBBIES,1,1,1,1,1
HOBBIES_1,1,1,1,1,1
HOBBIES_2,0,0,0,0,0
HOBBIES_1_001,1,0,0,0,0
HOBBIES_1_002,0,1,0,0,0


In [6]:
tags

{'Cat': array(['HOBBIES'], dtype=object),
 'Cat/Dept': array(['HOBBIES_1', 'HOBBIES_2'], dtype=object),
 'Cat/Dept/Item': array(['HOBBIES_1_001', 'HOBBIES_1_002', 'HOBBIES_1_003', 'HOBBIES_1_004',
        'HOBBIES_1_005', 'HOBBIES_1_006', 'HOBBIES_1_007', 'HOBBIES_1_008',
        'HOBBIES_1_009', 'HOBBIES_1_010', 'HOBBIES_1_011', 'HOBBIES_1_012',
        'HOBBIES_1_013', 'HOBBIES_1_014', 'HOBBIES_1_015', 'HOBBIES_1_016',
        'HOBBIES_1_017', 'HOBBIES_1_018', 'HOBBIES_1_019', 'HOBBIES_1_020',
        'HOBBIES_1_021', 'HOBBIES_1_022', 'HOBBIES_1_023', 'HOBBIES_1_024',
        'HOBBIES_1_025', 'HOBBIES_1_026', 'HOBBIES_1_027', 'HOBBIES_1_028',
        'HOBBIES_1_029', 'HOBBIES_1_030', 'HOBBIES_1_031', 'HOBBIES_1_032',
        'HOBBIES_1_033', 'HOBBIES_1_034', 'HOBBIES_1_035', 'HOBBIES_1_036',
        'HOBBIES_1_037', 'HOBBIES_1_038', 'HOBBIES_1_039', 'HOBBIES_1_040',
        'HOBBIES_1_041', 'HOBBIES_1_042', 'HOBBIES_1_043', 'HOBBIES_1_044',
        'HOBBIES_1_045', 'HOBBIES_1_046', '

In [7]:
Y_hat_df["ds"] = pd.to_datetime(Y_hat_df["ds"])
Y_hat_df.head()

Unnamed: 0_level_0,ds,prophet
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1
HOBBIES_1_001_CA_1,2021-01-16,0.641144
HOBBIES_1_001_CA_1,2021-01-17,0.855605
HOBBIES_1_001_CA_1,2021-01-18,0.713611
HOBBIES_1_001_CA_1,2021-01-19,1.213304
HOBBIES_1_001_CA_1,2021-01-20,1.142696


The following cell makes the previous forecasts coherent using the `HierarchicalReconciliation` class. The used methods to make the forecasts coherent are:
- `BottomUp`: The reconciliation of the method is a simple addition to the upper levels.
- `TopDown`: The second method constrains the base-level predictions to the top-most aggregate-level serie and then distributes it to the disaggregate series through the use of proportions. 
- `MiddleOut`: Anchors the base predictions in a middle level.

In [13]:
date = "2021-01-16"

Y_hat = Y_hat_df #.loc[Y_hat_df["ds"] == date]
Y = Y_df #.loc[Y_df["ds"] == date]

reconcilers = [
    BottomUp(),
    # TopDown(method='forecast_proportions'),
    # MiddleOut(middle_level='Cat/Dept/Item',
    #          top_down_method='forecast_proportions')
]
hrec = HierarchicalReconciliation(reconcilers=reconcilers)

start = timer()
Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat, Y_df=Y, S=S_df, tags=tags)
end = timer()
print(end - start)

1.5916450209915638


In [9]:
Y_df

Unnamed: 0_level_0,ds,y
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1
HOBBIES_1_001_CA_1,2021-01-16,4
HOBBIES_1_001_CA_1,2021-01-17,0
HOBBIES_1_001_CA_1,2021-01-18,0
HOBBIES_1_001_CA_1,2021-01-19,0
HOBBIES_1_001_CA_1,2021-01-20,0
...,...,...
HOBBIES,2021-04-21,3740
HOBBIES,2021-04-22,3475
HOBBIES,2021-04-23,4143
HOBBIES,2021-04-24,5333


In [10]:
Y_rec_df

Unnamed: 0_level_0,ds,prophet,prophet/BottomUp
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
HOBBIES_1_001_CA_1,2021-01-16,0.641144,0.641144
HOBBIES_1_001,2021-01-16,4.989419,4.911092
HOBBIES_1_002,2021-01-16,2.136553,2.076482
HOBBIES_1_002_CA_1,2021-01-16,0.226241,0.226241
HOBBIES_1_003_CA_1,2021-01-16,0.407938,0.407938
...,...,...,...
HOBBIES_2_145_WI_3,2021-01-16,0.461117,0.461117
HOBBIES_2_146_WI_3,2021-01-16,0.002087,0.002087
HOBBIES_2_147_WI_3,2021-01-16,0.247244,0.247244
HOBBIES_2_148_WI_3,2021-01-16,0.104610,0.104610


In [11]:
S_df

Unnamed: 0,HOBBIES_1_001_CA_1,HOBBIES_1_002_CA_1,HOBBIES_1_003_CA_1,HOBBIES_1_004_CA_1,HOBBIES_1_005_CA_1,HOBBIES_1_006_CA_1,HOBBIES_1_007_CA_1,HOBBIES_1_008_CA_1,HOBBIES_1_009_CA_1,HOBBIES_1_010_CA_1,...,HOBBIES_2_140_WI_3,HOBBIES_2_141_WI_3,HOBBIES_2_142_WI_3,HOBBIES_2_143_WI_3,HOBBIES_2_144_WI_3,HOBBIES_2_145_WI_3,HOBBIES_2_146_WI_3,HOBBIES_2_147_WI_3,HOBBIES_2_148_WI_3,HOBBIES_2_149_WI_3
HOBBIES,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
HOBBIES_1,1,1,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
HOBBIES_2,0,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1
HOBBIES_1_001,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
HOBBIES_1_002,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HOBBIES_2_145_WI_3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
HOBBIES_2_146_WI_3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
HOBBIES_2_147_WI_3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
HOBBIES_2_148_WI_3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [12]:
def smape(a, f):
    return 1 / len(a) * np.sum(2 * np.abs(f - a) / (np.abs(a) + np.abs(f) + 1e-9) * 100)


evaluator = HierarchicalEvaluation(evaluators=[smape])
evaluator.evaluate(Y_hat_df=Y_rec_df, Y_test_df=Y, tags=tags, benchmark="prophet")

Unnamed: 0_level_0,Unnamed: 1_level_0,prophet,prophet/BottomUp
level,metric,Unnamed: 2_level_1,Unnamed: 3_level_1
Overall,smape-scaled,1.0,0.999528
Cat,smape-scaled,1.0,1.046039
Cat/Dept,smape-scaled,1.0,1.101575
Cat/Dept/Item,smape-scaled,1.0,0.988191
Cat/Dept/Item/State,smape-scaled,1.0,1.0


In [13]:
Y

Unnamed: 0_level_0,ds,y
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1
HOBBIES_1_001_CA_1,2021-01-16,4
HOBBIES_1_002_CA_1,2021-01-16,0
HOBBIES_1_003_CA_1,2021-01-16,1
HOBBIES_1_004_CA_1,2021-01-16,2
HOBBIES_1_005_CA_1,2021-01-16,5
...,...,...
HOBBIES_2_148,2021-01-16,7
HOBBIES_2_149,2021-01-16,9
HOBBIES_1,2021-01-16,4473
HOBBIES_2,2021-01-16,492


In [14]:
Y_rec_df

Unnamed: 0_level_0,ds,prophet,prophet/BottomUp
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
HOBBIES_1_001_CA_1,2021-01-16,0.641144,0.641144
HOBBIES_1_001,2021-01-16,4.989419,4.911092
HOBBIES_1_002,2021-01-16,2.136553,2.076482
HOBBIES_1_002_CA_1,2021-01-16,0.226241,0.226241
HOBBIES_1_003_CA_1,2021-01-16,0.407938,0.407938
...,...,...,...
HOBBIES_2_145_WI_3,2021-01-16,0.461117,0.461117
HOBBIES_2_146_WI_3,2021-01-16,0.002087,0.002087
HOBBIES_2_147_WI_3,2021-01-16,0.247244,0.247244
HOBBIES_2_148_WI_3,2021-01-16,0.104610,0.104610
