In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd

from sktime.forecasting.compose import ForecastingPipeline, make_reduction
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.transformations.series.summarize import WindowSummarizer
from sktime.transformations.series.date import DateTimeFeatures

from sktime.forecasting.arima import ARIMA
from sktime.forecasting.naive import NaiveForecaster

from sktime.transformations.hierarchical.reconcile import Reconciler
from sktime.transformations.hierarchical.aggregate import Aggregator

In [3]:
data = pd.read_csv("global_fc/train.csv")
data['date'] = pd.to_datetime(data['date'])

In [4]:
stores = [5, 10]
items = [40, 50]

In [5]:
#### Prepare data for Hierarchical Modeling ----
subset = data.query("store in @stores and item in @items")

y_hier = subset[["store", "item", "date", "sales"]]
X_hier = subset.drop(columns=["sales"])

y_hier.set_index(["store", "item", "date"], inplace=True)
X_hier.set_index(["store", "item", "date"], inplace=True)

# y_hier.info(), X_hier.info()
# display(y_hier.head()), display(X_hier.head())

In [6]:
y_train_hier, y_test_hier, X_train_hier, X_test_hier = temporal_train_test_split(y_hier, X_hier)
# display(y_train_hier.head(5), X_train_hier.head(5))
# display(y_test_hier.head(5), X_test_hier.head(5))

In [7]:
FH = np.arange(1, len(y_test_hier.loc[5, 40])+1)
# FH 

In [8]:
steps=[
        ("daily_season", DateTimeFeatures(ts_freq="D")),  
        ("daily_season2", DateTimeFeatures(manual_selection=["week_of_month", "day_of_quarter"])),
]  

forecaster_hier = NaiveForecaster()

# Version 1A: Local Vectorized (Hierarchical) Forecasts (without reconciliation)
pipe_hier_norecon = ForecastingPipeline(steps= steps + [("forecaster", forecaster_hier)])

# Version 1B: Local Vectorized (Hierarchical) Forecasts (with reconciliation)
pipe_hier_recon = Aggregator() * pipe_hier_norecon * Reconciler(method="ols")

In [9]:
# Version 1A: Local Vectorized (Hierarchical) Forecasts (without reconciliation)
_ = pipe_hier_norecon.fit(y_train_hier, X_train_hier, fh=FH)

In [10]:
# Version 1B: Local Vectorized (Hierarchical) Forecasts (with reconciliation)
# Does not work with X_train (is it because of empty X dataframe?)
# If 1A works with X_train, shouldn't 1B also work?
_ = pipe_hier_recon.fit(y_train_hier, X_train_hier, fh=FH)

# This works 
# _ = pipe_hier_recon.fit(y_train_hier, fh=FH)

ValueError: (time) indices of series 0 are not contained in index of series 0, extra indices are: MultiIndex([(        5, '__total', '2013-01-01'),
            (        5, '__total', '2013-01-02'),
            (        5, '__total', '2013-01-03'),
            (        5, '__total', '2013-01-04'),
            (        5, '__total', '2013-01-05'),
            (        5, '__total', '2013-01-06'),
            (        5, '__total', '2013-01-07'),
            (        5, '__total', '2013-01-08'),
            (        5, '__total', '2013-01-09'),
            (        5, '__total', '2013-01-10'),
            ...
            ('__total', '__total', '2016-09-21'),
            ('__total', '__total', '2016-09-22'),
            ('__total', '__total', '2016-09-23'),
            ('__total', '__total', '2016-09-24'),
            ('__total', '__total', '2016-09-25'),
            ('__total', '__total', '2016-09-26'),
            ('__total', '__total', '2016-09-27'),
            ('__total', '__total', '2016-09-28'),
            ('__total', '__total', '2016-09-29'),
            ('__total', '__total', '2016-09-30')],
           names=['store', 'item', 'date'], length=4107)