In [1]:
# https://doi.org/10.1016/j.ijforecast.2008.07.004
# https://github.com/robjhyndman/reconciliation_review_talk/blob/main/10years_reconciliation.plook_up
# https://otexts.com/fpp3/single-level.html


import pandas as pd

from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.transformations.hierarchical.aggregate import Aggregator
from sktime.transformations.hierarchical.reconcile import Reconciler
from sktime.utils._testing.hierarchical import _bottom_hier_datagen

# get all the methods

METHOD_LIST = Reconciler.METHOD_LIST

In [2]:
agg = Aggregator(flatten_single_levels=True)

X = _bottom_hier_datagen(
    no_bottom_nodes=3,
    no_levels=1,
)
# add aggregate levels
X = agg.fit_transform(X)

# forecast all levels
fh = ForecastingHorizon([1], is_relative=True)
forecaster = ExponentialSmoothing(trend="add", seasonal="additive", sp=12)
prds = forecaster.fit(X).predict(fh)

In [3]:
prds["bu"] = Aggregator().fit_transform(prds)
prds

  warn(


Unnamed: 0_level_0,Unnamed: 1_level_0,passengers,bu
l1_agg,timepoints,Unnamed: 2_level_1,Unnamed: 3_level_1
__total,1961-01,13485.52155,13479.326262
l1_node01,1961-01,451.577217,451.577217
l1_node02,1961-01,8958.082274,8958.082274
l1_node03,1961-01,4069.66677,4069.66677


In [4]:
agg = Aggregator(flatten_single_levels=True)

X = _bottom_hier_datagen(no_bottom_nodes=5, no_levels=2, random_seed=100)
# add aggregate levels
X = agg.fit_transform(X)

# forecast all levels
fh = ForecastingHorizon([1], is_relative=True)
forecaster = ExponentialSmoothing(trend="add", seasonal="additive", sp=12)
prds = forecaster.fit(X).predict(fh)
prds

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,passengers
l2_agg,l1_agg,timepoints,Unnamed: 3_level_1
__total,__total,1961-01,28141.714272
l2_node01,__total,1961-01,15266.353526
l2_node01,l1_node03,1961-01,8630.641055
l2_node01,l1_node05,1961-01,6631.856038
l2_node02,__total,1961-01,12879.021572
l2_node02,l1_node01,1961-01,451.577217
l2_node02,l1_node02,1961-01,4027.290137
l2_node02,l1_node04,1961-01,8402.154023


In [5]:
import numpy as np

from sktime.transformations.hierarchical.reconcile import _get_g_matrix_bu


def _get_g_matrix_template(X):

    # get bottom level indexes
    bl_inds = (
        X.loc[~(X.index.get_level_values(level=-2).isin(["__total"]))]
        .index.droplevel(level=-1)
        .unique()
    )

    # get all level indexes
    al_inds = X.droplevel(level=-1).index.unique()

    g_matrix = pd.DataFrame(
        [[0.0 for i in range(len(bl_inds))] for i in range(len(al_inds))],
        index=al_inds,
    )
    g_matrix.columns = bl_inds
    g_matrix = g_matrix.transpose()

    return g_matrix

In [6]:
gmat = _get_g_matrix_template(prds[["passengers"]])

gmat

Unnamed: 0_level_0,l2_agg,__total,l2_node01,l2_node01,l2_node01,l2_node02,l2_node02,l2_node02,l2_node02
Unnamed: 0_level_1,l1_agg,__total,__total,l1_node03,l1_node05,__total,l1_node01,l1_node02,l1_node04
l2_agg,l1_agg,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
l2_node01,l1_node03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
l2_node01,l1_node05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
l2_node02,l1_node01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
l2_node02,l1_node02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
l2_node02,l1_node04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
def _update_g_matrix_td_fcst(g_mat, X_vector, g_mat_bu=_get_g_matrix_bu(X)):

    bl_preds = g_mat_bu.dot(X_vector)

    for i in g_mat.index:
        g_mat.loc[i, "__total"] = (bl_preds.loc[i] / bl_preds.sum()).values[0]

    return g_mat


_update_g_matrix_td_fcst(g_mat=gmat, X_vector=prds[["passengers"]].droplevel(-1))

Unnamed: 0_level_0,l2_agg,__total,l2_node01,l2_node01,l2_node01,l2_node02,l2_node02,l2_node02,l2_node02
Unnamed: 0_level_1,l1_agg,__total,__total,l1_node03,l1_node05,__total,l1_node01,l1_node02,l1_node04
l2_agg,l1_agg,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
l2_node01,l1_node03,0.306665,0.0,0.0,0.0,0.0,0.0,0.0,0.0
l2_node01,l1_node05,0.235644,0.0,0.0,0.0,0.0,0.0,0.0,0.0
l2_node02,l1_node01,0.016046,0.0,0.0,0.0,0.0,0.0,0.0,0.0
l2_node02,l1_node02,0.143098,0.0,0.0,0.0,0.0,0.0,0.0,0.0
l2_node02,l1_node04,0.298547,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
smat = Reconciler().fit(X).s_matrix
parent_child = []

for i in smat.columns:
    connected_nodes = smat[(smat[i] == 1)].sum(axis=1).sort_values(ascending=False)

    for j in range(len(connected_nodes.index) - 1):
        parent_child.append([connected_nodes.index[j], connected_nodes.index[j + 1]])

df = pd.DataFrame(parent_child)
df.columns = ["parent", "child"]

df = df.drop_duplicates().sort_values(["parent", "child"]).reset_index(drop=True)
df
# number of rows must be equal to number unique nodes -1 (i.e. number of edges)

Unnamed: 0,parent,child
0,"(__total, __total)","(l2_node01, __total)"
1,"(__total, __total)","(l2_node02, __total)"
2,"(l2_node01, __total)","(l2_node01, l1_node03)"
3,"(l2_node01, __total)","(l2_node01, l1_node05)"
4,"(l2_node02, __total)","(l2_node02, l1_node01)"
5,"(l2_node02, __total)","(l2_node02, l1_node02)"
6,"(l2_node02, __total)","(l2_node02, l1_node04)"


In [9]:
gmat

Unnamed: 0_level_0,l2_agg,__total,l2_node01,l2_node01,l2_node01,l2_node02,l2_node02,l2_node02,l2_node02
Unnamed: 0_level_1,l1_agg,__total,__total,l1_node03,l1_node05,__total,l1_node01,l1_node02,l1_node04
l2_agg,l1_agg,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
l2_node01,l1_node03,0.306665,0.0,0.0,0.0,0.0,0.0,0.0,0.0
l2_node01,l1_node05,0.235644,0.0,0.0,0.0,0.0,0.0,0.0,0.0
l2_node02,l1_node01,0.016046,0.0,0.0,0.0,0.0,0.0,0.0,0.0
l2_node02,l1_node02,0.143098,0.0,0.0,0.0,0.0,0.0,0.0,0.0
l2_node02,l1_node04,0.298547,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
def _update_g_matrix_td_fcst(g_mat, X_vector, look_up=df):

    for i in g_mat.index:
        child = i
        parent = look_up.loc[look_up["child"] == child, "parent"].values[0]
        props = []
        # print(sum([j == "__total" for j in list(child)]) < len(parent))
        while sum([j == "__total" for j in list(child)]) < len(child):
            # now need to find nodes directly connected to ("l2_node01", "__total")
            # print(parent)
            # print(child)
            children = look_up.loc[look_up["parent"] == parent, "child"].unique()
            # print(children)
            props.append((X_vector.loc[child] / X_vector.loc[children].sum()).values[0])
            # print(props)
            child = parent
            # print(child)
            if sum([j == "__total" for j in list(child)]) == len(child):
                break
            else:
                parent = look_up.loc[look_up["child"] == child, "parent"].values[0]
            # print(sum([j == "__total" for j in list(prent)]) < len(parent))

        # print(props)
        g_mat.loc[i, "__total"] = np.prod(props)

    return g_mat


_update_g_matrix_td_fcst(g_mat=gmat, X_vector=prds[["passengers"]].droplevel(-1))

Unnamed: 0_level_0,l2_agg,__total,l2_node01,l2_node01,l2_node01,l2_node02,l2_node02,l2_node02,l2_node02
Unnamed: 0_level_1,l1_agg,__total,__total,l1_node03,l1_node05,__total,l1_node01,l1_node02,l1_node04
l2_agg,l1_agg,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
l2_node01,l1_node03,0.306723,0.0,0.0,0.0,0.0,0.0,0.0,0.0
l2_node01,l1_node05,0.235688,0.0,0.0,0.0,0.0,0.0,0.0,0.0
l2_node02,l1_node01,0.016042,0.0,0.0,0.0,0.0,0.0,0.0,0.0
l2_node02,l1_node02,0.143067,0.0,0.0,0.0,0.0,0.0,0.0,0.0
l2_node02,l1_node04,0.298481,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
x = prds.values
props = []
props.append(((x[2] / (x[2] + x[3])) * (x[1] / (x[1] + x[4])))[0])
props.append(((x[3] / (x[2] + x[3])) * (x[1] / (x[1] + x[4])))[0])
props.append(((x[5] / (x[5] + x[6] + x[7])) * (x[4] / (x[1] + x[4])))[0])
props.append(((x[6] / (x[5] + x[6] + x[7])) * (x[4] / (x[1] + x[4])))[0])
props.append(((x[7] / (x[5] + x[6] + x[7])) * (x[4] / (x[1] + x[4])))[0])
pd.Series(props)

0    0.306723
1    0.235688
2    0.016042
3    0.143067
4    0.298481
dtype: float64

In [None]:
x = prds.values
(x[2] / (x[2] + x[3])) * (x[1] / (x[1] + x[4]))

In [None]:
x[2] / (x[2] + x[3] + x[5] + x[6] + x[7])