In [None]:
import numpy as np
import pandas as pd
from numpy.linalg import inv

from sktime.datatypes import get_examples

# https://otexts.com/fpp3/hierarchical.html
# https://github.com/robjhyndman/reconciliation_review_talk/blob/main/10years_reconciliation.pdf

# Hierarchical dataset

In [None]:
df = get_examples(mtype="pd_multiindex_hier", as_scitype="Hierarchical")
df = df[0]

df

## Aggregate Hierarchy

In [None]:
def aggregate_hierarchy(df_hier, flatten_single_levels=True):
    """From hierarchical mtype get the full aggregate hierarchy before forecasting"""

    hier_names = list(df_hier.index.names)

    # top level
    # remove aggregations that only have one level from below
    if flatten_single_levels:
        single_df = df_hier.groupby(["timepoints"]).count()
        mask1 = (
            single_df[(single_df > 1).all(1)]
            .index.get_level_values("timepoints")
            .unique()
        )
        mask1 = df_hier.index.get_level_values("timepoints").isin(mask1)
        top = df_hier.loc[mask1].groupby(level=["timepoints"]).sum()
    else:
        top = df_hier.loc[mask1].groupby(level=["timepoints"]).sum()

    ind_names = list(set(hier_names).difference(["timepoints"]))
    for i in ind_names:
        top[i] = "__total"

    top = top.set_index(ind_names, append=True).reorder_levels(hier_names)

    df_out = pd.concat([top, df_hier])

    # if we have a hierarchy with mid levels
    if len(hier_names) > 2:
        for i in range(len(hier_names) - 2):
            # list of levels to aggregate
            agg_levels = hier_names[0 : (i + 1)]
            agg_levels.append("timepoints")

            # remove aggregations that only have one level from below
            if flatten_single_levels:
                single_df = df_hier.groupby(level=agg_levels).count()
                # get index masks
                masks = []
                for i in agg_levels:
                    m1 = (
                        single_df[(single_df > 1).all(1)]
                        .index.get_level_values(i)
                        .unique()
                    )
                    m1 = df_hier.index.get_level_values(i).isin(m1)
                    masks.append(m1)
                mid = (
                    df_hier.loc[np.logical_and.reduce(masks)]
                    .groupby(level=agg_levels)
                    .sum()
                )
            else:
                mid = df_hier.groupby(level=agg_levels).sum()

            # now fill in index
            ind_names = list(set(hier_names).difference(agg_levels))
            for j in ind_names:
                mid[j] = "__total"
            # set back in index
            mid = mid.set_index(ind_names, append=True).reorder_levels(hier_names)
            df_out = pd.concat([df_out, mid])

    df_out.sort_index(inplace=True)
    return df_out

Now we have the full forecasting dataset

In [None]:
aggregate_hierarchy(df)

Let's test with bottom levels that span two nodes

- i.e. mid levels that are only present at a subset of bottom nodes

In [None]:
cols = ["foo", "foo2", "bar", "timepoints"] + [f"var_{i}" for i in range(2)]

Xlist = [
    pd.DataFrame(
        [["a", "a1", 0, 0, 1, 4], ["a", "a1", 0, 1, 2, 5], ["a", "a1", 0, 2, 3, 6]],
        columns=cols,
    ),
    pd.DataFrame(
        [["a", "a1", 1, 0, 1, 4], ["a", "a1", 1, 1, 2, 55], ["a", "a1", 1, 2, 3, 6]],
        columns=cols,
    ),
    pd.DataFrame(
        [["a", "a2", 2, 0, 1, 42], ["a", "a2", 2, 1, 2, 5], ["a", "a2", 2, 2, 3, 6]],
        columns=cols,
    ),
    pd.DataFrame(
        [["b", "b1", 0, 0, 1, 4], ["b", "b1", 0, 1, 2, 5], ["b", "b1", 0, 2, 3, 6]],
        columns=cols,
    ),
    pd.DataFrame(
        [["b", "b2", 1, 0, 1, 4], ["b", "b2", 1, 1, 2, 55], ["b", "b2", 1, 2, 3, 6]],
        columns=cols,
    ),
    pd.DataFrame(
        [["b", "b2", 2, 0, 1, 42], ["b", "b2", 2, 1, 2, 5], ["b", "b2", 2, 2, 3, 6]],
        columns=cols,
    ),
]
X = pd.concat(Xlist)
X = X.set_index(["foo", "foo2", "bar", "timepoints"])

X

Note flatten single levels is the default option

- see that `(a, a2, 2, *)` and `(b, b1, 0, *)` don't contain `__total`

In [None]:
aggregate_hierarchy(X, flatten_single_levels=True)

# Forecasting Example

Let's generate a hierarchical dataset similar to the last example from the flights dataset

- Generate dataset
- Generate full hierarchy
- Forecast each level
- Reconcile

## Generate Dataset

In [None]:
from sktime.datasets import load_airline
from sktime.utils.plotting import plot_series

In [None]:
zone1 = load_airline()

zone1

In [None]:
# plotting for visualization
plot_series(
    zone1,
    10 + zone1 * 5,
    -50 + zone1 * 0.9,
    zone1 ** 1.5,
    -20 + 10 * zone1,
    10 + (10 * zone1) + (0.05 * (zone1 ** 2)),
    labels=["zone1", "zone2", "zone3", "zone4", "zone5", "zone6"],
)

In [None]:
df = pd.DataFrame(zone1, index=zone1.index).rename(
    columns={"Number of airline passengers": "zone1"}
)

df["zone2"] = 10 + zone1 * 5
df["zone3"] = zone1 * 0.9 - 50
df["zone4"] = zone1 ** 1.5
df["zone5"] = zone1 * 10 - 500
df["zone6"] = 10 + (10 * zone1) + (0.05 * (zone1 ** 2))

df = (
    df.melt(ignore_index=False)
    .set_index(["variable", df.melt(ignore_index=False).index])
    .rename_axis(["airport", "timepoints"], axis=0)
    .rename(columns={"value": "passengers"})
)

# df['country'] = "USA"
df.loc[
    df.index.get_level_values(level="airport").isin(["zone1", "zone2", "zone3"]),
    "state",
] = "CA"
df.loc[
    df.index.get_level_values(level="airport").isin(["zone1", "zone2"]), "city"
] = "LA"
df.loc[df.index.get_level_values(level="airport").isin(["zone3"]), "city"] = "SF"


df.loc[
    df.index.get_level_values(level="airport").isin(["zone4", "zone5", "zone6"]),
    "state",
] = "NY"
df.loc[
    df.index.get_level_values(level="airport").isin(["zone4", "zone5"]), "city"
] = "NYC"
df.loc[df.index.get_level_values(level="airport").isin(["zone6"]), "city"] = "BF"

df = df.set_index(["state", "city", df.index])
df


# df.droplevel(level=-1).index.unique()

## Generate full hierarchy

In [None]:
df_fh = aggregate_hierarchy(df, flatten_single_levels=True)

df_fh

## Forecast each level

here we will forecast each unique level outside `timepoints`

In [None]:
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error

In [None]:
model_ids = df_fh.droplevel(level="timepoints").index.unique()

model_ids

Now set up loop for forecasting

In [None]:
# for i in model_ids:
mods = {}
prds = {}

for i in model_ids:
    # i = model_ids[0]
    y_train, y_test = temporal_train_test_split(df_fh.loc[i], test_size=36)
    fh = ForecastingHorizon(y_test.index, is_relative=False)
    forecaster = ExponentialSmoothing(trend="add", seasonal="additive", sp=12)
    mods[i] = forecaster.fit(y_train)
    prds[i] = forecaster.predict(fh)
    # plot_series(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"])
    print(i)
    print(mean_absolute_percentage_error(y_test, prds[i], symmetric=True))

Extract forecasts

In [None]:
prds = (
    pd.concat(prds)
    .rename_axis(df_fh.index.names, axis=0)
    .rename(columns={"passengers": "y_pred"})
)

# join with meas
prds = pd.concat([prds, df_fh], axis=1, join="inner").rename(
    columns={"passengers": "y_true"}
)

prds

## Reconcile - Bottom Up

Bottom up is easy we just sum the bottome levels much like aggregate function.

But we want it to be compatible with other methods which go like
    
    - get y 'base' forecasts for all series (previous section)
    - get S matrix from df index (defined by hierarchy structure)
    - get G matrix for recon (defined by recon method)
    - reconcile forecasts - SGy (all methods)


    - Here is the S matrix for our example

In [None]:
def get_s_matrix(df):

    # get bottom level indexes
    bl_inds = (
        df.loc[~(df.index.get_level_values(level=-2).isin(["__total"]))]
        .index.droplevel("timepoints")
        .unique()
    )

    # get all level indexes
    al_inds = df.droplevel(level="timepoints").index.unique()

    s_matrix = pd.DataFrame(
        [[0.0 for i in range(len(bl_inds))] for i in range(len(al_inds))], index=al_inds
    )

    #
    s_matrix.columns = list(bl_inds.get_level_values(level=-1))

    # now insert indicator for bottom level
    for i in s_matrix.columns:
        s_matrix.loc[s_matrix.index.get_level_values(-1) == i, i] = 1.0

    # now for each unique column
    for j in s_matrix.columns:

        # find bottom index id
        inds = list(s_matrix.index[s_matrix.index.get_level_values(level=-1).isin([j])])

        # generate new tuples for the aggregate levels
        for i in range(len(inds[0])):
            tmp = list(inds[i])
            tmp[-(i + 1)] = "__total"
            inds.append(tuple(tmp))

        # insrt indicator for aggregates
        for i in inds:
            s_matrix.loc[i, j] = 1.0

    # drop new levels not present in orginal matrix
    s_matrix.dropna(inplace=True)

    return s_matrix


s_test = get_s_matrix(prds)

s_test

- Now the G matrix for the bottom-up method
        
        - note for some reconcilers this will have to access each models residuals
        - the G matrix is used to transform the original forecasts at all levels to new bottom level forecasts
        - it is then combined with the summation matrix S
        - for bottom up the G matrix should be the transpose of the S matrix with `__total` level indicators set to zero

In [None]:
def get_g_matrix_bu(df):

    # get bottom level indexes
    bl_inds = (
        df.loc[~(df.index.get_level_values(level=-2).isin(["__total"]))]
        .index.droplevel("timepoints")
        .unique()
    )

    # get all level indexes
    al_inds = df.droplevel(level="timepoints").index.unique()

    g_matrix = pd.DataFrame(
        [[0.0 for i in range(len(bl_inds))] for i in range(len(al_inds))], index=al_inds
    )

    #
    g_matrix.columns = list(bl_inds.get_level_values(level=-1))

    # now insert indicator for bottom level
    for i in g_matrix.columns:
        g_matrix.loc[g_matrix.index.get_level_values(-1) == i, i] = 1.0

    return g_matrix.transpose()


g_test = get_g_matrix_bu(prds)

g_test

    - Now reconcile using SGy
        - this is the same no matter the reconciliation method

In [None]:
# note these should all be pandas df with the correct indexing
# using numpy for now..would be good to keep index matching...
def reconcile(base_fc, s_matrix, g_matrix):
    # return s_matrix.dot(g_matrix.dot(base_fc))
    return np.dot(s_matrix, np.dot(g_matrix, base_fc))


prds["y_reco_bu"] = (
    prds[["y_pred"]]
    .groupby(level="timepoints")
    .transform(lambda x: reconcile(x, s_test, g_test))
)

prds

This seems to work fine

In [None]:
prds.loc[prds.index.get_level_values(level=-1) == "1958-01"]

## OLS reconciliation

    - Now all we need is the new g_matrix method
    - now all this method needs is the summation matrix

In [None]:
def get_g_matrix_ols(df):

    smat = get_s_matrix(df)

    g_ols = pd.DataFrame(
        np.dot(inv(np.dot(np.transpose(smat), smat)), np.transpose(smat))
    )

    g_ols = g_ols.transpose()
    g_ols = g_ols.set_index(smat.index)
    g_ols.columns = smat.columns
    g_ols = g_ols.transpose()

    return g_ols


g_test_ols = get_g_matrix_ols(prds)

g_test_ols

In [None]:
prds["y_reco_ols"] = (
    prds[["y_pred"]]
    .groupby(level="timepoints")
    .transform(lambda x: reconcile(x, s_test, g_test_ols))
)

prds

This seems to work fine as well

    - note the bottom level forecasts have now changed as well

In [None]:
prds.loc[prds.index.get_level_values(level=-1) == "1958-01"]

Maybe need some significance testing here :p

In [None]:
# for i in model_ids:
#     # print(i)
#     # print(
#     #     mean_absolute_percentage_error(
#     #         prds.loc[i, "y_true"], prds.loc[i, "y_pred"], symmetric=True
#     #     )
#     # )
#     # print(
#     #     mean_absolute_percentage_error(
#     #         prds.loc[i, "y_true"], prds.loc[i, "y_reco_bu"], symmetric=True
#     #     )
#     # )
#     # print(
#     #     mean_absolute_percentage_error(
#     #         prds.loc[i, "y_true"], prds.loc[i, "y_reco_ols"], symmetric=True
#     #     )
#     # )
#     plot_series(
#         prds.loc[i, 'y_true'],
#         prds.loc[i, 'y_pred'],
#         prds.loc[i, 'y_reco_bu'],
#         prds.loc[i, 'y_reco_ols'],
#         labels=["y_test", "y_pred", "y_pred_bu", "y_pred_ols"],
#     )

So we could maybe work it like this


class (panel_forecaster)
    
    - fit
    - predict
    - train_test_temporal split?
    - list of model specs

class hierarchical_forecaster(panel_forecaster)
    
    Includes the aggregated levels for the panel.
    
    Inherits methods from above and adds g matrix methods that need information from model fits/original data

    - get_g_matrix_wlsvar
    - get_g_matrix_mint
    - get_g_matrix_mint_shrink
    - get_g_matrix_topdown
    - predict generates multiindex

class reconcile(Transfromer, predictions: multi-index with '__total' present, method = "bu"):

    Inherets transfromer methods? and includes reconciliation methods that don't depend on historic/residual data.

    Checks we have predicttions from hierarchical forecaster then

    - fit
    - predict, i.e. reconcile from this notebook
    - get_s_matrix
    - get_g_matrix_bu
    - get_g_matrix_ols
    - get_g_matrix_wlsstr


- generate full hierarchy
- individual forecasts
- Get S (summation) matrix (all recon methods)
- Get G (recon) matrix (method dependent)
- reconcile (all methods)


inheret from base classes (base estimator/forecaster) 
    - inhereting will be easier

- initialisation of class
    - recon method (method = "bu", "ols")
    - work for every dataframe

- call that class on a frame
    - list of forecasters (flexible, to do)

- forecaster method call returns recon predicts
    - data in it
    - list of forecasters (simple for now)

- hidden methods 
    - checks for data 
    - check for final predictions



- split it up into two methods in the same class?
- store predictions in methods of class self.