In [None]:
import pandas as pd

from sktime.datatypes import get_examples
from sktime.transformations.hierarchical.aggregate import aggregator
from sktime.transformations.hierarchical.reconcile import reconciler

# https://otexts.com/fpp3/hierarchical.html
# https://github.com/robjhyndman/reconciliation_review_talk/blob/main/10years_reconciliation.pdf

# Hierarchical dataset

In [None]:
df = get_examples(mtype="pd_multiindex_hier", as_scitype="Hierarchical")
df = df[0]

df

## Aggregate Hierarchy

Now we have the full forecasting dataset

In [None]:
agg_df = aggregator(flatten_single_levels=True)

agg_df.fit_transform(X=df)

Let's test with bottom levels that span two nodes

- i.e. mid levels that are only present at a subset of bottom nodes

In [None]:
cols = ["foo", "foo2", "bar", "timepoints"] + [f"var_{i}" for i in range(2)]

Xlist = [
    pd.DataFrame(
        [["a", "a1", 0, 0, 1, 4], ["a", "a1", 0, 1, 2, 5], ["a", "a1", 0, 2, 3, 6]],
        columns=cols,
    ),
    pd.DataFrame(
        [["a", "a1", 1, 0, 1, 4], ["a", "a1", 1, 1, 2, 55], ["a", "a1", 1, 2, 3, 6]],
        columns=cols,
    ),
    pd.DataFrame(
        [["a", "a2", 2, 0, 1, 42], ["a", "a2", 2, 1, 2, 5], ["a", "a2", 2, 2, 3, 6]],
        columns=cols,
    ),
    pd.DataFrame(
        [["b", "b1", 0, 0, 1, 4], ["b", "b1", 0, 1, 2, 5], ["b", "b1", 0, 2, 3, 6]],
        columns=cols,
    ),
    pd.DataFrame(
        [["b", "b2", 1, 0, 1, 4], ["b", "b2", 1, 1, 2, 55], ["b", "b2", 1, 2, 3, 6]],
        columns=cols,
    ),
    pd.DataFrame(
        [["b", "b2", 2, 0, 1, 42], ["b", "b2", 2, 1, 2, 5], ["b", "b2", 2, 2, 3, 6]],
        columns=cols,
    ),
]
X = pd.concat(Xlist)
X = X.set_index(["foo", "foo2", "bar", "timepoints"])

X

Note flatten single levels is the default option

- see that `(a, a2, 2, *)` and `(b, b1, 0, *)` don't contain `__total`

In [None]:
agg_df = aggregator(flatten_single_levels=True)
agg_df.fit_transform(X=X)

In [None]:
agg_df = aggregator(flatten_single_levels=False)
agg_df.fit_transform(X=X)

# Forecasting Example

Let's generate a hierarchical dataset similar to the last example from the flights dataset

- Generate dataset
- Generate full hierarchy
- Forecast each level
- Reconcile

## Generate Dataset

In [None]:
from sktime.datasets import load_airline
from sktime.utils.plotting import plot_series

In [None]:
zone1 = load_airline()

zone1

In [None]:
# plotting for visualization
plot_series(
    zone1,
    10 + zone1 * 5,
    -50 + zone1 * 0.9,
    zone1 ** 1.5,
    -20 + 10 * zone1,
    10 + (10 * zone1) + (0.05 * (zone1 ** 2)),
    labels=["zone1", "zone2", "zone3", "zone4", "zone5", "zone6"],
)

In [None]:
df = pd.DataFrame(zone1, index=zone1.index).rename(
    columns={"Number of airline passengers": "zone1"}
)

df["zone2"] = 10 + zone1 * 5
df["zone3"] = zone1 * 0.9 - 50
df["zone4"] = zone1 ** 1.5
df["zone5"] = zone1 * 10 - 500
df["zone6"] = 10 + (10 * zone1) + (0.05 * (zone1 ** 2))

df = (
    df.melt(ignore_index=False)
    .set_index(["variable", df.melt(ignore_index=False).index])
    .rename_axis(["airport", "timepoints"], axis=0)
    .rename(columns={"value": "passengers"})
)

# df['country'] = "USA"
df.loc[
    df.index.get_level_values(level="airport").isin(["zone1", "zone2", "zone3"]),
    "state",
] = "CA"
df.loc[
    df.index.get_level_values(level="airport").isin(["zone1", "zone2"]), "city"
] = "LA"
df.loc[df.index.get_level_values(level="airport").isin(["zone3"]), "city"] = "SF"


df.loc[
    df.index.get_level_values(level="airport").isin(["zone4", "zone5", "zone6"]),
    "state",
] = "NY"
df.loc[
    df.index.get_level_values(level="airport").isin(["zone4", "zone5"]), "city"
] = "NYC"
df.loc[df.index.get_level_values(level="airport").isin(["zone6"]), "city"] = "BF"

df = df.set_index(["state", "city", df.index])
df

## Generate full hierarchy

In [None]:
agg_df = aggregator(flatten_single_levels=True)
df_fh = agg_df.fit_transform(X=df)

df_fh

## Forecast each level

here we will forecast each unique level outside `timepoints`

In [None]:
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.exp_smoothing import ExponentialSmoothing

# from sktime.forecasting.model_selection import temporal_train_test_split
# from sktime.performance_metrics.forecasting import mean_absolute_percentage_error

In [None]:
model_ids = df_fh.droplevel(level="timepoints").index.unique()

model_ids

In [None]:
# this will automatically to a panel type forecaster!
fh = ForecastingHorizon([*range(1, 12)], is_relative=True)
forecaster = ExponentialSmoothing(trend="add", seasonal="additive", sp=12)
mods = forecaster.fit(df_fh)
prds = forecaster.predict(fh)
prds

## Reconcile - Bottom Up

Bottom up is easy we just sum the bottome levels much like aggregate function.

But we want it to be compatible with other methods which go like
    
    - get y 'base' forecasts for all series (previous section)
    - get S matrix from df index (defined by hierarchy structure)
    - get G matrix for recon (defined by recon method)
    - reconcile forecasts - SGy (all methods)


In [None]:
transformer = reconciler(method="bu")

fitted_transfrom = transformer.fit(X=prds[["passengers"]])

fitted_transfrom.s_matrix

# https://stackoverflow.com/questions/54307300/what-causes-indexing-past-lexsort-depth-warning-in-pandas

In [None]:
fitted_transfrom.g_matrix

In [None]:
prds["y_recon_bu"] = fitted_transfrom.transform(X=prds[["passengers"]])

prds

This seems to work fine

In [None]:
prds.loc[prds.index.get_level_values(level=-1) == "1961-01"]

## OLS reconciliation

    - Now all we need is the new g_matrix method

In [None]:
transformer_ols = reconciler(method="ols")

fitted_transfrom_ols = transformer_ols.fit(X=prds[["passengers"]])

fitted_transfrom_ols.g_matrix

In [None]:
prds["y_recon_ols"] = fitted_transfrom_ols.transform(X=prds[["passengers"]])

prds

This seems to work fine as well

    - note the bottom level forecasts have now changed as well

In [None]:
prds.loc[prds.index.get_level_values(level=-1) == "1961-01"]

## WLS structural reconciliation

    - Now all we need is the new g_matrix method

In [None]:
transformer_wls = reconciler(method="wls_str")

fitted_transfrom_wls = transformer_wls.fit(X=prds[["passengers"]])

fitted_transfrom_wls.g_matrix

In [None]:
prds["y_recon_wls"] = fitted_transfrom_wls.transform(X=prds[["passengers"]])

prds

In [None]:
prds.loc[prds.index.get_level_values(level=-1) == "1961-01"]

# Pipeline

here is the aggregator/forecaster/reconciler in a pipeline 

In [None]:
# from sktime.forecasting.compose import TransformedTargetForecaster

In [None]:
# forecaster = TransformedTargetForecaster(
#     [
#         ("aggregate", aggregator(flatten_single_levels=True)),
#         ("forecast", ExponentialSmoothing(trend="add", seasonal="additive", sp=12))
#     ]
# )

# forecaster.fit(df_fh)

In [None]:
# fh = ForecastingHorizon([*range(1, 12)], is_relative=True)
# prds = forecaster.predict(fh)
# prds

# Introduce tests

ok for aggregator check that

    -    test that "__total" is not named in index
    -    test that the final index is timestamp type - this is done in the package elsewhere (:)
    -    test that the index is actually named
    -    that we actually have two indexes - why is this not working from rest of package?

In [None]:
zone1.index.name = "time"
zone1

In [None]:
# this shouldn't work - BUT DOES?!
agg_df = aggregator(flatten_single_levels=True)
agg_df.fit_transform(X=zone1)

In [None]:
prds

In [None]:
# this shouldn't work
agg_df = aggregator(flatten_single_levels=True)
agg_df.fit_transform(X=prds)

In [None]:
df_fh

In [None]:
# this shouldn't work
agg_df = aggregator(flatten_single_levels=True)
agg_df.fit_transform(X=df_fh)

In [None]:
# test reconciliation works
test_df = prds.loc[
    prds.index.get_level_values(level=-2) != "__total", prds.columns[1:4]
].copy()
test_df.index.names = ["state", "city", "airport", "timepoints"]

agg_df = aggregator(flatten_single_levels=True)
test_df = agg_df.fit_transform(X=test_df)
test_df.index.names = ["state", "city", "airport", None]
# test_df.equals(prds[prds.columns[1:4]])
(test_df - prds[prds.columns[1:4]]).apply(lambda x: x.round(6).unique())