In [None]:
import os
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from utilsforecast.losses import *
from utilsforecast.evaluation import evaluate

from statsforecast import StatsForecast
from statsforecast.models import CrostonOptimized, ADIDA, TSB, HoltWinters
from statsforecast.core import StatsForecast

from hierarchicalforecast.utils import aggregate
from hierarchicalforecast.methods import BottomUp, MinTrace
from hierarchicalforecast.core import HierarchicalReconciliation
from hierarchicalforecast.evaluation import HierarchicalEvaluation


warnings.filterwarnings("ignore")
os.environ["NIXTLA_ID_AS_COL"] = "true"
pd.set_option('display.precision', 3)

In [None]:
url = "https://raw.githubusercontent.com/marcopeix/AppliedTimeSeriesForecastingInPython/refs/heads/master/data/intermittent_time_series.csv"
df = pd.read_csv(url, parse_dates=['ds'])
df.head()

In [None]:
fig, ax = plt.subplots(figsize=(10,8))

ax.bar(df.index, df['y'], color='lightgray')
ax.set_ylabel('Value')
ax.set_xlabel('Time steps')
ax.legend(loc='best')
plt.xlim(40, 100)

plt.tight_layout()

## Croston

In [None]:
# Croston model

sf = StatsForecast(
    df=df,
    models=models,
    freq='H',
    n_jobs=-1
)

cv_df = sf.cross_validation(
    df=df,
    h=1,
    step_size=1,
    n_windows=50
)

cv_df.index = np.arange(50, 100, 1)

In [None]:
fig, ax = plt.subplots(figsize=(10,8))

ax.bar(df.index, df['y'], color='lightgray')
ax.plot(cv_df.index, cv_df['CrostonOptimized'], ls='--', label='Croston (optimized)')
ax.set_ylabel('Value')
ax.set_xlabel('Time steps')
ax.legend(loc='best')
plt.xlim(40, 100)

plt.tight_layout()

## ADIDA

In [None]:
# ADIDA

sf = StatsForecast(
    df=df,
    models=models,
    freq='H',
    n_jobs=-1
)

cv_df = sf.cross_validation(
    df=df,
    h=1,
    step_size=1,
    n_windows=50
)

cv_df.index = np.arange(50, 100, 1)

In [None]:
fig, ax = plt.subplots(figsize=(10,8))

ax.bar(df.index, df['y'], color='lightgray')
ax.plot(cv_df.index, cv_df['CrostonOptimized'], ls='--', label='Croston')
ax.plot(cv_df.index, cv_df['ADIDA'], ls=':', label='ADIDA')
ax.set_ylabel('Value')
ax.set_xlabel('Time steps')
ax.legend(loc='best')
plt.xlim(40, 100)

plt.tight_layout()


## TSB

In [None]:
# TSB

sf = StatsForecast(
    df=df,
    models=models,
    freq='H',
    n_jobs=-1
)

cv_df = sf.cross_validation(
    df=df,
    h=1,
    step_size=1,
    n_windows=50
)

cv_df.index = np.arange(50, 100, 1)

In [None]:
fig, ax = plt.subplots(figsize=(10,8))

ax.bar(df.index, df['y'], color='lightgray')
ax.plot(cv_df.index, cv_df['TSB'], ls='--', label='TSB')
ax.plot(cv_df.index, cv_df['CrostonOptimized'], ls=':', label='Croston')
ax.plot(cv_df.index, cv_df['ADIDA'], ls='-.', label='ADIDA')

ax.set_ylabel('Value')
ax.set_xlabel('Time steps')
ax.legend(loc='best')
plt.xlim(40, 100)

plt.tight_layout()

## Error metrics (CFE, CFE_min, CFE_max, PIS, NOS)

In [None]:
croston_errors = errors(cv_df['y'], cv_df['CrostonOptimized'])
adida_errors = errors(cv_df['y'], cv_df['ADIDA'])
tsb_errors = errors(cv_df['y'], cv_df['TSB'])

In [None]:
fig, ax = plt.subplots(figsize=(9,6))

x = ['Croston', 'ADIDA', 'TSB']
y = [croston_errors['CFE'], adida_errors['CFE'], tsb_errors['CFE']]

errors = [
    [abs(croston_errors['CFE_min']), abs(adida_errors['CFE_min']), abs(tsb_errors['CFE_min'])],
    [croston_errors['CFE_max'], adida_errors['CFE_max'], tsb_errors['CFE_max']]
]

ax.errorbar(x, y, yerr=errors, fmt='o')
ax.set_xlabel('Models')
ax.set_ylabel('CFE')

for i, v in enumerate(y):
    plt.text(x=i+0.03, y=v, s=str(round(v,2)), va='center')

plt.tight_layout()

## Hierarchical forecasting

In [None]:
Y_df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv')
Y_df = Y_df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1)
Y_df.insert(0, 'Country', 'Australia')
Y_df = Y_df[['Country', 'Region', 'State', 'Purpose', 'ds', 'y']]
Y_df['ds'] = Y_df['ds'].str.replace(r'(\d+) (Q\d)', r'\1-\2', regex=True)
Y_df['ds'] = pd.to_datetime(Y_df['ds'])
Y_df.head()

In [None]:
spec = [
    ['Country'],
    ['Country', 'State'], 
    ['Country', 'Purpose'], 
    ['Country', 'State', 'Region'], 
    ['Country', 'State', 'Purpose'], 
    ['Country', 'State', 'Region', 'Purpose']
]

In [None]:
Y_df, S_df, tags = aggregate(Y_df, spec)
Y_df = Y_df.reset_index()

In [None]:
Y_df.head()

In [None]:
S_df.iloc[:5, :5]

In [None]:
Y_df.head()

In [None]:
tags['Country/State']

In [None]:
states = tags['Country/State']

fig, axes = plt.subplots(nrows=4, ncols=2, figsize=(10,8))
axes = axes.flatten()

for i, (ax, state) in enumerate(zip(axes, states)):
    plot_df = Y_df.query("unique_id == @state")

    ax.plot(plot_df['ds'], plot_df['y'])
    ax.set_xlabel('Date')
    ax.set_ylabel('Visits')
    ax.set_title(state)

fig.autofmt_xdate()
plt.tight_layout()

In [None]:
Y_test_df = Y_df.groupby('unique_id').tail(8)
Y_train_df = Y_df.drop(Y_test_df.index)

In [None]:
Y_test_df = Y_test_df.set_index('unique_id')
Y_train_df = Y_train_df.set_index('unique_id')

In [None]:
# Holt-Winters


Y_hat_df = fcst.forecast(h=8, fitted=True)
Y_fitted_df = fcst.forecast_fitted_values()

In [None]:
states = tags['Country/State']

fig, axes = plt.subplots(nrows=4, ncols=2, figsize=(10,8))
axes = axes.flatten()

for i, (ax, state) in enumerate(zip(axes, states)):
    pred_plot_df = Y_hat_df.query("unique_id == @state")
    actual_plot_df = Y_test_df.query("unique_id == @state")

    ax.plot(actual_plot_df['ds'], actual_plot_df['y'])
    ax.plot(pred_plot_df['ds'], pred_plot_df['HoltWinters'], ls='--', label='Holt-Winters')

    ax.set_xlabel('Date')
    ax.set_ylabel('Visits')
    ax.set_title(state)

fig.autofmt_xdate()
plt.tight_layout()

In [None]:
# Reconcile


In [None]:
Y_rec_df = Y_rec_df.drop(['index', 'index/BottomUp', 'index/MinTrace_method-ols'], axis=1)
Y_rec_df.head()

In [None]:
def rmse(y, y_hat):
    return np.mean(np.sqrt(np.mean((y-y_hat)**2, axis=1)))

def mase(y, y_hat, y_insample, seasonality=4):
    errors = np.mean(np.abs(y - y_hat), axis=1)
    scale = np.mean(np.abs(y_insample[:, seasonality:] - y_insample[:, :-seasonality]), axis=1)
    return np.mean(errors / scale)

# Evaluation tags
eval_tags = {}
eval_tags['Country'] = tags['Country']
eval_tags['Purpose'] = tags['Country/Purpose']
eval_tags['State'] = tags['Country/State']
eval_tags['Regions'] = tags['Country/State/Region']
eval_tags['Bottom'] = tags['Country/State/Region/Purpose']
eval_tags['All'] = np.concatenate(list(tags.values()))

# Evaluate

evaluation = evaluation.drop('Overall')
evaluation.columns = ['Base', 'BottomUp', 'MinTrace(ols)']
evaluation = evaluation.applymap('{:.2f}'.format)

In [None]:
evaluation.query('metric == "rmse"')