# Reduction-Based Forecasting (Tabularization)

Reduction converts a time series forecasting problem into a **supervised learning** task
by creating lagged feature windows. This is how many machine-learning forecasters in sktime
work internally.


## Core idea
For a window length $p$, build features:

\[\mathbf{x}_t = [y_{t-1}, y_{t-2}, \dots, y_{t-p}]\]

and train a regression model to predict $y_t$ or future values. Multi-step forecasting is
handled by **recursive**, **direct**, or **multioutput** strategies.


## Create lagged features and fit a simple linear model


In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

rng = np.random.default_rng(123)

n = 180
season = 12
trend = np.linspace(0, 8, n)
seasonality = 4 * np.sin(2 * np.pi * np.arange(n) / season)
noise = rng.normal(0, 0.8, n)

series = pd.Series(20 + trend + seasonality + noise, index=pd.period_range("2010-01", periods=n, freq="M"))
series.name = "y"


In [2]:
def make_lag_matrix(y, lags):
    y = pd.Series(y).reset_index(drop=True)
    X = pd.concat([y.shift(i) for i in range(1, lags + 1)], axis=1)
    X.columns = [f"lag_{i}" for i in range(1, lags + 1)]
    return X

lags = 12
X = make_lag_matrix(series, lags)

# Align target
y_target = series[lags:]
X = X.iloc[lags:]

# Train/test split
split = int(len(X) * 0.8)
X_train, X_test = X.iloc[:split], X.iloc[split:]
y_train, y_test = y_target.iloc[:split], y_target.iloc[split:]


In [3]:
# Fit a simple linear regression via least squares
Xb = np.column_stack([np.ones(len(X_train)), X_train.values])
coef = np.linalg.lstsq(Xb, y_train.values, rcond=None)[0]

# Recursive multi-step forecasting on the test window
history = series.iloc[: lags + split].values.tolist()
recursive_preds = []
for _ in range(len(y_test)):
    x = np.array(history[-lags:][::-1])
    x = np.concatenate([[1.0], x])
    pred = x @ coef
    recursive_preds.append(pred)
    history.append(pred)

recursive_preds = pd.Series(recursive_preds, index=y_test.index)


## Direct strategy (one model per horizon)
We train a separate model for each forecast step $h$.


In [4]:
def fit_direct_models(series, lags, horizon):
    models = []
    y = pd.Series(series).reset_index(drop=True)
    X = make_lag_matrix(y, lags)
    for h in range(1, horizon + 1):
        y_h = y.shift(-h)
        data = pd.concat([X, y_h], axis=1).dropna()
        X_h = data.iloc[:, :-1].values
        y_h = data.iloc[:, -1].values
        Xb = np.column_stack([np.ones(len(X_h)), X_h])
        coef = np.linalg.lstsq(Xb, y_h, rcond=None)[0]
        models.append(coef)
    return models

horizon = 5
models = fit_direct_models(series, lags=lags, horizon=horizon)

# Use first horizon steps of direct strategy for illustration
last_window = series.iloc[split : split + lags].values

direct_preds = []
for h, coef in enumerate(models, start=1):
    x = np.array(last_window[-lags:][::-1])
    x = np.concatenate([[1.0], x])
    direct_preds.append(x @ coef)

direct_index = y_test.index[:horizon]
direct_preds = pd.Series(direct_preds, index=direct_index)


In [5]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=series.index.to_timestamp(), y=series, mode="lines", name="Actual"))
fig.add_trace(go.Scatter(x=recursive_preds.index.to_timestamp(), y=recursive_preds, mode="lines", name="Recursive (reduction)"))
fig.add_trace(go.Scatter(x=direct_preds.index.to_timestamp(), y=direct_preds, mode="markers", name="Direct (first 5 steps)"))
fig.update_layout(title="Reduction-Based Forecasting", xaxis_title="Time", yaxis_title="y")
fig.show()


## When reduction shines
- You want to use powerful **tabular regressors** (random forests, gradient boosting).
- You have exogenous features (calendar, price, promotions).
- Forecast horizon is short-to-medium (recursive error compounding is manageable).


## Optional: sktime reduction API


In [6]:
try:
    from sktime.forecasting.compose import make_reduction
    from sktime.forecasting.model_selection import temporal_train_test_split
    from sktime.forecasting.base import ForecastingHorizon
    from sklearn.ensemble import RandomForestRegressor

    y = series
    y_train, y_test = temporal_train_test_split(y, test_size=24)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    reg = RandomForestRegressor(n_estimators=200, random_state=42)
    forecaster = make_reduction(reg, window_length=12, strategy="recursive")

    forecaster.fit(y_train)
    y_pred = forecaster.predict(fh)
    y_pred.head()
except Exception as e:
    print("sktime reduction demo skipped:", e)


sktime reduction demo skipped: No module named 'sktime'
