In [1]:
# =============================================================================
# File: demand_forecasting_dl_tf.py
# Topic: Time Series Demand Forecasting with Deep Neural Networks (TensorFlow)
# Purpose: Train DL models (MLP, LSTM, GRU) to forecast demand using covariates.
# Input: tabular time series (date, sku_id, location_id, demand, price, promo, ...)
# Output: forecasts + business metrics (MAPE, Bias) + service-level impact estimate
# =============================================================================

from __future__ import annotations

import math
from dataclasses import dataclass
from typing import Dict, List, Tuple

import numpy as np
import pandas as pd
import tensorflow as tf


In [2]:
# =============================================================================
# ==== 0. TOP-LEVEL CONFIG, CONSTANTS, AND REPRODUCIBILITY =====================
# =============================================================================

SEED = 42
tf.random.set_seed(SEED)
np.random.seed(SEED)

# ---- Data configuration (adjust to your project structure) -------------------
DATA_PATH = "data/demand.csv"  # if you load real data later
DATE_COL = "date"
TARGET_COL = "demand"

# Entity keys (for aggregation levels)
SKU_COL = "sku_id"
LOC_COL = "location_id"

# Exogenous / covariate features for multivariate forecasting
COVARIATE_COLS = [
    "price",
    "promo_flag",
    "holiday_flag",
    "temp_c",
]

# ---- Windowing / forecasting configuration ----------------------------------
LOOKBACK = 28          # historical days used as input
HORIZON = 7            # days ahead to forecast (multi-step)
STRIDE = 1             # move window by 1 day
TRAIN_RATIO = 0.70
VAL_RATIO = 0.15
BATCH_SIZE = 64
EPOCHS = 10

# ---- Model configuration -----------------------------------------------------
HIDDEN_UNITS = 64
DROPOUT = 0.2
LEARNING_RATE = 1e-3

# ---- Business configuration --------------------------------------------------
SERVICE_LEVEL = 0.95   # target service level for inventory/service impact
LEAD_TIME_DAYS = 7     # used for simplified service level impact estimate

In [3]:
# =============================================================================
# ==== 1. UTILITIES: METRICS AND BUSINESS IMPACT ===============================
# =============================================================================

def mape(y_true: np.ndarray, y_pred: np.ndarray, eps: float = 1e-6) -> float:
    """
    Mean Absolute Percentage Error (MAPE).

    Why: MAPE is widely understood in demand planning and easy to communicate,
    but it can be unstable when demand approaches zero. We protect with eps.
    """
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    denom = np.maximum(np.abs(y_true), eps)
    return float(np.mean(np.abs(y_true - y_pred) / denom) * 100.0)


def bias_pct(y_true: np.ndarray, y_pred: np.ndarray, eps: float = 1e-6) -> float:
    """
    Forecast bias in percent.

    Why: Bias shows systematic over/under-forecasting, which directly impacts
    inventory (overstock vs. stockouts). Positive means over-forecasting.
    """
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    denom = np.maximum(np.mean(np.abs(y_true)), eps)
    return float(np.mean(y_pred - y_true) / denom * 100.0)


def z_from_service_level(service_level: float) -> float:
    """
    Approximate z-score for common service levels.

    Why: In SCM, service level is often translated into a z value for safety stock.
    For simplicity we use a small lookup; replace with scipy if desired.
    """
    lookup = {
        0.50: 0.00,
        0.84: 1.00,
        0.90: 1.28,
        0.95: 1.645,
        0.97: 1.88,
        0.98: 2.05,
        0.99: 2.33,
    }
    # Fallback: clamp to nearest known
    keys = np.array(sorted(lookup.keys()))
    nearest = float(keys[np.argmin(np.abs(keys - service_level))])
    return lookup[nearest]


def safety_stock(
    demand_std_per_day: float,
    lead_time_days: int,
    service_level: float,
) -> float:
    """
    Simplified safety stock estimate:
      SS = z * sigma_demand_per_day * sqrt(lead_time)

    Why: This gives learners a direct bridge from forecast quality (variance)
    to inventory/service KPIs. In real life, use demand during lead time and
    include lead time variability as well.
    """
    z = z_from_service_level(service_level)
    return float(z * demand_std_per_day * math.sqrt(max(lead_time_days, 1)))


def service_level_impact_proxy(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    service_level: float,
    lead_time_days: int,
) -> Dict[str, float]:
    """
    Proxy to connect forecast errors to service-level/inventory impact.

    Approach (simplified):
    - Use forecast residual std as a proxy for demand uncertainty.
    - Compute implied safety stock for the target service level.

    Why: This is not a full inventory simulator, but it demonstrates
    how “better forecasts -> lower uncertainty -> lower safety stock”.
    """
    residual = np.asarray(y_true) - np.asarray(y_pred)
    sigma = float(np.std(residual))
    ss = safety_stock(sigma, lead_time_days, service_level)
    return {
        "residual_std": sigma,
        "implied_safety_stock_units": ss,
        "service_level_target": float(service_level),
        "lead_time_days": float(lead_time_days),
    }

In [4]:
# =============================================================================
# ==== 2. DATA: SYNTHETIC GENERATOR (FOR LEARNING / DEMO) ======================
# =============================================================================

def make_synthetic_sku_location_data(
    n_days: int = 365,
    sku_id: str = "SKU_001",
    location_id: str = "LOC_A",
) -> pd.DataFrame:
    """
    Create a realistic-ish demand series with covariates.

    Why: Learners can run end-to-end without needing a private dataset.
    We include:
    - weekly seasonality
    - price elasticity (higher price -> lower demand)
    - promotion lift
    - holidays spikes
    - weather influence
    """
    dates = pd.date_range("2024-01-01", periods=n_days, freq="D")

    weekly = 10 * np.sin(2 * np.pi * np.arange(n_days) / 7.0)
    trend = np.linspace(0, 5, n_days)

    price = 10 + 0.5 * np.sin(2 * np.pi * np.arange(n_days) / 30.0) + np.random.normal(0, 0.2, n_days)
    promo_flag = (np.random.rand(n_days) < 0.10).astype(int)
    holiday_flag = (np.random.rand(n_days) < 0.03).astype(int)
    temp_c = 15 + 10 * np.sin(2 * np.pi * np.arange(n_days) / 365.0) + np.random.normal(0, 1.0, n_days)

    # Demand construction (keep non-negative)
    base = 50 + weekly + trend
    price_effect = -3.0 * (price - np.mean(price))  # elasticity
    promo_lift = 15.0 * promo_flag
    holiday_spike = 20.0 * holiday_flag
    weather_effect = 0.3 * (temp_c - np.mean(temp_c))

    noise = np.random.normal(0, 5, n_days)
    demand = np.maximum(0, base + price_effect + promo_lift + holiday_spike + weather_effect + noise)

    df = pd.DataFrame({
        DATE_COL: dates,
        SKU_COL: sku_id,
        LOC_COL: location_id,
        TARGET_COL: demand.astype(float),
        "price": price.astype(float),
        "promo_flag": promo_flag.astype(int),
        "holiday_flag": holiday_flag.astype(int),
        "temp_c": temp_c.astype(float),
    })
    return df


def make_multi_entity_dataset() -> pd.DataFrame:
    """
    Build a small multi-SKU, multi-location dataset.

    Why: In SCM you rarely forecast a single series. This supports:
    - multiple aggregation levels (SKU-LOC, SKU, LOC, TOTAL)
    """
    frames = []
    for sku in ["SKU_001", "SKU_002", "SKU_003"]:
        for loc in ["LOC_A", "LOC_B"]:
            frames.append(make_synthetic_sku_location_data(n_days=365, sku_id=sku, location_id=loc))
    return pd.concat(frames, ignore_index=True)

In [5]:
# =============================================================================
# ==== 3. AGGREGATION LEVELS (MULTI-LEVEL FORECASTING) ==========================
# =============================================================================

def aggregate_demand(
    df: pd.DataFrame,
    level: str,
) -> pd.DataFrame:
    """
    Aggregate to different hierarchy levels.

    level:
      - "sku_loc": forecast each SKU-Location series
      - "sku":     forecast SKU total across locations
      - "loc":     forecast Location total across SKUs
      - "total":   forecast overall demand

    Why: Businesses forecast at multiple levels for S&OP, replenishment, and capacity.
    """
    df = df.copy()
    df[DATE_COL] = pd.to_datetime(df[DATE_COL])

    group_cols = [DATE_COL]  # always group by time
    if level == "sku_loc":
        group_cols += [SKU_COL, LOC_COL]
    elif level == "sku":
        group_cols += [SKU_COL]
    elif level == "loc":
        group_cols += [LOC_COL]
    elif level == "total":
        pass
    else:
        raise ValueError(f"Unknown level: {level}")

    # For covariates: we use mean as a simple aggregation choice.
    agg_map = {TARGET_COL: "sum"}
    for c in COVARIATE_COLS:
        agg_map[c] = "mean"

    out = df.groupby(group_cols, as_index=False).agg(agg_map).sort_values(group_cols)
    return out

In [6]:
# =============================================================================
# ==== 4. FEATURE ENGINEERING: WINDOWING FOR DL MODELS ==========================
# =============================================================================

@dataclass
class WindowedData:
    X: np.ndarray  # shape: [samples, lookback, features] or flattened for MLP
    y: np.ndarray  # shape: [samples, horizon]
    feature_names: List[str]


def standardize_train_only(
    train_array: np.ndarray,
    val_array: np.ndarray,
    test_array: np.ndarray,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """
    Standardize using TRAIN statistics only.

    Why: Prevents leakage. In forecasting, leakage can easily occur if you compute
    normalization on the full timeline.
    """
    mean = train_array.mean(axis=0, keepdims=True)
    std = train_array.std(axis=0, keepdims=True) + 1e-8
    return (train_array - mean) / std, (val_array - mean) / std, (test_array - mean) / std, mean, std


def make_supervised_windows(
    df_series: pd.DataFrame,
    lookback: int,
    horizon: int,
    target_col: str,
    covariate_cols: List[str],
) -> WindowedData:
    """
    Convert a single time series dataframe into supervised learning windows.

    Output:
      X: [N, lookback, F] where F = 1 (demand lag) + len(covariates)
      y: [N, horizon] (multi-step direct forecasting)

    Why:
    - DL models learn patterns across time from sequences.
    - Multi-step direct output is a practical baseline for SCM horizons.
    """
    df_series = df_series.sort_values(DATE_COL).reset_index(drop=True)

    # Input features include lagged demand itself + covariates aligned by date.
    feature_names = [target_col] + covariate_cols
    values = df_series[feature_names].to_numpy(dtype=float)

    X_list, y_list = [], []
    total_len = len(df_series)

    for start in range(0, total_len - lookback - horizon + 1, STRIDE):
        end_x = start + lookback
        end_y = end_x + horizon

        X_list.append(values[start:end_x, :])             # [lookback, F]
        y_list.append(values[end_x:end_y, 0])             # demand only, [horizon]

    X = np.stack(X_list, axis=0)
    y = np.stack(y_list, axis=0)
    return WindowedData(X=X, y=y, feature_names=feature_names)


def train_val_test_split(
    X: np.ndarray,
    y: np.ndarray,
    train_ratio: float,
    val_ratio: float,
) -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]:
    """
    Time-based split (no shuffling).

    Why: In time series you must preserve ordering; random split breaks the
    forecasting constraint and inflates performance.
    """
    n = X.shape[0]
    n_train = int(n * train_ratio)
    n_val = int(n * val_ratio)

    X_train, y_train = X[:n_train], y[:n_train]
    X_val, y_val = X[n_train:n_train + n_val], y[n_train:n_train + n_val]
    X_test, y_test = X[n_train + n_val:], y[n_train + n_val:]
    return (X_train, y_train), (X_val, y_val), (X_test, y_test)

In [7]:
# =============================================================================
# ==== 5. MODELS: MLP VS. RNN VS. LSTM VS. GRU =================================
# =============================================================================

def build_mlp(
    lookback: int,
    n_features: int,
    horizon: int,
) -> tf.keras.Model:
    """
    MLP baseline: flatten the lookback window into a single vector.

    Why:
    - Often competitive for short horizons and stable patterns.
    - Simpler and faster than recurrent models.
    Trade-off:
    - Does not explicitly model temporal order beyond what flattening preserves.
    """
    inputs = tf.keras.Input(shape=(lookback, n_features))
    x = tf.keras.layers.Flatten()(inputs)
    x = tf.keras.layers.Dense(HIDDEN_UNITS, activation="relu")(x)
    x = tf.keras.layers.Dropout(DROPOUT)(x)
    x = tf.keras.layers.Dense(HIDDEN_UNITS, activation="relu")(x)
    outputs = tf.keras.layers.Dense(horizon)(x)
    return tf.keras.Model(inputs, outputs, name="MLP_Forecaster")


def build_simple_rnn(
    lookback: int,
    n_features: int,
    horizon: int,
) -> tf.keras.Model:
    """
    Simple RNN: a minimal recurrent baseline.

    Why:
    - Teaches recurrence conceptually.
    Trade-off:
    - Can struggle with longer dependencies due to vanishing gradients.
    """
    inputs = tf.keras.Input(shape=(lookback, n_features))
    x = tf.keras.layers.SimpleRNN(HIDDEN_UNITS, return_sequences=False)(inputs)
    x = tf.keras.layers.Dropout(DROPOUT)(x)
    outputs = tf.keras.layers.Dense(horizon)(x)
    return tf.keras.Model(inputs, outputs, name="SimpleRNN_Forecaster")


def build_lstm(
    lookback: int,
    n_features: int,
    horizon: int,
) -> tf.keras.Model:
    """
    LSTM: handles longer-range dependencies via gates (input/forget/output).

    Why (SCM):
    - Captures lag effects (promo lead/lag, weekly/monthly patterns).
    - More robust than vanilla RNN for multi-week lookbacks.
    """
    inputs = tf.keras.Input(shape=(lookback, n_features))
    x = tf.keras.layers.LSTM(HIDDEN_UNITS, return_sequences=False)(inputs)
    x = tf.keras.layers.Dropout(DROPOUT)(x)
    outputs = tf.keras.layers.Dense(horizon)(x)
    return tf.keras.Model(inputs, outputs, name="LSTM_Forecaster")


def build_gru(
    lookback: int,
    n_features: int,
    horizon: int,
) -> tf.keras.Model:
    """
    GRU: similar to LSTM with fewer gates (often faster, fewer parameters).

    Why:
    - Often a strong default when you want recurrent power with lower complexity.
    """
    inputs = tf.keras.Input(shape=(lookback, n_features))
    x = tf.keras.layers.GRU(HIDDEN_UNITS, return_sequences=False)(inputs)
    x = tf.keras.layers.Dropout(DROPOUT)(x)
    outputs = tf.keras.layers.Dense(horizon)(x)
    return tf.keras.Model(inputs, outputs, name="GRU_Forecaster")


def compile_model(model: tf.keras.Model) -> tf.keras.Model:
    """
    Compile with MSE loss and MAE metric.

    Why:
    - MSE is a stable regression objective.
    - MAE is interpretable; we compute MAPE/Bias separately for business reporting.
    """
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
        loss="mse",
        metrics=[tf.keras.metrics.MeanAbsoluteError(name="mae")],
    )
    return model


In [8]:
# =============================================================================
# ==== 6. TRAINING AND EVALUATION ==============================================
# =============================================================================

def fit_model(
    model: tf.keras.Model,
    X_train: np.ndarray,
    y_train: np.ndarray,
    X_val: np.ndarray,
    y_val: np.ndarray,
) -> tf.keras.callbacks.History:
    """
    Train with early stopping.

    Why: Prevents overfitting and matches real forecasting practice.
    """
    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor="val_loss",
            patience=3,
            restore_best_weights=True,
        )
    ]

    history = model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        verbose=1,
        callbacks=callbacks,
    )
    return history


def evaluate_business_metrics(
    model: tf.keras.Model,
    X_test: np.ndarray,
    y_test: np.ndarray,
    service_level: float,
    lead_time_days: int,
) -> Dict[str, float]:
    """
    Compute business-facing metrics on the test set.

    Note: y_test and predictions are multi-step. We evaluate:
    - overall MAPE across all horizon points
    - bias across all horizon points
    - service-level impact proxy using residual variability
    """
    y_pred = model.predict(X_test, verbose=0)

    # Flatten across horizon for simple reporting
    y_true_flat = y_test.reshape(-1)
    y_pred_flat = y_pred.reshape(-1)

    out = {
        "MAPE_%": mape(y_true_flat, y_pred_flat),
        "Bias_%": bias_pct(y_true_flat, y_pred_flat),
    }
    out.update(service_level_impact_proxy(y_true_flat, y_pred_flat, service_level, lead_time_days))
    return out

In [9]:
# =============================================================================
# ==== 7. PROGRESSIVE EXAMPLES (IN ORDER) ======================================
# =============================================================================

def run_example_1_single_series_mlp() -> None:
    """
    Example 1: Single SKU-Location series, MLP baseline.

    Why this example first:
    - minimal complexity
    - shows the end-to-end pipeline (window -> train -> metrics)
    """
    df = make_synthetic_sku_location_data(n_days=365, sku_id="SKU_001", location_id="LOC_A")

    windows = make_supervised_windows(df, LOOKBACK, HORIZON, TARGET_COL, COVARIATE_COLS)
    (X_train, y_train), (X_val, y_val), (X_test, y_test) = train_val_test_split(
        windows.X, windows.y, TRAIN_RATIO, VAL_RATIO
    )

    # Standardize features (time series safe: fit on train only)
    # We standardize per-feature across the lookback dimension by reshaping.
    n_features = X_train.shape[-1]
    X_train_2d = X_train.reshape(-1, n_features)  # [samples*lookback, features]
    X_val_2d = X_val.reshape(-1, n_features)
    X_test_2d = X_test.reshape(-1, n_features)

    X_train_2d, X_val_2d, X_test_2d, _, _ = standardize_train_only(X_train_2d, X_val_2d, X_test_2d)

    X_train = X_train_2d.reshape(-1, LOOKBACK, n_features)
    X_val = X_val_2d.reshape(-1, LOOKBACK, n_features)
    X_test = X_test_2d.reshape(-1, LOOKBACK, n_features)

    model = compile_model(build_mlp(LOOKBACK, n_features, HORIZON))
    fit_model(model, X_train, y_train, X_val, y_val)

    metrics = evaluate_business_metrics(model, X_test, y_test, SERVICE_LEVEL, LEAD_TIME_DAYS)
    print("\n[Example 1: MLP Single Series] Metrics")
    for k, v in metrics.items():
        print(f"- {k}: {v:.4f}" if isinstance(v, float) else f"- {k}: {v}")


def run_example_2_single_series_lstm_vs_gru() -> None:
    """
    Example 2: Same series, compare LSTM vs GRU.

    Why:
    - introduces recurrence and memory
    - highlights a common practical trade-off: LSTM robustness vs GRU speed
    """
    df = make_synthetic_sku_location_data(n_days=365, sku_id="SKU_001", location_id="LOC_A")

    windows = make_supervised_windows(df, LOOKBACK, HORIZON, TARGET_COL, COVARIATE_COLS)
    (X_train, y_train), (X_val, y_val), (X_test, y_test) = train_val_test_split(
        windows.X, windows.y, TRAIN_RATIO, VAL_RATIO
    )

    n_features = X_train.shape[-1]
    X_train_2d = X_train.reshape(-1, n_features)
    X_val_2d = X_val.reshape(-1, n_features)
    X_test_2d = X_test.reshape(-1, n_features)
    X_train_2d, X_val_2d, X_test_2d, _, _ = standardize_train_only(X_train_2d, X_val_2d, X_test_2d)

    X_train = X_train_2d.reshape(-1, LOOKBACK, n_features)
    X_val = X_val_2d.reshape(-1, LOOKBACK, n_features)
    X_test = X_test_2d.reshape(-1, LOOKBACK, n_features)

    for builder in [build_lstm, build_gru]:
        model = compile_model(builder(LOOKBACK, n_features, HORIZON))
        fit_model(model, X_train, y_train, X_val, y_val)

        metrics = evaluate_business_metrics(model, X_test, y_test, SERVICE_LEVEL, LEAD_TIME_DAYS)
        print(f"\n[Example 2: {model.name}] Metrics")
        for k, v in metrics.items():
            print(f"- {k}: {v:.4f}" if isinstance(v, float) else f"- {k}: {v}")


def run_example_3_multivariate_and_multi_level(level: str = "sku_loc") -> None:
    """
    Example 3: Multi-entity dataset + aggregation level.

    Why:
    - introduces realistic SCM structure (many SKUs/locations)
    - teaches aggregation-level forecasting choices

    Note:
    - For clarity, we train on ONE aggregated series (the first group).
    - Extension task: loop through all groups and train per series, or build a
      global model with embeddings for SKU/LOC.
    """
    df_raw = make_multi_entity_dataset()
    df_agg = aggregate_demand(df_raw, level=level)

    # Pick one series group to keep runtime small
    group_cols = [DATE_COL]
    if level == "sku_loc":
        group_cols += [SKU_COL, LOC_COL]
    elif level == "sku":
        group_cols += [SKU_COL]
    elif level == "loc":
        group_cols += [LOC_COL]
    elif level == "total":
        group_cols += []
    else:
        raise ValueError(level)

    if level == "total":
        df_series = df_agg.sort_values(DATE_COL)
        series_name = "TOTAL"
    else:
        key_cols = group_cols[1:]
        first_key = df_agg[key_cols].drop_duplicates().iloc[0].to_dict()
        mask = np.ones(len(df_agg), dtype=bool)
        for k, v in first_key.items():
            mask &= (df_agg[k] == v)
        df_series = df_agg[mask].sort_values(DATE_COL)
        series_name = " | ".join([f"{k}={v}" for k, v in first_key.items()])

    windows = make_supervised_windows(df_series, LOOKBACK, HORIZON, TARGET_COL, COVARIATE_COLS)
    (X_train, y_train), (X_val, y_val), (X_test, y_test) = train_val_test_split(
        windows.X, windows.y, TRAIN_RATIO, VAL_RATIO
    )

    n_features = X_train.shape[-1]
    X_train_2d = X_train.reshape(-1, n_features)
    X_val_2d = X_val.reshape(-1, n_features)
    X_test_2d = X_test.reshape(-1, n_features)
    X_train_2d, X_val_2d, X_test_2d, _, _ = standardize_train_only(X_train_2d, X_val_2d, X_test_2d)

    X_train = X_train_2d.reshape(-1, LOOKBACK, n_features)
    X_val = X_val_2d.reshape(-1, LOOKBACK, n_features)
    X_test = X_test_2d.reshape(-1, LOOKBACK, n_features)

    model = compile_model(build_gru(LOOKBACK, n_features, HORIZON))
    fit_model(model, X_train, y_train, X_val, y_val)

    metrics = evaluate_business_metrics(model, X_test, y_test, SERVICE_LEVEL, LEAD_TIME_DAYS)
    print(f"\n[Example 3: Multi-level={level} | Series={series_name} | Model=GRU] Metrics")
    for k, v in metrics.items():
        print(f"- {k}: {v:.4f}" if isinstance(v, float) else f"- {k}: {v}")

In [10]:
# =============================================================================
# ==== 8. FORECAST HORIZON TRADE-OFFS (SHORT VS LONG) ==========================
# =============================================================================

def horizon_tradeoff_note() -> None:
    """
    A small conceptual note printed to console.

    Why:
    - Short horizon: usually higher accuracy, operational replenishment decisions.
    - Long horizon: supports S&OP/capacity planning but has higher uncertainty.
    """
    print("\n[Horizon Trade-off Note]")
    print("- Short horizon (e.g., 1-7 days): better accuracy, tactical decisions.")
    print("- Long horizon (e.g., 8-56 days): more uncertainty, strategic planning.")
    print("- In DL, longer horizons often benefit from more covariates and stronger regularization.")

In [None]:
# =============================================================================
# ==== 9. TRY-YOURSELF TASKS (WITH OPTIONAL SOLUTIONS) =========================
# =============================================================================

# TODO 1: Change HORIZON from 7 to 14 and compare MAPE/Bias and implied safety stock.
# TODO 2: Remove covariates (set COVARIATE_COLS = []) and measure accuracy drop.
# TODO 3: Increase LOOKBACK to 56. Does LSTM/GRU improve more than MLP?
# TODO 4: Change SERVICE_LEVEL from 0.95 to 0.99 and observe safety stock impact.

# --- Optional solution hint (keep commented for learners) ---------------------
# SOLUTION IDEA:
# - Longer horizon usually increases residual_std -> increases implied safety stock.
# - Covariates often reduce bias during promo/holiday periods (less systematic error).


In [11]:
# =============================================================================
# ==== 10. MAIN: RUN DEMOS / SANITY CHECKS =====================================
# =============================================================================

if __name__ == "__main__":
    print("Running Time Series Demand Forecasting with DL (TensorFlow) demos...")

    horizon_tradeoff_note()

    # Example 1: MLP baseline
    run_example_1_single_series_mlp()

    # Example 2: LSTM vs GRU comparison
    run_example_2_single_series_lstm_vs_gru()

    # Example 3: Multi-level aggregation (try: 'sku_loc', 'sku', 'loc', 'total')
    run_example_3_multivariate_and_multi_level(level="sku_loc")

Running Time Series Demand Forecasting with DL (TensorFlow) demos...

[Horizon Trade-off Note]
- Short horizon (e.g., 1-7 days): better accuracy, tactical decisions.
- Long horizon (e.g., 8-56 days): more uncertainty, strategic planning.
- In DL, longer horizons often benefit from more covariates and stronger regularization.
Epoch 1/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 75ms/step - loss: 3175.6743 - mae: 55.4011 - val_loss: 2578.5620 - val_mae: 49.7953
Epoch 2/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 3141.2490 - mae: 55.0890 - val_loss: 2535.3364 - val_mae: 49.3598
Epoch 3/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 3104.4492 - mae: 54.7567 - val_loss: 2486.9299 - val_mae: 48.8663
Epoch 4/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 3062.9500 - mae: 54.3711 - val_loss: 2430.9097 - val_mae: 48.2868
Epoch 5/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━