In [3]:
# =============================================================================
# File: dl_foundations_scm_data.py
# Purpose: Show how supply-chain data differs from standard ML data and why DL helps
# Topic: Multi-entity + time dependencies (SKU–Location–Customer) with lag effects
# Input: Synthetic order history (date, sku_id, location_id, customer_id, demand)
# Output: Baseline ML vs Deep Learning models + simple evaluation (MAPE)
# =============================================================================

# ==== 0. Imports ==============================================================
from __future__ import annotations

import math
from dataclasses import dataclass
from typing import Dict, Tuple

import numpy as np
import pandas as pd
import tensorflow as tf

In [4]:
# ==== 1. Top-level config & constants ========================================
SEED = 42
N_DAYS = 240
N_SKUS = 40
N_LOCATIONS = 12
N_CUSTOMERS = 30

# Time-series feature engineering
LAGS = [1, 7, 14]            # Lag effects (yesterday, last week, 2 weeks)
ROLLING_WINDOWS = [7, 28]    # Demand smoothing windows

# Modeling
TRAIN_RATIO = 0.75
BATCH_SIZE = 256
EPOCHS = 10
LEARNING_RATE = 1e-3

# Sequence model config
SEQ_LEN = 28  # how much history we feed to the DL sequence model

# Columns (keep naming consistent across files in your course)
COL_DATE = "date"
COL_SKU = "sku_id"
COL_LOC = "location_id"
COL_CUST = "customer_id"
COL_DEMAND = "demand"


In [5]:
# ==== 2. Reproducibility helpers =============================================
def set_seeds(seed: int = SEED) -> None:
    """Set seeds for reproducible runs."""
    np.random.seed(seed)
    tf.random.set_seed(seed)

In [6]:
# ==== 3. Data generation (SCM-like structure) ================================
def generate_scm_orders(
    n_days: int,
    n_skus: int,
    n_locations: int,
    n_customers: int,
) -> pd.DataFrame:
    """
    Create a synthetic dataset that mimics supply-chain demand drivers:
    - Multi-entity hierarchy: SKU x Location x Customer
    - Calendar seasonality + weekly patterns
    - Promotions/events (semi-structured "event-like" shocks)
    - Lead/lag effects (demand depends on past demand)
    """
    dates = pd.date_range("2024-01-01", periods=n_days, freq="D")

    # Build the Cartesian product: each day has many entity combinations.
    # This is common in SCM: millions of rows across SKU–Location–Customer–Day.
    grid = pd.MultiIndex.from_product(
        [dates, range(n_skus), range(n_locations), range(n_customers)],
        names=[COL_DATE, COL_SKU, COL_LOC, COL_CUST],
    ).to_frame(index=False)

    # Entity “strengths” (nonlinear + heterogenous effects)
    sku_base = np.random.lognormal(mean=2.0, sigma=0.4, size=n_skus)
    loc_factor = np.random.uniform(0.7, 1.3, size=n_locations)
    cust_factor = np.random.uniform(0.6, 1.5, size=n_customers)

    # Calendar patterns
    dow = grid[COL_DATE].dt.dayofweek.values  # 0..6
    week_seasonality = 1.0 + 0.15 * np.sin(2 * np.pi * dow / 7.0)

    day_idx = (grid[COL_DATE] - grid[COL_DATE].min()).dt.days.values
    year_seasonality = 1.0 + 0.20 * np.sin(2 * np.pi * day_idx / 90.0)  # ~quarterly-ish

    # Event shocks (semi-structured "events"): sparse, bursty, non-linear impact
    # Why it matters: SCM often has irregular events (promotions, disruptions).
    event_flag = (np.random.rand(len(grid)) < 0.01).astype(np.float32)
    event_lift = 1.0 + event_flag * np.random.uniform(0.5, 2.0, size=len(grid))

    # Compose demand mean (before lag)
    base_mean = (
        sku_base[grid[COL_SKU].values]
        * loc_factor[grid[COL_LOC].values]
        * cust_factor[grid[COL_CUST].values]
        * week_seasonality
        * year_seasonality
        * event_lift
    )

    # Add stochasticity and clipping at 0 (demand cannot be negative)
    noise = np.random.normal(loc=0.0, scale=0.25, size=len(grid))
    demand = np.maximum(0.0, base_mean * (1.0 + noise))

    df = grid.copy()
    df[COL_DEMAND] = demand.astype(np.float32)
    df["event_flag"] = event_flag

    # Add a realistic “missingness” pattern: not every SKU sells daily in every node.
    # This is a key difference vs many “clean” ML datasets.
    mask = np.random.rand(len(df)) > 0.10  # 10% missing rows
    df = df.loc[mask].reset_index(drop=True)

    return df

In [7]:
# ==== 4. Feature engineering (lags + rolling stats) ==========================
def add_time_features(df: pd.DataFrame) -> pd.DataFrame:
    """Add calendar features useful in SCM (dow, month, etc.)."""
    out = df.copy()
    out["dow"] = out[COL_DATE].dt.dayofweek.astype(np.int16)
    out["month"] = out[COL_DATE].dt.month.astype(np.int16)
    out["day_idx"] = (out[COL_DATE] - out[COL_DATE].min()).dt.days.astype(np.int32)
    return out


def add_lag_rolling_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    Add lag and rolling features per (SKU, Location, Customer).
    Why: SCM demand has temporal dependencies and lag effects (ordering cycles,
    planning calendars, promotions, replenishment patterns).
    """
    out = df.sort_values([COL_SKU, COL_LOC, COL_CUST, COL_DATE]).copy()
    grp = out.groupby([COL_SKU, COL_LOC, COL_CUST], sort=False)[COL_DEMAND]

    for lag in LAGS:
        out[f"lag_{lag}"] = grp.shift(lag)

    for w in ROLLING_WINDOWS:
        # Use past-only windows to avoid leakage.
        out[f"roll_mean_{w}"] = grp.shift(1).rolling(w).mean()

    # A common SCM trick: treat missing history as 0 for intermittent demand.
    # Not always correct, but helpful as a starting educational baseline.
    lag_cols = [f"lag_{l}" for l in LAGS] + [f"roll_mean_{w}" for w in ROLLING_WINDOWS]
    out[lag_cols] = out[lag_cols].fillna(0.0)

    return out


def train_test_split_time(df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Time-based split to prevent future leakage."""
    df_sorted = df.sort_values(COL_DATE).copy()
    cutoff_idx = int(len(df_sorted) * TRAIN_RATIO)
    train = df_sorted.iloc[:cutoff_idx].copy()
    test = df_sorted.iloc[cutoff_idx:].copy()
    return train, test

In [8]:
# ==== 5. Metrics ==============================================================
def mape(y_true: np.ndarray, y_pred: np.ndarray, eps: float = 1e-6) -> float:
    """Mean Absolute Percentage Error with epsilon to avoid div-by-zero."""
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    denom = np.maximum(np.abs(y_true), eps)
    return float(np.mean(np.abs(y_true - y_pred) / denom))

In [9]:
# ==== 6. Example 1: Baseline "classical ML" (linear regression) ===============
def fit_linear_baseline(train: pd.DataFrame, test: pd.DataFrame) -> Dict[str, float]:
    """
    A simple baseline:
    - Use only engineered numeric features (lags/rolling/calendar)
    - Ignore high-cardinality categorical interactions (SKU/LOC/CUST)

    Why it often fails in SCM at scale:
    - Non-linear effects are common (promos, substitution, thresholds)
    - High-cardinality entity interactions explode feature space
    """
    # Minimal, numeric feature set
    features = ["dow", "month", "day_idx", "event_flag"] + \
               [f"lag_{l}" for l in LAGS] + [f"roll_mean_{w}" for w in ROLLING_WINDOWS]

    X_tr = train[features].to_numpy(dtype=np.float32)
    y_tr = train[COL_DEMAND].to_numpy(dtype=np.float32)
    X_te = test[features].to_numpy(dtype=np.float32)
    y_te = test[COL_DEMAND].to_numpy(dtype=np.float32)

    # Closed-form linear regression (least squares)
    # Add bias term
    X_tr_b = np.hstack([np.ones((len(X_tr), 1), dtype=np.float32), X_tr])
    X_te_b = np.hstack([np.ones((len(X_te), 1), dtype=np.float32), X_te])

    # Solve: beta = (X'X)^(-1) X'y
    beta, *_ = np.linalg.lstsq(X_tr_b, y_tr, rcond=None)
    preds = X_te_b @ beta

    return {
        "mape": mape(y_te, preds),
        "y_mean_test": float(np.mean(y_te)),
        "pred_mean_test": float(np.mean(preds)),
    }



In [10]:
# ==== 7. Example 2: DL on tabular SCM data (embeddings + MLP) =================
@dataclass
class TabularDlArtifacts:
    model: tf.keras.Model
    feature_means: np.ndarray
    feature_stds: np.ndarray


def build_tabular_dl_model(
    n_skus: int,
    n_locations: int,
    n_customers: int,
    n_num_features: int,
) -> tf.keras.Model:
    """
    Embeddings for SKU/Location/Customer + numeric features.
    Why this helps in SCM:
    - Learns dense representations for high-cardinality entities
    - Captures non-linear interactions without manual one-hot explosion
    """
    # Categorical inputs (integer ids)
    sku_in = tf.keras.Input(shape=(), dtype=tf.int32, name=COL_SKU)
    loc_in = tf.keras.Input(shape=(), dtype=tf.int32, name=COL_LOC)
    cust_in = tf.keras.Input(shape=(), dtype=tf.int32, name=COL_CUST)

    # Numeric input
    num_in = tf.keras.Input(shape=(n_num_features,), dtype=tf.float32, name="num_features")

    # Embeddings (dimensions are small on purpose for tutorial clarity)
    sku_emb = tf.keras.layers.Embedding(n_skus, 8, name="sku_emb")(sku_in)
    loc_emb = tf.keras.layers.Embedding(n_locations, 4, name="loc_emb")(loc_in)
    cust_emb = tf.keras.layers.Embedding(n_customers, 6, name="cust_emb")(cust_in)

    # Flatten embeddings
    sku_vec = tf.keras.layers.Flatten()(sku_emb)
    loc_vec = tf.keras.layers.Flatten()(loc_emb)
    cust_vec = tf.keras.layers.Flatten()(cust_emb)

    # Combine
    x = tf.keras.layers.Concatenate()([sku_vec, loc_vec, cust_vec, num_in])

    # Non-linear layers
    x = tf.keras.layers.Dense(64, activation="relu")(x)
    x = tf.keras.layers.Dense(32, activation="relu")(x)

    # Positive demand: use softplus to avoid negative predictions
    out = tf.keras.layers.Dense(1, activation="softplus", name="demand_pred")(x)

    model = tf.keras.Model(inputs=[sku_in, loc_in, cust_in, num_in], outputs=out)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
        loss="mae",
    )
    return model


def standardize_features(
    train_num: np.ndarray,
    test_num: np.ndarray,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Standardize numeric features using train statistics to avoid leakage."""
    means = train_num.mean(axis=0, keepdims=True)
    stds = train_num.std(axis=0, keepdims=True) + 1e-6
    return (train_num - means) / stds, (test_num - means) / stds, means.squeeze(), stds.squeeze()


def fit_tabular_dl(train: pd.DataFrame, test: pd.DataFrame) -> Tuple[TabularDlArtifacts, Dict[str, float]]:
    """Train DL model and return artifacts + evaluation metrics."""
    num_cols = ["dow", "month", "day_idx", "event_flag"] + \
               [f"lag_{l}" for l in LAGS] + [f"roll_mean_{w}" for w in ROLLING_WINDOWS]

    tr_num = train[num_cols].to_numpy(dtype=np.float32)
    te_num = test[num_cols].to_numpy(dtype=np.float32)

    tr_num_s, te_num_s, means, stds = standardize_features(tr_num, te_num)

    y_tr = train[COL_DEMAND].to_numpy(dtype=np.float32)
    y_te = test[COL_DEMAND].to_numpy(dtype=np.float32)

    model = build_tabular_dl_model(
        n_skus=N_SKUS,
        n_locations=N_LOCATIONS,
        n_customers=N_CUSTOMERS,
        n_num_features=tr_num_s.shape[1],
    )

    # Build tf.data for efficiency
    ds_tr = tf.data.Dataset.from_tensor_slices(
        (
            {
                COL_SKU: train[COL_SKU].to_numpy(np.int32),
                COL_LOC: train[COL_LOC].to_numpy(np.int32),
                COL_CUST: train[COL_CUST].to_numpy(np.int32),
                "num_features": tr_num_s,
            },
            y_tr,
        )
    ).shuffle(50_000, seed=SEED).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

    ds_te = tf.data.Dataset.from_tensor_slices(
        (
            {
                COL_SKU: test[COL_SKU].to_numpy(np.int32),
                COL_LOC: test[COL_LOC].to_numpy(np.int32),
                COL_CUST: test[COL_CUST].to_numpy(np.int32),
                "num_features": te_num_s,
            },
            y_te,
        )
    ).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

    model.fit(ds_tr, validation_data=ds_te, epochs=EPOCHS, verbose=0)

    preds = model.predict(ds_te, verbose=0).reshape(-1)
    return (
        TabularDlArtifacts(model=model, feature_means=means, feature_stds=stds),
        {"mape": mape(y_te, preds), "y_mean_test": float(np.mean(y_te)), "pred_mean_test": float(np.mean(preds))},
    )

In [12]:
# ==== 8. Example 3: DL sequence model (LSTM over demand history) ===============
def make_sequences(df: pd.DataFrame, seq_len: int = SEQ_LEN) -> Tuple[Dict[str, np.ndarray], np.ndarray]:
    """
    Build sequences per entity group (SKU, LOC, CUST).
    Input: last seq_len demand values -> predict next-day demand.

    Why: Many SCM signals are truly temporal, not just tabular.
    LSTM/Temporal models can learn:
    - repeated replenishment cycles
    - holiday/weekday patterns
    - intermittent demand regimes
    """
    df = df.sort_values([COL_SKU, COL_LOC, COL_CUST, COL_DATE]).copy()
    sequences = []
    sku_ids, loc_ids, cust_ids = [], [], []
    targets = []

    for (sku, loc, cust), g in df.groupby([COL_SKU, COL_LOC, COL_CUST], sort=False):
        y = g[COL_DEMAND].to_numpy(np.float32)

        # Skip short histories
        if len(y) <= seq_len:
            continue

        for i in range(seq_len, len(y)):
            sequences.append(y[i - seq_len:i])
            targets.append(y[i])
            sku_ids.append(sku)
            loc_ids.append(loc)
            cust_ids.append(cust)

    X_seq = np.stack(sequences, axis=0)  # (N, seq_len)
    y = np.array(targets, dtype=np.float32)

    # Add channel dimension for RNN input: (N, seq_len, 1)
    X_seq = X_seq[..., None]

    return (
        {
            COL_SKU: np.array(sku_ids, dtype=np.int32),
            COL_LOC: np.array(loc_ids, dtype=np.int32),
            COL_CUST: np.array(cust_ids, dtype=np.int32),
            "demand_seq": X_seq,
        },
        y,
    )


def build_sequence_model(n_skus: int, n_locations: int, n_customers: int, seq_len: int) -> tf.keras.Model:
    """Embeddings + LSTM over demand sequences."""
    sku_in = tf.keras.Input(shape=(), dtype=tf.int32, name=COL_SKU)
    loc_in = tf.keras.Input(shape=(), dtype=tf.int32, name=COL_LOC)
    cust_in = tf.keras.Input(shape=(), dtype=tf.int32, name=COL_CUST)

    seq_in = tf.keras.Input(shape=(seq_len, 1), dtype=tf.float32, name="demand_seq")

    sku_vec = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(n_skus, 8)(sku_in))
    loc_vec = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(n_locations, 4)(loc_in))
    cust_vec = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(n_customers, 6)(cust_in))

    seq_feat = tf.keras.layers.LSTM(32)(seq_in)

    x = tf.keras.layers.Concatenate()([sku_vec, loc_vec, cust_vec, seq_feat])
    x = tf.keras.layers.Dense(64, activation="relu")(x)
    out = tf.keras.layers.Dense(1, activation="softplus")(x)

    model = tf.keras.Model(inputs=[sku_in, loc_in, cust_in, seq_in], outputs=out)
    model.compile(optimizer=tf.keras.optimizers.Adam(LEARNING_RATE), loss="mae")
    return model


def fit_sequence_dl(train: pd.DataFrame, test: pd.DataFrame) -> Dict[str, float]:
    """Train and evaluate the LSTM model."""
    X_tr, y_tr = make_sequences(train, SEQ_LEN)
    X_te, y_te = make_sequences(test, SEQ_LEN)

    # Standardize sequences (optional, but often helps):
    # Use log1p for demand to stabilize variance (common in SCM).
    # This is a “why” comment: demand tends to be skewed and heteroscedastic.
    def log1p_pack(X: Dict[str, np.ndarray], y: np.ndarray) -> Tuple[Dict[str, np.ndarray], np.ndarray]:
        X2 = dict(X)
        X2["demand_seq"] = np.log1p(X2["demand_seq"])
        y2 = np.log1p(y)
        return X2, y2

    X_tr, y_tr_l = log1p_pack(X_tr, y_tr)
    X_te, y_te_l = log1p_pack(X_te, y_te)

    model = build_sequence_model(N_SKUS, N_LOCATIONS, N_CUSTOMERS, SEQ_LEN)

    ds_tr = tf.data.Dataset.from_tensor_slices((X_tr, y_tr_l)).shuffle(50_000, seed=SEED).batch(BATCH_SIZE)
    ds_te = tf.data.Dataset.from_tensor_slices((X_te, y_te_l)).batch(BATCH_SIZE)

    model.fit(ds_tr, validation_data=ds_te, epochs=EPOCHS, verbose=0)

    # Predict back-transform
    preds_l = model.predict(ds_te, verbose=0).reshape(-1)
    preds = np.expm1(preds_l)

    return {"mape": mape(y_te, preds), "y_mean_test": float(np.mean(y_te)), "pred_mean_test": float(np.mean(preds))}


In [14]:
# ==== 9. When deep learning is justified in SCM ===============================
def deep_learning_justification_checklist() -> None:
    """
    Practical rule-of-thumb checklist (printed as guidance).
    Keep this in code so learners see it near the implementation context.
    """
    print("\nDL is usually justified in SCM when you have at least one of:")
    print("  - High-cardinality entities (thousands+ SKUs/locations/customers)")
    print("  - Non-linear drivers (promos, substitution, constraints, regime shifts)")
    print("  - Strong temporal structure (lags, cycles, intermittent demand)")
    print("  - Multi-modal data (text events, sensor streams, images)")
    print("  - Scale where feature engineering / one-hot becomes impractical")
    print("\nClassical ML may be enough when:")
    print("  - Few entities + stable patterns + clean engineered features")
    print("  - Data volume is limited, or interpretability is primary requirement")


In [15]:
# ==== 10. Built-in checks / demo =============================================
def run_demo() -> None:
    set_seeds(SEED)

    # ---- Load (generate) data
    df = generate_scm_orders(N_DAYS, N_SKUS, N_LOCATIONS, N_CUSTOMERS)
    df = add_time_features(df)
    df = add_lag_rolling_features(df)

    train, test = train_test_split_time(df)

    # ---- Example 1: Linear baseline
    lin_metrics = fit_linear_baseline(train, test)
    print("Example 1 - Linear baseline metrics:", lin_metrics)

    # ---- Example 2: Tabular DL with embeddings
    _, tab_metrics = fit_tabular_dl(train, test)
    print("Example 2 - Tabular DL (embeddings + MLP) metrics:", tab_metrics)

    # ---- Example 3: Sequence DL (LSTM)
    # Note: This is heavier; with large grids it can still be big.
    seq_metrics = fit_sequence_dl(train, test)
    print("Example 3 - Sequence DL (LSTM) metrics:", seq_metrics)

    deep_learning_justification_checklist()

    # ---- Tiny “try yourself” tasks
    # TODO 1: Increase SEQ_LEN to 56 and compare MAPE. Does longer history help?
    # TODO 2: Add a "price_index" synthetic feature and see if DL benefits more than linear.
    # TODO 3: Change missingness rate in generate_scm_orders() and observe performance impact.


if __name__ == "__main__":
    run_demo()

    # ---- Optional: quick sanity print for learners
    print("\nSanity check: script finished successfully.")

Example 1 - Linear baseline metrics: {'mape': 174.2105255126953, 'y_mean_test': 8.37143611907959, 'pred_mean_test': 7.556517601013184}
Example 2 - Tabular DL (embeddings + MLP) metrics: {'mape': 197.01075744628906, 'y_mean_test': 8.37143611907959, 'pred_mean_test': 7.959930896759033}
Example 3 - Sequence DL (LSTM) metrics: {'mape': 140.90504455566406, 'y_mean_test': 7.829433441162109, 'pred_mean_test': 7.357210636138916}

DL is usually justified in SCM when you have at least one of:
  - High-cardinality entities (thousands+ SKUs/locations/customers)
  - Non-linear drivers (promos, substitution, constraints, regime shifts)
  - Strong temporal structure (lags, cycles, intermittent demand)
  - Multi-modal data (text events, sensor streams, images)
  - Scale where feature engineering / one-hot becomes impractical

Classical ML may be enough when:
  - Few entities + stable patterns + clean engineered features
  - Data volume is limited, or interpretability is primary requirement

Sanity che

In [None]:
### Machine Learning vs Deep Learning in Supply Chain – Detailed Summary

**Typical problems and data**  
- In supply chains, Machine Learning (ML) is widely used on structured, tabular data for demand forecasting, inventory optimization, lead‑time prediction, and routing decisions, often using transactional ERP, POS, and logistics data.[web:60][web:61][web:66]  
- Deep Learning (DL) is increasingly applied when supply chains involve large volumes of complex or unstructured data, such as sensor streams from IoT devices, shipment event logs, satellite imagery, scanner images, documents, and text signals from news or social media.[web:56][web:59][web:63][web:65]  

**Model types typically used**  
- ML in supply chain commonly uses gradient boosting, random forests, regularized regression, and classical time‑series models for tasks like demand forecasting, safety stock optimization, supplier risk scoring, and delivery‑time prediction.[web:60][web:61][web:62][web:70]  
- DL uses architectures such as recurrent networks (LSTM/GRU), CNNs, and transformers to capture long‑range temporal dependencies in demand, complex spatial–temporal patterns in logistics networks, and multimodal signals (text, images, sensor data) affecting supply chain performance.[web:56][web:63][web:64][web:65]  

**Feature engineering vs representation learning**  
- In ML‑based supply chain projects, domain experts craft features like promotions flags, holiday calendars, price indices, stockout indicators, lead‑time buckets, and route distance metrics; model quality heavily depends on this manual feature engineering.[web:60][web:61][web:62]  
- DL models can ingest richer raw inputs (e.g. raw time series of orders, event logs, weather grids, shipment status sequences) and automatically learn hierarchical features, which helps capture non‑linear interactions and latent patterns that manual feature sets may miss.[web:56][web:63][web:64][web:65]  

**Demand forecasting examples**  
- ML improves classical demand forecasting by using tree‑based ensembles or boosting models on engineered features to reduce MAPE and RMSE, enabling better safety stock sizing and reducing stockouts and overstock.[web:60][web:61][web:62][web:65][web:70]  
- DL frameworks that combine SOMs, PCA, and deep neural networks, or sequence models like LSTMs and transformers, can further enhance demand and shipment‑time forecasts by modeling complex temporal patterns, regime shifts, and interactions across products, locations, and channels.[web:56][web:63][web:64][web:67]  

**Logistics, routing, and risk**  
- ML is used for ETA prediction, route‑choice modeling, anomaly detection on transport costs, and classification of shipments by delay risk using historical trip data, traffic observations, and basic external signals.[web:60][web:61][web:65]  
- DL extends this by learning from continuous GPS traces, traffic sensor streams, camera images, and text updates, enabling more accurate dynamic routing, early delay detection, and predictive maintenance for fleet and equipment.[web:56][web:59][web:63][web:64]  

**Warehouse and inventory operations**  
- ML supports smart inventory management by predicting fast/slow movers, optimizing reorder policies, and detecting anomalies in picking times or inventory records, generally from structured inventory and operations data.[web:60][web:66][web:69]  
- DL powers advanced warehouse automation such as vision‑based counting and damage detection, robot navigation, automatic pallet recognition, and complex multi‑step decision policies learned from sensor and camera data.[web:56][web:59][web:60][web:64]  

**Data volume, infrastructure, and deployment**  
- ML solutions in supply chain often work well with historical datasets at the scale of thousands or millions of rows and can be deployed within existing planning tools or cloud platforms with moderate compute resources.[web:58][web:60][web:61]  
- DL solutions usually require higher data volumes from multiple systems (IoT, WMS/TMS, external feeds), GPU‑accelerated infrastructure, and more complex MLOps practices, but can yield significant gains in forecasting accuracy, routing efficiency, and automation quality when these prerequisites are met.[web:56][web:63][web:64][web:65]  

**Business trade‑offs and when to use which**  
- For many organizations, ML is a strong first step to modernizing planning and execution, offering interpretable models that integrate relatively easily with existing ERP, APS, and BI stacks for decisions like inventory targets, replenishment, and capacity planning.[web:58][web:60][web:61][web:69]  
- DL becomes attractive when supply chains generate diverse, high‑frequency data and the value of incremental accuracy or automation is high, such as in global e‑commerce networks, real‑time logistics platforms, or highly automated warehouses, where more complex models can materially improve service levels and cost.[web:56][web:63][web:64][web:65]  
