In [1]:
import os
import json
import yaml
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score, confusion_matrix

# ------------------------------------------------------------
# Config
# ------------------------------------------------------------
DATA_PATH = r"C:\Users\NXTWAVE\Downloads\Stockout Forecasting\archive\supply_chain_dataset1.csv"
OUT_DIR   = Path(r"C:\Users\NXTWAVE\Downloads\Stockout Forecasting")
OUT_DIR.mkdir(parents=True, exist_ok=True)

# ------------------------------------------------------------
# Load data
# ------------------------------------------------------------
def load_data(csv_path):
    df = pd.read_csv(csv_path, parse_dates=["Date"])
    print(f"[INFO] Loaded {csv_path} with shape {df.shape}")
    return df

# ------------------------------------------------------------
# Feature Engineering
# ------------------------------------------------------------
def make_features(df):
    df = df.sort_values(["SKU_ID", "Warehouse_ID", "Date"]).copy()

    df["demand_7d"] = df.groupby(["SKU_ID","Warehouse_ID"])["Units_Sold"].transform(lambda x: x.rolling(7, min_periods=1).mean())
    df["demand_14d"] = df.groupby(["SKU_ID","Warehouse_ID"])["Units_Sold"].transform(lambda x: x.rolling(14, min_periods=1).mean())
    df["demand_28d"] = df.groupby(["SKU_ID","Warehouse_ID"])["Units_Sold"].transform(lambda x: x.rolling(28, min_periods=1).mean())

    df["cv_14d"] = (
        df.groupby(["SKU_ID","Warehouse_ID"])["Units_Sold"]
        .transform(lambda x: x.rolling(14, min_periods=1).std())
        / (df["demand_14d"] + 1e-6)
    )

    df["days_of_cover"] = df["Inventory_Level"] / (df["demand_14d"] + 1e-6)

    if "Stockout_Flag" in df.columns:
        df["stockout_flag"] = df["Stockout_Flag"]
    else:
        df["stockout_flag"] = (df["Inventory_Level"] <= 0).astype(int)

    df = df.fillna(0)
    return df

# ------------------------------------------------------------
# Demand Forecasting
# ------------------------------------------------------------
def train_demand_model(df):
    feats = ["demand_7d","demand_14d","demand_28d","cv_14d","days_of_cover","Inventory_Level","Order_Quantity"]
    X = df[feats]
    y = df["Units_Sold"]

    Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.2, random_state=42)

    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("model", RandomForestRegressor(n_estimators=100, random_state=42))
    ])
    pipe.fit(Xtr, ytr)

    preds = pipe.predict(Xte)
    mae = mean_absolute_error(yte, preds)
    rmse = mean_squared_error(yte, preds) ** 0.5   # compatible
    print(f"[DEMAND] MAE={mae:.2f} RMSE={rmse:.2f}")

    return pipe, {"mae": float(mae), "rmse": float(rmse)}

# ------------------------------------------------------------
# Stockout Classifier
# ------------------------------------------------------------
def train_stockout_model(df):
    feats = ["demand_7d","demand_14d","demand_28d","cv_14d","days_of_cover","Inventory_Level","Order_Quantity"]
    X = df[feats]
    y = df["stockout_flag"]

    Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("model", RandomForestClassifier(n_estimators=100, random_state=42))
    ])
    pipe.fit(Xtr, ytr)

    preds = pipe.predict(Xte)
    acc = accuracy_score(yte, preds)
    cm = confusion_matrix(yte, preds)
    print(f"[STOCKOUT] Accuracy={acc:.2f}")

    return pipe, {"accuracy": float(acc), "confusion_matrix": cm.tolist()}

# ------------------------------------------------------------
# Extra Visuals: Accuracy Graph & Confusion Matrix Heatmap
# ------------------------------------------------------------
def save_accuracy_and_cm(demand_metrics, stockout_metrics, outdir):
    # Accuracy graph for demand model (MAE, RMSE)
    plt.figure(figsize=(6,4))
    plt.bar(["MAE","RMSE"], [demand_metrics["mae"], demand_metrics["rmse"]], color=["skyblue","orange"])
    plt.title("Demand Forecast Errors")
    plt.ylabel("Error")
    plt.savefig(outdir / "viz_demand_accuracy.png")
    plt.close()

    # Confusion matrix heatmap
    cm = np.array(stockout_metrics["confusion_matrix"])
    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["No Stockout","Stockout"], yticklabels=["No Stockout","Stockout"])
    plt.title("Stockout Classifier Confusion Matrix")
    plt.ylabel("True")
    plt.xlabel("Predicted")
    plt.savefig(outdir / "viz_stockout_confusion_matrix.png")
    plt.close()

# ------------------------------------------------------------
# Run Flow
# ------------------------------------------------------------
df = load_data(DATA_PATH)
df = make_features(df)

# Train models
demand_model, demand_metrics = train_demand_model(df)
stockout_model, stockout_metrics = train_stockout_model(df)

# Save models
joblib.dump(demand_model, OUT_DIR / "demand_model.pkl")
joblib.dump(stockout_model, OUT_DIR / "stockout_model.pkl")

# Save features
df.to_hdf(OUT_DIR / "processed_inventory.h5", key="data", mode="w")

# Save insights
insights = {
    "top_risky_skus": df.groupby("SKU_ID")["stockout_flag"].mean().sort_values(ascending=False).head(10).to_dict(),
    "demand_metrics": demand_metrics,
    "stockout_metrics": stockout_metrics
}
with open(OUT_DIR / "insights.json","w") as f:
    json.dump(insights, f, indent=2)

# Save metadata
metadata = {
    "input_file": DATA_PATH,
    "output_dir": str(OUT_DIR),
    "features": ["demand_7d","demand_14d","demand_28d","cv_14d","days_of_cover","Inventory_Level","Order_Quantity"],
    "versions": {
        "pandas": pd.__version__,
        "numpy": np.__version__
    }
}
with open(OUT_DIR / "build_metadata.yaml","w") as f:
    yaml.dump(metadata, f)

# Save accuracy graph + confusion matrix
save_accuracy_and_cm(demand_metrics, stockout_metrics, OUT_DIR)

print(f"[DONE] Accuracy graph and confusion matrix heatmap saved to {OUT_DIR}")


[INFO] Loaded C:\Users\NXTWAVE\Downloads\Stockout Forecasting\archive\supply_chain_dataset1.csv with shape (91250, 15)
[DEMAND] MAE=4.12 RMSE=5.22




[STOCKOUT] Accuracy=1.00
[DONE] Accuracy graph and confusion matrix heatmap saved to C:\Users\NXTWAVE\Downloads\Stockout Forecasting
