# NB-04 Driver Pareto Story (marts-only)

This notebook reads DS2 `mart_denial_pareto` (and optional DS0) to produce a driver Pareto chart and memo.

In [1]:
import os
from datetime import datetime
from pathlib import Path

import pandas as pd
import matplotlib.pyplot as plt


In [2]:
# Load marts only (DS2 required, DS0 optional)
BQ_PROJECT_ID = os.getenv("BQ_PROJECT_ID") or os.getenv("GOOGLE_CLOUD_PROJECT")
BQ_DATASET_ID = os.getenv("BQ_DATASET_ID")

if not BQ_PROJECT_ID or not BQ_DATASET_ID:
    raise RuntimeError("BigQuery not configured. Set BQ_PROJECT_ID and BQ_DATASET_ID.")

import pandas_gbq

def read_bq_table(table_name: str) -> pd.DataFrame:
    query = f"SELECT * FROM `{BQ_PROJECT_ID}.{BQ_DATASET_ID}.{table_name}`"
    return pandas_gbq.read_gbq(query, project_id=BQ_PROJECT_ID)

# DS2 required
DS2_TABLE = "mart_denial_pareto"
ds2 = read_bq_table(DS2_TABLE)

# DS0 optional (for receipt only)
DS0_TABLE = "mart_exec_overview_latest_week"
try:
    ds0 = read_bq_table(DS0_TABLE)
except Exception:
    ds0 = pd.DataFrame()




Downloading:   0%|[32m          [0m|

Downloading:  56%|[32m█████▌    [0m|

Downloading: 100%|[32m██████████[0m|

Downloading: 100%|[32m██████████[0m|




Downloading:   0%|[32m          [0m|

Downloading: 100%|[32m██████████[0m|




In [3]:
# Determine latest available period in DS2
if "svc_month" not in ds2.columns:
    raise ValueError("DS2 missing required column: svc_month")

latest_period = ds2["svc_month"].max()
period_df = ds2[ds2["svc_month"] == latest_period].copy()

# Driver dimension: denial_group + next_best_action
required_cols = ["denial_group", "next_best_action", "denied_potential_allowed_proxy_amt"]
missing_cols = [c for c in required_cols if c not in period_df.columns]
if missing_cols:
    raise ValueError(f"DS2 missing required columns: {missing_cols}")

period_df["driver_label"] = (
    period_df["denial_group"].astype(str).str.strip()
    + " | "
    + period_df["next_best_action"].astype(str).str.strip()
)

# Sort and top N
N_TOP = 10
period_df = period_df.sort_values("denied_potential_allowed_proxy_amt", ascending=False)

top_df = period_df.head(N_TOP).copy()

# Cumulative percent of total
if "pct_of_month_total" in period_df.columns:
    total_pct = period_df["pct_of_month_total"].sum()
    if total_pct == 0:
        top_df["cumulative_pct"] = 0.0
    else:
        top_df["cumulative_pct"] = period_df["pct_of_month_total"].cumsum().head(N_TOP)
else:
    total = period_df["denied_potential_allowed_proxy_amt"].sum()
    if total == 0:
        top_df["cumulative_pct"] = 0.0
    else:
        top_df["cumulative_pct"] = (
            top_df["denied_potential_allowed_proxy_amt"].cumsum() / total
        )


In [4]:
# Pareto chart
fig, ax = plt.subplots(figsize=(10, 4.5))

ax.bar(
    range(len(top_df)),
    top_df["denied_potential_allowed_proxy_amt"].values,
    color="#4c78a8",
)

ax.set_ylabel("Denied Potential Allowed Proxy ($)")
ax.set_xticks(range(len(top_df)))
ax.set_xticklabels(top_df["driver_label"].tolist(), rotation=45, ha="right", fontsize=8)

ax2 = ax.twinx()
ax2.plot(range(len(top_df)), top_df["cumulative_pct"].values * 100, color="#f58518", marker="o")
ax2.set_ylabel("Cumulative % of total")
ax2.set_ylim(0, 110)

ax.set_title("Top Drivers - Denied Potential Allowed Proxy (Latest Period)")
fig.tight_layout()

# Save image
img_dir = Path("docs") / "images" if Path("docs").is_dir() else Path("..") / "docs" / "images"
img_dir.mkdir(parents=True, exist_ok=True)
img_path = img_dir / "nb04_driver_pareto.png"
fig.savefig(img_path, dpi=150, bbox_inches="tight")
plt.close(fig)

print(f"Wrote {img_path}")


Wrote ..\docs\images\nb04_driver_pareto.png


In [5]:
# Memo export (ASCII-only)
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

anchor_week = "N/A"
if not ds0.empty and "week_start" in ds0.columns:
    try:
        anchor_week = str(ds0["week_start"].iloc[0])
    except Exception:
        anchor_week = "N/A"

# Top drivers summary
if len(top_df) > 0:
    driver_1 = top_df["driver_label"].iloc[0]
    driver_2 = top_df["driver_label"].iloc[1] if len(top_df) > 1 else "N/A"
    driver_3 = top_df["driver_label"].iloc[2] if len(top_df) > 2 else "N/A"
    total = period_df["denied_potential_allowed_proxy_amt"].sum()
    if total == 0:
        pct_1 = 0.0
    else:
        pct_1 = (top_df["denied_potential_allowed_proxy_amt"].iloc[0] / total) * 100
else:
    driver_1 = "N/A"
    driver_2 = "N/A"
    driver_3 = "N/A"
    pct_1 = 0.0

memo_lines = [
    "# Driver Memo - Contribution Pareto (Latest Available Period)",
    "",
    "## What this is (no causality)",
    "Contribution/composition view from DS2 marts. This does not establish causality.",
    "",
    "## Receipt",
    f"- Period: {latest_period}",
    f"- Anchor week (if DS0 available): {anchor_week}",
    "- Metric basis: Denied Potential Allowed Proxy (directional prioritization only)",
    "- Source: mart_denial_pareto (DS2)",
    f"- Generated on: {now}",
    "",
    "## Top drivers (contribution)",
    f"- Top driver: {driver_1} - {pct_1:.1f}% of period total",
    f"- Next: {driver_2}, {driver_3}",
    "",
    "## So what (conditional)",
    "- If NB-03 Mix = OK: prioritize next-best-action workflows for top drivers.",
    "- If NB-03 Mix = CHECK SEGMENTS: validate segment mix before acting.",
    "",
    "## Guardrails",
    "- Proxy values are directional prioritization only; not guaranteed recovery.",
    "- This view is contribution/composition, not causality.",
    "",
    "![Driver Pareto](images/nb04_driver_pareto.png)",
]

memo_lines = [line.encode("ascii", "ignore").decode("ascii") for line in memo_lines]

memo_path = Path("docs") / "driver_pareto_memo_latest_period.md" if Path("docs").is_dir() else Path("..") / "docs" / "driver_pareto_memo_latest_period.md"
with open(memo_path, "w", encoding="utf-8") as handle:
    handle.write("\n".join(memo_lines))

print(f"Wrote {memo_path}")


Wrote ..\docs\driver_pareto_memo_latest_period.md
