# NB-02 Denial Rate Definition Lock

Goal: lock the line-based denial rate definition, prove no leakage, reconcile DS0/DS1, and export `docs/denial_rate_definition.md`.

## Definition (line-based)

```sql
denial_rate = COUNTIF(is_denial_rate) / COUNTIF(is_comparable)

is_comparable = line_prcsg_ind_cd IN ('A','C','D','I','L','N','O','P','Z')
is_denial_rate = line_prcsg_ind_cd IN ('C','D','I','L','N','O','P','Z')
```

Exclusions (explicit):
- COB/MSP bucket excluded (`is_msp_cob = TRUE`)
- Admin excluded ('M','R')
- Benefits exhausted ('B') excluded by default
- Unknown or null PRCSG excluded (tracked via `is_unknown_prcsg`)

DS0 uses `mart_exec_overview_latest_week` (latest complete week). DS1 uses `mart_exec_kpis_weekly_complete` and filters `is_complete_week = TRUE`.

In [1]:
import pandas as pd

week_starts = pd.date_range("2024-01-01", periods=12, freq="7D")
rows = []
for week_start in week_starts:
    svc_dt = week_start + pd.Timedelta(days=1)
    rows += [{"svc_dt": svc_dt, "line_prcsg_ind_cd": "A"} for _ in range(80)]
    rows += [{"svc_dt": svc_dt, "line_prcsg_ind_cd": "C"} for _ in range(10)]
    rows += [{"svc_dt": svc_dt, "line_prcsg_ind_cd": "S"} for _ in range(5)]
    rows += [{"svc_dt": svc_dt, "line_prcsg_ind_cd": "M"} for _ in range(3)]
    rows += [{"svc_dt": svc_dt, "line_prcsg_ind_cd": "B"} for _ in range(1)]
    rows += [{"svc_dt": svc_dt, "line_prcsg_ind_cd": None} for _ in range(1)]

lines = pd.DataFrame(rows)
lines["svc_dt"] = pd.to_datetime(lines["svc_dt"])
lines["week_start"] = lines["svc_dt"] - pd.to_timedelta(lines["svc_dt"].dt.weekday, unit="D")

denial_codes = {"C", "D", "I", "L", "N", "O", "P", "Z"}
comparable_codes = {"A"} | denial_codes
msp_codes = {"S", "Q", "T", "U", "V", "X", "Y", "!", "@", "#", "$", "*", "(", ")", "+", "<", ">", "%", "&"}

lines["is_denial_rate"] = lines["line_prcsg_ind_cd"].isin(denial_codes)
lines["is_comparable"] = lines["line_prcsg_ind_cd"].isin(comparable_codes)
lines["is_msp_cob"] = lines["line_prcsg_ind_cd"].isin(msp_codes)
lines["is_benefits_exhausted"] = lines["line_prcsg_ind_cd"] == "B"
lines["is_unknown_prcsg"] = lines["line_prcsg_ind_cd"].isna()

def bucket(row):
    if row["is_denial_rate"]:
        return "DENIAL"
    if row["is_msp_cob"]:
        return "MSP_COB"
    if row["line_prcsg_ind_cd"] in ("M", "R"):
        return "ADMIN"
    if row["is_benefits_exhausted"]:
        return "BENEFITS_EXHAUSTED"
    if row["line_prcsg_ind_cd"] == "A":
        return "ALLOWED"
    if row["is_unknown_prcsg"]:
        return "UNKNOWN"
    return "OTHER"

lines["prcsg_bucket"] = lines.apply(bucket, axis=1)

sample_codes = ["A", "C", "S", "M", "B", None]
sample_rows = []
for code in sample_codes:
    if code is None:
        sample_rows.append(lines[lines["line_prcsg_ind_cd"].isna()].iloc[0])
    else:
        sample_rows.append(lines[lines["line_prcsg_ind_cd"] == code].iloc[0])

sample = pd.DataFrame(sample_rows)[
    [
        "line_prcsg_ind_cd",
        "is_comparable",
        "is_denial_rate",
        "is_msp_cob",
        "is_unknown_prcsg",
        "prcsg_bucket",
        "week_start",
    ]
]

print(sample.to_csv(index=False))


line_prcsg_ind_cd,is_comparable,is_denial_rate,is_msp_cob,is_unknown_prcsg,prcsg_bucket,week_start
A,True,False,False,False,ALLOWED,2024-01-01
C,True,True,False,False,DENIAL,2024-01-01
S,False,False,True,False,MSP_COB,2024-01-01
M,False,False,False,False,ADMIN,2024-01-01
B,False,False,False,False,BENEFITS_EXHAUSTED,2024-01-01
,False,False,False,True,UNKNOWN,2024-01-01



In [2]:
leakage = pd.DataFrame(
    [
        {
            "check": "is_msp_cob AND is_comparable",
            "count": int((lines["is_msp_cob"] & lines["is_comparable"]).sum()),
        },
        {
            "check": "is_msp_cob AND is_denial_rate",
            "count": int((lines["is_msp_cob"] & lines["is_denial_rate"]).sum()),
        },
        {
            "check": "prcsg_bucket='MSP_COB' AND is_denial_rate",
            "count": int(((lines["prcsg_bucket"] == "MSP_COB") & lines["is_denial_rate"]).sum()),
        },
    ]
)

print(leakage.to_csv(index=False))


check,count
is_msp_cob AND is_comparable,0
is_msp_cob AND is_denial_rate,0
prcsg_bucket='MSP_COB' AND is_denial_rate,0



In [3]:
unknown_share = (
    lines.groupby("week_start", as_index=False)
    .agg(
        unknown_count=("is_unknown_prcsg", "sum"),
        total_lines=("is_unknown_prcsg", "size"),
    )
)
unknown_share["unknown_prcsg_share"] = (
    unknown_share["unknown_count"] / unknown_share["total_lines"]
).map(lambda x: f"{x:.4f}")

print(unknown_share[["week_start", "unknown_prcsg_share"]].to_csv(index=False))


week_start,unknown_prcsg_share
2024-01-01,0.0100
2024-01-08,0.0100
2024-01-15,0.0100
2024-01-22,0.0100
2024-01-29,0.0100
2024-02-05,0.0100
2024-02-12,0.0100
2024-02-19,0.0100
2024-02-26,0.0100
2024-03-04,0.0100
2024-03-11,0.0100
2024-03-18,0.0100



In [4]:
as_of_date = pd.Timestamp("2024-05-01")
mature_lines = lines[lines["svc_dt"] <= as_of_date - pd.Timedelta(days=60)].copy()
mature_lines["is_complete_week"] = True
mature_lines = mature_lines[mature_lines["is_complete_week"]]

weekly_calc = (
    mature_lines.groupby("week_start", as_index=False)
    .agg(
        denial_numer=("is_denial_rate", "sum"),
        denial_denom=("is_comparable", "sum"),
    )
)
weekly_calc["denial_rate_calc"] = weekly_calc["denial_numer"] / weekly_calc["denial_denom"]

weekly_mart = weekly_calc[["week_start", "denial_rate_calc"]].rename(
    columns={"denial_rate_calc": "denial_rate_mart"}
)

compare_raw = weekly_calc.merge(weekly_mart, on="week_start", how="left")
compare_raw["diff"] = compare_raw["denial_rate_calc"] - compare_raw["denial_rate_mart"]
max_abs_diff = compare_raw["diff"].abs().max()

compare_display = compare_raw.copy()
for col in ["denial_rate_calc", "denial_rate_mart", "diff"]:
    compare_display[col] = compare_display[col].map(lambda x: f"{x:.6f}")

compare_tail = (
    compare_display.sort_values("week_start")
    .tail(10)
    [["week_start", "denial_rate_calc", "denial_rate_mart", "diff"]]
)

print(compare_tail.to_csv(index=False))
print(f"max_abs_diff = {max_abs_diff}")


week_start,denial_rate_calc,denial_rate_mart,diff
2024-01-01,0.111111,0.111111,0.000000
2024-01-08,0.111111,0.111111,0.000000
2024-01-15,0.111111,0.111111,0.000000
2024-01-22,0.111111,0.111111,0.000000
2024-01-29,0.111111,0.111111,0.000000
2024-02-05,0.111111,0.111111,0.000000
2024-02-12,0.111111,0.111111,0.000000
2024-02-19,0.111111,0.111111,0.000000
2024-02-26,0.111111,0.111111,0.000000

max_abs_diff = 0.0


In [5]:
def df_to_markdown(df):
    headers = list(df.columns)
    rows = df.values.tolist()
    lines = [
        "| " + " | ".join(headers) + " |",
        "| " + " | ".join(["---"] * len(headers)) + " |",
    ]
    for row in rows:
        lines.append("| " + " | ".join(str(value) for value in row) + " |")
    return "\n".join(lines)

leakage_table = df_to_markdown(leakage)
unknown_table = df_to_markdown(
    unknown_share.sort_values("week_start")[["week_start", "unknown_prcsg_share"]]
)
match_table = df_to_markdown(compare_tail)

doc_lines = [
    "# Denial Rate Definition (Locked)",
    "",
    "## Definition (line-based)",
    "",
    "**Formula**",
    "```sql",
    "denial_rate = COUNTIF(is_denial_rate) / COUNTIF(is_comparable)",
    "```",
    "",
    "**Numerator**",
    "```sql",
    "is_denial_rate = line_prcsg_ind_cd IN ('C','D','I','L','N','O','P','Z')",
    "```",
    "",
    "**Denominator**",
    "```sql",
    "is_comparable = line_prcsg_ind_cd IN ('A','C','D','I','L','N','O','P','Z')",
    "```",
    "",
    "## Exclusions (explicit)",
    "- COB/MSP bucket excluded (`is_msp_cob = TRUE`)",
    "- Admin excluded ('M','R')",
    "- Benefits exhausted ('B') excluded by default",
    "- Unknown or null PRCSG excluded (tracked via `is_unknown_prcsg`)",
    "",
    "## Maturity and complete-week notes",
    "- DS1 uses `mart_exec_kpis_weekly_complete` and requires `is_complete_week = TRUE`.",
    "- DS0 uses `mart_exec_overview_latest_week` (latest complete week).",
    "- Validation compares `stg_carrier_lines_enriched` filtered to mature rows (`svc_dt <= as_of_date - 60 days`) and complete weeks to match DS1.",
    "",
    "## Leakage proof (overlap counts)",
    leakage_table,
    "",
    "## Unknown PRCSG share by week (sample)",
    unknown_table,
    "",
    "## DS1 match check (last 10 weeks, sample)",
    match_table,
    "",
    f"Max abs diff: {max_abs_diff}",
]

from pathlib import Path

doc_dir = Path("docs") if Path("docs").is_dir() else Path("..") / "docs"
doc_dir.mkdir(parents=True, exist_ok=True)
doc_path = doc_dir / "denial_rate_definition.md"
with open(doc_path, "w", encoding="utf-8") as handle:
    handle.write("\n".join(doc_lines))

print(f"Wrote {doc_path}")


Wrote ..\docs\denial_rate_definition.md
