<a href="https://colab.research.google.com/github/tomheston/fragility-metrics/blob/main/notebooks/diagnostic_2x2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# @title
# Fragility Metrics Toolkit: Diagnostic Metrics (Benchmark Framework)
# 20-NOV-2025
# Fully aligned with FRAGILITY_METRICS.md v9.5 §3.4 DFQ and §4 DNB
#
# Input: TP, FN, FP, TN + chosen metric + benchmark p₀
# Output: p (one-sided exact binomial vs p₀), fr (DFI/DFQ), nb (DNB)
#
# IF YOU USE THIS CALCULATOR PLEASE CITE:
# Heston, T. F. (2025). Fragility Metrics Toolkit [Software]. Zenodo. https://doi.org/10.5281/zenodo.17254763
#
# © Thomas F. Heston 2025. CC-BY 4.0

try:
    from scipy.stats import binomtest
    from math import log, sqrt
    import numpy as np
except ImportError:
    import subprocess, sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "scipy"])
    from scipy.stats import binomtest
    from math import log, sqrt
    import numpy as np

ALPHA = 0.05

# ---------- DNB (exact v9.5 formula with documented Haldane–Anscombe 0.5 correction) ----------
def compute_dnb(tp, fn, fp, tn):
    a, b, c, d = tp + 0.5, fn + 0.5, fp + 0.5, tn + 0.5  # Haldane–Anscombe correction
    dor = (a * d) / (b * c)
    se = sqrt(1/a + 1/b + 1/c + 1/d)
    ln_dor = abs(log(dor))
    return 0.0 if ln_dor == 0 else ln_dor / (ln_dor + se)

# ---------- Metric-specific (k, n_relevant, toggle direction) ----------
def get_diagnostic_params(tp, fn, fp, tn, metric):
    if metric == "sensitivity":
        k = tp
        n_rel = tp + fn
        toggle_increases_k = lambda: "FN → TP"
        toggle_decreases_k = lambda: "TP → FN"
    elif metric == "specificity":
        k = tn
        n_rel = fp + tn
        toggle_increases_k = lambda: "FP → TN"
        toggle_decreases_k = lambda: "TN → FP"
    elif metric == "ppv":
        k = tp
        n_rel = tp + fp
        toggle_increases_k = lambda: "FP → TP"
        toggle_decreases_k = lambda: "TP → FP"
    elif metric == "npv":
        k = tn
        n_rel = fn + tn
        toggle_increases_k = lambda: "FN → TN"
        toggle_decreases_k = lambda: "TN → FN"
    elif metric == "accuracy":
        k = tp + tn
        n_rel = tp + fn + fp + tn
        toggle_increases_k = lambda: "any misclassification → correct"
        toggle_decreases_k = lambda: "any correct → misclassification"
    else:
        raise ValueError("metric must be sensitivity/specificity/ppv/npv/accuracy")
    return k, n_rel, toggle_increases_k, toggle_decreases_k

# ---------- DFI/DFQ (strict v9.5 one-sided binomial benchmark) ----------
def compute_dfi_dfq(k, n_rel, p0, direction="greater"):
    if n_rel == 0 or k > n_rel or k < 0:
        return None, None
    current_p = binomtest(k, n_rel, p0, alternative="greater" if direction == "greater" else "less").pvalue
    significant = current_p <= ALPHA

    # Toggle direction depends on current status
    if significant:
        # Need to move toward null → decrease k (for greater) or increase k (for less)
        step = -1 if direction == "greater" else +1
        max_steps = k if step == -1 else n_rel - k
        for d in range(1, max_steps + 1):
            k_new = k + step * d
            p_new = binomtest(k_new, n_rel, p0, alternative="greater" if direction == "greater" else "less").pvalue
            if p_new > ALPHA:
                return d, d / n_rel
    else:
        # Need to move away from null → increase k (greater) or decrease k (less)
        step = +1 if direction == "greater" else -1
        max_steps = n_rel - k if step == +1 else k
        for d in range(1, max_steps + 1):
            k_new = k + step * d
            p_new = binomtest(k_new, n_rel, p0, alternative="greater" if direction == "greater" else "less").pvalue
            if p_new <= ALPHA:
                return d, d / n_rel
    return None, None

# ---------- High-level ----------
def diagnostic_benchmark(tp, fn, fp, tn, metric="accuracy", p0=0.50, direction="greater"):
    k, n_rel, _, _ = get_diagnostic_params(tp, fn, fp, tn, metric)
    if n_rel == 0:
        raise ValueError(f"{metric.capitalize()} undefined (denominator = 0)")
    p_val = binomtest(k, n_rel, p0, alternative="greater" if direction == "greater" else "less").pvalue
    dfi, dfq = compute_dfi_dfq(k, n_rel, p0, direction)
    dnb = compute_dnb(tp, fn, fp, tn)
    return {
        "metric": metric,
        "benchmark_p0": p0,
        "k": k,
        "n_relevant": n_rel,
        "observed_proportion": k / n_rel,
        "p": p_val,
        "fr": {"DFI": dfi, "DFQ": dfq},
        "nb": {"DNB": dnb}
    }

# ---------- Narrative ----------
def narrative(res, tp, fn, fp, tn):
    lines = [
        f"Diagnostic table: TP={tp}, FN={fn}, FP={fp}, TN={tn}",
        f"Metric assessed: {res['metric'].upper()} (benchmark p₀ = {res['benchmark_p0']})",
        f"Observed = {res['observed_proportion']:.4f} ({res['k']}/{res['n_relevant']})",
        f"One-sided exact binomial p = {res['p']:.6f}",
    ]
    if res['fr']['DFI'] is not None:
        lines.append(f"DFI = {res['fr']['DFI']} → DFQ = {res['fr']['DFQ']:.6f}")
    else:
        lines.append("DFQ = 1.000000 (cannot flip benchmark decision)")
    lines.append(f"DNB = {res['nb']['DNB']:.6f}")
    return "\n".join(lines)

# ---------- CLI ----------
def main():
    print("Diagnostic Benchmark Calculator (v9.5 compliant)\n")
    tp = int(input("TP: "))
    fn = int(input("FN: "))
    fp = int(input("FP: "))
    tn = int(input("TN: "))
    print("\nMetric: 1=sensitivity, 2=specificity, 3=ppv, 4=npv, 5=accuracy")
    m = {"1": "sensitivity", "2": "specificity", "3": "ppv", "4": "npv", "5": "accuracy"}
    metric = m[input("Choose (1–5) [default 5]: ") or "5"]
    p0 = float(input("Benchmark p₀ [default 0.50]: ") or "0.50")
    print("Direction: 1 = greater than p₀, 2 = less than p₀")
    direction = "greater" if input("Choose (1–2) [default 1]: ") or "1" == "1" else "less"

    res = diagnostic_benchmark(tp, fn, fp, tn, metric, p0, direction)

    print("\n================ p–fr–nb ================")
    print(f"p (one-sided exact binomial) = {res['p']:.6f}")
    print(f"DFI = {res['fr']['DFI']}")
    print(f"DFQ = {1.0 if res['fr']['DFQ'] is None else res['fr']['DFQ']:.6f}")
    print(f"DNB = {res['nb']['DNB']:.6f}")
    print("=========================================\n")
    print(narrative(res, tp, fn, fp, tn))

if __name__ == "__main__":
    main()

Diagnostic Benchmark Calculator (v9.5 compliant)

TP: 5
FN: 25
FP: 3
TN: 7

Metric: 1=sensitivity, 2=specificity, 3=ppv, 4=npv, 5=accuracy
Choose (1–5) [default 5]: 1
Benchmark p₀ [default 0.50]: 
Direction: 1 = greater than p₀, 2 = less than p₀
Choose (1–2) [default 1]: 1

p (one-sided exact binomial) = 0.999970
DFI = 15
DFQ = 0.500000
DNB = 0.491010

Diagnostic table: TP=5, FN=25, FP=3, TN=7
Metric assessed: SENSITIVITY (benchmark p₀ = 0.5)
Observed = 0.1667 (5/30)
One-sided exact binomial p = 0.999970
DFI = 15 → DFQ = 0.500000
DNB = 0.491010
