<a href="https://colab.research.google.com/github/tomheston/fragility-metrics/blob/main/notebooks/diagnostic_2x2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# @title
# Fragility Metrics Toolkit: Diagnostic Metrics (2×2 with Ground Truth)
# 20-NOV-2025
# Input: TP, FN, FP, TN
# Output: p (Fisher’s exact), fr (DFI/DFQ), nb (DNB)
#
# Fully aligned with FRAGILITY_METRICS.md v9.5
# - DFI = minimum success toggles to flip two-sided Fisher’s exact significance
# - DFQ = DFI / N  (N = TP+FN+FP+TN)
# - DNB = |ln(DOR)| / (|ln(DOR)| + SE_lnDOR) with Haldane-Anscombe 0.5 correction
#
# IF YOU USE THIS CALCULATOR PLEASE CITE:
# Heston, T. F. (2025). Fragility Metrics Toolkit [Software]. Zenodo. https://doi.org/10.5281/zenodo.17254763
#
# © Thomas F. Heston 2025. CC-BY 4.0

try:
    from scipy.stats import fisher_exact
    from math import log, sqrt
    import numpy as np
except ImportError:
    import subprocess, sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "scipy"])
    from scipy.stats import fisher_exact
    from math import log, sqrt
    import numpy as np

ALPHA = 0.05

# ---------- Core utilities ----------
def fisher_p(tp, fn, fp, tn):
    table = np.array([[tp, fn], [fp, tn]])
    _, p = fisher_exact(table, alternative="two-sided")
    oddsratio, _ = fisher_exact(table)  # default is odds ratio
    return p, oddsratio

def diagnostic_neutrality_boundary(tp, fn, fp, tn):
    """DNB – exact formula from v9.5"""
    a, b, c, d = tp + 0.5, fn + 0.5, fp + 0.5, tn + 0.5
    dor = (a * d) / (b * c)
    se_lndor = sqrt(1/a + 1/b + 1/c + 1/d)
    ln_dor = abs(log(dor))
    return 0.0 if ln_dor == 0 else ln_dor / (ln_dor + se_lndor)

# ---------- DFI/DFQ – corrected & robust ----------
def diagnostic_fragility(tp: int, fn: int, fp: int, tn: int, alpha: float = ALPHA):
    N = tp + fn + fp + tn
    if N == 0:
        return None, None

    p0, _ = fisher_p(tp, fn, fp, tn)
    currently_significant = p0 <= alpha

    # We only need to move toward the boundary when significant,
    # or away from it when nonsignificant.
    for dist in range(1, N + 1):
        # Four admissible single-case moves that reduce association
        candidates = [
            (tp - dist, fn + dist, fp, tn) if tp >= dist else None,
            (tp, fn, fp - dist, tn + dist) if fp >= dist else None,
            (tp + dist, fn - dist, fp, tn) if fn >= dist else None,  # rare
            (tp, fn, fp + dist, tn - dist) if tn >= dist else None,  # rare
        ]
        # Also allow moves that increase association when currently nonsignificant
        if not currently_significant:
            candidates += [
                (tp + dist, fn, fp, tn - dist) if tn >= dist else None,
                (tp, fn + dist, fp, tn - dist) if tn >= dist else None,
                (tp - dist, fn, fp + dist, tn) if tp >= dist else None,
                (tp, fn - dist, fp, tn + dist) if fn >= dist else None,
            ]

        for cand in candidates:
            if cand is None or any(x < 0 for x in cand):
                continue
            ttp, tfn, tfp, ttn = cand
            p_new, _ = fisher_p(ttp, tfn, tfp, ttn)
            flipped = (p_new > alpha) if currently_significant else (p_new <= alpha)
            if flipped:
                return dist, dist / N

    return None, None  # cannot flip

# ---------- High-level ----------
def diagnostic_metrics(tp: int, fn: int, fp: int, tn: int):
    p, dor = fisher_p(tp, fn, fp, tn)
    dfi, dfq = diagnostic_fragility(tp, fn, fp, tn)
    dnb = diagnostic_neutrality_boundary(tp, fn, fp, tn)

    return {
        "p": p,
        "DOR": dor,
        "fr": {"DFI": dfi, "DFQ": dfq},
        "nb": {"DNB": dnb}
    }

# ---------- Narrative ----------
def narrative(tp, fn, fp, tn, res):
    N = tp + fn + fp + tn
    sens = tp / (tp + fn) if (tp + fn) > 0 else 0
    spec = tn / (fp + tn) if (fp + tn) > 0 else 0
    lines = [
        f"TP={tp}, FN={fn}, FP={fp}, TN={tn}  (N={N})",
        f"Sensitivity = {sens:.4f}, Specificity = {spec:.4f}",
        f"Fisher’s exact p = {res['p']:.6f}  ({'significant' if res['p'] <= ALPHA else 'not significant'})",
        f"DOR = {res['DOR']:.3f}",
    ]
    if res['fr']['DFI'] is not None:
        stability = (
            "extremely fragile" if res['fr']['DFQ'] < 0.01 else
            "very fragile" if res['fr']['DFQ'] < 0.05 else
            "fragile" if res['fr']['DFQ'] < 0.10 else
            "moderately stable" if res['fr']['DFQ'] < 0.25 else
            "very stable"
        )
        lines.append(f"DFI = {res['fr']['DFI']} → DFQ = {res['fr']['DFQ']:.6f} ({stability})")
    else:
        lines.append("DFQ = 1.000000 (cannot flip significance)")

    separation = (
        "at neutrality boundary" if res['nb']['DNB'] < 0.05 else
        "near neutrality" if res['nb']['DNB'] < 0.10 else
        "moderately separated" if res['nb']['DNB'] < 0.25 else
        "clearly separated" if res['nb']['DNB'] < 0.50 else
        "far from neutrality"
    )
    lines.append(f"DNB = {res['nb']['DNB']:.6f} → {separation}")

    return "\n".join(lines)

# ---------- CLI ----------
def main():
    print("Diagnostic Metrics Calculator\n")
    tp = int(input("TP: "))
    fn = int(input("FN: "))
    fp = int(input("FP: "))
    tn = int(input("TN: "))

    res = diagnostic_metrics(tp, fn, fp, tn)

    print("\n================ p–fr–nb ================")
    print(f"p (Fisher’s exact) = {res['p']:.6f}")
    print(f"DFI = {res['fr']['DFI']}")
    print(f"DFQ = {1.0 if res['fr']['DFQ'] is None else res['fr']['DFQ']:.6f}")
    print(f"DNB = {res['nb']['DNB']:.6f}")
    print("=========================================\n")
    print(narrative(tp, fn, fp, tn, res))

if __name__ == "__main__":
    main()

Diagnostic Metrics Calculator

TP: 15
FN: 4
FP: 31
TN: 5

p (Fisher’s exact) = 0.703163
DFI = 3
DFQ = 0.054545
DNB = 0.418225

TP=15, FN=4, FP=31, TN=5  (N=55)
Sensitivity = 0.7895, Specificity = 0.1389
Fisher’s exact p = 0.703163  (not significant)
DOR = 0.605
DFI = 3 → DFQ = 0.054545 (fragile)
DNB = 0.418225 → clearly separated
