<a href="https://colab.research.google.com/github/tomheston/fragility-metrics/blob/main/notebooks/proportion_vs_benchmark.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title
# Fragility Metrics Toolkit: Proportion vs Benchmark
# 20-NOV-2025
# Layout:
# Single arm (or agreement vs benchmark when only k, n, p₀ are available)
# k = number of successes
# n = relevant denominator (n_relevant)
# p₀ = benchmark proportion
#
# Input: {k, n, p₀, direction}
# Output: p-value (one-sided exact binomial p-value)
# fr (BFI/BFQ)
# nb (Proportion-NBF)
#
# Notes
# - One-sided exact binomial test with context-aware calculation
# - User chooses direction based on research question
# - BFQ is the primary fragility quotient (fr) for this design
# - Proportion-NBF is the primary robustness metric (nb) for single-arm benchmarks
# - α = 0.05 fixed (change ALPHA below if needed)
#
# IF YOU USE THIS CALCULATOR PLEASE CITE:
# Heston, T. F. (2025). Fragility Metrics Toolkit [Software]. Zenodo. https://doi.org/10.5281/zenodo.17254763
#
# © Thomas F. Heston 2025. CC-BY 4.0
# ---------- SciPy dependency ----------
try:
    import scipy
except ImportError:
    try:
        import subprocess
        import sys
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "scipy"])
        import scipy
    except Exception:
        print("Please install scipy: pip install scipy")
        raise
from math import sqrt
from scipy.stats import binomtest, binom
ALPHA = 0.05

# ---------- Core utilities ----------
def compute_context_aware_pvalue(k: int, n: int, p0: float, alternative: str = "greater"):
    """Context-aware p-value for display (significantly above or below)."""
    phat = k / n
    if alternative == "greater":
        return binomtest(k, n, p0, alternative="greater").pvalue
    else:  # "less"
        if phat > p0:
            return binomtest(k, n, p0, alternative="greater").pvalue
        else:
            return binomtest(k, n, p0, alternative="less").pvalue

# ---------- BFI/BFQ : "at or above" / "at or below" ----------
def compute_bfi_bfq(k: int, n: int, p0: float, alpha: float = ALPHA, alternative: str = "greater"):
    """BFI/BFQ based on statistical proof of the opposite claim."""
    opp_alt = "less" if alternative == "greater" else "greater"
    if alternative == "greater":  # Claim: at or above p0
        if binomtest(k, n, p0, alternative="less").pvalue <= alpha:
            # Already proven below -> increase until not
            for d in range(1, n - k + 1):
                k_new = k + d
                if binomtest(k_new, n, p0, alternative="less").pvalue > alpha:
                    return d, d / n
            return None, None
        else:
            # Supported -> decrease until proven below
            for d in range(1, k + 1):
                k_new = k - d
                if binomtest(k_new, n, p0, alternative="less").pvalue <= alpha:
                    return d, d / n
            return None, None
    else:  # "less" : Claim at or below p0
        if binomtest(k, n, p0, alternative="greater").pvalue <= alpha:
            # Already proven above -> decrease until not
            for d in range(1, k + 1):
                k_new = k - d
                if binomtest(k_new, n, p0, alternative="greater").pvalue > alpha:
                    return d, d / n
            return None, None
        else:
            # Supported -> increase until proven above
            for d in range(1, n - k + 1):
                k_new = k + d
                if binomtest(k_new, n, p0, alternative="greater").pvalue <= alpha:
                    return d, d / n
            return None, None

# ---------- Proportion-NBF (unchanged) ----------
def compute_proportion_nbf(k: int, n: int, p0: float):
    if n <= 0:
        return None
    phat = k / n
    diff = abs(phat - p0)
    S = sqrt(p0 * (1 - p0) / n)
    if diff == 0 and S == 0:
        return 0.0
    return diff / (diff + S)

# ---------- High-level calculator ----------
def single_proportion_benchmark(k: int, n: int, p0: float, alpha: float = ALPHA, alternative: str = "greater"):
    if not (0 <= k <= n):
        raise ValueError("Invalid inputs: require 0 <= k <= n.")
    if not (0 < p0 < 1):
        raise ValueError("Invalid benchmark p₀: require 0 < p₀ < 1.")
    if alternative not in {"greater", "less"}:
        raise ValueError('alternative must be "greater" or "less".')
    p_val = compute_context_aware_pvalue(k, n, p0, alternative)
    bfi, bfq = compute_bfi_bfq(k, n, p0, alpha=alpha, alternative=alternative)
    prop_nbf = compute_proportion_nbf(k, n, p0)
    result = {
        "p": p_val,
        "fr": {"BFI": bfi, "BFQ": bfq},
        "nb": {"Proportion-NBF": prop_nbf}
    }
    return result

# ---------- Narrative interpretation ----------
def generate_narrative(k, n, p0, p_val, bfi, bfq, prop_nbf, alternative):
    phat = k / n
    claim = "at or above" if alternative == "greater" else "at or below"
    opp_alt = "less" if alternative == "greater" else "greater"
    opposite_proven = binomtest(k, n, p0, alternative=opp_alt).pvalue <= ALPHA
    supported = not opposite_proven

    p_narrative = f"Displayed p = {p_val:.6f} {'supports' if p_val <= ALPHA else 'does not support'} evidence in claimed direction."
    claim_narrative = f"Claim 'p̂ {claim} p₀={p0:.3f}' is {'rejected' if opposite_proven else 'not rejected'} at α=0.05."

    if bfi is not None:
        fragility_narrative = f"BFI = {bfi}: toggling {bfi} event(s) proves the opposite claim."
        if bfq < 0.01: stability = "extremely fragile"
        elif bfq < 0.05: stability = "very fragile"
        elif bfq < 0.10: stability = "fragile"
        elif bfq < 0.25: stability = "moderately stable"
        else: stability = "highly stable"
        bfq_narrative = f"BFQ = {bfq:.6f} → {stability}."
    else:
        fragility_narrative = "Maximally stable."
        bfq_narrative = ""

    if prop_nbf is not None:
        if prop_nbf < 0.05: sep = "at neutrality"
        elif prop_nbf < 0.10: sep = "near neutrality"
        elif prop_nbf < 0.25: sep = "moderate separation"
        elif prop_nbf < 0.50: sep = "clear separation"
        else: sep = "far from neutrality"
        nbf_narrative = f"Proportion-NBF = {prop_nbf:.6f} → {sep} from p₀."
    else:
        nbf_narrative = ""

    return f"{claim_narrative}\n{p_narrative}\n{fragility_narrative}\n{bfq_narrative}\n{nbf_narrative}"

# ---------- CLI ----------
def main():
    print("Proportion vs Benchmark Calculator\n")
    k = int(input("k (successes): ").strip())
    n = int(input("n (denominator): ").strip())
    p0 = float(input("p₀ (benchmark proportion, e.g. 0.15): ").strip())
    print("\nChoose claim:")
    print("1. p ≥ p₀ (at or above benchmark)")
    print("2. p ≤ p₀ (at or below benchmark)")
    choice = input("Enter 1 or 2: ").strip()
    alternative = "greater" if choice == "1" else "less"
    direction_desc = "p ≥ p₀ (at or above)" if alternative == "greater" else "p ≤ p₀ (at or below)"
    phat = k / n
    print(f"\nObserved p̂ = {phat:.6f}, benchmark p₀ = {p0:.6f}")
    print(f"Claim: {direction_desc}\n")

    res = single_proportion_benchmark(k, n, p0, alpha=ALPHA, alternative=alternative)

    print("================ p–fr–nb ================")
    print(f"Displayed p-value = {res['p']:.6f}")
    print(f"BFI = {res['fr']['BFI']}")
    if res['fr']['BFQ'] is not None:
        print(f"BFQ = {res['fr']['BFQ']:.6f}")
    print(f"Proportion-NBF = {res['nb']['Proportion-NBF']:.6f}")
    print("=========================================\n")

    print("Interpretation:")
    print(generate_narrative(k, n, p0, res['p'], res['fr']['BFI'], res['fr']['BFQ'], res['nb']['Proportion-NBF'], alternative))

    pmf_exact = binom.pmf(k, n, p0)
    print(f"\nExact prob of {k} successes under p₀: {pmf_exact:.8f}")

if __name__ == "__main__":
    main()

Proportion vs Benchmark Calculator

