<a href="https://colab.research.google.com/github/tomheston/fragility-metrics/blob/main/binary_2x2_independent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# @title
# Fragility Metrics Toolkit: Independent Binary Outcomes
# 19-NOV-2025
# Layout:
#      | Outcome A | Outcome B |
# ArmA |     a     |    b      | n1 = sample size of ArmA
# ArmB |     c     |    d      | n2 = sample size of ArmB
#                                 N = total sample size
#
# Input: {a, b, c, d}
# Output: p (baseline two-sided p-value: Fisher's exact or Pearson's χ², depending on N and cell counts), fr (FI/MFQ and GFI/GFQ), nb (RQ)
#
# Notes
# GFI/GFQ preferred over FI/MFQ for N ≤ 4000
# Program uses Pearson's χ² (two-sided) for N ≥ 4000 AND min(a,b,c,d) ≥ 50; α = 0.05.
# You can modify the p-value threshold below by changing the value of ALPHA from 0.05 to a custom threshold
# For speed of computation, Fisher's exact is changed to Pearson's chi-square test at N> 4000 and minimum cell count > 50. You can change this below if you wish.
# Due to the exponential increase in calculation requirements, the N threshold for GFI is set at 4000 below. For faster computers, this may be increased.
# GFI/GFQ can be "None" when N exceeds the threshold.
#
# IF YOU USE THIS CALCULATOR PLEASE CITE:
# Heston, T. F. (2025). Fragility Metrics Toolkit [Software]. Zenodo. https://doi.org/10.5281/zenodo.17254763
#
# © Thomas F. Heston 2025. CC-BY 4.0
#
# Install SciPy if missing
try:
    import scipy
except ImportError:
    try:
        import subprocess
        import sys
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "scipy"])
        import scipy
    except Exception:
        print("Please install scipy: pip install scipy")
        raise

from collections import deque
from scipy.stats import fisher_exact, chi2_contingency

ALPHA = 0.05
N_THRESHOLD = 4000       # Sample size threshold for considering χ²
MIN_CELL_THRESHOLD = 50  # Minimum cell count threshold for χ² validity
GFI_THRESHOLD = 4000     # Don't compute GFI for N > 4000, use RQ instead


# ---------- Core utilities ----------

def test_p(a, b, c, d, use_chi2=False):
    """Unified significance test: Fisher or χ²."""
    if use_chi2:
        _, p, _, _ = chi2_contingency([[a, b], [c, d]])
        return p
    else:
        _, p = fisher_exact([[a, b], [c, d]], alternative="two-sided")
        return p


def is_significant(p, alpha=ALPHA):
    return p <= alpha


def n_total(a, b, c, d):
    return a + b + c + d


# ---------- Walsh-compliant FI / FQ / MFQ ----------

def choose_arm(a, b, c, d):
    """Arm with fewer events; if tie, arm with fewer total; if still tie, A."""
    if a < c:
        return 'A'
    if c < a:
        return 'B'
    totA, totB = a + b, c + d
    if totA < totB:
        return 'A'
    if totB < totA:
        return 'B'
    return 'A'


def toggle_once(a, b, c, d, arm, direction):
    """
    Apply one within-arm toggle.
    direction ∈ {"up","down"}:
      up   = non-event -> event  (increase events in chosen arm)
      down = event     -> non-event (decrease events in chosen arm)
    Returns new counts or None if impossible.
    """
    if arm == 'A':
        if direction == 'up':
            if b <= 0:
                return None
            return a + 1, b - 1, c, d
        else:  # down
            if a <= 0:
                return None
            return a - 1, b + 1, c, d
    else:  # 'B'
        if direction == 'up':
            if d <= 0:
                return None
            return a, b, c + 1, d - 1
        else:  # down
            if c <= 0:
                return None
            return a, b, c - 1, d + 1


def steps_to_cross(a, b, c, d, arm, direction, use_chi2=False, alpha=ALPHA, max_iter=10**6):
    """
    Apply monotone toggles within the chosen arm until significance flips.
    Returns (steps, final_p) or (None, last_p) if impossible.
    """
    base_p = test_p(a, b, c, d, use_chi2)
    base_sig = is_significant(base_p, alpha)

    steps = 0
    A, B, C, D = a, b, c, d
    for _ in range(max_iter):
        nxt = toggle_once(A, B, C, D, arm, direction)
        if nxt is None:
            return None, test_p(A, B, C, D, use_chi2)
        A, B, C, D = nxt
        steps += 1
        p_now = test_p(A, B, C, D, use_chi2)
        if is_significant(p_now, alpha) != base_sig:
            return steps, p_now
    return None, test_p(A, B, C, D, use_chi2)


def compute_fi_fq_mfq(a, b, c, d, alpha=ALPHA):
    """Walsh-compliant FI, FQ, MFQ for a 2×2 table."""
    N = n_total(a, b, c, d)
    use_chi2 = (N > N_THRESHOLD) and (min(a, b, c, d) >= MIN_CELL_THRESHOLD)

    base_p = test_p(a, b, c, d, use_chi2)
    base_state = "significant" if is_significant(base_p, alpha) else "non-significant"
    arm = choose_arm(a, b, c, d)

    up_res = steps_to_cross(a, b, c, d, arm, "up", use_chi2, alpha)
    down_res = steps_to_cross(a, b, c, d, arm, "down", use_chi2, alpha)

    candidates = []
    if up_res[0] is not None:
        candidates.append(("non-events → events", up_res[0], up_res[1]))
    if down_res[0] is not None:
        candidates.append(("events → non-events", down_res[0], down_res[1]))

    n_mod = (a + b) if arm == 'A' else (c + d)

    if not candidates:
        return {
            "FI": None,
            "FQ": None,
            "MFQ": None,
            "baseline_p": base_p,
            "baseline_state": base_state,
            "arm": arm,
            "direction": None,
            "n_mod": n_mod,
            "final_p": base_p,
            "test_used": "chi2" if use_chi2 else "fisher_exact",
            "note": "FI not attainable under Walsh constraints."
        }

    direction, steps, p_final = min(candidates, key=lambda x: x[1])
    FI = int(steps)
    FQ = (FI / N) if N > 0 else None
    MFQ = (FI / n_mod) if n_mod > 0 else None

    return {
        "FI": FI,
        "FQ": FQ,
        "MFQ": MFQ,
        "baseline_p": base_p,
        "baseline_state": base_state,
        "target_state": ("non-significant" if base_state == "significant" else "significant"),
        "arm": arm,
        "direction": direction,
        "n_mod": n_mod,
        "final_p": p_final,
        "test_used": "chi2" if use_chi2 else "fisher_exact"
    }


# ---------- Modified Fragility Index (MFI) - retained for completeness ----------

def compute_mfi(a, b, c, d, alpha=ALPHA):
    """
    Modified FI (R package variant):
    Toggle either arm (both directions), report minimum number of toggles required.
    """
    N = n_total(a, b, c, d)
    use_chi2 = (N > N_THRESHOLD) and (min(a, b, c, d) >= MIN_CELL_THRESHOLD)

    base_p = test_p(a, b, c, d, use_chi2)
    base_state = "significant" if is_significant(base_p, alpha) else "non-significant"

    # Try arm A (both directions)
    up_A = steps_to_cross(a, b, c, d, 'A', "up", use_chi2, alpha)
    down_A = steps_to_cross(a, b, c, d, 'A', "down", use_chi2, alpha)

    # Try arm B (both directions)
    up_B = steps_to_cross(a, b, c, d, 'B', "up", use_chi2, alpha)
    down_B = steps_to_cross(a, b, c, d, 'B', "down", use_chi2, alpha)

    candidates = []
    if up_A[0] is not None:
        candidates.append(('A', 'non-events → events', up_A[0], up_A[1]))
    if down_A[0] is not None:
        candidates.append(('A', 'events → non-events', down_A[0], down_A[1]))
    if up_B[0] is not None:
        candidates.append(('B', 'non-events → events', up_B[0], up_B[1]))
    if down_B[0] is not None:
        candidates.append(('B', 'events → non-events', down_B[0], down_B[1]))

    if not candidates:
        return {
            "MFI": None,
            "baseline_p": base_p,
            "baseline_state": base_state,
            "arm": None,
            "direction": None,
            "final_p": base_p,
            "test_used": "chi2" if use_chi2 else "fisher_exact",
            "note": "MFI not attainable (no arm can flip significance)."
        }

    arm, direction, steps, p_final = min(candidates, key=lambda x: x[2])

    return {
        "MFI": int(steps),
        "baseline_p": base_p,
        "baseline_state": base_state,
        "target_state": ("non-significant" if base_state == "significant" else "significant"),
        "arm": arm,
        "direction": direction,
        "final_p": p_final,
        "test_used": "chi2" if use_chi2 else "fisher_exact"
    }


# ---------- Standardized Fragility Index (SFI) - retained for completeness ----------

def compute_sfi(a, b, c, d, alpha=ALPHA):
    """
    SFI: Number of BFU-sized toggles in the LARGER arm to flip significance.
    BFU = 1/max(n₁, n₂).
    If arms have equal size, choose arm with fewer events.
    Label-invariant, allocation-fair.
    """
    N = n_total(a, b, c, d)
    use_chi2 = (N > N_THRESHOLD) and (min(a, b, c, d) >= MIN_CELL_THRESHOLD)

    n_A = a + b
    n_B = c + d

    # Identify larger arm; if tied, choose arm with fewer events
    if n_A > n_B:
        larger_arm = 'A'
        n_large = n_A
    elif n_B > n_A:
        larger_arm = 'B'
        n_large = n_B
    else:  # tied totals, choose arm with fewer events
        if a < c:
            larger_arm = 'A'
            n_large = n_A
        else:
            larger_arm = 'B'
            n_large = n_B

    BFU = 1.0 / n_large if n_large > 0 else None

    if BFU is None:
        return {
            "SFI": None,
            "BFU": None,
            "baseline_p": test_p(a, b, c, d, use_chi2),
            "larger_arm": None,
            "n_large": 0,
            "test_used": "chi2" if use_chi2 else "fisher_exact",
            "note": "Empty arm."
        }

    base_p = test_p(a, b, c, d, use_chi2)
    base_state = "significant" if is_significant(base_p, alpha) else "non-significant"

    # Toggle larger arm in both directions
    up_res = steps_to_cross(a, b, c, d, larger_arm, "up", use_chi2, alpha)
    down_res = steps_to_cross(a, b, c, d, larger_arm, "down", use_chi2, alpha)

    candidates = []
    if up_res[0] is not None:
        candidates.append(("non-events → events", up_res[0], up_res[1]))
    if down_res[0] is not None:
        candidates.append(("events → non-events", down_res[0], down_res[1]))

    if not candidates:
        return {
            "SFI": None,
            "BFU": BFU,
            "baseline_p": base_p,
            "baseline_state": base_state,
            "larger_arm": larger_arm,
            "n_large": n_large,
            "final_p": base_p,
            "test_used": "chi2" if use_chi2 else "fisher_exact",
            "note": "SFI not attainable (larger arm cannot flip significance)."
        }

    direction, steps, p_final = min(candidates, key=lambda x: x[1])
    SFI = int(steps)

    return {
        "SFI": SFI,
        "BFU": BFU,
        "baseline_p": base_p,
        "baseline_state": base_state,
        "target_state": ("non-significant" if base_state == "significant" else "significant"),
        "larger_arm": larger_arm,
        "n_large": n_large,
        "direction": direction,
        "final_p": p_final,
        "test_used": "chi2" if use_chi2 else "fisher_exact"
    }


# ---------- Risk Quotient (RQ) ----------

def compute_rq(a, b, c, d):
    """
    Risk Quotient (RQ): Normalized distance from independence.
    RQ = RRI / (N/k), where RRI = (1/k) * sum(|O - E|)
    For 2×2 table, k = 4 cells.
    """
    N = n_total(a, b, c, d)
    if N == 0:
        return {"RQ": None, "RRI": None, "note": "Empty table."}

    # Expected values under independence
    n_A = a + b
    n_B = c + d
    events = a + c
    non_events = b + d

    E_a = (n_A * events) / N
    E_b = (n_A * non_events) / N
    E_c = (n_B * events) / N
    E_d = (n_B * non_events) / N

    # RRI = (1/k) * sum(|O - E|)
    k = 4
    RRI = (1 / k) * (abs(a - E_a) + abs(b - E_b) + abs(c - E_c) + abs(d - E_d))

    # RQ = RRI / (N/k)
    RQ = RRI / (N / k)

    return {
        "RQ": RQ,
        "RRI": RRI
    }


# ---------- Global Fragility Index (GFI) / GFQ for 2×2 ----------

def _neighbors_all_moves(state, N):
    """
    Generate all one-move neighbors: decrement one cell, increment another (nonnegative constraint).
    State representation: (a, b, c) where d = N - a - b - c
    """
    a, b, c = state
    d = N - a - b - c
    cells = [a, b, c, d]

    for src in range(4):
        if cells[src] == 0:
            continue
        for dst in range(4):
            if dst == src:
                continue

            new_cells = cells.copy()
            new_cells[src] -= 1
            new_cells[dst] += 1

            yield (new_cells[0], new_cells[1], new_cells[2]), (src, dst)


def compute_gfi_gfq(a, b, c, d, alpha=ALPHA, store_path=True):
    """
    Global, path-independent minimal moves (GFI) to flip two-sided significance.
    Searches the multinomial lattice with total N fixed.

    Only computed for N ≤ GFI_THRESHOLD. For larger N, use RQ instead.
    """
    for x in (a, b, c, d):
        if not (isinstance(x, int) and x >= 0):
            raise ValueError("Cells a,b,c,d must be non-negative integers.")

    N = n_total(a, b, c, d)
    use_chi2 = (N > N_THRESHOLD) and (min(a, b, c, d) >= MIN_CELL_THRESHOLD)

    if N == 0:
        return {
            "GFI": None,
            "GFQ": None,
            "baseline_p": None,
            "baseline_state": None,
            "final_p": None,
            "witness_path": [],
            "test_used": "chi2" if use_chi2 else "fisher_exact",
            "note": "Empty table."
        }

    if N > GFI_THRESHOLD:
        return {
            "GFI": None,
            "GFQ": None,
            "baseline_p": test_p(a, b, c, d, use_chi2),
            "baseline_state": None,
            "final_p": None,
            "witness_path": [],
            "test_used": "chi2" if use_chi2 else "fisher_exact",
            "note": f"GFI not computed for N > {GFI_THRESHOLD}. Use RQ for robustness assessment."
        }

    base_p = test_p(a, b, c, d, use_chi2)
    base_state = is_significant(base_p, alpha)

    start = (a, b, c)
    visited = {start}
    q = deque()
    q.append((start, 0, [] if store_path else None))

    while q:
        state, depth, path = q.popleft()

        for nxt, move in _neighbors_all_moves(state, N):
            if nxt in visited:
                continue
            visited.add(nxt)
            new_depth = depth + 1

            a_new, b_new, c_new = nxt
            d_new = N - a_new - b_new - c_new
            p_now = test_p(a_new, b_new, c_new, d_new, use_chi2)

            if is_significant(p_now, alpha) != base_state:
                GFI = new_depth
                GFQ = GFI / N
                return {
                    "GFI": GFI,
                    "GFQ": GFQ,
                    "baseline_p": base_p,
                    "baseline_state": ("significant" if base_state else "non-significant"),
                    "final_p": p_now,
                    "witness_path": (path + [move]) if store_path else [],
                    "test_used": "chi2" if use_chi2 else "fisher_exact"
                }

            new_path = (path + [move]) if store_path else None
            q.append((nxt, new_depth, new_path))

    return {
        "GFI": None,
        "GFQ": None,
        "baseline_p": base_p,
        "baseline_state": ("significant" if base_state else "non-significant"),
        "final_p": base_p,
        "witness_path": [],
        "test_used": "chi2" if use_chi2 else "fisher_exact",
        "note": "No flip found (study appears highly robust)."
    }


# ---------- High-level calculator: binary 2×2 independent ----------

def binary_2x2_independent(a, b, c, d, alpha=ALPHA):
    """
    High-level calculator for independent 2×2 binary outcomes.

    Returns a minimal p–fr–nb structure:
      - p   : baseline p-value (Fisher or χ² depending on N / cell counts)
      - fr  : FI, MFQ, GFI, GFQ
      - nb  : RQ
    """
    N = n_total(a, b, c, d)
    use_chi2 = (N > N_THRESHOLD) and (min(a, b, c, d) >= MIN_CELL_THRESHOLD)

    # Baseline p
    p_val = test_p(a, b, c, d, use_chi2)

    # Fragility (Walsh-compliant FI + MFQ)
    res_fi = compute_fi_fq_mfq(a, b, c, d, alpha=alpha)

    # Global Fragility (if feasible)
    res_gfi = compute_gfi_gfq(a, b, c, d, alpha=alpha, store_path=False)

    # Robustness (RQ)
    res_rq = compute_rq(a, b, c, d)

    result = {
        "p": p_val,
        "fr": {
            "FI":  res_fi.get("FI"),
            "MFQ": res_fi.get("MFQ"),
            "GFI": res_gfi.get("GFI"),
            "GFQ": res_gfi.get("GFQ"),
        },
        "nb": {
            "RQ": res_rq.get("RQ")
        }
    }
    return result


# ---------- CLI: minimal p–fr–nb output ----------

def main():
    print("Enter 2×2 table cells as integers.")
    a = int(input("a (Arm A events): ").strip())
    b = int(input("b (Arm A non-events): ").strip())
    c = int(input("c (Arm B events): ").strip())
    d = int(input("d (Arm B non-events): ").strip())

    res = binary_2x2_independent(a, b, c, d, alpha=ALPHA)

    # Minimal, clean p–fr–nb output
    print("\n================ p–fr–nb =================")
    print(f"p  = {res['p']:.6f}")

    fr = res["fr"]
    print("fr:")
    print(f"  FI  = {fr['FI']}")
    if fr["MFQ"] is not None:
        print(f"  MFQ = {fr['MFQ']:.6f}")
    else:
        print("  MFQ = None")
    print(f"  GFI = {fr['GFI']}")
    if fr["GFQ"] is not None:
        print(f"  GFQ = {fr['GFQ']:.6f}")
    else:
        print("  GFQ = None")

    nb = res["nb"]
    if nb["RQ"] is not None:
        print("nb:")
        print(f"  RQ  = {nb['RQ']:.6f}")
    else:
        print("nb:")
        print("  RQ  = None")
    print("==========================================")

if __name__ == "__main__":
    main()


Enter 2×2 table cells as integers.
a (Arm A events): 5
b (Arm A non-events): 18
c (Arm B events): 14
d (Arm B non-events): 28

p  = 0.400577
fr:
  FI  = 3
  MFQ = 0.130435
  GFI = 3
  GFQ = 0.046154
nb:
  RQ  = 0.106036
