<a href="https://colab.research.google.com/github/tomheston/fragility-metrics/blob/main/notebooks/Fragility_Index_calculator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# @title
# Fragility metrics for a 2×2 table, Google Colab ready
# v.11-NOV-2025
# Walsh-compliant FI + FQ + MFQ  ➕  Standardized Fragility Index (SFI)  ➕  Global Fragility Index (GFI) + Global Fragility Quotient (GFQ)  ➕  Risk Quotient (RQ)
#
# Layout:
#   |  Events | Non-events |
# A |    a    |     b      |
# B |    c    |     d      |
#
# Tests: Fisher's exact (two-sided) for N ≤ 5000 OR min(a,b,c,d) < 50; Pearson's χ² (two-sided) for N > 5000 AND min(a,b,c,d) ≥ 50; α = 0.05.
#
# FI (Walsh rules): toggles only within the arm with FEWER events at baseline;
# if tied, choose arm with FEWER total; if still tied, choose A.
# A toggle is an event↔non-event flip within that arm. Row totals fixed.
#
# FQ  = FI / N
# MFQ = (fragility count) / n_mod   (n_mod = size of the modified arm at baseline; by default fragility count = FI)
#
# SFI (Standardized Fragility Index): Number of BFU-sized toggles in the LARGER arm required to flip significance.
# BFU = 1/max(n₁, n₂). If arms are equal size, choose arm with fewer events. Label-invariant, allocation-fair.
#
# GFI (Global Fragility Index): Minimum number of single-observation moves between ANY two cells (N fixed)
#       required to flip significance. Path-independent under multinomial model. Computed only for N ≤ 5000.
# GFQ = GFI / N
#
# RQ (Risk Quotient): Normalized distance from independence. RQ = RRI/(N/k). Used for N > 5000.
#
# IF YOU USE THIS CALCULATOR PLEASE CITE:
# Heston, T. F. (2025). Fragility Metrics Toolkit (Version 2.2.0) [Software]. Zenodo. https://doi.org/10.5281/zenodo.17254763
#
# © Thomas F. Heston 2025. CC-BY 4.0

# Install SciPy if missing
try:
    import scipy
except ImportError:
    !pip -q install scipy

from collections import deque
from scipy.stats import fisher_exact, chi2_contingency

ALPHA = 0.05
N_THRESHOLD = 4000  # Sample size threshold for considering χ²
MIN_CELL_THRESHOLD = 50  # Minimum cell count threshold for χ² validity
GFI_THRESHOLD = 4000  # Don't compute GFI for N > 5000, use RQ instead

# ---------- Core utilities ----------

def test_p(a, b, c, d, use_chi2=False):
    """Unified significance test: Fisher or χ²."""
    if use_chi2:
        _, p, _, _ = chi2_contingency([[a, b], [c, d]])
        return p
    else:
        _, p = fisher_exact([[a, b], [c, d]], alternative="two-sided")
        return p

def is_significant(p, alpha=ALPHA):
    return p <= alpha

def n_total(a, b, c, d):
    return a + b + c + d

# ---------- Walsh-compliant FI / FQ / MFQ ----------

def choose_arm(a, b, c, d):
    """Arm with fewer events; if tie, arm with fewer total; if still tie, A."""
    if a < c:
        return 'A'
    if c < a:
        return 'B'
    totA, totB = a + b, c + d
    if totA < totB:
        return 'A'
    if totB < totA:
        return 'B'
    return 'A'

def toggle_once(a, b, c, d, arm, direction):
    """
    Apply one within-arm toggle.
    direction ∈ {"up","down"}:
      up   = non-event -> event  (increase events in chosen arm)
      down = event     -> non-event (decrease events in chosen arm)
    Returns new counts or None if impossible.
    """
    if arm == 'A':
        if direction == 'up':
            if b <= 0: return None
            return a+1, b-1, c, d
        else:  # down
            if a <= 0: return None
            return a-1, b+1, c, d
    else:  # 'B'
        if direction == 'up':
            if d <= 0: return None
            return a, b, c+1, d-1
        else:  # down
            if c <= 0: return None
            return a, b, c-1, d+1

def steps_to_cross(a, b, c, d, arm, direction, use_chi2=False, alpha=ALPHA, max_iter=10**6):
    """
    Apply monotone toggles within the chosen arm until significance flips.
    Returns (steps, final_p) or (None, last_p) if impossible.
    """
    base_p = test_p(a, b, c, d, use_chi2)
    base_sig = is_significant(base_p, alpha)

    steps = 0
    A, B, C, D = a, b, c, d
    for _ in range(max_iter):
        nxt = toggle_once(A, B, C, D, arm, direction)
        if nxt is None:
            return None, test_p(A, B, C, D, use_chi2)
        A, B, C, D = nxt
        steps += 1
        p_now = test_p(A, B, C, D, use_chi2)
        if is_significant(p_now, alpha) != base_sig:
            return steps, p_now
    return None, test_p(A, B, C, D, use_chi2)

def compute_fi_fq_mfq(a, b, c, d, alpha=ALPHA):
    N = n_total(a, b, c, d)
    use_chi2 = (N > N_THRESHOLD) and (min(a, b, c, d) >= MIN_CELL_THRESHOLD)

    base_p = test_p(a, b, c, d, use_chi2)
    base_state = "significant" if is_significant(base_p, alpha) else "non-significant"
    arm = choose_arm(a, b, c, d)

    up_res   = steps_to_cross(a, b, c, d, arm, "up", use_chi2, alpha)
    down_res = steps_to_cross(a, b, c, d, arm, "down", use_chi2, alpha)

    candidates = []
    if up_res[0]   is not None: candidates.append(("non-events → events", up_res[0], up_res[1]))
    if down_res[0] is not None: candidates.append(("events → non-events", down_res[0], down_res[1]))

    n_mod = (a + b) if arm == 'A' else (c + d)

    if not candidates:
        return {
            "FI": None, "FQ": None, "MFQ": None,
            "baseline_p": base_p, "baseline_state": base_state,
            "arm": arm, "direction": None, "n_mod": n_mod,
            "final_p": base_p, "test_used": "chi2" if use_chi2 else "fisher_exact",
            "note": "FI not attainable under Walsh constraints."
        }

    direction, steps, p_final = min(candidates, key=lambda x: x[1])
    FI = int(steps)
    FQ = (FI / N) if N > 0 else None
    MFQ = (FI / n_mod) if n_mod > 0 else None

    return {
        "FI": FI, "FQ": FQ, "MFQ": MFQ,
        "baseline_p": base_p, "baseline_state": base_state,
        "target_state": ("non-significant" if base_state == "significant" else "significant"),
        "arm": arm, "direction": direction, "n_mod": n_mod, "final_p": p_final,
        "test_used": "chi2" if use_chi2 else "fisher_exact"
    }

# ---------- Standardized Fragility Index (SFI) ----------

def compute_sfi(a, b, c, d, alpha=ALPHA):
    """
    SFI: Number of BFU-sized toggles in the LARGER arm to flip significance.
    BFU = 1/max(n₁, n₂).
    If arms have equal size, choose arm with fewer events.
    Label-invariant, allocation-fair.
    """
    N = n_total(a, b, c, d)
    use_chi2 = (N > N_THRESHOLD) and (min(a, b, c, d) >= MIN_CELL_THRESHOLD)

    n_A = a + b
    n_B = c + d

    # Identify larger arm; if tied, choose arm with fewer events
    if n_A > n_B:
        larger_arm = 'A'
        n_large = n_A
    elif n_B > n_A:
        larger_arm = 'B'
        n_large = n_B
    else:  # tied totals, choose arm with fewer events
        if a < c:
            larger_arm = 'A'
            n_large = n_A
        else:
            larger_arm = 'B'
            n_large = n_B

    BFU = 1.0 / n_large if n_large > 0 else None

    if BFU is None:
        return {
            "SFI": None, "BFU": None,
            "baseline_p": test_p(a, b, c, d, use_chi2),
            "larger_arm": None, "n_large": 0,
            "test_used": "chi2" if use_chi2 else "fisher_exact",
            "note": "Empty arm."
        }

    base_p = test_p(a, b, c, d, use_chi2)
    base_state = "significant" if is_significant(base_p, alpha) else "non-significant"

    # Toggle larger arm in both directions
    up_res   = steps_to_cross(a, b, c, d, larger_arm, "up", use_chi2, alpha)
    down_res = steps_to_cross(a, b, c, d, larger_arm, "down", use_chi2, alpha)

    candidates = []
    if up_res[0]   is not None: candidates.append(("non-events → events", up_res[0], up_res[1]))
    if down_res[0] is not None: candidates.append(("events → non-events", down_res[0], down_res[1]))

    if not candidates:
        return {
            "SFI": None, "BFU": BFU,
            "baseline_p": base_p, "baseline_state": base_state,
            "larger_arm": larger_arm, "n_large": n_large,
            "final_p": base_p,
            "test_used": "chi2" if use_chi2 else "fisher_exact",
            "note": "SFI not attainable (larger arm cannot flip significance)."
        }

    direction, steps, p_final = min(candidates, key=lambda x: x[1])
    SFI = int(steps)

    return {
        "SFI": SFI, "BFU": BFU,
        "baseline_p": base_p, "baseline_state": base_state,
        "target_state": ("non-significant" if base_state == "significant" else "significant"),
        "larger_arm": larger_arm, "n_large": n_large,
        "direction": direction, "final_p": p_final,
        "test_used": "chi2" if use_chi2 else "fisher_exact"
    }

# ---------- Risk Quotient (RQ) ----------

def compute_rq(a, b, c, d):
    """
    Risk Quotient (RQ): Normalized distance from independence.
    RQ = RRI / (N/k), where RRI = (1/k) * sum(|O - E|)
    For 2×2 table, k = 4 cells.
    Part of the Neutrality Boundary Framework (NBF).
    """
    N = n_total(a, b, c, d)
    if N == 0:
        return {"RQ": None, "RRI": None, "note": "Empty table."}

    use_chi2 = (N > N_THRESHOLD) and (min(a, b, c, d) >= MIN_CELL_THRESHOLD)

    # Expected values under independence
    n_A = a + b
    n_B = c + d
    events = a + c
    non_events = b + d

    E_a = (n_A * events) / N
    E_b = (n_A * non_events) / N
    E_c = (n_B * events) / N
    E_d = (n_B * non_events) / N

    # RRI = (1/k) * sum(|O - E|)
    k = 4
    RRI = (1/k) * (abs(a - E_a) + abs(b - E_b) + abs(c - E_c) + abs(d - E_d))

    # RQ = RRI / (N/k)
    RQ = RRI / (N / k)

    return {
        "RQ": RQ,
        "RRI": RRI,
        "baseline_p": test_p(a, b, c, d, use_chi2)
    }

# ---------- Global Fragility Index (GFI) / GFQ for 2×2 ----------

def _neighbors_all_moves(state, N):
    """
    Generate all one-move neighbors: decrement one cell, increment another (nonnegative constraint).
    State representation: (a, b, c) where d = N - a - b - c
    """
    a, b, c = state
    d = N - a - b - c
    cells = [a, b, c, d]

    for src in range(4):
        if cells[src] == 0:
            continue
        for dst in range(4):
            if dst == src:
                continue

            # Apply move
            new_cells = cells.copy()
            new_cells[src] -= 1
            new_cells[dst] += 1

            # Return as (a, b, c) tuple
            yield (new_cells[0], new_cells[1], new_cells[2]), (src, dst)

def compute_gfi_gfq(a, b, c, d, alpha=ALPHA, store_path=True):
    """
    Global, path-independent minimal moves (GFI) to flip two-sided significance.
    Searches the multinomial lattice with total N fixed.

    Efficiency optimizations:
    - State represented as (a, b, c) tuple; d computed as N - a - b - c
    - Optional path storage (set store_path=False for speed)

    Only computed for N ≤ 5000. For larger N, use RQ instead.
    """
    # Validate
    for x in (a, b, c, d):
        if not (isinstance(x, int) and x >= 0):
            raise ValueError("Cells a,b,c,d must be non-negative integers.")

    N = n_total(a, b, c, d)
    use_chi2 = (N > N_THRESHOLD) and (min(a, b, c, d) >= MIN_CELL_THRESHOLD)

    if N == 0:
        return {
            "GFI": None, "GFQ": None, "baseline_p": None, "baseline_state": None,
            "final_p": None, "witness_path": [],
            "test_used": "chi2" if use_chi2 else "fisher_exact",
            "note": "Empty table."
        }

    if N > GFI_THRESHOLD:
        return {
            "GFI": None, "GFQ": None,
            "baseline_p": test_p(a, b, c, d, use_chi2),
            "baseline_state": None,
            "final_p": None, "witness_path": [],
            "test_used": "chi2" if use_chi2 else "fisher_exact",
            "note": f"GFI not computed for N > {GFI_THRESHOLD}. Use RQ for robustness assessment."
        }

    base_p = test_p(a, b, c, d, use_chi2)
    base_state = is_significant(base_p, alpha)

    # State represented as (a, b, c); d = N - a - b - c
    start = (a, b, c)

    visited = {start}
    q = deque()
    q.append((start, 0, [] if store_path else None))

    while q:
        state, depth, path = q.popleft()

        for nxt, move in _neighbors_all_moves(state, N):
            if nxt in visited:
                continue
            visited.add(nxt)
            new_depth = depth + 1

            # Reconstruct full state for testing
            a_new, b_new, c_new = nxt
            d_new = N - a_new - b_new - c_new
            p_now = test_p(a_new, b_new, c_new, d_new, use_chi2)

            if is_significant(p_now, alpha) != base_state:
                GFI = new_depth
                GFQ = GFI / N
                return {
                    "GFI": GFI, "GFQ": GFQ,
                    "baseline_p": base_p,
                    "baseline_state": ("significant" if base_state else "non-significant"),
                    "final_p": p_now,
                    "witness_path": (path + [move]) if store_path else [],
                    "test_used": "chi2" if use_chi2 else "fisher_exact"
                }

            new_path = (path + [move]) if store_path else None
            q.append((nxt, new_depth, new_path))

    # Not found
    return {
        "GFI": None, "GFQ": None,
        "baseline_p": base_p,
        "baseline_state": ("significant" if base_state else "non-significant"),
        "final_p": base_p, "witness_path": [],
        "test_used": "chi2" if use_chi2 else "fisher_exact",
        "note": "No flip found (study appears highly robust)."
    }

# ---------- CLI ----------

def main():
    print("Enter 2×2 table cells as integers.")
    a = int(input("a (Arm A events): ").strip())
    b = int(input("b (Arm A non-events): ").strip())
    c = int(input("c (Arm B events): ").strip())
    d = int(input("d (Arm B non-events): ").strip())

    N = n_total(a, b, c, d)
    use_chi2 = (N > N_THRESHOLD) and (min(a, b, c, d) >= MIN_CELL_THRESHOLD)

    # FI / FQ / MFQ (Walsh)
    res_fi = compute_fi_fq_mfq(a, b, c, d, alpha=ALPHA)

    # SFI
    res_sfi = compute_sfi(a, b, c, d, alpha=ALPHA)

    # GFI / GFQ or RQ depending on N
    if N <= GFI_THRESHOLD:
        res_gfi = compute_gfi_gfq(a, b, c, d, alpha=ALPHA, store_path=True)
        res_rq = None
    else:
        res_gfi = None
        res_rq = compute_rq(a, b, c, d)

    print("\n" + "="*60)
    print("FRAGILITY METRICS RESULTS")
    print("="*60)

    print(f"\n--- Baseline (N = {N}) ---")
    print(f"Test used: {'Pearson χ²' if use_chi2 else 'Fisher exact'} (two-sided)")
    print(f"p-value = {test_p(a,b,c,d,use_chi2):.6f} (α={ALPHA})")

    print("\n--- Walsh-compliant Fragility Index (FI) ---")
    if res_fi["FI"] is None:
        print("FI: not attainable under Walsh constraints.")
        if res_fi.get("note"):
            print(f"Note: {res_fi['note']}")
    else:
        print(f"FI  = {res_fi['FI']}")
        print(f"FQ  = {res_fi['FQ']:.6f}")
        print(f"MFQ = {res_fi['MFQ']:.6f}")
        print(f"Modified arm: {res_fi['arm']}, direction: {res_fi['direction']}, n_mod = {res_fi['n_mod']}")
        print(f"Final p-value = {res_fi['final_p']:.6f}")

    if N <= GFI_THRESHOLD:
        print("\n--- Global Fragility Index (GFI) ---")
        print("(Path-independent, N fixed, multinomial model)")
        if res_gfi["GFI"] is None:
            print(f"GFI: not attained.")
            if res_gfi.get("note"):
                print(f"Note: {res_gfi['note']}")
        else:
            print(f"GFI = {res_gfi['GFI']}")
            print(f"GFQ = {res_gfi['GFQ']:.6f}")
            print(f"Final p-value = {res_gfi['final_p']:.6f}")
            # Show witness path
            if res_gfi["witness_path"]:
                idx_to_name = {0: "a", 1: "b", 2: "c", 3: "d"}
                path_str = " → ".join([f"{idx_to_name[i]}→{idx_to_name[j]}" for (i, j) in res_gfi["witness_path"]])
                print(f"Witness path: {path_str}")
    else:
        print("\n--- Risk Quotient (RQ) ---")
        print("(Neutrality Boundary Framework metric, used for N > 5000)")
        if res_rq["RQ"] is None:
            print("RQ: not computable.")
            if res_rq.get("note"):
                print(f"Note: {res_rq['note']}")
        else:
            print(f"RQ  = {res_rq['RQ']:.6f}")
            print(f"RRI = {res_rq['RRI']:.6f}")
            print(f"Interpretation: RQ measures geometric distance from neutrality (independence).")
            print(f"  0.00-0.05: Extremely fragile")
            print(f"  0.05-0.10: Fragile")
            print(f"  0.10-0.25: Moderately robust")
            print(f"  0.25-0.50: Robust")
            print(f"  >0.50: Very robust")

    print("\n--- Standardized Fragility Index (SFI) ---")
    if res_sfi["SFI"] is None:
        print("SFI: not attainable.")
        if res_sfi.get("note"):
            print(f"Note: {res_sfi['note']}")
    else:
        print(f"SFI = {res_sfi['SFI']}")
        print(f"BFU = {res_sfi['BFU']:.6f}")
        print(f"Larger arm: {res_sfi['larger_arm']}, n_large = {res_sfi['n_large']}")
        print(f"Direction: {res_sfi['direction']}")
        print(f"Final p-value = {res_sfi['final_p']:.6f}")

    print("\n" + "="*60)

if __name__ == "__main__":
    main()

Enter 2×2 table cells as integers.
a (Arm A events): 30
b (Arm A non-events): 681
c (Arm B events): 4
d (Arm B non-events): 684

FRAGILITY METRICS RESULTS

--- Baseline (N = 1399) ---
Test used: Fisher exact (two-sided)
p-value = 0.000005 (α=0.05)

--- Walsh-compliant Fragility Index (FI) ---
FI  = 12
FQ  = 0.008578
MFQ = 0.017442
Modified arm: B, direction: non-events → events, n_mod = 688
Final p-value = 0.051639

--- Global Fragility Index (GFI) ---
(Path-independent, N fixed, multinomial model)
GFI = 7
GFQ = 0.005004
Final p-value = 0.054785
Witness path: a→c → a→c → a→c → a→c → a→c → a→c → a→c

--- Standardized Fragility Index (SFI) ---
SFI = 18
BFU = 0.001406
Larger arm: A, n_large = 711
Direction: events → non-events
Final p-value = 0.075776

