<a href="https://colab.research.google.com/github/tomheston/fragility-metrics/blob/main/notebooks/anova.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title
# Complete Evidence Framework: ANOVA (k ≥ 2 groups)
# v.23-NOV-2025
# One-way ANOVA with complete p-fr-nb triplet
#
# Input options:
#   1. F-statistic with degrees of freedom
#   2. Group summary statistics (means, SDs, sample sizes)
#
# Output:
#   p  = p-value from F-test
#   fr = ANOVA-FQ (fragility quotient, stability of F-classification)
#   nb = η² (robustness, proportion of variance explained)
#
# ANOVA-FQ Formula:
#   Let F = observed F-statistic, F* = critical F at α=0.05
#   ANOVA-FS = |√F - √F*|  (SE-equivalent distance to significance boundary)
#   ANOVA-FQ = ANOVA-FS / (1 + ANOVA-FS)  (normalized [0,1])
#
# Key property: For k=2, F = t², so √F = |t|
#   → ANOVA-FQ reduces exactly to CFQ (continuous fragility quotient)
#
# η² Formula:
#   η² = df_b·F / (df_b·F + df_w)
#   where df_b = k-1, df_w = N-k
#
# Interpretation:
#   fr ∈ [0,1]: 0 = fragile, 1 = stable
#   nb ∈ [0,1]: 0 = no effect, 1 = all variance explained
#
# Limits:
#   k ≤ 20 groups (configurable via MAX_GROUPS)
#   No raw data required (computes from summary statistics)
#
# IF YOU USE THIS CALCULATOR PLEASE CITE:
# Heston, T. F. (2025). Fragility Metrics Toolkit [Software]. Zenodo.
#   https://doi.org/10.5281/zenodo.17254763
#
# © Thomas F. Heston 2025. CC-BY 4.0

# ----- SciPy availability guard -----
try:
    import scipy
except ImportError:
    try:
        import subprocess
        import sys
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "scipy"])
        import scipy
    except Exception:
        print("Please install scipy: pip install scipy")
        raise

import numpy as np
from scipy import stats
import sys

# ========== CONFIGURABLE PARAMETERS ==========
ALPHA = 0.05
MAX_GROUPS = 20  # Practical limit for number of groups
# =============================================

# ---------- Core Functions ----------

def calculate_anova_complete(F, df_b, df_w, alpha=ALPHA):
    """
    Calculate complete p-fr-nb triplet from F-statistic and degrees of freedom.

    Parameters:
    -----------
    F : float
        F-statistic from one-way ANOVA
    df_b : int
        Degrees of freedom between groups (k - 1)
    df_w : int
        Degrees of freedom within groups (N - k)
    alpha : float
        Significance level (default 0.05)

    Returns:
    --------
    dict : Dictionary containing:
        - p: p-value from F-test
        - fr: ANOVA-FQ (fragility quotient)
        - nb: η² (eta-squared, robustness)
        - ANOVA_FS: raw fragility score (SE-equivalent units)
        - F: F-statistic
        - F_crit: critical F at alpha
        - df_b: between-group df
        - df_w: within-group df

    Reference:
    ----------
    FRAGILITY_METRICS v9.6+ (extended), §4.6
    """
    if F < 0:
        raise ValueError("F-statistic must be non-negative")
    if df_b <= 0 or df_w <= 0:
        raise ValueError("Degrees of freedom must be positive")

    # p-value
    p_value = 1 - stats.f.cdf(F, df_b, df_w)

    # Critical F at alpha
    F_crit = stats.f.ppf(1 - alpha, df_b, df_w)

    # ANOVA-FS and ANOVA-FQ (fragility)
    sqrt_F = np.sqrt(F)
    sqrt_F_crit = np.sqrt(F_crit)
    ANOVA_FS = abs(sqrt_F - sqrt_F_crit)
    ANOVA_FQ = ANOVA_FS / (1 + ANOVA_FS)

    # η² (robustness)
    eta_squared = (df_b * F) / (df_b * F + df_w)

    return {
        'p': p_value,
        'fr': ANOVA_FQ,
        'nb': eta_squared,
        'ANOVA_FS': ANOVA_FS,
        'F': F,
        'F_crit': F_crit,
        'df_b': df_b,
        'df_w': df_w
    }


def calculate_anova_from_summary(group_means, group_stds, group_sizes, alpha=ALPHA):
    """
    Calculate complete p-fr-nb triplet from group summary statistics.

    Parameters:
    -----------
    group_means : list of float
        Mean for each group
    group_stds : list of float
        Standard deviation for each group
    group_sizes : list of int
        Sample size for each group
    alpha : float
        Significance level (default 0.05)

    Returns:
    --------
    dict : Dictionary containing complete ANOVA results with p-fr-nb
    """
    k = len(group_means)
    if k < 2:
        raise ValueError("Need at least 2 groups")
    if k > MAX_GROUPS:
        raise ValueError(f"Maximum {MAX_GROUPS} groups allowed (configurable via MAX_GROUPS)")
    if not (len(group_means) == len(group_stds) == len(group_sizes)):
        raise ValueError("All input lists must have same length")

    N = sum(group_sizes)

    # Calculate grand mean
    grand_mean = sum(n * m for n, m in zip(group_sizes, group_means)) / N

    # Calculate between-group sum of squares
    SS_between = sum(n * (m - grand_mean)**2 for n, m in zip(group_sizes, group_means))

    # Calculate within-group sum of squares
    SS_within = sum((n - 1) * s**2 for n, s in zip(group_sizes, group_stds))

    # Degrees of freedom
    df_b = k - 1
    df_w = N - k

    # Mean squares
    MS_between = SS_between / df_b
    MS_within = SS_within / df_w

    # F-statistic
    F_stat = MS_between / MS_within if MS_within > 0 else float('inf')

    # Calculate complete triplet
    result = calculate_anova_complete(F_stat, df_b, df_w, alpha=alpha)

    # Add summary statistics
    result.update({
        'k_groups': k,
        'N_total': N,
        'group_means': group_means,
        'group_stds': group_stds,
        'group_sizes': group_sizes,
        'SS_between': SS_between,
        'SS_within': SS_within,
        'MS_between': MS_between,
        'MS_within': MS_within
    })

    return result


# ---------- Interpretation Functions ----------

def interpret_fragility(fr):
    """
    Interpret ANOVA-FQ (fragility quotient).

    Reference: FRAGILITY_METRICS v9.6, Part VIII
    """
    if fr < 0.01:
        return "extremely fragile"
    elif fr < 0.05:
        return "very fragile"
    elif fr < 0.10:
        return "fragile"
    elif fr < 0.25:
        return "mildly stable"
    elif fr < 0.40:
        return "moderate stability"
    else:
        return "very stable"


def interpret_robustness(nb):
    """
    Interpret η² (robustness/effect size).

    Reference: FRAGILITY_METRICS v9.6, Part VIII
    """
    if nb < 0.05:
        return "at neutrality (no group effect)"
    elif nb < 0.10:
        return "near neutrality (minimal group differences)"
    elif nb < 0.25:
        return "moderate distance from neutrality"
    elif nb < 0.50:
        return "clear separation (substantial group differences)"
    else:
        return "far from neutrality (large group effect)"


def eta_squared_to_cohens_f(eta_squared):
    """Convert η² to Cohen's f effect size."""
    if eta_squared >= 1:
        return float('inf')
    return np.sqrt(eta_squared / (1 - eta_squared))


# ---------- Summary Interface ----------

def anova_summary(result):
    """
    Display complete p-fr-nb evidence assessment.

    Parameters:
    -----------
    result : dict
        Output from calculate_anova_complete or calculate_anova_from_summary
    """
    def fmt_f(x):
        return "NA" if x is None or not np.isfinite(x) else f"{x:.6f}"

    def fmt_i(x):
        return "NA" if x is None else str(int(x))

    # Group information if available
    if 'k_groups' in result:
        print(f"k_groups = {fmt_i(result['k_groups'])}")
        print(f"N_total = {fmt_i(result['N_total'])}")
        print()

    # F-test results
    print(f"F = {fmt_f(result['F'])}")
    print(f"F_crit (α={ALPHA}) = {fmt_f(result['F_crit'])}")
    print(f"df_between = {fmt_i(result['df_b'])}")
    print(f"df_within = {fmt_i(result['df_w'])}")
    print()

    # Complete evidence triplet
    print("=" * 50)
    print("COMPLETE EVIDENCE ASSESSMENT (p-fr-nb)")
    print("=" * 50)
    print(f"p = {fmt_f(result['p'])}")
    print(f"fr (ANOVA-FQ) = {fmt_f(result['fr'])}")
    print(f"nb (eta_squared) = {fmt_f(result['nb'])}")
    print("=" * 50)
    print()

    # Interpretations
    interp_fr = interpret_fragility(result['fr'])
    interp_nb = interpret_robustness(result['nb'])

    print("INTERPRETATION:")
    print(f"Fragility: {interp_fr}")
    print(f"Robustness: {interp_nb}")
    print()

    # Additional metrics
    cohens_f = eta_squared_to_cohens_f(result['nb'])
    print(f"ANOVA-FS (raw fragility score) = {fmt_f(result['ANOVA_FS'])}")
    print(f"Cohen's f (effect size) = {fmt_f(cohens_f)}")

    # Significance classification
    is_sig = result['p'] <= ALPHA
    print()
    print(f"Classification: {'SIGNIFICANT' if is_sig else 'NON-SIGNIFICANT'} at α={ALPHA}")

    # Group details if available
    if 'group_means' in result:
        print()
        print("GROUP SUMMARY:")
        for i, (m, s, n) in enumerate(zip(result['group_means'],
                                          result['group_stds'],
                                          result['group_sizes']), 1):
            print(f"  Group {i}: mean={fmt_f(m)}, sd={fmt_f(s)}, n={fmt_i(n)}")


# ---------- CLI Entry Point ----------

def main():
    print("Complete Evidence Framework: ANOVA Analysis")
    print("=" * 50)
    print("Calculate p-fr-nb triplet for multi-group comparisons")
    print()
    print("Choose input type:")
    print("1. F-statistic and degrees of freedom")
    print("2. Group summary statistics (means, SDs, sample sizes)")
    print()

    choice = input("Enter choice (1 or 2): ").strip()
    print()

    if choice == "1":
        # Input: F-statistic and df
        F = float(input("F-statistic: ").strip())
        df_b = int(input("df_between (k-1): ").strip())
        df_w = int(input("df_within (N-k): ").strip())
        print()

        result = calculate_anova_complete(F, df_b, df_w, alpha=ALPHA)
        anova_summary(result)

    elif choice == "2":
        # Input: Group summaries
        k = int(input("Number of groups: ").strip())
        if k < 2:
            print("Error: Need at least 2 groups")
            sys.exit(1)
        if k > MAX_GROUPS:
            print(f"Error: Maximum {MAX_GROUPS} groups allowed")
            sys.exit(1)

        print()
        group_means = []
        group_stds = []
        group_sizes = []

        for i in range(k):
            print(f"Group {i+1}:")
            mean = float(input("  Mean: ").strip())
            sd = float(input("  SD: ").strip())
            n = int(input("  Sample size: ").strip())
            group_means.append(mean)
            group_stds.append(sd)
            group_sizes.append(n)
            print()

        result = calculate_anova_from_summary(group_means, group_stds, group_sizes, alpha=ALPHA)
        anova_summary(result)

    else:
        print("Invalid choice. Please enter 1 or 2.")
        sys.exit(1)


if __name__ == "__main__":
    main()


Complete Evidence Framework: ANOVA Analysis
Calculate p-fr-nb triplet for multi-group comparisons

Choose input type:
1. F-statistic and degrees of freedom
2. Group summary statistics (means, SDs, sample sizes)

