<a href="https://colab.research.google.com/github/tomheston/fragility-metrics/blob/main/notebooks/correlation_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Fragility Metrics Toolkit: Correlation Analysis — COMPLETE p–fr–nb
# v.25-NOV-2025
# Now with Zerko Fragility Quotient (ZFQ) → full triplet achieved
#
# Input: r (Pearson or Spearman), n (sample size)
# Output: p, fr (ZFQ), nb (DTI)
#
# ZFQ Formula (framework-consistent):
#   Let z_r = atanh(r)  (Fisher z-transform), and
#       Z   = |z_r| * sqrt(n − 3)
#   Under H0 (ρ = 0), Z ~ N(0, 1) approximately for moderate/large n.
#   Zerko Fragility Quotient (ZFQ) is defined as:
#       ZFQ = 0, if Z ≤ Z_crit
#       ZFQ = (Z − Z_crit) / (1 + Z − Z_crit), if Z > Z_crit
#   where Z_crit = 1.96 for a two-sided α = 0.05.
#
#   This makes ZFQ:
#   - 0 exactly at the conventional significance threshold,
#   - increasing in both |r| and n once the result is beyond that threshold,
#   - bounded in [0, 1), like other FQ metrics in the framework.
#
# DTI:
#   Distance to Independence in Fisher-z space:
#       z_r = |atanh(r)|
#       DTI = z_r / (1 + z_r)
#
# DTI is the robustness (nb) metric; ZFQ is the fragility (fr) metric.
#
# IF YOU USE THIS CALCULATOR PLEASE CITE:
# Heston, T. F. (2025). Fragility Metrics Toolkit [Software]. Zenodo.
# https://doi.org/10.5281/zenodo.17254763
#
# © Thomas F. Heston 2025. CC-BY 4.0

import numpy as np
from scipy.stats import t as tdist

ALPHA = 0.05
Z_CRIT = 1.96  # Two-sided α=0.05 in z-space

# ==================== Zerko Fragility Quotient (ZFQ) ====================

def compute_zfq_corrected(r: float, n: int):
    """
    Zerko Fragility Quotient — canonical, framework-consistent fragility for correlation.

    Uses the standardized Fisher z-statistic:
        z_r   = atanh(r)
        Z     = |z_r| * sqrt(n − 3)
        ZFQ   = 0                         if Z <= Z_CRIT
              = (Z − Z_CRIT)/(1+Z−Z_CRIT) if Z >  Z_CRIT

    This is exactly analogous to CFQ/ANOVA-FQ but in Fisher-z space.
    """
    if n < 4:  # need n-3 > 0 for the Fisher z test statistic
        return None
    if abs(r) >= 1.0:
        return 1.0

    z_r = np.arctanh(r)
    Z_stat = abs(z_r) * np.sqrt(n - 3)

    if Z_stat <= Z_CRIT:
        return 0.0
    else:
        ZFS = Z_stat - Z_CRIT  # Zerko Fragility Score
        return ZFS / (1.0 + ZFS)

# ==================== DTI — Distance to Independence (official nb) ====================

def compute_dti(r: float):
    """
    Distance to Independence (DTI) in Fisher-z space.

    z_r  = |atanh(r)|
    DTI  = z_r / (1 + z_r)

    Interpreted as the normalized distance of the estimated correlation from ρ=0.
    """
    if abs(r) >= 1.0:
        return 1.0
    z = abs(np.arctanh(r))
    return z / (1.0 + z)

# ==================== Classic p-value (for reference) ====================

def classic_pvalue(r: float, n: int):
    """
    Classical two-sided p-value for Pearson correlation using the t-test with df=n-2.
    """
    if n < 3:
        return None
    if abs(r) >= 1.0:
        return 0.0
    t_stat = r * np.sqrt((n - 2) / (1.0 - r**2))
    return 2.0 * tdist.sf(abs(t_stat), n - 2)

# ==================== Interpretation ====================

def interpret_zfq(zfq):
    """Qualitative interpretation of ZFQ."""
    if zfq is None:
        return "not computed"
    if zfq < 0.01:
        return "extremely fragile"
    elif zfq < 0.05:
        return "very fragile"
    elif zfq < 0.10:
        return "fragile"
    elif zfq < 0.25:
        return "mildly stable"
    elif zfq < 0.40:
        return "moderate stability"
    else:
        return "very stable"

def interpret_dti(dti):
    """Qualitative interpretation of DTI."""
    if dti is None:
        return "not computed"
    if dti < 0.05:
        return "at independence (no relationship)"
    elif dti < 0.10:
        return "near independence"
    elif dti < 0.25:
        return "moderate distance"
    elif dti < 0.50:
        return "clear separation"
    else:
        return "far from independence (strong relationship)"

# ==================== Complete Evidence ====================

def correlation_complete(r: float, n: int):
    """
    Compute the complete p–fr–nb triplet for a correlation.

    Parameters
    ----------
    r : float
        Pearson or Spearman correlation coefficient, in [-1, 1].
    n : int
        Sample size (pairwise complete).

    Returns
    -------
    dict with keys:
        'r'  : correlation
        'n'  : sample size
        'p'  : classical two-sided p-value (t-test)
        'fr' : ZFQ (fragility)
        'nb' : DTI (robustness)
        'z'  : Fisher z-transform (atanh(r)), or inf if |r|=1
    """
    if not (-1.0 <= r <= 1.0):
        raise ValueError("r must be in [-1, 1]")
    if n < 2:
        raise ValueError("n must be ≥ 2")

    p_val = classic_pvalue(r, n)
    zfq = compute_zfq_corrected(r, n)
    dti = compute_dti(r)

    return {
        "r": r,
        "n": n,
        "p": p_val,
        "fr": zfq,
        "nb": dti,
        "z": np.arctanh(r) if abs(r) < 1.0 else float("inf"),
    }

# ==================== Output ====================

def print_correlation_results(res):
    """
    Pretty-print the complete evidence assessment for correlation.
    """
    print("\\n" + "=" * 60)
    print("COMPLETE EVIDENCE ASSESSMENT: CORRELATION (p–fr–nb)")
    print("=" * 60)
    print(f"Pearson r = {res['r']:+.6f}  (n = {res['n']})")

    # p-value
    if res["p"] is not None:
        print(f"p-value   = {res['p']:.6f}")
    else:
        print("p-value   = n/a (n < 3)")

    # fragility (ZFQ)
    zfq = res["fr"]
    if zfq is None:
        zfq_str = "n/a (n < 4)"
    else:
        zfq_str = f"{zfq:.6f}"

    print(f"fr (ZFQ)  = {zfq_str}  →  {interpret_zfq(zfq)}")

    # robustness (DTI)
    dti = res["nb"]
    dti_str = "n/a" if dti is None else f"{dti:.6f}"
    print(f"nb (DTI)  = {dti_str}  →  {interpret_dti(dti)}")
    print("=" * 60)

    # Strength verdict based on |r|
    abs_r = abs(res["r"])
    if abs_r >= 0.7:
        strength = "very strong"
    elif abs_r >= 0.5:
        strength = "strong"
    elif abs_r >= 0.3:
        strength = "moderate"
    elif abs_r >= 0.1:
        strength = "weak"
    else:
        strength = "negligible"

    print("\\nInterpretation:")
    print(f"• Observed correlation: {strength} (r = {res['r']:+.3f})")
    print(f"• Fragility (ZFQ): {interpret_zfq(zfq)}")
    print(f"• Robustness (DTI): {interpret_dti(dti)}")

    p = res["p"]
    if p is not None and p <= ALPHA and (zfq is not None) and zfq < 0.10:
        print("→ Significant but fragile — interpret with caution")
    elif p is not None and p > ALPHA and (dti is not None) and dti > 0.25:
        print("→ Non-significant p-value, but meaningful correlation exists (likely small sample or low power)")
    else:
        print("→ p–fr–nb triplet is concordant")

    print("\\nZerko Fragility Quotient (ZFQ) completes the p–fr–nb triplet for correlation.")
    print("Reference: FRAGILITY_METRICS.md v9.7 + Zerko Addendum (2025)")

# ==================== CLI ====================

def main():
    print("Correlation Complete Evidence Calculator (p–fr–nb)\\n")
    r = float(input("Enter Pearson/Spearman r (-1 to +1): ").strip())
    n = int(input("Enter sample size n: ").strip())

    result = correlation_complete(r, n)
    print_correlation_results(result)

if __name__ == "__main__":
    main()


Correlation Analysis

Pearson r (-1 to +1): .1
Sample size n: 500

p  = 0.025347 by two-sided t-test for Pearson r
fr = n/a for correlations*
nb (DTI) = 0.091186

Interpretation:
The p-value is significant at p≤0.05**
Pearson r = 0.1000  (n = 500)
DTI (Distance to Independence) = 0.091186

The observed correlation of r = 0.100 and DTI agree: weak, essentially indistinguishable from no relationship (r=0).

*Fragility (fr) is not defined because no unique, model-free way exists to toggle points (see: FRAGILITY_METRICS.md https://doi.org/10.5281/zenodo.17254763)
**The p-value is provided for reference only and should be interpreted cautiously, as it is highly dependent on sample size and can be misleading.
DTI is the framework’s official robustness (nb) metric for correlations.
