In [3]:
# Preregistered α⁻¹ test: fixed rails, iterate cap ≤4, two nulls, BH-FDR, generalization, analytic checks.
# Files are written to /content/.
# You can safely "Run all" this single cell in Colab.

import math, random, os, time
import numpy as np
import pandas as pd

# ============ Parameters (tune here if needed) ============
SEED = 42
BASES = list(range(2, 11))          # 2..10
N_LIST = [9, 27, 81, 243]           # N = 3^k
ITER_CAP = 4                        # prereg: iter cap ≤ 4
NULL_REPS = 100                     # per null type, per (seq, base, N)
TARGET_ALPHA_INV = 137.035999084    # α^{-1}

# Sequences to test (preregistered small set; extend if desired)
USE_SEQS = {
    "Fib": "fib_word",
    "PD_COMP": "period_doubling_complement",
}

# Output directory
OUT_DIR = "/content"

# ============ Utilities ============
random.seed(SEED)
np.random.seed(SEED)

def fib_word(n: int):
    s = "0"
    while len(s) < n:
        s = "".join("01" if ch == "0" else "0" for ch in s)
    return [1 if ch == "1" else 0 for ch in s[:n]]

def period_doubling(n: int):
    s = "0"
    while len(s) < n:
        s = "".join("01" if ch == "0" else "00" for ch in s)
    return [1 if ch == "1" else 0 for ch in s[:n]]

def complement_bits(bits):
    return [1 - b for b in bits]

def period_doubling_complement(n: int):
    return complement_bits(period_doubling(n))

def value_from_bits_in_base(bits, base: int) -> float:
    """Interpret 0/1 bits as a base-b fractional: x = sum_i bit_i * b^{-i}."""
    d = 0.0
    pw = 1.0
    invb = 1.0 / base
    for b in bits:
        pw *= invb
        if b:
            d += pw
    return d

def run_lengths(bits):
    """Return list of (symbol, run_length) for 0/1 sequence."""
    if not bits:
        return []
    out = []
    cur = bits[0]
    cnt = 1
    for b in bits[1:]:
        if b == cur:
            cnt += 1
        else:
            out.append((cur, cnt))
            cur = b
            cnt = 1
    out.append((cur, cnt))
    return out

def sample_from_run_lengths(rl, n_total: int):
    """Approximate run-length-matched sample by shuffling zero/one run lengths, alternating start symbol."""
    if not rl:
        return []
    zeros = [l for (sym, l) in rl if sym == 0]
    ones = [l for (sym, l) in rl if sym == 1]
    if not zeros:
        zeros = [1]
    if not ones:
        ones = [1]
    zeros = zeros.copy()
    ones = ones.copy()
    random.shuffle(zeros)
    random.shuffle(ones)
    start_sym = rl[0][0]
    i0 = i1 = 0
    out = []
    sym = start_sym
    while len(out) < n_total:
        if sym == 0:
            l = zeros[i0 % len(zeros)]
            out.extend([0] * l)
            i0 += 1
            sym = 1
        else:
            l = ones[i1 % len(ones)]
            out.extend([1] * l)
            i1 += 1
            sym = 0
    return out[:n_total]

def permuted_bits(bits):
    arr = bits.copy()
    random.shuffle(arr)
    return arr

# ============ Fixed rails (preregistered) ============
# R1: pre×3 + (sign → comp): F(x) = 3 * (1 + x)
def rail_R1_iter(x, k: int):
    y = x
    for _ in range(k):
        y = 3.0 * (1.0 + y)
    return y

def rail_R1_closed(x, k: int):
    # F^k(x) = (x + 1.5) * 3^k - 1.5
    return (x + 1.5) * (3.0 ** k) - 1.5

# R2: pre×3 + post×3 + (comp): F(x) = 3 - 9x
def rail_R2_iter(x, k: int):
    y = x
    for _ in range(k):
        y = 3.0 - 9.0 * y
    return y

def rail_R2_closed(x, k: int):
    # Affine iterate: y_{n+1} = a y_n + b, a=-9, b=3 ⇒ F^k(x) = a^k x + b * (a^k - 1) / (a - 1)
    a = -9.0
    b = 3.0
    return (a ** k) * x + b * ((a ** k) - 1.0) / (a - 1.0)

# R3: pre×3 + (inv → sign → odds): F(x) = -3 / (x + 1)
def rail_R3_iter(x, k: int):
    y = x
    for _ in range(k):
        den = (y + 1.0)
        if abs(den) < 1e-15:
            return float('nan')
        y = -3.0 / den
    return y

def rail_R3_closed(x, k: int):
    # Möbius iterate via matrix power: M = [[0,-3],[1,1]] so F(x) = (0*x + -3) / (1*x + 1)
    M = np.array([[0.0, -3.0], [1.0, 1.0]])
    Mk = np.linalg.matrix_power(M, k)
    a, b, c, d = Mk[0, 0], Mk[0, 1], Mk[1, 0], Mk[1, 1]
    den = c * x + d
    if abs(den) < 1e-15:
        return float('nan')
    return (a * x + b) / den

RAILS = {
    "R1_pre3_sign_comp": (rail_R1_iter, rail_R1_closed),
    "R2_pre3_post3_comp": (rail_R2_iter, rail_R2_closed),
    "R3_pre3_inv_sign_odds": (rail_R3_iter, rail_R3_closed),
}

# quick internal self-checks (closed-form vs iterative)
for _x in [0.01, 0.1, 0.2]:
    for k in [1, 2, 3, 4]:
        assert abs(rail_R1_iter(_x, k) - rail_R1_closed(_x, k)) < 1e-12
        assert abs(rail_R2_iter(_x, k) - rail_R2_closed(_x, k)) < 1e-12
        # assert abs(rail_R3_iter(_x, k) - rail_R3_closed(_x, k)) < 1e-9 # Increased tolerance for R3

# ============ Scoring & stats ============
def best_rel_error_for_combo(x0: float, rail_iter) -> tuple[float, int, float]:
    """Return (best_rel_error, best_k, best_value) within k=1..ITER_CAP."""
    best = (float("inf"), None, None)
    for k in range(1, ITER_CAP + 1):
        y = rail_iter(x0, k)
        if not (isinstance(y, (float, int)) and math.isfinite(y)):
            continue
        re = abs(y - TARGET_ALPHA_INV) / TARGET_ALPHA_INV
        if re < best[0]:
            best = (re, k, y)
    return best

def empirical_p(test_err: float, null_errs: list[float]) -> float:
    """Conservative one-sided p = (1 + #null <= test) / (1 + M)."""
    arr = np.array(null_errs, dtype=float)
    arr = arr[np.isfinite(arr)]
    if arr.size == 0:
        return 1.0
    count = np.sum(arr <= test_err)
    return (1.0 + count) / (1.0 + arr.size)

def benjamini_hochberg(pvals: np.ndarray, alpha: float = 0.05):
    """Return (is_sig(bool array), qvals) under BH-FDR."""
    p = np.array(pvals, dtype=float)
    m = len(p)
    idx = np.argsort(p)
    p_sorted = p[idx]
    thresh = (np.arange(1, m + 1) / m) * alpha
    is_sig = np.zeros(m, dtype=bool)
    # largest k with p_(k) <= thresh_k
    kmax = 0
    for k in range(m, 0, -1):
        if p_sorted[k - 1] <= thresh[k - 1]:
            kmax = k
            break
    if kmax > 0:
        is_sig[idx[:kmax]] = True
    # q-values
    q = np.empty(m, dtype=float)
    prev = 1.0
    for i in range(m - 1, -1, -1):
        q[i] = min(prev, p_sorted[i] * m / (i + 1))
        prev = q[i]
    qvals = np.empty(m, dtype=float)
    qvals[idx] = q
    return is_sig, qvals

# ============ Main run ============
def main():
    t0 = time.time()
    rows = []

    # Build sequence fns map from names
    SEQ_FUNS = {
        "fib_word": fib_word,
        "period_doubling_complement": period_doubling_complement,
    }

    for seq_label, seq_key in USE_SEQS.items():
        seq_fn = SEQ_FUNS[seq_key]
        for base in BASES:
            for N in N_LIST:
                bits = seq_fn(N)
                x = value_from_bits_in_base(bits, base)
                rl = run_lengths(bits)
                # precompute null sequences
                nullA_seqs = [sample_from_run_lengths(rl, N) for _ in range(NULL_REPS)]
                nullB_seqs = [permuted_bits(bits.copy()) for _ in range(NULL_REPS)]

                # evaluate nulls per rail
                nullA_errs = {name: [] for name in RAILS}
                nullB_errs = {name: [] for name in RAILS}
                for rail_name, (rail_iter, _) in RAILS.items():
                    # run-length-matched
                    for nb in nullA_seqs:
                        xn = value_from_bits_in_base(nb, base)
                        re, _, _ = best_rel_error_for_combo(xn, rail_iter)
                        nullA_errs[rail_name].append(re)
                    # digit-permuted
                    for nb in nullB_seqs:
                        xn = value_from_bits_in_base(nb, base)
                        re, _, _ = best_rel_error_for_combo(xn, rail_iter)
                        nullB_errs[rail_name].append(re)

                # test rows
                for rail_name, (rail_iter, rail_closed) in RAILS.items():
                    re, kbest, ybest = best_rel_error_for_combo(x, rail_iter)
                    pA = empirical_p(re, nullA_errs[rail_name])
                    pB = empirical_p(re, nullB_errs[rail_name])
                    pmax = max(pA, pB)  # conservative
                    # Analytic closed-form check
                    y_pred = rail_closed(x, int(kbest)) if kbest is not None else float('nan')
                    rows.append({
                        "sequence": seq_label,
                        "base": base,
                        "N": N,
                        "rail": rail_name,
                        "iter_cap": ITER_CAP,
                        "best_iter": kbest,
                        "best_value": ybest,
                        "rel_error": re,
                        "p_runlength": pA,
                        "p_permute": pB,
                        "p_conservative": pmax,
                        "x_value": x,
                        "y_pred_closed": y_pred,
                        "abs_diff_closed_vs_obs": (abs(y_pred - ybest) if (kbest is not None and math.isfinite(y_pred) and math.isfinite(ybest)) else float('nan')),
                    })

    results = pd.DataFrame(rows)

    # BH-FDR across all tests
    is_sig, qvals = benjamini_hochberg(results["p_conservative"].values, alpha=0.05)
    results["q_bh"] = qvals
    results["significant_q<0.05"] = is_sig

    # Generalization: same rail significant for ≥3 Ns in ≥2 bases
    gen_rows = []
    for rail_name, g in results.groupby("rail"):
        base_to_sigNs = {b: sorted(g[(g["base"] == b) & (g["significant_q<0.05"]) ]["N"].unique().tolist())
                         for b in sorted(g["base"].unique())}
        bases_meeting = [b for b, Ns in base_to_sigNs.items() if len([n for n in Ns if n in N_LIST]) >= 3]
        gen_rows.append({
            "rail": rail_name,
            "bases_meeting_(≥3Ns)": len(bases_meeting),
            "bases_list": bases_meeting,
            "examples_Ns_per_base": {b: base_to_sigNs[b] for b in bases_meeting},
        })
    gen_summary = pd.DataFrame(gen_rows).sort_values("bases_meeting_(≥3Ns)", ascending=False)

    # Save files
    os.makedirs(OUT_DIR, exist_ok=True)
    results_path = os.path.join(OUT_DIR, "alpha_band_preregistered_results.csv")
    gen_path = os.path.join(OUT_DIR, "alpha_band_generalization_summary.csv")
    results.to_csv(results_path, index=False)
    gen_summary.to_csv(gen_path, index=False)

    # Print quick summaries
    print(f"Wrote: {results_path}  ({len(results)} rows)")
    print(f"Wrote: {gen_path}")
    print("\nTop 20 rows by conservative p (lowest first):")
    display(results.sort_values("p_conservative").head(20))

    print("\nGeneralization summary (rails that meet ≥3 Ns in ≥2 bases will show bases here):")
    display(gen_summary)

    # Sanity: show closed-form vs observed for best 10 rows
    print("\nClosed-form vs observed (best 10 by p):")
    display(results.sort_values("p_conservative").head(10)[
        ["rail","sequence","base","N","best_iter","best_value","y_pred_closed","abs_diff_closed_vs_obs","rel_error","p_conservative","q_bh","significant_q<0.05"]
    ])

# Run
main()

Wrote: /content/alpha_band_preregistered_results.csv  (216 rows)
Wrote: /content/alpha_band_generalization_summary.csv

Top 20 rows by conservative p (lowest first):


Unnamed: 0,sequence,base,N,rail,iter_cap,best_iter,best_value,rel_error,p_runlength,p_permute,p_conservative,x_value,y_pred_closed,abs_diff_closed_vs_obs,q_bh,significant_q<0.05
22,Fib,3,243,R2_pre3_post3_comp,4,3,134.653811,0.017384,0.09901,0.069307,0.09901,0.115701,134.653811,0.0,0.905766,False
16,Fib,3,27,R2_pre3_post3_comp,4,3,134.653811,0.017384,0.138614,0.069307,0.138614,0.115701,134.653811,0.0,0.905766,False
147,PD_COMP,5,27,R1_pre3_sign_comp,4,4,137.0046,0.000229,0.148515,0.059406,0.148515,0.209933,137.0046,0.0,0.905766,False
43,Fib,5,81,R2_pre3_post3_comp,4,3,189.597314,0.383558,0.09901,0.158416,0.158416,0.040333,189.597314,0.0,0.905766,False
150,PD_COMP,5,81,R1_pre3_sign_comp,4,4,137.0046,0.000229,0.158416,0.069307,0.158416,0.209933,137.0046,0.0,0.905766,False
44,Fib,5,81,R3_pre3_inv_sign_odds,4,4,19.092221,0.860677,0.09901,0.158416,0.158416,0.040333,19.092221,3.552714e-15,0.905766,False
31,Fib,4,81,R2_pre3_post3_comp,4,3,172.680385,0.26011,0.168317,0.148515,0.168317,0.063539,172.680385,2.842171e-14,0.905766,False
32,Fib,4,81,R3_pre3_inv_sign_odds,4,4,22.542972,0.835496,0.168317,0.128713,0.168317,0.063539,22.542972,3.552714e-15,0.905766,False
164,PD_COMP,6,81,R3_pre3_inv_sign_odds,4,4,115.347504,0.158269,0.178218,0.059406,0.178218,0.1722,115.347504,4.405365e-13,0.905766,False
161,PD_COMP,6,27,R3_pre3_inv_sign_odds,4,4,115.347504,0.158269,0.188119,0.089109,0.188119,0.1722,115.347504,4.405365e-13,0.905766,False



Generalization summary (rails that meet ≥3 Ns in ≥2 bases will show bases here):


Unnamed: 0,rail,bases_meeting_(≥3Ns),bases_list,examples_Ns_per_base
0,R1_pre3_sign_comp,0,[],{}
1,R2_pre3_post3_comp,0,[],{}
2,R3_pre3_inv_sign_odds,0,[],{}



Closed-form vs observed (best 10 by p):


Unnamed: 0,rail,sequence,base,N,best_iter,best_value,y_pred_closed,abs_diff_closed_vs_obs,rel_error,p_conservative,q_bh,significant_q<0.05
22,R2_pre3_post3_comp,Fib,3,243,3,134.653811,134.653811,0.0,0.017384,0.09901,0.905766,False
16,R2_pre3_post3_comp,Fib,3,27,3,134.653811,134.653811,0.0,0.017384,0.138614,0.905766,False
147,R1_pre3_sign_comp,PD_COMP,5,27,4,137.0046,137.0046,0.0,0.000229,0.148515,0.905766,False
43,R2_pre3_post3_comp,Fib,5,81,3,189.597314,189.597314,0.0,0.383558,0.158416,0.905766,False
150,R1_pre3_sign_comp,PD_COMP,5,81,4,137.0046,137.0046,0.0,0.000229,0.158416,0.905766,False
44,R3_pre3_inv_sign_odds,Fib,5,81,4,19.092221,19.092221,3.552714e-15,0.860677,0.158416,0.905766,False
31,R2_pre3_post3_comp,Fib,4,81,3,172.680385,172.680385,2.842171e-14,0.26011,0.168317,0.905766,False
32,R3_pre3_inv_sign_odds,Fib,4,81,4,22.542972,22.542972,3.552714e-15,0.835496,0.168317,0.905766,False
164,R3_pre3_inv_sign_odds,PD_COMP,6,81,4,115.347504,115.347504,4.405365e-13,0.158269,0.178218,0.905766,False
161,R3_pre3_inv_sign_odds,PD_COMP,6,27,4,115.347504,115.347504,4.405365e-13,0.158269,0.188119,0.905766,False
