In [1]:
# === Emergence Testbed v3 (skeptic-ready, self-contained) ===
# - exact 1/9 check (Fractions / Decimal)
# - triadic scans across bases, sequences
# - tier-2 dyadic micro-envelopes (symmetric, incl. negatives)
# - hold-out selection
# - two nulls (density-matched + runlength-shuffled)
# - envelope-null for the tier-2 tweaks
# - Bonferroni correction across the declared hypothesis class
# - PNGs + CSVs under /content/out_skeptics
# Runtime: ~10–20 min with defaults; set FAST=True to smoke-test.

import math, random, itertools, os, json, zipfile, io, statistics, sys, time
from fractions import Fraction
from decimal import Decimal, getcontext
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ---- Config ----
FAST = False             # True = quick debug; False = full run
SEED = 12345
N_DIGITS = 2000          # digits per sequence (prefix used everywhere)
BASES = [7,8,9,10,12]
K_RANGE = list(range(0,13))  # triadic exponents k (m=3^k)
SEQ_NAMES = ["F","R","TM","FIBBI"]

# Monte Carlo sizes (tune for runtime)
TRIALS_DENSITY   = 4000 if not FAST else 400
TRIALS_RUNLEN    = 2000 if not FAST else 200
TRIALS_ENVELOPE  = 2000 if not FAST else 200

# Tier‑2 micro‑envelope grid: factors = (1 + sign * c * 2^{-ell})
# Use small c and moderate ells to model "gentle" dyadic micro-shifts
C_LIST   = [1,3]                  # magnitude multipliers
ELL_LIST = list(range(8,15))      # dyadic scales
SIGNS    = [-1, +1]

# Reference α (dimensionless). You can update this to latest CODATA if you like.
ALPHA_REF = Decimal("0.0072973525693")

# Decimal precision
getcontext().prec = 120  # generous for all small differences we care about

# ---- Utilities ----
rng = random.Random(SEED)

def ensure_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

OUTDIR = "/content/out_skeptics"
ensure_dir(OUTDIR)

def to_decimal(x):
    if isinstance(x, Decimal):
        return x
    return Decimal(str(x))

def bits_from_phi_minus_1(n):
    """First n bits of (phi-1) in base-2 using standard fractional expansion."""
    phi_minus_1 = (Decimal(5).sqrt() - Decimal(1)) / Decimal(2)
    x = +phi_minus_1
    bits = []
    for _ in range(n):
        x = x*2
        if x >= 1:
            bits.append(1); x -= 1
        else:
            bits.append(0)
    return bits

def fib_word_bits(n):
    """Fibonacci word via morphism σ: 0->01, 1->0 (start '0')."""
    s = "0"
    while len(s) < n:
        s = s.replace("0","#").replace("1","0").replace("#","01")
    return [1 if ch=="1" else 0 for ch in s[:n]]

def rabbit_bits_from_fib(fbits):
    """Rabbit = bitwise complement of Fibonacci word (proven conjugacy)."""
    return [1-b for b in fbits]

def thue_morse_bits(n):
    """Thue–Morse: parity of ones in binary index. Start at i=1 for consistency."""
    out = []
    for i in range(1, n+1):
        out.append((bin(i).count("1") % 2))
    return out

def baseB_value_from_bits(bits, B):
    """Interpret bits as base-B digits: value = sum b_i B^{-i} (Decimal)."""
    B = Decimal(B)
    acc = Decimal(0)
    powB = Decimal(1)
    for b in bits:
        powB *= B
        if b:
            acc += (Decimal(1)/powB)
    return acc

def triadic_scan(bits, B, alpha_ref, k_list):
    """Return DataFrame of triadic estimates and errors for m=3^k scaled by B^3."""
    DB = baseB_value_from_bits(bits, B)
    step = DB / (Decimal(B)**3)  # change if m increments by 1
    rows = []
    for k in k_list:
        m = Decimal(3) ** k
        est = (m / (Decimal(B)**3)) * DB
        err = est - alpha_ref
        rows.append(dict(k=k, m=int(3**k), estimate=est, error=err, abs_err=abs(err), step=step))
    df = pd.DataFrame(rows).sort_values("abs_err", ascending=True).reset_index(drop=True)
    return df, DB, step

def first_runs(bits):
    """Return run-lengths list [len1, len2, ...] starting with bits[0]."""
    if not bits:
        return []
    runs = []
    cur = bits[0]; cnt = 1
    for b in bits[1:]:
        if b == cur: cnt += 1
        else:
            runs.append(cnt); cur = b; cnt = 1
    runs.append(cnt)
    return runs

def shuffle_by_runs(bits, rng):
    """Shuffle run-lengths preserving alternation (approx runlength null)."""
    runs = first_runs(bits)
    if not runs:
        return bits[:]
    perm = runs[:]
    rng.shuffle(perm)
    # reconstruct
    start = bits[0]
    out = []
    cur = start
    for L in perm:
        out.extend([cur]*L)
        cur = 1-cur
    return out[:len(bits)]

def density_null_bits(p, n, rng):
    """Bernoulli(p) null."""
    return [1 if rng.random()<p else 0 for _ in range(n)]

def pick_k_on_prefix(bits, B, alpha_ref, k_list, prefix_len):
    """Choose k minimizing abs error using only a prefix; score later on full."""
    df_prefix, _, _ = triadic_scan(bits[:prefix_len], B, alpha_ref, k_list)
    return int(df_prefix.iloc[0]["k"])

def best_envelope_error(alpha_star, alpha_ref):
    """Tier-2 dyadic micro-envelopes: min | alpha_star * (1 + s*c*2^-ell ) - alpha_ref |."""
    best = None
    best_row = None
    for c, ell, sgn in itertools.product(C_LIST, ELL_LIST, SIGNS):
        factor = Decimal(1) + Decimal(sgn) * Decimal(c) * (Decimal(1) / (Decimal(2)**ell))
        est = alpha_star * factor
        err = abs(est - alpha_ref)
        if (best is None) or (err < best):
            best = err
            best_row = dict(c=c, ell=ell, sign=sgn, factor=factor, abs_err=err, estimate=est)
    return best_row

def envelope_null_errors(alpha_star_dist, alpha_ref, rng, n_trials):
    """For each null alpha_star draw, apply the same envelope grid; return best errors."""
    out = []
    # To speed up, precompute all factors (Decimal)
    factors = []
    for c, ell, sgn in itertools.product(C_LIST, ELL_LIST, SIGNS):
        factors.append(Decimal(1) + Decimal(sgn) * Decimal(c) * (Decimal(1) / (Decimal(2)**ell)))
    for i in range(n_trials):
        a = alpha_star_dist[i % len(alpha_star_dist)]  # reuse pool if needed
        best = min([abs(a * f - alpha_ref) for f in factors])
        out.append(float(best))  # store as float for hist/stats
    return out

def p_value(one_sided_observed, null_samples):
    """Right-tail p for 'null <= observed' (we care about being unusually small)."""
    null = sorted(null_samples)
    # add +1 for a simple finite-sample conservative estimate
    rank = sum(1 for x in null if x <= one_sided_observed) + 1
    return rank / (len(null) + 1)

def bonferroni(p, m):
    return min(1.0, p*m)

# ---- Build sequences ----
print("Generating sequences...")
F_bits = fib_word_bits(N_DIGITS)
R_bits = rabbit_bits_from_fib(F_bits)   # exact complement of F
TM_bits = thue_morse_bits(N_DIGITS)
FIBBI_bits = bits_from_phi_minus_1(N_DIGITS)
SEQ_MAP = {"F":F_bits, "R":R_bits, "TM":TM_bits, "FIBBI":FIBBI_bits}

# ---- Exact DF+DR vs 1/9 with rationals (uses finite prefix only) ----
print("Checking DF+DR vs 1/9 (exact with Fractions on finite prefix)...")
def DF_fraction(bits):
    # interpret bits as decimal digits: 0.b1 b2 ... in base-10
    num = Fraction(0,1)
    p10 = Fraction(1,1)
    for b in bits:
        p10 *= 10
        if b: num += Fraction(1, p10)
    return num

DF = DF_fraction(F_bits)
DR = DF_fraction(R_bits)
one_ninth = Fraction(1,9)
finite_diff = DF + DR - one_ninth

# ---- Main loops: triadic scans, hold-out, nulls, envelopes ----
rows_best = []
rows_prefix = []
alpha_star_pool_for_env_null = []   # collect triadic alpha* values from nulls

for B in BASES:
    for seq_name in SEQ_NAMES:
        bits = SEQ_MAP[seq_name]
        # triadic scan on full prefix
        df_scan, DB, step = triadic_scan(bits, B, ALPHA_REF, K_RANGE)
        best_row = df_scan.iloc[0].to_dict()
        best_row.update(dict(base=B, sequence=seq_name, DB=float(DB), step=float(step)))
        rows_best.append(best_row)
        # hold-out: choose k on first half; score on second half
        N1 = N_DIGITS//2
        k_hat = pick_k_on_prefix(bits, B, ALPHA_REF, K_RANGE, N1)
        # score chosen k on second half only
        df_h2, DB2, step2 = triadic_scan(bits[N1:], B, ALPHA_REF, [k_hat])
        holdout_err = float(abs(df_h2.iloc[0]["error"]))
        rows_prefix.append(dict(base=B, sequence=seq_name, k_hat=k_hat, holdout_abs_err=holdout_err))

# Save triadic results
df_best = pd.DataFrame(rows_best).sort_values(["sequence","base"]).reset_index(drop=True)
df_prefix = pd.DataFrame(rows_prefix)
df_best.to_csv(os.path.join(OUTDIR,"triadic_best.csv"), index=False)
df_prefix.to_csv(os.path.join(OUTDIR,"holdout_selection.csv"), index=False)

# ---- Monte Carlo nulls: density & run-length ----
def mc_best_errors(bits, B, trials, mode, rng):
    """Return list of best triadic abs-errors under null."""
    n = len(bits)
    p = sum(bits)/n
    base = B
    errs = []
    for t in range(trials):
        if mode=="density":
            sim = density_null_bits(p, n, rng)
        elif mode=="runlen":
            sim = shuffle_by_runs(bits, rng)
        else:
            raise ValueError("unknown mode")
        df_scan, _, _ = triadic_scan(sim, base, ALPHA_REF, K_RANGE)
        errs.append(float(df_scan.iloc[0]["abs_err"]))
    return errs

print("Monte Carlo nulls...")
mc_summary = []
for B in BASES:
    # We only test the F-sequence for MC (preregistered “primary”). Others are descriptive.
    bits = F_bits
    mc_den = mc_best_errors(bits, B, TRIALS_DENSITY, "density", rng)
    mc_run = mc_best_errors(bits, B, TRIALS_RUNLEN,  "runlen",  rng)
    # Observe best real error at this base:
    obs = float(df_best[(df_best.sequence=="F") & (df_best.base==B)].iloc[0]["abs_err"])
    p_den = p_value(obs, mc_den)
    p_run = p_value(obs, mc_run)
    mc_summary.append(dict(base=B, obs_abs_err=obs, p_density=p_den, p_runlen=p_run,
                           n_den=len(mc_den), n_run=len(mc_run)))
    # collect for the envelope null later (reuse mc_den alpha* values)
    # We need alpha* pool: rebuild from the mc density sims:
    # For speed, approximate alpha* pool by mapping errors to a* with the k picked in each trial.
    # Instead, re-run quickly to get the *estimates* themselves:
    alpha_pool = []
    for t in range(min(TRIALS_ENVELOPE, TRIALS_DENSITY)):
        sim = density_null_bits(sum(bits)/len(bits), len(bits), rng)
        df_scan_sim, _, _ = triadic_scan(sim, B, ALPHA_REF, K_RANGE)
        alpha_pool.append(Decimal(str(df_scan_sim.iloc[0]["estimate"])))
    # Envelope null
    # Compute observed envelope error (F @ best k for this base)
    df_f_base = df_best[(df_best.sequence=="F") & (df_best.base==B)]
    est_star = Decimal(str(df_f_base.iloc[0]["estimate"]))
    env_obs = float(best_envelope_error(est_star, ALPHA_REF)["abs_err"])
    env_null = envelope_null_errors(alpha_pool, ALPHA_REF, rng, TRIALS_ENVELOPE)
    p_env = p_value(env_obs, env_null)
    mc_summary[-1].update(dict(env_obs_abs_err=env_obs, p_env=p_env, n_env=len(env_null)))

df_mc = pd.DataFrame(mc_summary)
df_mc["p_density_bonf"] = df_mc["p_density"].apply(lambda p: bonferroni(p, len(BASES)))
df_mc["p_runlen_bonf"]  = df_mc["p_runlen"].apply(lambda p: bonferroni(p, len(BASES)))
df_mc["p_env_bonf"]     = df_mc["p_env"].apply(lambda p: bonferroni(p, len(BASES)))
df_mc.to_csv(os.path.join(OUTDIR,"mc_pvalues.csv"), index=False)

# ---- Tier-2 best envelopes for the observed triadic best ----
env_rows = []
for idx, row in df_best.iterrows():
    est = Decimal(str(row["estimate"]))
    best_env = best_envelope_error(est, ALPHA_REF)
    best_env.update(dict(sequence=row["sequence"], base=row["base"]))
    env_rows.append(best_env)
df_env = pd.DataFrame(env_rows)
df_env.to_csv(os.path.join(OUTDIR,"tier2_envelopes.csv"), index=False)

# ---- Base sensitivity for F only (descriptive) ----
base_sens = []
for B in BASES:
    df_scan, _, _ = triadic_scan(F_bits, B, ALPHA_REF, K_RANGE)
    base_sens.append(dict(base=B, k=int(df_scan.iloc[0]["k"]), best_abs_err=float(df_scan.iloc[0]["abs_err"])))
pd.DataFrame(base_sens).to_csv(os.path.join(OUTDIR,"base_sensitivity_F.csv"), index=False)

# ---- Plots (Matplotlib only; one plot per figure; no style/colors set) ----
def save_plot_triads(df_best, seq, path):
    sub = df_best[df_best.sequence==seq].copy()
    fig = plt.figure()
    ax = plt.gca()
    for B in BASES:
        cur = sub[sub.base==B]
        ax.scatter([B], [float(cur["abs_err"].values[0])])
        ax.text(B, float(cur["abs_err"].values[0]), f'k={int(cur["k"].values[0])}', ha='center', va='bottom', fontsize=9)
    ax.set_xlabel("base B")
    ax.set_ylabel("best |α* - α|")
    ax.set_title(f"Best triadic error per base (sequence={seq})")
    fig.savefig(path, dpi=160, bbox_inches="tight"); plt.close(fig)

save_plot_triads(df_best, "F",     os.path.join(OUTDIR,"plot_best_by_base_F.png"))
save_plot_triads(df_best, "R",     os.path.join(OUTDIR,"plot_best_by_base_R.png"))
save_plot_triads(df_best, "TM",    os.path.join(OUTDIR,"plot_best_by_base_TM.png"))
save_plot_triads(df_best, "FIBBI", os.path.join(OUTDIR,"plot_best_by_base_FIBBI.png"))

# Monte Carlo hist (F, base=10) for density/runlen and envelope
def save_mc_hist(base_val, df_best, df_mc, OUTDIR):
    obs = float(df_best[(df_best.sequence=="F") & (df_best.base==base_val)].iloc[0]["abs_err"])
    # density
    bits = F_bits
    mc_den = mc_best_errors(bits, base_val, TRIALS_DENSITY if not FAST else 400, "density", random.Random(SEED+77))
    fig = plt.figure()
    plt.hist(mc_den, bins=40)
    plt.axvline(obs, linestyle='--')
    plt.xlabel("best |α* - α| under density null")
    plt.ylabel("count")
    plt.title(f"MC density null (F, base={base_val})")
    fig.savefig(os.path.join(OUTDIR,f"hist_mc_density_base{base_val}.png"), dpi=160, bbox_inches="tight"); plt.close(fig)
    # runlength
    mc_run = mc_best_errors(bits, base_val, TRIALS_RUNLEN if not FAST else 200, "runlen", random.Random(SEED+88))
    fig = plt.figure()
    plt.hist(mc_run, bins=40)
    plt.axvline(obs, linestyle='--')
    plt.xlabel("best |α* - α| under run-length null")
    plt.ylabel("count")
    plt.title(f"MC run-length null (F, base={base_val})")
    fig.savefig(os.path.join(OUTDIR,f"hist_mc_runlen_base{base_val}.png"), dpi=160, bbox_inches="tight"); plt.close(fig)

save_mc_hist(10, df_best, df_mc, OUTDIR)

# ---- Baseline summary printout ----
print("\n=== BASELINE (exact) ===")
print(f"digits_used N                : {N_DIGITS}")
print(f"DF + DR (finite)             : {DF} + {DR} = {DF+DR}")
print(f"1/9 (limit)                  : {one_ninth}")
print(f"finite - 1/9 (exact)         : {DF+DR-one_ninth}  (~{float(DF+DR-one_ninth):+.3e})")

print("\n=== Best triadic per sequence/base (k, abs_err) ===")
print(df_best[["sequence","base","k","abs_err"]].to_string(index=False))

print("\n=== Hold-out (pick k on first half; score on second half) ===")
print(df_prefix.to_string(index=False))

print("\n=== Monte Carlo p-values (F only) ===")
print(df_mc.to_string(index=False))

print("\n=== Tier-2 envelopes (best dyadic tweak per observed triadic) ===")
print(df_env.to_string(index=False))

print(f"\nSaved outputs → {OUTDIR}")


Generating sequences...
Checking DF+DR vs 1/9 (exact with Fractions on finite prefix)...
Monte Carlo nulls...

=== BASELINE (exact) ===
digits_used N                : 2000
DF + DR (finite)             : 10010100100101001010010010100100101001010010010100101001001010010010100101001001010010010100101001001010010100100101001001010010100100101001010010010100100101001010010010100100101001010010010100101001001010010010100101001001010010010100101001001010010100100101001001010010100100101001010010010100100101001010010010100100101001010010010100101001001010010010100101001001010010100100101001001010010100100101001001010010100100101001010010010100100101001010010010100100101001010010010100101001001010010010100101001001010010100100101001001010010100100101001001010010100100101001010010010100100101001010010010100100101001010010010100101001001010010010100101001001010010100100101001001010010100100101001001010010100100101001010010010100100101001010010010100101001001010010010100101001001010010010100101001