In [1]:
# === Emergence Tier-1/2/3 test suite (single cell) ===========================
# Reproducible triadic driver + dyadic envelopes + square-root study + tier-3 probe
# Bases 2..12, sequences F, R, TM, FIBBI, with MC nulls and hold-outs.
# Saves CSVs/PNGs in /content/out_tier3_suite

import math, random, statistics, itertools, os, sys
from fractions import Fraction
from decimal import Decimal, getcontext
from collections import Counter, defaultdict

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ---------- Config ----------
OUTDIR = "/content/out_tier3_suite"
os.makedirs(OUTDIR, exist_ok=True)

# Digits to generate from each sequence (safe default; can push higher if runtime allows)
N_DIGITS = 4000

# Triadic exponents to scan (k from 0..K_MAX -> multiplier 3^k)
K_MAX = 8

# Monte Carlo sizes (dial up if you want; keep modest to avoid timeouts)
MC_DENSITY = 1500
MC_RUNLEN  = 800
SEED = 123456
random.seed(SEED)
np.random.seed(SEED)

# Reference constants (CODATA-like for α; update here if you want to test robustness)
alpha_ref = Decimal("0.0072973525693")
inv137    = Decimal(1) / Decimal(137)
pi        = Decimal(str(math.pi))
e         = Decimal(str(math.e))
sqrt2     = Decimal(str(math.sqrt(2)))
sqrt3     = Decimal(str(math.sqrt(3)))
phi       = (Decimal(1) + Decimal(5).sqrt()) / 2  # golden ratio via Decimal sqrt

# decimal precision for high-precision ops (binary φ−1 extraction, etc.)
getcontext().prec = max(60, N_DIGITS + 80)

# ---------- Helpers: sequence generators (first N bits) ----------

def fib_word_bits(n):
    """Fibonacci word with morphism 0->01, 1->0, seeded by 0; return first n bits as list[0/1]."""
    s = "0"
    while len(s) < n:
        s = s.replace("0", "01").replace("1", "0")  # careful: do in two passes to avoid overlap
        # The above would collide; do a safe morph:
    # safer morph without collision:
    s = "0"
    while len(s) < n:
        # build new string from old
        out = []
        for ch in s:
            out.append("01" if ch == "0" else "0")
        s = "".join(out)
    return [1 if c=='1' else 0 for c in s[:n]]

def rabbit_bits(n):
    """Rabbit sequence (complement of Fibonacci word): R = bitwise complement of F."""
    F = fib_word_bits(n)
    return [1-b for b in F]

def thue_morse_bits(n):
    """Thue–Morse: t(n) = parity of 1-bits in n (0-indexed)."""
    # Generate quickly:
    out = []
    for i in range(n):
        out.append(bin(i).count("1") & 1)  # 1 if odd, else 0
    return out

def phi_minus_1_binary_bits(n):
    """Binary expansion of φ−1 in [0,1): generate n bits via multiply-by-2 method using Decimal."""
    x = (phi - 1)  # φ−1 ∈ (0,1)
    bits = []
    for _ in range(n):
        x *= 2
        if x >= 1:
            bits.append(1)
            x -= 1
        else:
            bits.append(0)
    return bits

# ---------- Map bits -> real via "digit map" in base B ----------
def digits_to_real(bits, B=10):
    """Interpret bits b_i as digits in base-B: 0.b1 b2 ... (base B). Return Decimal real."""
    B = Decimal(B)
    w = Decimal(1) / B
    v = Decimal(0)
    for b in bits:
        if b:
            v += w
        w /= B
    return v

# ---------- Exact check: complement identity DF + DR = 1/9 (finite) ----------
def complement_check(n):
    F = fib_word_bits(n)
    R = rabbit_bits(n)
    # exact decimal-digit sum using Fractions (no floating carries)
    num = 0
    den = 10
    # But we can do exact via Fraction: 0.b1b2... = sum bi*10^{-i}
    DF = Fraction(0,1)
    DR = Fraction(0,1)
    p10 = Fraction(1,10)
    w = p10
    for f,r in zip(F,R):
        if f: DF += w
        if r: DR += w
        w *= p10
    return DF, DR, DF+DR

# ---------- Triadic scan for a given bit-sequence and base B ----------
def triadic_scan(bits, B, k_max=K_MAX):
    D = digits_to_real(bits, B)
    rows = []
    for k in range(k_max+1):
        m = Decimal(3) ** k
        est = (m/Decimal(1000)) * D
        err = abs(est - alpha_ref)
        rows.append(dict(base=B, k=k, estimate=est, abs_err=err, D_value=D))
    # best k by absolute error
    best = min(rows, key=lambda r: r['abs_err'])
    return best, rows

# ---------- Hold-out: choose k on first half, score on second half ----------
def triadic_holdout(bits_full, B):
    n = len(bits_full)
    half = n//2
    b1   = bits_full[:half]
    b2   = bits_full[half:]
    best1,_ = triadic_scan(b1, B)
    k_hat = best1['k']
    # Score same k on second half
    D2 = digits_to_real(b2, B)
    est2 = (Decimal(3) ** k_hat)/Decimal(1000) * D2
    err2 = abs(est2 - alpha_ref)
    return k_hat, est2, err2

# ---------- Monte Carlo nulls ----------
def mc_density_null(bits_ref, B, trials=MC_DENSITY):
    # match density p = mean(bits_ref), iid Bernoulli
    p = sum(bits_ref)/len(bits_ref)
    errs=[]
    for _ in range(trials):
        rnd = [1 if random.random()<p else 0 for _ in range(len(bits_ref))]
        best,_ = triadic_scan(rnd, B)
        errs.append(float(best['abs_err']))
    return np.array(errs)

def run_lengths(bits):
    # lengths of consecutive runs (value, length)
    if not bits: return []
    lens=[]
    cur=bits[0]; L=1
    for b in bits[1:]:
        if b==cur: L+=1
        else:
            lens.append((cur,L))
            cur=b; L=1
    lens.append((cur,L))
    return lens

def from_run_lengths(rls):
    # rebuild bits from list of (val,len)
    out=[]
    for val,L in rls:
        out.extend([val]*L)
    return out

def mc_runlen_null(bits_ref, B, trials=MC_RUNLEN):
    rls = run_lengths(bits_ref)
    errs=[]
    for _ in range(trials):
        # shuffle block order but keep (val,len) multiset; also allow flipping the starting val randomly
        rperm = rls[:]
        random.shuffle(rperm)
        # Because start might not match continuity, we allow the first block's val to lead
        rnd = from_run_lengths(rperm)
        # if too long/short, crop/pad
        if len(rnd)>len(bits_ref): rnd = rnd[:len(bits_ref)]
        if len(rnd)<len(bits_ref): rnd += [rnd[-1]]*(len(bits_ref)-len(rnd))
        best,_ = triadic_scan(rnd, B)
        errs.append(float(best['abs_err']))
    return np.array(errs)

# ---------- Square-root study ----------
def square_root_panel(bits, B, label):
    """Compute D, triadic best est (alpha*), then take sqrt transforms and compare to sqrt targets."""
    D = digits_to_real(bits, B)
    # triadic-best
    best,_ = triadic_scan(bits, B)
    a_star = best['estimate']  # Decimal
    # sqrt forms (both signs; also reciprocal and complement forms)
    forms = []
    targets = {
        'sqrt_alpha' : alpha_ref.sqrt(),
        'sqrt_1_pi'  : (Decimal(1)/pi).sqrt(),
        'sqrt2'      : sqrt2,
        'sqrt3'      : sqrt3
    }
    cands = []
    def add_form(name, val):
        forms.append((name, val, ))
        for tname,tval in targets.items():
            cands.append(dict(seq=label, base=B, form=name, target=tname,
                              value=val, target_val=tval, abs_err=abs(val-tval)))
    # Basic numeric roots
    add_form("sqrt(D)", D.sqrt() if D>0 else Decimal(0))
    add_form("-sqrt(D)", -D.sqrt() if D>0 else Decimal(0))
    add_form("sqrt(alpha*)", a_star.sqrt() if a_star>0 else Decimal(0))
    add_form("-sqrt(alpha*)", -a_star.sqrt() if a_star>0 else Decimal(0))
    # Complements/reciprocals
    if D>0 and D<1:
        add_form("sqrt(1-D)", (Decimal(1)-D).sqrt())
        add_form("-sqrt(1-D)", -(Decimal(1)-D).sqrt())
    if D>0:
        add_form("sqrt(1/D)", (Decimal(1)/D).sqrt())
        add_form("-sqrt(1/D)", - (Decimal(1)/D).sqrt())

    df = pd.DataFrame(cands).sort_values("abs_err")
    return best, df

# ---------- Tier-3 probe ----------
def tier3_probe():
    rows=[]
    # family A: m*(phi^(pi-2) - sqrt3) with m in [96..104]
    for m in range(96,105):
        val = Decimal(m)*(phi**(pi-Decimal(2)) - sqrt3)
        rows.append(dict(family='A', m=m, swap='none', value=val, abs_err=abs(val-alpha_ref)))
    # family B: swap sqrt3->sqrt2
    for m in range(96,105):
        val = Decimal(m)*(phi**(pi-Decimal(2)) - sqrt2)
        rows.append(dict(family='B', m=m, swap='sqrt2', value=val, abs_err=abs(val-alpha_ref)))
    # family C: swap phi->1/phi
    for m in range(96,105):
        val = Decimal(m)*((Decimal(1)/phi)**(pi-Decimal(2)) - sqrt3)
        rows.append(dict(family='C', m=m, swap='1/phi', value=val, abs_err=abs(val-alpha_ref)))
    df = pd.DataFrame(rows).sort_values("abs_err")
    return df

# ---------- De Bruijn profile (simple, optional) ----------
def kgram_profile(bits, k):
    cnt=Counter(tuple(bits[i:i+k]) for i in range(0, len(bits)-k+1))
    tot=sum(cnt.values())
    prof = { "".join(map(str,kk)): c/tot for kk,c in cnt.items() }
    return prof

# ---------- Magic grid sketch (row/col sum structure) ----------
def magic_grid_metrics(bits, n):
    """Fill n×n grid row-major with bits (0/1). Return row/col sums and dispersion metrics."""
    need = n*n
    bb = bits[:need] if len(bits)>=need else bits + [0]*(need-len(bits))
    A = np.array(bb, dtype=int).reshape(n,n)
    row_sums = A.sum(axis=1)
    col_sums = A.sum(axis=0)
    return dict(n=n, row_std=float(row_sums.std()), col_std=float(col_sums.std()),
                row_sums=",".join(map(str,row_sums.tolist())),
                col_sums=",".join(map(str,col_sums.tolist())))

# ========================== RUN SUITE ==========================
print("Generating sequences...")
F_bits    = fib_word_bits(N_DIGITS)
R_bits    = rabbit_bits(N_DIGITS)      # complement of F
TM_bits   = thue_morse_bits(N_DIGITS)
FIBBI_bits= phi_minus_1_binary_bits(N_DIGITS)

seqs = {
    'F'    : F_bits,
    'R'    : R_bits,
    'TM'   : TM_bits,
    'FIBBI': FIBBI_bits
}

# --- Baseline exact complement identity on a modest prefix (to keep print readable)
print("\n=== BASELINE: Complement identity (finite, exact) ===")
DF, DR, SUM = complement_check(120)  # 120 digits printed compactly
print("DF (Fraction) =", DF)
print("DR (Fraction) =", DR)
print("DF + DR       =", SUM, "   (should be exactly 1/9)")

# --- Triadic scan across bases and sequences
print("\nScanning triadic grid (bases 2..12)...")
triadic_best_rows=[]
triadic_holdout_rows=[]
triadic_full_dump=[]

for base in range(2,13):
    for name,bits in seqs.items():
        best, rows = triadic_scan(bits, base, K_MAX)
        triadic_best_rows.append(dict(sequence=name, base=base, k_best=best['k'], error_best=float(best['abs_err'])))
        for r in rows:
            triadic_full_dump.append(dict(sequence=name, base=base, **{k:r[k] for k in ['k','estimate','abs_err','D_value']}))
        # hold-out
        k_hat, est2, err2 = triadic_holdout(bits, base)
        triadic_holdout_rows.append(dict(base=base, sequence=name, k_hat=k_hat, holdout_abs_err=float(err2)))

df_best   = pd.DataFrame(triadic_best_rows).sort_values(['sequence','base'])
df_hold   = pd.DataFrame(triadic_holdout_rows).sort_values(['sequence','base'])
df_tridump= pd.DataFrame(triadic_full_dump)
df_best.to_csv(f"{OUTDIR}/triadic_best.csv", index=False)
df_hold.to_csv(f"{OUTDIR}/holdout_selection.csv", index=False)
df_tridump.to_csv(f"{OUTDIR}/triadic_full_grid.csv", index=False)

print("\n=== Best triadic per sequence/base (k_best, |α*−α|) ===")
print(df_best.to_string(index=False))
print("\n=== Hold-out (k from first half; error on second half) ===")
print(df_hold.to_string(index=False))

# --- Monte Carlo p-values for the flagship case (sequence=F, base=10)
print("\nMonte Carlo nulls (sequence=F, base=10) ...")
best_F10,_ = triadic_scan(F_bits, 10)
obs_err = float(best_F10['abs_err'])
dens = mc_density_null(F_bits, 10, trials=MC_DENSITY)
runl = mc_runlen_null (F_bits, 10, trials=MC_RUNLEN)
p_dens = (dens <= obs_err).mean()
p_run  = (runl <= obs_err).mean()

pd.DataFrame(dict(err=dens)).to_csv(f"{OUTDIR}/mc_density_errors.csv", index=False)
pd.DataFrame(dict(err=runl)).to_csv(f"{OUTDIR}/mc_runlength_errors.csv", index=False)

print(f"\n=== MC p-values (F, base=10) ===")
print(f" observed best error : {obs_err:.6e}")
print(f" p_density (unadj)   : {p_dens:.4g}  over {len(dens)} trials")
print(f" p_runlen (unadj)    : {p_run:.4g}  over {len(runl)} trials")

# quick histograms
plt.figure(figsize=(6.2,4.6))
plt.hist(runl, bins=25)
plt.axvline(obs_err, linestyle='--', linewidth=2)
plt.xlabel("best |α* − α| under run-length null")
plt.ylabel("count"); plt.title("MC run-length null (F, base=10)")
plt.tight_layout(); plt.savefig(f"{OUTDIR}/hist_mc_runlen_base10.png"); plt.close()

plt.figure(figsize=(6.2,4.6))
plt.hist(dens, bins=25)
plt.axvline(obs_err, linestyle='--', linewidth=2)
plt.xlabel("best |α* − α| under density null")
plt.ylabel("count"); plt.title("MC density null (F, base=10)")
plt.tight_layout(); plt.savefig(f"{OUTDIR}/hist_mc_density_base10.png"); plt.close()

# --- Square-root panels: per sequence, base=10 by default
print("\nSquare-root proximity panels (sequence, base=10) ...")
root_tables=[]
for name,bits in seqs.items():
    best_root, df_root = square_root_panel(bits, 10, name)
    root_tables.append(df_root)
    df_root.to_csv(f"{OUTDIR}/sqrt_panel_{name}.csv", index=False)
df_roots_all = pd.concat(root_tables, ignore_index=True)
top_roots = df_roots_all.sort_values("abs_err").groupby("target").head(10)
top_roots.to_csv(f"{OUTDIR}/sqrt_panel_TOP.csv", index=False)
print(top_roots.to_string(index=False))

# --- Tier-3 probe
print("\nTier-3 probe around m*(phi^(pi-2) - sqrt3), m=96..104 ...")
df_t3 = tier3_probe()
print(df_t3.head(12).to_string(index=False))
df_t3.to_csv(f"{OUTDIR}/tier3_probe.csv", index=False)

# Simple plot: best triadic error per base for each sequence
for name in seqs.keys():
    sub = df_best[df_best.sequence==name].sort_values("base")
    plt.figure(figsize=(6.8,5.0))
    plt.scatter(sub.base, sub.error_best)
    for _,r in sub.iterrows():
        plt.annotate(f"k={int(r.k_best)}", (r.base, r.error_best), textcoords="offset points", xytext=(4,3), fontsize=9)
    plt.xlabel("base B"); plt.ylabel("best |α* − α|")
    plt.title(f"Best triadic error per base (sequence={name})")
    plt.tight_layout(); plt.savefig(f"{OUTDIR}/plot_best_by_base_{name}.png"); plt.close()

# --- De Bruijn profile (k=3) and "magic grid" sketch (n=16)
print("\nDe Bruijn 3-gram profile deltas vs density-matched random (sequence=F) ...")
k=3
prof_F = kgram_profile(F_bits, k)
# density-matched random profile average
def avg_profile_random(bits, k, trials=200):
    p=sum(bits)/len(bits)
    acc=defaultdict(float)
    for _ in range(trials):
        rnd=[1 if random.random()<p else 0 for _ in range(len(bits))]
        pr = kgram_profile(rnd, k)
        for key,val in pr.items(): acc[key]+=val
    for key in acc: acc[key]/=trials
    return acc
prof_Ravg = avg_profile_random(F_bits, k, trials=200)
# deltas:
keys = sorted(set(prof_F)|set(prof_Ravg))
rows=[]
for kk in keys:
    rows.append(dict(kgram=kk, F=prof_F.get(kk,0.0), rand_avg=prof_Ravg.get(kk,0.0),
                     delta=prof_F.get(kk,0.0)-prof_Ravg.get(kk,0.0)))
df_kgram = pd.DataFrame(rows).sort_values("kgram")
df_kgram.to_csv(f"{OUTDIR}/debruijn_profile_k{k}.csv", index=False)
print(df_kgram.to_string(index=False))

print("\nMagic-grid sketch (row/col dispersion) ...")
mg_rows=[]
for name,bits in seqs.items():
    mg_rows.append(dict(sequence=name, **magic_grid_metrics(bits, 16)))
df_mg = pd.DataFrame(mg_rows)
df_mg.to_csv(f"{OUTDIR}/magic_grid_16.csv", index=False)
print(df_mg.to_string(index=False))

print(f"\nSaved outputs → {OUTDIR}")
# ============================================================================



Generating sequences...

=== BASELINE: Complement identity (finite, exact) ===
DF (Fraction) = 10010100100101001010010010100100101001010010010100101001001010010010100101001001010010010100101001001010010100100101001/1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
DR (Fraction) = 10110101101101011010110110101101101011010110110101101011011010110110101101011011010110110101101011011010110101101101011/100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
DF + DR       = 111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111/1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000    (should be exactly 1/9)

Scanning triadic grid (bases 2..12)...

=== Best triadic per sequence/base (k_best, |α*−α|) ===
sequence  base  k_best 