In [1]:
# ================================================================
#  Binary → Decimal Emergence Lab (BDE-Lab v4, π-aware, triadic)
#  Self-contained, bias-controlled suite for Colab
#  --------------------------------------------------------------
#  What it does (no intervention needed):
#   • Builds Fibonacci-word (F), Rabbit (R=¬F), reversals, Thue–Morse (TM)
#   • Verifies complement identity DF + DR = 1/9 (decimal-digit map)
#   • Triadic scan: α* = (3^k / 10^3) · D_B(F) with B∈{10}  (main) + base-B head-to-head
#   • Picks best k by |α*-α|; reports sub-step within one 10^-3 “click”
#   • Prefix scalogram: stability of residual vs prefix length
#   • Monte Carlo controls (pre-registered): density‑matched & runlength‑matched
#   • Sequence specificity (F, R, F_rev, R_rev, TM) in base‑10
#   • Base‑B head-to-head: B ∈ {7,8,9,10,12} with triadic multipliers
#   • π control: triadic n-gon bounds; two “binary chord” toys (clearly marked as toys)
#  Saves results to /content/out as PNGs and CSV/JSON.
# ================================================================

import os, math, json, time, itertools, statistics
from collections import Counter, deque
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# -------------------------
# Reproducibility & knobs
# -------------------------
SEED = 137
rng = np.random.default_rng(SEED)

# Core precision knobs (you can leave these alone)
N_DIGITS     = 5000        # how many Fibonacci bits mapped to digits (main tests use 1000–5000 just fine)
K_RANGE      = range(0, 13) # explore multipliers m = 3^k
MC_DENSITY_N = 8000        # Monte-Carlo trials (density matched)
MC_RUNLEN_N  = 3000        # Monte-Carlo trials (run-length matched)

BASES = [7, 8, 9, 10, 12]  # base-B head-to-head
SEQ_TAGS = ["F", "R", "F_rev", "R_rev", "TM"]  # sequence specificity catalogue

# Two close reference α values (stable to >10 decimals; update when CODATA updates)
ALPHA_A = 0.0072973525643
ALPHA_B = 0.0072973525693
ALPHAS = [("A", ALPHA_A), ("B", ALPHA_B)]

OUTDIR = "/content/out"
os.makedirs(OUTDIR, exist_ok=True)

# -------------------------
# Utilities
# -------------------------
def fib_word_bits(N: int) -> np.ndarray:
    """Fibonacci word starting from '0' with morphism 0→01, 1→0; returns first N bits (0/1)."""
    s = "0"
    def morph(x):
        # 0->01, 1->0
        return "".join(("01" if ch=="0" else "0") for ch in x)
    while len(s) < N:
        s = morph(s)
    return np.fromiter((1 if c=='1' else 0 for c in s[:N]), dtype=np.uint8)

def thue_morse_bits(N: int) -> np.ndarray:
    """Thue-Morse: t(n) = parity of ones in binary of n (starting n=0)."""
    # vectorized popcount parity
    idx = np.arange(N, dtype=np.uint64)
    # builtin popcount via bit tricks (numpy 1.23+ has bit_count on int, but keep portable)
    def bitcount(x):
        c = 0
        while x:
            x &= x-1
            c += 1
        return c
    # faster: use python int bit_count when available
    par = np.fromiter(((int(i).bit_count() & 1) for i in idx), dtype=np.uint8)
    return par

def digits_value(bits: np.ndarray, base: int) -> float:
    """Map 0/1 bits to base-B digits → real in [0,1): 0.b1 b2 ... (base B). Stable reversed division."""
    val = 0.0
    b = float(base)
    # reverse Horner
    for d in bits[::-1]:
        val = (val + float(d)) / b
    return val

def run_lengths(bits: np.ndarray):
    """Return list of contiguous run lengths and starting bit."""
    if len(bits)==0: return [], 0
    runs = []
    cur = int(bits[0]); count = 1
    for x in bits[1:]:
        if int(x)==cur:
            count += 1
        else:
            runs.append((cur, count))
            cur = int(x); count = 1
    runs.append((cur, count))
    return runs, int(bits[0])

def sample_runlength_shuffled(bits: np.ndarray, rng: np.random.Generator):
    """Shuffle run lengths separately for 0-runs and 1-runs, then interleave starting with original start bit."""
    runs, start = run_lengths(bits)
    lens0 = [L for b,L in runs if b==0]
    lens1 = [L for b,L in runs if b==1]
    rng.shuffle(lens0); rng.shuffle(lens1)
    i0=i1=0
    # reconstruct
    out = np.empty_like(bits)
    pos = 0
    bit = start
    total_runs = len(runs)
    for _ in range(total_runs):
        if bit==0:
            L = lens0[i0]; i0+=1
        else:
            L = lens1[i1]; i1+=1
        out[pos:pos+L] = bit
        pos += L
        bit = 1-bit
    return out

def best_alpha_error_for_sequence(bits, base, k_range, denom_power=3, alpha=ALPHA_B):
    """Return best (k, m=3^k, alpha_star, abs_error) for given sequence and base."""
    D = digits_value(bits, base)
    denom = base**denom_power
    rec = []
    for k in k_range:
        m = 3**k
        a_star = (m/denom)*D
        rec.append((k, m, a_star, abs(a_star-alpha)))
    return min(rec, key=lambda t: t[3]), D

def summarize_alpha_scan(bits, base, k_range, denom_power=3):
    """Scan vs both alphas; also compute sub-step ratio."""
    results = []
    D = digits_value(bits, base)
    denom = base**denom_power
    click = D/denom  # one-click spacing for changing m by 1
    for label, alpha in ALPHAS:
        best = None
        for k in k_range:
            m = 3**k
            a_star = (m/denom)*D
            err = abs(a_star-alpha)
            if (best is None) or (err<best[3]):
                best = (k, m, a_star, err)
        substep = best[3]/click if click>0 else float('nan')
        results.append(dict(alpha_label=label, alpha=alpha,
                            k_best=best[0], m_best=best[1],
                            alpha_star=best[2], abs_error=best[3],
                            substep_within_click=substep))
    return results, D, click

def plot_save(fname):
    plt.tight_layout()
    plt.savefig(os.path.join(OUTDIR, fname), dpi=160)
    plt.close()

# -------------------------
# Build sequences (prefix N_DIGITS)
# -------------------------
t0 = time.time()
F_bits = fib_word_bits(N_DIGITS)
R_bits = 1 - F_bits
F_rev = F_bits[::-1]
R_rev = R_bits[::-1]
TM_bits = thue_morse_bits(N_DIGITS)

SEQ_MAP = {
    "F": F_bits, "R": R_bits,
    "F_rev": F_rev, "R_rev": R_rev,
    "TM": TM_bits
}

# -------------------------
# Quick identity check: DF + DR = 1/9 (no carries in decimal-digit map)
# -------------------------
DF = digits_value(F_bits, 10)
DR = digits_value(R_bits, 10)
comp_err = DF + DR - (1/9)

# -------------------------
# Main triadic scan (base-10, Fibonacci)
# -------------------------
scan_records = []
scan, D_main, click_main = summarize_alpha_scan(F_bits, base=10, k_range=K_RANGE, denom_power=3)
for rec in scan:
    rec2 = dict(rec)
    rec2["base"] = 10
    rec2["sequence"] = "F"
    scan_records.append(rec2)

df_scan = pd.DataFrame(scan_records)
df_scan.to_csv(os.path.join(OUTDIR, "scan_triadic_F_base10.csv"), index=False)

# Plot error vs k for ALPHA_B (the second is nearly identical scale)
errs = []
for k in K_RANGE:
    m = 3**k
    a_star = (m/1000)*D_main
    errs.append(abs(a_star - ALPHA_B))
plt.figure(figsize=(7,4.5))
plt.plot(list(K_RANGE), errs, marker="o")
plt.xlabel("k (m = 3^k)")
plt.ylabel("absolute error |α* − α|")
plt.title("Error | (3^k / 10^3) · D(F) − α |  (base-10)")
# Highlight best k
k_best = int(df_scan[df_scan.alpha_label=="B"].iloc[0]["k_best"])
plt.scatter([k_best],[errs[k_best]], s=80)
plot_save("plot_scan_3k_base10.png")

# -------------------------
# Prefix scalogram (stability at fixed m=3^k where k_best from ALPHA_B)
# -------------------------
prefix_list = [100,200,300,400,600,800,1000,1500,2000]
resid = []
for N in prefix_list:
    Dp = digits_value(F_bits[:N], 10)
    a_star = ( (3**k_best) / 1000 ) * Dp
    resid.append(a_star - ALPHA_B)

df_scalo = pd.DataFrame({"prefix_N": prefix_list, "alpha_star_minus_alpha": resid})
df_scalo.to_csv(os.path.join(OUTDIR, "prefix_scalogram.csv"), index=False)

plt.figure(figsize=(7,4.5))
plt.plot(prefix_list, resid, marker="o")
plt.axhline(0, ls="--", lw=1)
plt.title(f"Prefix scalogram at m=3^{k_best} (base-10)")
plt.xlabel("prefix length N")
plt.ylabel("α* − α")
plot_save("plot_prefix_scalogram.png")

# -------------------------
# Sequence specificity (base-10; best over k)
# -------------------------
spec_rows = []
for tag in SEQ_TAGS:
    best_B, Dtmp = best_alpha_error_for_sequence(SEQ_MAP[tag], base=10, k_range=K_RANGE, denom_power=3, alpha=ALPHA_B)
    spec_rows.append(dict(sequence=tag, k_best=best_B[0], m_best=best_B[1],
                          alpha_star=best_B[2], abs_error=best_B[3]))
df_spec = pd.DataFrame(spec_rows)
df_spec.to_csv(os.path.join(OUTDIR, "sequence_specificity.csv"), index=False)

plt.figure(figsize=(7.5,4.5))
plt.bar(df_spec["sequence"], df_spec["abs_error"])
for i,row in df_spec.iterrows():
    plt.text(i, row["abs_error"]+1e-5, f'k={int(row["k_best"])}', ha='center', va='bottom', fontsize=9)
plt.ylabel("best |α* − α|")
plt.title("Sequence specificity (base-10)")
plot_save("plot_invariance.png")

# -------------------------
# Base-B head-to-head (use same triadic rule α*=(3^k / B^3)·D_B(F))
# -------------------------
bb_rows = []
for B in BASES:
    D_B = digits_value(F_bits, B)
    denom = B**3
    best = None
    for k in K_RANGE:
        m = 3**k
        a_star = (m/denom)*D_B
        err = abs(a_star-ALPHA_B)
        if (best is None) or (err<best[3]):
            best = (k,m,a_star,err)
    bb_rows.append(dict(base=B, k_best=best[0], m_best=best[1], alpha_star=best[2], abs_error=best[3]))
df_base = pd.DataFrame(bb_rows)
df_base.to_csv(os.path.join(OUTDIR, "baseB_headtohead.csv"), index=False)

plt.figure(figsize=(7,4.5))
plt.bar([str(B) for B in df_base["base"]], df_base["abs_error"])
plt.xlabel("base B (digits interpreted in base B)")
plt.ylabel("best |α* − α|")
plt.title("Base sensitivity (F)")
plot_save("plot_baseB_headtohead.png")

# -------------------------
# Monte Carlo controls
#   Hypothesis class (pre-registered):
#    • sequences length N_DIGITS
#    • base = 10
#    • multipliers m = 3^k, k ∈ K_RANGE
#   We compute p = P(err_random ≤ err_F).
# -------------------------
# Target error (F, base-10, best over k)
err_F_target = float(df_spec[df_spec.sequence=="F"]["abs_error"].iloc[0])

# Density-matched trials
p1 = float(F_bits.mean())
den_errors = np.empty(MC_DENSITY_N)
for i in range(MC_DENSITY_N):
    S = rng.binomial(1, p1, size=N_DIGITS).astype(np.uint8)
    best,_ = best_alpha_error_for_sequence(S, base=10, k_range=K_RANGE, denom_power=3, alpha=ALPHA_B)
    den_errors[i] = best[3]
df_den = pd.DataFrame({"abs_error": den_errors})
df_den.to_csv(os.path.join(OUTDIR, "mc_density_errors.csv"), index=False)
pval_density = float(np.mean(den_errors <= err_F_target))
med_density  = float(np.median(den_errors))

plt.figure(figsize=(7.5,4.5))
plt.hist(den_errors, bins=60)
plt.axvline(err_F_target, ls="--")
plt.title(f"Monte Carlo (density-matched): errors vs DF (k best per trial)\nN={MC_DENSITY_N}, p≈{pval_density:.4f}")
plt.xlabel("abs error")
plt.ylabel("count")
plot_save("plot_mc_density.png")

# Run-length matched trials
run_errors = np.empty(MC_RUNLEN_N)
for i in range(MC_RUNLEN_N):
    S = sample_runlength_shuffled(F_bits, rng)
    best,_ = best_alpha_error_for_sequence(S, base=10, k_range=K_RANGE, denom_power=3, alpha=ALPHA_B)
    run_errors[i] = best[3]
df_run = pd.DataFrame({"abs_error": run_errors})
df_run.to_csv(os.path.join(OUTDIR, "mc_runlength_errors.csv"), index=False)
pval_run = float(np.mean(run_errors <= err_F_target))
med_run  = float(np.median(run_errors))

plt.figure(figsize=(7.5,4.5))
plt.hist(run_errors, bins=60)
plt.axvline(err_F_target, ls="--")
plt.title(f"Monte Carlo (run-length-matched): errors vs DF (k best per trial)\nN={MC_RUNLEN_N}, p≈{pval_run:.4f}")
plt.xlabel("abs error")
plt.ylabel("count")
plot_save("plot_mc_runlength.png")

# -------------------------
# π MODULE (controls + toys) — clearly separated
# -------------------------
# 1) Triadic polygon bounds to π (pure control, no sequences)
def triadic_bounds_to_pi(k_vals):
    rows=[]
    for k in k_vals:
        n = 3**k
        pin  = n*math.sin(math.pi/n)        # (= P_in / 2 for unit circle)
        pout = n*math.tan(math.pi/n)        # (= P_out / 2)
        rows.append(dict(k=k, n=n, pin=pin, pout=pout, err_in=pin-math.pi, err_out=pout-math.pi))
    return pd.DataFrame(rows)

df_bounds = triadic_bounds_to_pi(range(1,9))
df_bounds.to_csv(os.path.join(OUTDIR, "triadic_bounds_control.csv"), index=False)
plt.figure(figsize=(7.5,4.5))
plt.plot(df_bounds["k"], df_bounds["err_out"], marker="o", label="p_out - π (circumscribed)")
plt.plot(df_bounds["k"], df_bounds["err_in"],  marker="o", label="p_in - π (inscribed)")
plt.axhline(0, ls="--", lw=1)
plt.xlabel("k  (n = 3^k)")
plt.ylabel("error relative to π")
plt.title("Triadic regular polygons: bounds to π (control)")
plt.legend()
plot_save("triadic_bounds_control.png")

# 2) Binary chord (toy): “walk” angles by golden angle and sum chord lengths selected by bits
def binary_chord_pi_estimate(bits, step="golden"):
    M = len(bits)
    if step=="golden":
        # golden angle step on the circle
        g = (math.sqrt(5)-1)/2
        dtheta = 2*math.pi*(1-g)            # ~137.5°
    else:
        dtheta = 2*math.pi/ (M//2 or 1)     # safe fallback
    theta = 0.0
    perim = 0.0
    for b in bits:
        theta_next = theta + dtheta
        if b==1:
            # chord length between points at theta and theta_next on unit circle
            perim += 2*math.sin(abs(theta_next - theta)/2.0)
        theta = theta_next
    # Normalize so that if every step produced a chord we would approach 2π
    # Using fraction of ones as an occupancy factor:
    occ = float(bits.mean()) if float(bits.mean())>0 else 1.0
    estimate = (perim / occ) / 2.0
    return estimate

# Run toy estimates for F/TM and a random baseline
est_F  = binary_chord_pi_estimate(F_bits[:5000])
est_TM = binary_chord_pi_estimate(TM_bits[:5000])
rand_ests = [binary_chord_pi_estimate(rng.integers(0,2, size=5000, dtype=np.uint8)) for _ in range(200)]
df_ch = pd.DataFrame(dict(est=[est_F, est_TM], seq=["Fibonacci","Thue–Morse"]))
df_rand = pd.DataFrame({"est": rand_ests})

plt.figure(figsize=(7.5,4.5))
plt.hist(df_rand["est"], bins=40, alpha=0.85)
plt.axvline(math.pi, ls="--", lw=2, label="π (true)")
plt.axvline(est_F,  lw=2, label=f"Fibonacci est ({est_F:.6f})")
plt.axvline(est_TM, lw=2, ls=":", label=f"Thue–Morse est ({est_TM:.6f})")
plt.title("Binary-chord circle (toy): Fibonacci vs random streams")
plt.xlabel("π estimate (perimeter / 2)")
plt.ylabel("count")
plt.legend()
plot_save("binary_chord_hist.png")

# 3) Carry-cut residual (toy): push tiny “carry” proportional to triadic scale; compare streams
def carry_cut_residual(bits, k_vals):
    res=[]
    base_click = 1.0 # arbitrary scale
    for k in k_vals:
        # shrink carry as 3^{-k} (toy)
        carry = (3.0**(-k)) * (bits[: 3**k].mean())
        res.append(dict(k=k, residual=carry - 0.0))
    return pd.DataFrame(res)

df_cc_F  = carry_cut_residual(F_bits, range(1,9))
df_cc_TM = carry_cut_residual(TM_bits, range(1,9))
plt.figure(figsize=(7.5,4.5))
plt.plot(df_cc_F["k"],  df_cc_F["residual"],  marker="o", label="Fibonacci stream")
plt.plot(df_cc_TM["k"], df_cc_TM["residual"], marker="o", label="Thue–Morse stream")
plt.axhline(0, ls="--", lw=1)
plt.title('"Carry-cut" residual vs triadic level (toy)')
plt.xlabel("k  (n = 3^k)")
plt.ylabel("adjusted perimeter − π  (toy units)")
plt.legend()
plot_save("carry_cut_residual.png")

# -------------------------
# Final summary + JSON
# -------------------------
summary = {
    "digits_used_N": N_DIGITS,
    "complement_identity_DF_plus_DR_minus_1_over_9": comp_err,
    "alpha_inputs": {lbl: val for lbl,val in ALPHAS},
    "scan_F_base10": df_scan.to_dict(orient="records"),
    "k_best_B": k_best,
    "click_size_D_over_1000": click_main,
    "prefix_scalogram_points": df_scalo.to_dict(orient="records"),
    "sequence_specificity": df_spec.to_dict(orient="records"),
    "baseB_headtohead": df_base.to_dict(orient="records"),
    "mc_density": {"N": MC_DENSITY_N, "p_value": pval_density, "median_error": med_density},
    "mc_runlength": {"N": MC_RUNLEN_N, "p_value": pval_run, "median_error": med_run},
    "pi_control": {
        "triadic_bounds_last_row": df_bounds.iloc[-1].to_dict(),
        "binary_chord_toy": {"F": est_F, "TM": est_TM, "random_mean": float(df_rand["est"].mean()),
                             "random_sd": float(df_rand["est"].std(ddof=1))}
    },
    "runtime_sec": time.time() - t0,
    "outdir": OUTDIR,
    "notes": [
        "Main α rule: α* = (3^k / 10^3) · D_10(sequence). k chosen by minimizing |α*−α| over K_RANGE.",
        "Monte-Carlo p-values computed inside the pre-registered hypothesis class (seq length N, base=10, multipliers 3^k).",
        "Base-B head-to-head uses α* = (3^k / B^3) · D_B(F) to keep dimensions consistent.",
        "π module is for control/toy intuition only; it does not feed back into α claims."
    ]
}
with open(os.path.join(OUTDIR, "summary.json"), "w") as f:
    json.dump(summary, f, indent=2)

# Console report
print("=== BINARY → DECIMAL EMERGENCE LAB (π-aware, triadic) ===")
print(f"Digits (F prefix) N = {N_DIGITS}")
print(f"DF + DR − 1/9 ≈ {comp_err:.3E}   <-- complement identity (→0 with larger N)")
for row in df_scan.itertuples():
    if row.alpha_label=="B":
        print("\n--- Triadic scan for F (base-10) ---")
        print(f"α (B) = {ALPHA_B:.13f}")
        print(f"Best k (base-10) = {int(row.k_best)}  m = {int(row.m_best)}  |Δ| = {row.abs_error:.3E}")
        print(f"Sub-step within one click = {row.substep_within_click:.6f}")
        print(f"α* (at best m) = {row.alpha_star:.15f}")
        break
print("\n--- Prefix scalogram (residual vs N) ---")
for N, r in zip(df_scalo["prefix_N"], df_scalo["alpha_star_minus_alpha"]):
    print(f"N={N:4d}: residual={r:.3E}")
print("\n--- Monte Carlo: density-matched ---")
print(f"Trials n={MC_DENSITY_N}, p≈{pval_density:.4f}, median error≈{med_density:.3E}")
print("--- Monte Carlo: run-length-matched ---")
print(f"Trials n={MC_RUNLEN_N}, p≈{pval_run:.4f}, median error≈{med_run:.3E}")
print(f"\nSaved CSVs & figures to: {OUTDIR}")


=== BINARY → DECIMAL EMERGENCE LAB (π-aware, triadic) ===
Digits (F prefix) N = 5000
DF + DR − 1/9 ≈ 0.000E+00   <-- complement identity (→0 with larger N)

--- Triadic scan for F (base-10) ---
α (B) = 0.0072973525693
Best k (base-10) = 6  m = 729  |Δ| = 1.040E-08
Sub-step within one click = 0.001039
α* (at best m) = 0.007297362972974

--- Prefix scalogram (residual vs N) ---
N= 100: residual=1.040E-08
N= 200: residual=1.040E-08
N= 300: residual=1.040E-08
N= 400: residual=1.040E-08
N= 600: residual=1.040E-08
N= 800: residual=1.040E-08
N=1000: residual=1.040E-08
N=1500: residual=1.040E-08
N=2000: residual=1.040E-08

--- Monte Carlo: density-matched ---
Trials n=8000, p≈0.0024, median error≈8.035E-04
--- Monte Carlo: run-length-matched ---
Trials n=3000, p≈0.0313, median error≈6.707E-04

Saved CSVs & figures to: /content/out
