In [1]:
# ===============================
# BINARY → DECIMAL EMERGENCE LAB  (π-free, triadic residual audit)
# ===============================
# Colab-ready, single-cell, self-contained.
# Saves all CSVs/plots/JSON to /content/out.

import os, json, math, random, statistics
from dataclasses import dataclass, asdict
from typing import List, Dict, Tuple
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# --------- Config (you can tweak safely) ----------
OUTDIR = "/content/out"
os.makedirs(OUTDIR, exist_ok=True)

SEED = 137
random.seed(SEED); np.random.seed(SEED)

# Digits to use from the Fibonacci word when forming decimals:
N_DIGITS       = 1000     # matches your prior runs
K_RANGE        = list(range(0, 9))   # triadic exponents k = 0..8 (includes 6=729)
DENOM_POWER_P  = 3        # α* = (m / 10^p) * D  (fixed p=3)
BASES_TO_TEST  = [7,8,9,10,12]  # base sensitivity sweep

# Monte Carlo controls
MC_DENSITY_N   = 8000   # density-matched (Bernoulli with same mean)
MC_RUNLEN_N    = 3000   # runlength-matched (sample empirical run lengths)
# You can reduce these if runtime is too long.
# --------------------------------------------------

# Two reference α values (A ~ your working, B ~ nearby for robustness)
ALPHA_A = 0.0072973525643
ALPHA_B = 0.0072973525693

# ---------- Core: Fibonacci word & companions ----------
def fib_word_bits(n: int) -> List[int]:
    """
    First n bits of the Fibonacci word (morphism 0->01, 1->0 from seed '0').
    Result begins '0,1,0,0,1,0,1,...' i.e. 0 1 0 0 1 0 0 1 ...
    """
    a, b = "0", "01"    # F0=0, F1=01, Fn = Fn-1 + Fn-2 (by morphism)
    s = a
    while len(s) < n:
        a, b = b, b + a
        s = b
    return [1 if ch == '1' else 0 for ch in s[:n]]

def complement_bits(bits: List[int]) -> List[int]:
    return [1 - b for b in bits]

def reverse_bits(bits: List[int]) -> List[int]:
    return list(reversed(bits))

def thue_morse_bits(n: int) -> List[int]:
    """
    Thue-Morse sequence: parity of 1-bits count in index (starting at 0).
    """
    out = []
    for i in range(n):
        out.append(bin(i).count("1") & 1)  # 0/1
    return out

# ---------- Mapping: bits -> decimal-digit number ----------
def bits_to_decimal_digit_number(bits: List[int], base: int = 10) -> float:
    """
    Interpret bits as base-'base' digits after the point: 0.b1 b2 b3...
    Value is sum_{i>=1} bits[i-1] * base^{-i}, computed by Horner from the right.
    Works for any integer base >= 2.
    """
    v = 0.0
    inv = 1.0 / base
    for b in reversed(bits):
        v = (v + b) * inv
    return v

# ---------- Triadic scan diagnostics ----------
def triadic_m_list(k_list: List[int]) -> List[int]:
    return [3**k for k in k_list]

def alpha_star_from_D(D: float, m: int, p: int = 3) -> float:
    return (m / (10.0**p)) * D

def delta_one_click(D: float, p: int = 3) -> float:
    """
    Step size when m -> m±1 at fixed p.
    """
    return D / (10.0**p)

def scan_triadic(D: float, alpha: float, k_list: List[int], p: int = 3) -> Dict:
    m_list = triadic_m_list(k_list)
    alphas = np.array([alpha_star_from_D(D, m, p) for m in m_list])
    errors = np.abs(alphas - alpha)
    best_idx = int(np.argmin(errors))
    best = {
        "k_best": k_list[best_idx],
        "m_best": m_list[best_idx],
        "abs_error": float(errors[best_idx]),
        "alpha_star": float(alphas[best_idx])
    }
    # Sub-step residual normalized by one-click Δ
    Δ = delta_one_click(D, p)
    substep = (alphas - alpha) / Δ
    # Center the best sub-step into (-0.5,0.5] for "how close within one click"
    def center(x):
        return ((x + 0.5) % 1.0) - 0.5
    best["substep_within_one_click"] = float(center(substep[best_idx]))
    return {"m_list": m_list, "alpha_star_list": alphas, "errors": errors, "best": best}

# ---------- Run-length helpers for matched nulls ----------
def run_lengths(bits: List[int]) -> List[int]:
    if not bits:
        return []
    runs = []
    cur = bits[0]
    cnt = 1
    for b in bits[1:]:
        if b == cur:
            cnt += 1
        else:
            runs.append(cnt)
            cur = b
            cnt = 1
    runs.append(cnt)
    return runs

def empirical_runlength_sampler(len_target: int, rl0: List[int], rl1: List[int], p0start: float = 0.5) -> List[int]:
    """
    Build a sequence of length >= len_target by sampling 0-runs from rl0 and 1-runs from rl1.
    Start with a 0-run with prob p0start; then alternate.
    Trim to exact len_target.
    """
    # Precompute random choices from empirical lists
    def draw_from(lst):
        return random.choice(lst) if lst else 1
    bits = []
    symbol = 0 if (random.random() < p0start) else 1
    while len(bits) < len_target:
        rlen = draw_from(rl0) if symbol == 0 else draw_from(rl1)
        bits.extend([symbol] * rlen)
        symbol ^= 1
    return bits[:len_target]

# ---------- Suite: build everything, run tests ----------
def main():
    print("=== BINARY → DECIMAL EMERGENCE LAB (π-free, triadic) ===")
    print(f"Digits (F prefix) N = {N_DIGITS}")
    # 1) Build sequences
    F_bits = fib_word_bits(N_DIGITS)
    R_bits = complement_bits(F_bits)
    Fr_bits = reverse_bits(F_bits)
    Rr_bits = reverse_bits(R_bits)
    TM_bits = thue_morse_bits(N_DIGITS)

    # Exact complement identity check: DF + DR − 1/9 → 0 as N↑
    DF = bits_to_decimal_digit_number(F_bits, base=10)
    DR = bits_to_decimal_digit_number(R_bits, base=10)
    complement_check = DF + DR - (1.0/9.0)
    print(f"DF + DR − 1/9 ≈ {complement_check:.3E}   <-- exact identity in theory")

    # 2) α*, scans for F (base-10)
    print("\n--- Triadic scan for F (base-10) ---")
    scanF_A = scan_triadic(DF, ALPHA_A, K_RANGE, DENOM_POWER_P)
    bestF = scanF_A["best"]
    print(f"α (A) = {ALPHA_A}")
    print(f"Best k (base-10) = {bestF['k_best']}  m = {bestF['m_best']}  |Δ| = {bestF['abs_error']:.3E}")
    print(f"Sub-step within one click = {bestF['substep_within_one_click']:.6f}")
    print(f"α* (at best m) = {bestF['alpha_star']:.18f}")
    # Also compare to ALPHA_B
    scanF_B = scan_triadic(DF, ALPHA_B, K_RANGE, DENOM_POWER_P)
    bestF_B = scanF_B["best"]
    print(f"α (B) = {ALPHA_B}")
    print(f"Best k vs B = {bestF_B['k_best']} (|Δ|={bestF_B['abs_error']:.3E})")

    # 3) Variants (sequence specificity, base-10)
    def seq_report(tag, bits):
        D = bits_to_decimal_digit_number(bits, base=10)
        s = scan_triadic(D, ALPHA_A, K_RANGE, DENOM_POWER_P)
        b = s["best"]
        return {"sequence": tag, **b, "alpha_star": b["alpha_star"]}
    seq_results = []
    seq_results.append(seq_report("F",  F_bits))
    seq_results.append(seq_report("R",  R_bits))
    seq_results.append(seq_report("Fᵣ", Fr_bits))
    seq_results.append(seq_report("Rᵣ", Rr_bits))
    seq_results.append(seq_report("TM", TM_bits))

    # 4) Base sensitivity (map bits as base-B digits, still scale by m/10^p)
    base_rows = []
    for B in BASES_TO_TEST:
        D_B = bits_to_decimal_digit_number(F_bits, base=B)
        sB  = scan_triadic(D_B, ALPHA_A, K_RANGE, DENOM_POWER_P)
        bB  = sB["best"]
        base_rows.append({"base": B, **bB, "alpha_star": bB["alpha_star"]})

    # 5) Prefix scalogram at fixed m=729 (k=6), base-10
    print("\n--- Prefix scalogram (residual vs N) ---")
    Ns = [100, 200, 300, 400, 600, 800, 1000, 1500, 2000]
    scal_rows = []
    for n in Ns:
        F_n = fib_word_bits(n)
        Dn  = bits_to_decimal_digit_number(F_n, base=10)
        a_star = alpha_star_from_D(Dn, 3**6, DENOM_POWER_P)
        err    = a_star - ALPHA_A
        sub    = err / delta_one_click(Dn, DENOM_POWER_P)
        scal_rows.append({"N": n, "alpha_star": a_star, "residual": err, "substep": sub})
        print(f"N={n:4d}: residual={err:.3E}, substep={sub:.6f}")

    # 6) Monte Carlo — density-matched
    print("\n--- Monte Carlo: density-matched ---")
    density_p = float(np.mean(F_bits))  # fraction of 1s in F prefix
    D_target  = DF
    best_error_F = bestF["abs_error"]
    mc_density_errors = []
    m_list = triadic_m_list(K_RANGE)
    D_click = delta_one_click(D_target, DENOM_POWER_P)

    for i in range(MC_DENSITY_N):
        rnd = (np.random.rand(N_DIGITS) < density_p).astype(int).tolist()
        D_r = bits_to_decimal_digit_number(rnd, base=10)
        errs = [abs(alpha_star_from_D(D_r, m, DENOM_POWER_P) - ALPHA_A) for m in m_list]
        mc_density_errors.append(min(errs))
        if (i+1) % 1000 == 0:
            print(f"  density MC {i+1}/{MC_DENSITY_N}")

    pval_density = float(np.mean(np.array(mc_density_errors) <= best_error_F))

    # 7) Monte Carlo — run-length-matched
    print("\n--- Monte Carlo: run-length-matched ---")
    # Separate empirical 0-runs and 1-runs from F prefix
    runs = []
    cur = F_bits[0]; cnt = 1
    for b in F_bits[1:]:
        if b == cur: cnt += 1
        else:
            runs.append((cur, cnt))
            cur = b; cnt = 1
    runs.append((cur, cnt))
    rl0 = [l for (sym,l) in runs if sym==0]
    rl1 = [l for (sym,l) in runs if sym==1]
    if not rl0: rl0=[1]
    if not rl1: rl1=[1]

    mc_runlen_errors = []
    for i in range(MC_RUNLEN_N):
        rnd_bits = empirical_runlength_sampler(N_DIGITS, rl0, rl1, p0start=1.0 - density_p)
        D_r = bits_to_decimal_digit_number(rnd_bits, base=10)
        errs = [abs(alpha_star_from_D(D_r, m, DENOM_POWER_P) - ALPHA_A) for m in m_list]
        mc_runlen_errors.append(min(errs))
        if (i+1) % 500 == 0:
            print(f"  runlen MC {i+1}/{MC_RUNLEN_N}")

    pval_runlen = float(np.mean(np.array(mc_runlen_errors) <= best_error_F))

    # 8) Simple transforms of DF (sanity/invariance probes)
    transform_rows = []
    def triadic_best_for(value: float, tag: str):
        s = scan_triadic(value, ALPHA_A, K_RANGE, DENOM_POWER_P)
        b = s["best"]
        transform_rows.append({"transform": tag, **b, "alpha_star": b["alpha_star"]})

    triadic_best_for(DF, "DF")
    triadic_best_for(math.sqrt(DF), "sqrt_DF")
    triadic_best_for(DF**0.25, "fourth_DF")
    triadic_best_for(DF/3.0, "DF_div3")  # should shift k by +1 in principle

    # ---------- Save tables ----------
    # Sequence specificity
    df_seq = pd.DataFrame(seq_results)
    df_seq.to_csv(f"{OUTDIR}/sequence_specificity.csv", index=False)

    # Base head-to-head
    df_base = pd.DataFrame(base_rows)
    df_base.to_csv(f"{OUTDIR}/baseB_headtohead.csv", index=False)

    # Prefix scalogram
    df_scal = pd.DataFrame(scal_rows)
    df_scal.to_csv(f"{OUTDIR}/prefix_scalogram.csv", index=False)

    # Monte Carlo distributions
    pd.DataFrame({"min_error": mc_density_errors}).to_csv(f"{OUTDIR}/mc_density_errors.csv", index=False)
    pd.DataFrame({"min_error": mc_runlen_errors}).to_csv(f"{OUTDIR}/mc_runlength_errors.csv", index=False)

    # Transforms
    df_tr = pd.DataFrame(transform_rows)
    df_tr.to_csv(f"{OUTDIR}/transforms_results.csv", index=False)

    # Full scan for F (per k)
    scan_rows = []
    for k, m, e, a in zip(K_RANGE, scanF_A["m_list"], scanF_A["errors"], scanF_A["alpha_star_list"]):
        scan_rows.append({"k": k, "m": m, "abs_error": float(e), "alpha_star": float(a)})
    pd.DataFrame(scan_rows).to_csv(f"{OUTDIR}/scan_triadic_F_base10.csv", index=False)

    # ---------- Plots ----------
    # 1) Error vs k for F (base-10)
    plt.figure(figsize=(6,4))
    plt.plot(K_RANGE, scanF_A["errors"], marker='o')
    plt.xlabel("k (m=3^k)")
    plt.ylabel("|α* - α|")
    plt.title("Triadic scan (F, base-10)")
    plt.grid(True, alpha=0.3)
    plt.tight_layout(); plt.savefig(f"{OUTDIR}/plot_scan_3k_base10.png", dpi=160); plt.close()

    # 2) Monte Carlo histograms
    plt.figure(figsize=(6,4))
    plt.hist(mc_density_errors, bins=50)
    plt.axvline(best_error_F, linestyle='--')
    plt.xlabel("Best |α* - α| (density-matched)")
    plt.ylabel("count")
    plt.title(f"MC density-matched (N={MC_DENSITY_N})")
    plt.grid(True, alpha=0.3)
    plt.tight_layout(); plt.savefig(f"{OUTDIR}/plot_mc_density.png", dpi=160); plt.close()

    plt.figure(figsize=(6,4))
    plt.hist(mc_runlen_errors, bins=50)
    plt.axvline(best_error_F, linestyle='--')
    plt.xlabel("Best |α* - α| (runlength-matched)")
    plt.ylabel("count")
    plt.title(f"MC runlength-matched (N={MC_RUNLEN_N})")
    plt.grid(True, alpha=0.3)
    plt.tight_layout(); plt.savefig(f"{OUTDIR}/plot_mc_runlength.png", dpi=160); plt.close()

    # 3) Base head-to-head (bar)
    plt.figure(figsize=(6,4))
    x = np.arange(len(df_base))
    plt.bar(x, df_base["abs_error"])
    plt.xticks(x, [str(b) for b in df_base["base"]])
    plt.xlabel("base B (digits interpreted in base B)")
    plt.ylabel("best |α* - α|")
    plt.title("Base sensitivity (F)")
    plt.grid(True, axis='y', alpha=0.3)
    plt.tight_layout(); plt.savefig(f"{OUTDIR}/plot_baseB_headtohead.png", dpi=160); plt.close()

    # 4) Invariance plot (sequence variants)
    plt.figure(figsize=(7,4))
    seq_order = ["F","R","Fᵣ","Rᵣ","TM"]
    vals = [float(df_seq[df_seq["sequence"]==s]["abs_error"].iloc[0]) for s in seq_order]
    plt.bar(np.arange(len(seq_order)), vals)
    plt.xticks(np.arange(len(seq_order)), seq_order)
    plt.ylabel("best |α* - α|")
    plt.title("Sequence specificity (base-10)")
    plt.grid(True, axis='y', alpha=0.3)
    plt.tight_layout(); plt.savefig(f"{OUTDIR}/plot_invariance.png", dpi=160); plt.close()

    # 5) Prefix scalogram
    plt.figure(figsize=(6,4))
    plt.plot(df_scal["N"], df_scal["residual"], marker='o')
    plt.xlabel("prefix length N")
    plt.ylabel("α* - α")
    plt.title("Prefix scalogram at m=729 (k=6)")
    plt.grid(True, alpha=0.3)
    plt.tight_layout(); plt.savefig(f"{OUTDIR}/plot_prefix_scalogram.png", dpi=160); plt.close()

    # 6) α* at best points for comparison (text plot)
    plt.figure(figsize=(6,4))
    txt = []
    txt.append(f"Digits used (F): N={N_DIGITS}")
    txt.append(f"DF + DR − 1/9 ≈ {complement_check:.3E}  <-- complement identity")
    txt.append(f"α used (A)  = {ALPHA_A}")
    txt.append(f"α used (B)  = {ALPHA_B}")
    txt.append(f"Best k (F, base-10) = {bestF['k_best']}  m={bestF['m_best']}")
    txt.append(f"|α*−α| ≈ {bestF['abs_error']:.3E}, sub-step ~ {bestF['substep_within_one_click']:.6f}")
    txt.append(f"MC density p ≈ {pval_density:.4f}, MC run-length p ≈ {pval_runlen:.4f}")
    plt.axis('off')
    plt.text(0.05, 0.95, "\n".join(txt), va='top', fontsize=10)
    plt.tight_layout(); plt.savefig(f"{OUTDIR}/plot_summary_text.png", dpi=160); plt.close()

    # ---------- Summary JSON ----------
    summary = {
        "N_digits": N_DIGITS,
        "alpha_used_A": ALPHA_A,
        "alpha_used_B": ALPHA_B,
        "DF_plus_DR_minus_one_ninth": complement_check,
        "scan_F_base10": {
            "k_best": bestF["k_best"],
            "m_best": bestF["m_best"],
            "abs_error": bestF["abs_error"],
            "alpha_star": bestF["alpha_star"],
            "substep_within_one_click": bestF["substep_within_one_click"]
        },
        "sequence_specificity": seq_results,
        "baseB_headtohead": base_rows,
        "prefix_scalogram": scal_rows,
        "mc_density": {
            "n": MC_DENSITY_N,
            "p_value": pval_density,
            "median_error": float(np.median(mc_density_errors)),
            "p5": float(np.percentile(mc_density_errors, 5)),
            "p95": float(np.percentile(mc_density_errors, 95))
        },
        "mc_runlength": {
            "n": MC_RUNLEN_N,
            "p_value": pval_runlen,
            "median_error": float(np.median(mc_runlen_errors)),
            "p5": float(np.percentile(mc_runlen_errors, 5)),
            "p95": float(np.percentile(mc_runlen_errors, 95))
        },
        "transforms": transform_rows,
        "files": {
            "scan_tri_F": "scan_triadic_F_base10.csv",
            "sequence_specificity": "sequence_specificity.csv",
            "baseB_headtohead": "baseB_headtohead.csv",
            "prefix_scalogram": "prefix_scalogram.csv",
            "mc_density": "mc_density_errors.csv",
            "mc_runlength": "mc_runlength_errors.csv",
            "transforms": "transforms_results.csv",
            "plots": [
                "plot_scan_3k_base10.png",
                "plot_mc_density.png",
                "plot_mc_runlength.png",
                "plot_baseB_headtohead.png",
                "plot_invariance.png",
                "plot_prefix_scalogram.png",
                "plot_summary_text.png"
            ]
        }
    }
    with open(f"{OUTDIR}/summary.json","w") as f:
        json.dump(summary, f, indent=2)

    print("\n=== REPORT ===")
    print(f"Digits used (F): N = {N_DIGITS}")
    print(f"DF + DR − 1/9  ≈ {complement_check:.3E}   <-- complement identity check")
    print(f"α (A) = {ALPHA_A} | α (B) = {ALPHA_B}")
    print(f"Best triadic (F, base-10): k={bestF['k_best']} (m={bestF['m_best']}) |Δ|≈{bestF['abs_error']:.3E}")
    print(f"Sub-step (within one click) ≈ {bestF['substep_within_one_click']:.6f}")
    print(f"MC density: n={MC_DENSITY_N}, p≈{pval_density:.4f}")
    print(f"MC run-length: n={MC_RUNLEN_N}, p≈{pval_runlen:.4f}")
    print(f"\nSaved CSVs & figures to: {OUTDIR}")

if __name__ == "__main__":
    main()


=== BINARY → DECIMAL EMERGENCE LAB (π-free, triadic) ===
Digits (F prefix) N = 1000
DF + DR − 1/9 ≈ 0.000E+00   <-- exact identity in theory

--- Triadic scan for F (base-10) ---
α (A) = 0.0072973525643
Best k (base-10) = 6  m = 729  |Δ| = 1.041E-08
Sub-step within one click = 0.001040
α* (at best m) = 0.007297362972973630
α (B) = 0.0072973525693
Best k vs B = 6 (|Δ|=1.040E-08)

--- Prefix scalogram (residual vs N) ---
N= 100: residual=1.041E-08, substep=0.001040
N= 200: residual=1.041E-08, substep=0.001040
N= 300: residual=1.041E-08, substep=0.001040
N= 400: residual=1.041E-08, substep=0.001040
N= 600: residual=1.041E-08, substep=0.001040
N= 800: residual=1.041E-08, substep=0.001040
N=1000: residual=1.041E-08, substep=0.001040
N=1500: residual=1.041E-08, substep=0.001040
N=2000: residual=1.041E-08, substep=0.001040

--- Monte Carlo: density-matched ---
  density MC 1000/8000
  density MC 2000/8000
  density MC 3000/8000
  density MC 4000/8000
  density MC 5000/8000
  density MC 6000/8