In [1]:
# ================================================================
#  BINARY → DECIMAL EMERGENCE: RIGOROUS STRESS TEST (Colab-ready)
#  - Verifies decimal-digit translation + exact 1/9 complement law
#  - Reproduces 729 (=3^6) extremum for α under a fixed rule
#  - Strong MC controls (density- and run-length-matched)
#  - Base-B head-to-head: B ∈ {7,8,9,10,12} with identical constraints
#  - Sequence specificity: Fibonacci vs complements/reverses vs Thue–Morse
#  - Small pre-registered transforms: sqrt, fourth-root, divide-by-3
#  - Clean CSVs/plots + JSON summary in /content/out
# ================================================================

import os, math, random, json
from decimal import Decimal, getcontext
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ------------------------- CONFIG -------------------------------
OUTDIR = "/content/out"
os.makedirs(OUTDIR, exist_ok=True)

# Core settings
N_DIGITS = 1000           # length of Fibonacci prefix read as decimal digits
ALPHA_1 = Decimal("0.0072973525643")  # your working α (from your runs)
ALPHA_2 = Decimal("0.0072973525693")  # α variant cited in your doc (sign-stability check)
P_DEN = 3                 # fixed decimal shift: denominator is 10^P_DEN (pre-registered)
K_MIN, K_MAX = 0, 12      # scan k in m = 3^k
BASES = [7, 8, 9, 10, 12] # base-B head-to-head (identical constraints)
SEED = 137                # reproducibility
MC_DENSITY_SAMPLES   = 8000   # density-matched Monte Carlo
MC_RUNLENGTH_SAMPLES = 3000   # run-length-matched Monte Carlo

random.seed(SEED)
np.random.seed(SEED)
getcontext().prec = 2000  # high precision for Decimal arithmetic (safe for 1000 digits)

# --------------------- UTILITY: SEQUENCES -----------------------
def fib_word_prefix(N: int) -> str:
    """Fibonacci word prefix of length N via morphic iteration: σ_F: 0→01, 1→0."""
    a, b = "0", "01"
    while len(b) < N:
        a, b = b, b + a
    return b[:N]

def complement_bits(bits: str) -> str:
    return bits.translate(str.maketrans({"0":"1","1":"0"}))

def reverse_bits(bits: str) -> str:
    return bits[::-1]

def thue_morse_prefix(N: int) -> str:
    """Thue–Morse via morphism μ: 0→01, 1→10 (start 0)."""
    a, b = "0", "01"
    while len(a) < N:
        a = a.replace("0","x").replace("1","y")
        a = a.replace("x","01").replace("y","10")
    return a[:N]

# ---------------- DECIMAL-DIGIT TRANSLATION (base B) ------------
def digits_to_decimal(bits: str, base: int) -> Decimal:
    """
    Interpret a 0/1 string as base-B digits: 0.b1 b2 ... in base B, returned as Decimal (base 10).
    Efficient backward fold avoids huge B^i.
    """
    B = Decimal(base)
    acc = Decimal(0)
    one = Decimal(1)
    for ch in reversed(bits):
        d = one if ch == '1' else Decimal(0)
        acc = (acc + d) / B
    return acc

# ----------------------- CORE OBJECTS ---------------------------
F_bits  = fib_word_prefix(N_DIGITS)
R_bits  = complement_bits(F_bits)         # Rabbit: proven bitwise complement of F (conjugate morphisms)
F_rbits = reverse_bits(F_bits)
R_rbits = reverse_bits(R_bits)

# Base-10 decimal-digit numbers (bedrock of your framework)
DF  = digits_to_decimal(F_bits, 10)       # 0.0100101...
DR  = digits_to_decimal(R_bits, 10)       # complement-as-digits
one_ninth = Decimal(1) / Decimal(9)
delta_1_9 = (DF + DR) - one_ninth         # should be ~0 at truncation; exact in the infinite limit

# ---------------------- α* COMPUTATIONS -------------------------
def alpha_star_from(DD: Decimal, m: int, p: int) -> Decimal:
    """α* = (m / 10^p) * DD (fixed denominator for base-10 case)."""
    return (Decimal(m) / (Decimal(10) ** p)) * DD

def alpha_star_baseB(DD_B: Decimal, m: int, p: int, B: int) -> Decimal:
    """Base-B fair version: α*_B = (m / B^p) * D_B."""
    return (Decimal(m) / (Decimal(B) ** p)) * DD_B

# -------------- SCAN m = 3^k (base-10, identical rule) ----------
scan_rows = []
for k in range(K_MIN, K_MAX+1):
    m = 3**k
    a1 = alpha_star_from(DF, m, P_DEN)
    e1 = abs(a1 - ALPHA_1)
    a2 = alpha_star_from(DF, m, P_DEN)
    e2 = abs(a2 - ALPHA_2)
    scan_rows.append((k, m, a1, e1, e2))
scan_df = pd.DataFrame(scan_rows, columns=["k","m","alpha_star","abs_err_to_ALPHA1","abs_err_to_ALPHA2"])
scan_df.to_csv(f"{OUTDIR}/scan_3k_base10.csv", index=False)

best_row = scan_df.sort_values("abs_err_to_ALPHA1").iloc[0]
best_k, best_err = int(best_row.k), Decimal(best_row.abs_err_to_ALPHA1)

# -------------- INVARIANCE: complement/reverse (base-10) --------
def best_k_for(bits: str):
    DD = digits_to_decimal(bits, 10)
    rec = []
    for k in range(K_MIN, K_MAX+1):
        m = 3**k
        val = alpha_star_from(DD, m, P_DEN)
        e = abs(val - ALPHA_1)
        rec.append((k, m, e, val))
    k_best, m_best, e_best, v_best = min(rec, key=lambda t: t[2])
    return {"k_best":int(k_best), "m_best":int(m_best), "abs_error":float(e_best), "alpha_star":str(v_best)}

invariance = {
    "F"  : best_k_for(F_bits),
    "R"  : best_k_for(R_bits),
    "Fᵣ" : best_k_for(F_rbits),
    "Rᵣ" : best_k_for(R_rbits),
}
pd.DataFrame(
    [{"sequence":k, **v} for k,v in invariance.items()]
).to_csv(f"{OUTDIR}/invariance_results.csv", index=False)

# ----------------------- MONTE CARLO ----------------------------
# We evaluate the α* error at the specific rung k=6 (m=729) with fixed denominator 10^3
m_star = 3**6
alpha1_float = float(ALPHA_1)
# Precompute weights for fast dot: value = sum_i bits[i] * (m / 10^(P_DEN + i))
i_idx = np.arange(1, N_DIGITS+1, dtype=np.float64)
w = (m_star) * np.power(10.0, -(P_DEN + i_idx))
F_arr = np.frombuffer(F_bits.encode('ascii'), dtype=np.uint8) - ord('0')
DF_val_float = float((F_arr * w).sum())
DF_err_float = abs(DF_val_float - alpha1_float)

# --- Density-matched controls: same number of ones ---
def mc_density_errors(n_samples: int, ones: int, wvec: np.ndarray):
    errs = np.empty(n_samples, dtype=np.float64)
    N = wvec.shape[0]
    for s in range(n_samples):
        idx = np.random.choice(N, size=ones, replace=False)
        val = wvec[idx].sum()
        errs[s] = abs(val - alpha1_float)
    return errs

# --- Run-length-matched controls: same run lengths, permuted ---
def run_lengths(bits: str):
    L, cur, count = [], bits[0], 1
    for ch in bits[1:]:
        if ch == cur:
            count += 1
        else:
            L.append(count)
            cur, count = ch, 1
    L.append(count)
    return L, bits[0]

def bits_from_runs(runlens, start_char, total_len):
    out = np.empty(total_len, dtype=np.uint8)
    bit = 1 if start_char == '1' else 0
    pos = 0
    for rl in runlens:
        out[pos:pos+rl] = bit
        pos += rl
        bit ^= 1
        if pos >= total_len:
            break
    return out[:total_len]

L_runs, start_c = run_lengths(F_bits)
ones_count = int(F_arr.sum())

def mc_runlength_errors(n_samples: int, runlens: list, start_char: str, wvec: np.ndarray):
    errs = np.empty(n_samples, dtype=np.float64)
    L = runlens[:]
    for s in range(n_samples):
        random.shuffle(L)
        arr = bits_from_runs(L, start_char, len(wvec))
        val = float((arr * wvec).sum())
        errs[s] = abs(val - alpha1_float)
    return errs

mc_density = mc_density_errors(MC_DENSITY_SAMPLES, ones_count, w)
mc_runlens = mc_runlength_errors(MC_RUNLENGTH_SAMPLES, L_runs, start_c, w)

# p-values (fraction of controls with error <= DF_err_float)
p_density = float(np.mean(mc_density <= DF_err_float))
p_runlens = float(np.mean(mc_runlens <= DF_err_float))

pd.DataFrame({"abs_error":mc_density}).to_csv(f"{OUTDIR}/mc_density_errors.csv", index=False)
pd.DataFrame({"abs_error":mc_runlens}).to_csv(f"{OUTDIR}/mc_runlength_errors.csv", index=False)

# -------------------- BASE-B HEAD-TO-HEAD -----------------------
# Identical constraints: α*_B(k) = (3^k / B^P_DEN) * D_B(F); scan k∈[K_MIN,K_MAX]; record best error
base_rows = []
for B in BASES:
    D_BF = digits_to_decimal(F_bits, B)
    best = None
    for k in range(K_MIN, K_MAX+1):
        m = 3**k
        val = alpha_star_baseB(D_BF, m, P_DEN, B)
        e = abs(val - ALPHA_1)
        if best is None or e < best[0]:
            best = (e, k, m, val)
    e_best, k_best, m_best, v_best = best
    base_rows.append((B, int(k_best), int(m_best), float(e_best), str(v_best)))
base_df = pd.DataFrame(base_rows, columns=["base","best_k","best_m","best_abs_error","alpha_star"])
base_df.to_csv(f"{OUTDIR}/baseB_headtohead.csv", index=False)

# ----------------- SEQUENCE SPECIFICITY (base-10) ----------------
def best_error_for_sequence(bits: str):
    DD = digits_to_decimal(bits, 10)
    rec = []
    for k in range(K_MIN, K_MAX+1):
        m = 3**k
        val = alpha_star_from(DD, m, P_DEN)
        e = abs(val - ALPHA_1)
        rec.append((k, m, e, val))
    k_best, m_best, e_best, v_best = min(rec, key=lambda t: t[2])
    return {"k_best":int(k_best), "m_best":int(m_best), "abs_error":float(e_best), "alpha_star":str(v_best)}

TM_bits = thue_morse_prefix(N_DIGITS)

seq_spec = {
    "F"   : best_error_for_sequence(F_bits),
    "R"   : best_error_for_sequence(R_bits),
    "Fᵣ"  : best_error_for_sequence(F_rbits),
    "Rᵣ"  : best_error_for_sequence(R_rbits),
    "TM"  : best_error_for_sequence(TM_bits),
}
pd.DataFrame([{"sequence":k, **v} for k,v in seq_spec.items()]).to_csv(f"{OUTDIR}/sequence_specificity.csv", index=False)

# ------------- SMALL PRE-REGISTERED TRANSFORMS (base-10) --------
# No new knobs; apply to DF only, compare across k.
def best_error_for_value(DD: Decimal):
    rec = []
    for k in range(K_MIN, K_MAX+1):
        m = 3**k
        val = alpha_star_from(DD, m, P_DEN)
        e = abs(val - ALPHA_1)
        rec.append((k, m, e, val))
    k_best, m_best, e_best, v_best = min(rec, key=lambda t: t[2])
    return {"k_best":int(k_best), "m_best":int(m_best), "abs_error":float(e_best), "alpha_star":str(v_best)}

def dec_sqrt(x: Decimal) -> Decimal:
    return x.sqrt()

transforms = {
    "DF"       : DF,
    "sqrt_DF"  : dec_sqrt(DF),
    "fourth_DF": dec_sqrt(dec_sqrt(DF)),
    "DF_div3"  : DF / Decimal(3),
}
X_rows = []
for name, val in transforms.items():
    res = best_error_for_value(val)
    X_rows.append({"transform":name, **res})
pd.DataFrame(X_rows).to_csv(f"{OUTDIR}/transforms_results.csv", index=False)

# --------------- STABILITY AGAINST α UPDATES --------------------
alpha_star_729 = alpha_star_from(DF, 3**6, P_DEN)
sign1 = "positive" if (alpha_star_729 - ALPHA_1) > 0 else "negative"
sign2 = "positive" if (alpha_star_729 - ALPHA_2) > 0 else "negative"

# --------------------------- PLOTS -------------------------------
plt.figure(figsize=(7,4))
plt.title("Error | (3^k / 10^3) · D(F) − α |  (base-10)")
plt.plot(scan_df["k"], scan_df["abs_err_to_ALPHA1"])
plt.scatter([best_k], [float(best_err)], s=40)
plt.xlabel("k in 3^k"); plt.ylabel("absolute error")
plt.tight_layout(); plt.savefig(f"{OUTDIR}/plot_scan_3k_base10.png", dpi=220); plt.close()

plt.figure(figsize=(7,4))
plt.title("Monte Carlo (density-matched): errors vs DF (k=6)")
plt.hist(mc_density, bins=40, edgecolor="black", alpha=0.7)
plt.axvline(DF_err_float, linestyle="--")
plt.xlabel("abs error"); plt.ylabel("count")
plt.tight_layout(); plt.savefig(f"{OUTDIR}/plot_mc_density.png", dpi=220); plt.close()

plt.figure(figsize=(7,4))
plt.title("Monte Carlo (run-length-matched): errors vs DF (k=6)")
plt.hist(mc_runlens, bins=40, edgecolor="black", alpha=0.7)
plt.axvline(DF_err_float, linestyle="--")
plt.xlabel("abs error"); plt.ylabel("count")
plt.tight_layout(); plt.savefig(f"{OUTDIR}/plot_mc_runlength.png", dpi=220); plt.close()

plt.figure(figsize=(7,4))
plt.title("Base-B head-to-head (best achievable error)")
plt.plot(base_df["base"], base_df["best_abs_error"], marker="o")
plt.xlabel("base B"); plt.ylabel("best abs error"); plt.grid(True, alpha=0.3)
plt.tight_layout(); plt.savefig(f"{OUTDIR}/plot_baseB_headtohead.png", dpi=220); plt.close()

inv_tbl = pd.DataFrame(
    [{"sequence":k, **v} for k,v in invariance.items()]
).sort_values("sequence")
plt.figure(figsize=(7,4))
plt.title("Flip/Reverse invariance (best k and error) [base-10]")
plt.bar(inv_tbl["sequence"], inv_tbl["abs_error"])
for i,row in inv_tbl.iterrows():
    plt.text(i, row["abs_error"], f'k={row["k_best"]}', ha="center", va="bottom", fontsize=9)
plt.ylabel("best absolute error"); plt.tight_layout()
plt.savefig(f"{OUTDIR}/plot_invariance.png", dpi=220); plt.close()

# -------------------------- SUMMARY -----------------------------
summary = {
    "N_digits": N_DIGITS,
    "alpha_used_A": str(ALPHA_1),
    "alpha_used_B": str(ALPHA_2),
    "DF_plus_DR_minus_one_ninth": f"{delta_1_9:.3E}",
    "alpha_star_at_729": str(alpha_star_729),
    "abs_error_at_729_vs_A": f"{abs(alpha_star_729 - ALPHA_1):.3E}",
    "abs_error_at_729_vs_B": f"{abs(alpha_star_729 - ALPHA_2):.3E}",
    "sign_of(alpha*_729 - A)": sign1,
    "sign_of(alpha*_729 - B)": sign2,
    "scan_best_k_base10": best_k,
    "scan_best_error_base10": f"{best_err:.3E}",
    "invariance": invariance,
    "mc_density": {
        "n": MC_DENSITY_SAMPLES,
        "p_value": p_density,
        "median_error": float(np.median(mc_density)),
        "p5": float(np.percentile(mc_density, 5)),
        "p95": float(np.percentile(mc_density, 95)),
    },
    "mc_runlength": {
        "n": MC_RUNLENGTH_SAMPLES,
        "p_value": p_runlens,
        "median_error": float(np.median(mc_runlens)),
        "p5": float(np.percentile(mc_runlens, 5)),
        "p95": float(np.percentile(mc_runlens, 95)),
    },
    "baseB_headtohead": base_df.to_dict(orient="records"),
    "sequence_specificity": seq_spec,
    "transforms_results": X_rows,
    "files": {
        "scan_3k": "scan_3k_base10.csv",
        "invariance": "invariance_results.csv",
        "mc_density": "mc_density_errors.csv",
        "mc_runlength": "mc_runlength_errors.csv",
        "baseB": "baseB_headtohead.csv",
        "sequence_specificity": "sequence_specificity.csv",
        "transforms": "transforms_results.csv",
        "plots": [
            "plot_scan_3k_base10.png",
            "plot_mc_density.png",
            "plot_mc_runlength.png",
            "plot_baseB_headtohead.png",
            "plot_invariance.png",
        ],
    }
}
with open(f"{OUTDIR}/summary.json", "w") as f:
    json.dump(summary, f, indent=2)

# ------------------------- REPORT OUT ---------------------------
print("\n=== BINARY → DECIMAL EMERGENCE: RIGOROUS STRESS TEST ===")
print(f"Digits (F prefix): N={N_DIGITS}")
print(f"Check: DF + DR − 1/9 ≈ {delta_1_9:.2E}  (truncation→0 as N↑) <-- exact identity in theory")
print(f"α (A) = {ALPHA_1} | α (B) = {ALPHA_2}")
print(f"α* at (m=729, 10^{-3}) = {alpha_star_729}")
print(f"|α* − α_A| ≈ {abs(alpha_star_729 - ALPHA_1):.3E} | |α* − α_B| ≈ {abs(alpha_star_729 - ALPHA_2):.3E}")
print(f"Scan best k (base-10) = {best_k} with |Δ| ≈ {best_err:.3E}")
print(f"MC (density): n={MC_DENSITY_SAMPLES}, p≈{p_density:.4f}, med≈{np.median(mc_density):.3E}")
print(f"MC (run-length): n={MC_RUNLENGTH_SAMPLES}, p≈{p_runlens:.4f}, med≈{np.median(mc_runlens):.3E}")
print("\nBase-B head-to-head (best errors):")
print(base_df.to_string(index=False))
print("\nSequence specificity (best k & errors):")
print(pd.DataFrame([{"sequence":k, **v} for k,v in seq_spec.items()]).to_string(index=False))
print("\nTransforms (pre-registered) on DF:")
print(pd.DataFrame(X_rows).to_string(index=False))
print(f"\nSaved CSVs/plots/summary to: {OUTDIR}")



=== BINARY → DECIMAL EMERGENCE: RIGOROUS STRESS TEST ===
Digits (F prefix): N=1000
Check: DF + DR − 1/9 ≈ -1.11E-1001  (truncation→0 as N↑) <-- exact identity in theory
α (A) = 0.0072973525643 | α (B) = 0.0072973525693
α* at (m=729, 10^-3) = 0.00729736297297362973629729736297297362973629729736297362972973629729736297362972973629729736297362972973629736297297362972973629736297297362973629729736297297362973629729736297297362973629729736297362972973629729736297362972973629729736297362972973629736297297362972973629736297297362973629729736297297362973629729736297297362973629729736297362972973629729736297362972973629736297297362972973629736297297362972973629736297297362973629729736297297362973629729736297297362973629729736297362972973629729736297362972973629736297297362972973629736297297362972973629736297297362973629729736297297362973629729736297297362973629729736297362972973629729736297362972973629736297297362972973629736297297362972973629736297297362973629729736297297362973629729736297362