In [1]:
# @title Next‑Gen Emergence Audit (bases 2..12, tier‑2 envelopes, MC nulls)
# @markdown This cell is self‑contained. Run once; outputs are written to `/content/out_ng`.

# ============ CONFIG ============
N_DIGITS        = 4000          # number of digits from each sequence (affects run time)
BASES_TO_SCAN   = list(range(2, 13))   # 2..12 inclusive
K_RANGE         = list(range(0, 13))   # scan k where m = 3^k
SEQ_LIST        = ["F", "R", "TM", "FIBBI"]   # Fibonacci word, Rabbit, Thue–Morse, phi-1-as-binary-digits
ALPHA_REF       = 0.0072973525693     # reference α value (unitless)
# Monte Carlo settings (F sequence, base=10) -- adjust if you want more power
MC_DEN_TRIALS   = 3000          # density-matched null trials
MC_RUN_TRIALS   = 1500          # run-length-shuffled null trials
RNG_SEED        = 7             # reproducibility
# Tier-2 dyadic micro-envelope search (multiplicative factors around each observed triadic α*)
DYADIC_c        = [1, 3]
DYADIC_ell      = list(range(8, 15))  # 8..14
DYADIC_SIGNS    = [-1, +1]
# Transforms to apply to observed α* (helps catch “negative / reciprocal / complement” patterns)
APPLY_TRANSFORMS = True
# =================================

import math, random, statistics, os, textwrap
from fractions import Fraction
from decimal import Decimal, getcontext
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

random.seed(RNG_SEED)
np.random.seed(RNG_SEED)
os.makedirs("/content/out_ng", exist_ok=True)

# High precision arithmetic for small steps / near-equalities
getcontext().prec = 120

# ---------- Sequence generators ----------
def morphic(seed: str, rules: dict, N: int) -> str:
    s = seed
    while len(s) < N:
        s = "".join(rules[ch] for ch in s)
    return s[:N]

def seq_F(N: int) -> str:
    # Fibonacci word from morphism 0->01, 1->0 starting at '0'
    return morphic("0", {"0":"01","1":"0"}, N)

def seq_R(N: int) -> str:
    # Rabbit sequence (complement morphic), start at '1'
    return morphic("1", {"0":"1","1":"10"}, N)

def seq_TM(N: int) -> str:
    # Thue–Morse: parity of 1-bits in index
    # t(n) = 0 if popcount(n) even else 1; produce N symbols for n=0..N-1
    out = []
    for n in range(N):
        out.append("1" if (bin(n).count("1") & 1) else "0")
    return "".join(out)

def seq_FIBBI(N: int) -> str:
    # "phi-1 as binary string" interpreted as *digits* (not base-2 place value):
    # The intended string (as provided earlier): 0.100111100011011101111001101110010111111101001...
    # We use a fixed long prefix repeated as needed to reach N. This is deliberate:
    # it treats the given phi-1 digit stream as an aperiodic decimal-digit source.
    base_stream = ("100111100011011101111001101110010111111101001"
                   "111011011100111011110010111100111011110010111")  # seed chunk
    # If user has a bespoke longer chunk, insert it here or swap this function.
    s = (base_stream * ((N // len(base_stream)) + 2))[:N]
    return s

SEQ_FUNCS = {"F": seq_F, "R": seq_R, "TM": seq_TM, "FIBBI": seq_FIBBI}

# ---------- Decimal-digit evaluation in base B ----------
def digits_value_baseB(bits: str, B: int) -> Decimal:
    # Interpret bits (characters '0'/'1') as base‑B digits after the radix point:
    # value = sum_{i>=1} bit_i * B^{-i}. Use backward Horner for stability.
    DB = Decimal(B)
    v = Decimal(0)
    for ch in reversed(bits):
        v = (v + (1 if ch == "1" else 0)) / DB
    return v

# ---------- Triadic scan ----------
def triadic_scan(bits: str, B: int, alpha_ref: Decimal, k_list):
    D = digits_value_baseB(bits, B)
    results = []
    BB = Decimal(B)**3
    for k in k_list:
        m = Decimal(3)**k
        est = (m / BB) * D
        err = abs(est - alpha_ref)
        results.append((k, int(Decimal(3)**k), D, est, err))
    return results  # list of tuples

# ---------- Tier‑2 dyadic micro‑envelopes around an observed α* ----------
def dyadic_envelopes(alpha_star: Decimal):
    out = []
    for c in DYADIC_c:
        for ell in DYADIC_ell:
            for sgn in DYADIC_SIGNS:
                factor = Decimal(1) + Decimal(sgn) * (Decimal(c) / (Decimal(2)**ell))
                out.append((c, ell, sgn, factor, alpha_star * factor))
    return out

# ---------- Transforms (optional) ----------
def transforms(x: Decimal):
    # symmetric negative allowed, reciprocal and complement supported
    out = [("id", x)]
    if x != 0:
        out.append(("inv", Decimal(1)/x))
    out.append(("one_minus", Decimal(1)-x))
    out.append(("neg", -x))
    # include even scalings (doublings/halvings) to test “even‑tier” interactions
    for p in [1,2,3,4]:
        out.append((f"*2^{p}", x*(2**p)))
        out.append((f"/2^{p}", x/(2**p)))
    return out

# ---------- Run‑length shuffle null (preserve run lengths, permute order) ----------
def run_length_shuffle(bits: str) -> str:
    runs = []
    cur = bits[0]
    count = 1
    for ch in bits[1:]:
        if ch == cur:
            count += 1
        else:
            runs.append((cur, count))
            cur, count = ch, 1
    runs.append((cur, count))
    # permute run order but preserve (symbol,count) multiset and alternation via post‑processing
    random.shuffle(runs)
    # force alternation by merging adjacent same‑symbol runs after shuffle
    merged = []
    for sym, cnt in runs:
        if merged and merged[-1][0] == sym:
            merged[-1] = (sym, merged[-1][1] + cnt)
        else:
            merged.append((sym, cnt))
    # rebuild string
    out = []
    for sym, cnt in merged:
        out.append(sym*cnt)
    s = "".join(out)
    # trim or pad to original length by simple wrap or crop
    if len(s) < len(bits):
        s = (s * ((len(bits)//len(s))+2))[:len(bits)]
    else:
        s = s[:len(bits)]
    return s

# ---------- Density‑matched Bernoulli null ----------
def density_matched(bits: str) -> str:
    p = bits.count("1")/len(bits)
    return "".join("1" if random.random() < p else "0" for _ in range(len(bits)))

# ---------- Complement identity check on finite prefix ----------
def finite_sum_to_one_ninth(F_bits: str, R_bits: str):
    # Treat as decimal‑digit strings with denominator 10^N (Fractions to avoid fp issues)
    N = len(F_bits)
    numF = int(F_bits, 2)  # WRONG: bits are not binary place value. Build as base‑10 digits:
    # Build numerator directly: sum d_i * 10^{N-i}
    numF = 0
    for i, ch in enumerate(F_bits):
        numF = numF*10 + (1 if ch=="1" else 0)
    numR = 0
    for i, ch in enumerate(R_bits):
        numR = numR*10 + (1 if ch=="1" else 0)
    den = 10**N
    s = Fraction(numF + numR, den)
    return s, Fraction(1,9), s - Fraction(1,9)

# ---------- Main run ----------
alpha_ref = Decimal(str(ALPHA_REF))

print("Generating sequences...")
S = {name: SEQ_FUNCS[name](N_DIGITS) for name in SEQ_LIST}

# Baseline complement identity on finite prefix (F + R == 1/9)
print("\n=== BASELINE: Complement identity (finite N, exact) ===")
sum_FR, one_ninth, diff = finite_sum_to_one_ninth(S["F"], S["R"])
print("digits_used N          :", N_DIGITS)
print("DF + DR (finite)       :", f"{sum_FR.numerator}/{sum_FR.denominator}")
print("1/9 (limit)            :", "1/9")
print("finite - 1/9 (exact)   :", diff, " (~{:.3e})".format(float(diff)))

# Triadic scan across bases for each sequence
rows = []
best_rows = []
print("\nScanning triadic grid (bases 2..12)...")
for base in BASES_TO_SCAN:
    for seq in SEQ_LIST:
        res = triadic_scan(S[seq], base, alpha_ref, K_RANGE)
        for (k, m, D, est, err) in res:
            rows.append({
                "sequence": seq, "base": base, "k": k, "m": int(m),
                "D_value": str(D), "estimate": str(est), "abs_err": float(err)
            })
        # best per (seq,base)
        k_best, m_best, Dbest, est_best, err_best = min(res, key=lambda t:t[-1])
        best_rows.append({
            "sequence": seq, "base": base, "k_best": k_best,
            "m_best": int(m_best), "D_value": str(Dbest),
            "estimate": str(est_best), "error_best": float(err_best)
        })

df_all = pd.DataFrame(rows)
df_best = pd.DataFrame(best_rows).sort_values(["sequence","base"])
df_best.to_csv("/content/out_ng/triadic_best.csv", index=False)

print("\n=== Best triadic per sequence/base (k_best, |α*−α|) ===")
print(df_best[["sequence","base","k_best","error_best"]].to_string(index=False))

# Hold‑out: choose k on first half, score on second half (per base, per sequence)
print("\nComputing hold‑out validation (choose k on first half; score on second half)...")
hold_rows = []
n_half = N_DIGITS//2
for base in BASES_TO_SCAN:
    for seq in SEQ_LIST:
        D1 = digits_value_baseB(S[seq][:n_half], base)
        D2 = digits_value_baseB(S[seq][n_half:], base)
        BB = Decimal(base)**3
        # choose k on first half
        errs1 = []
        for k in K_RANGE:
            est1 = (Decimal(3)**k / BB) * D1
            err1 = abs(est1 - alpha_ref)
            errs1.append((k, err1))
        k_hat = min(errs1, key=lambda t:t[1])[0]
        # score k_hat on second half
        est2 = (Decimal(3)**k_hat / BB) * D2
        err2 = abs(est2 - alpha_ref)
        hold_rows.append({"base":base,"sequence":seq,"k_hat":k_hat,"holdout_abs_err":float(err2)})

df_hold = pd.DataFrame(hold_rows)
df_hold.to_csv("/content/out_ng/holdout_selection.csv", index=False)

print("\n=== Hold-out (k from first half; error on second half) ===")
print(df_hold.to_string(index=False))

# Tier‑2 dyadic micro‑envelopes around each observed best α*
print("\nSearching tier‑2 dyadic micro‑envelopes...")
env_rows = []
for r in best_rows:
    est = Decimal(r["estimate"])
    for (c, ell, sgn, factor, tweaked) in dyadic_envelopes(est):
        env_err = abs(tweaked - alpha_ref)
        env_rows.append({
            "sequence": r["sequence"], "base": r["base"],
            "k_best": r["k_best"], "c": c, "ell": ell, "sign": sgn,
            "factor": float(factor), "estimate": str(tweaked),
            "abs_err": float(env_err),
        })
df_env = pd.DataFrame(env_rows)
df_env.to_csv("/content/out_ng/tier2_envelopes.csv", index=False)

# Optional transforms of α* (negatives, reciprocal, complement, even scalings)
xform_rows = []
if APPLY_TRANSFORMS:
    print("\nApplying transforms to observed α* (neg/reciprocal/complement/even scalings)...")
    for r in best_rows:
        x = Decimal(r["estimate"])
        for name, y in transforms(x):
            xform_rows.append({
                "sequence": r["sequence"], "base": r["base"], "k_best": r["k_best"],
                "transform": name, "value": str(y), "abs_err": float(abs(y - alpha_ref))
            })
df_xf = pd.DataFrame(xform_rows)
df_xf.to_csv("/content/out_ng/transforms.csv", index=False)

# Monte Carlo nulls (F sequence, base 10)
print("\nMonte Carlo nulls (F sequence, base=10) ...")
bitsF = S["F"]

def best_err_for_bits(bits: str, base: int) -> float:
    res = triadic_scan(bits, base, alpha_ref, K_RANGE)
    return float(min(res, key=lambda t: t[-1])[-1])

obs_err_base10_F = df_best.query("sequence=='F' and base==10")["error_best"].iloc[0]

# Density-matched
den_errs = []
for _ in range(MC_DEN_TRIALS):
    b = density_matched(bitsF)
    den_errs.append(best_err_for_bits(b, 10))
# Run-length-shuffled
run_errs = []
for _ in range(MC_RUN_TRIALS):
    b = run_length_shuffle(bitsF)
    run_errs.append(best_err_for_bits(b, 10))

p_den = (np.sum(np.array(den_errs) <= obs_err_base10_F) + 1) / (MC_DEN_TRIALS + 1)
p_run = (np.sum(np.array(run_errs) <= obs_err_base10_F) + 1) / (MC_RUN_TRIALS + 1)

# Save MC histograms
plt.figure()
plt.hist(den_errs, bins=30)
plt.axvline(obs_err_base10_F, linestyle="--")
plt.xlabel("best |α* − α| under density null")
plt.ylabel("count")
plt.title("MC density null (F, base=10)")
plt.tight_layout()
plt.savefig("/content/out_ng/hist_mc_density_base10.png", dpi=130)
plt.close()

plt.figure()
plt.hist(run_errs, bins=30)
plt.axvline(obs_err_base10_F, linestyle="--")
plt.xlabel("best |α* − α| under run-length null")
plt.ylabel("count")
plt.title("MC run-length null (F, base=10)")
plt.tight_layout()
plt.savefig("/content/out_ng/hist_mc_runlen_base10.png", dpi=130)
plt.close()

# Bonferroni‑style multiplicity adjustment across all scanned bases (for F only)
# (We also compute unadjusted here; BH-FDR can be computed similarly.)
pvals_rows = []
for base in BASES_TO_SCAN:
    # observed error for F & base
    obs = df_best.query("sequence=='F' and base==@base")["error_best"].iloc[0]
    # reuse MC distribution computed at base=10 as a conservative proxy (documented caveat)
    p_den_b = (np.sum(np.array(den_errs) <= obs) + 1) / (MC_DEN_TRIALS + 1)
    p_run_b = (np.sum(np.array(run_errs) <= obs) + 1) / (MC_RUN_TRIALS + 1)
    pvals_rows.append({"base":base,"obs_abs_err":obs,"p_density":p_den_b,"p_runlen":p_run_b})
df_p = pd.DataFrame(pvals_rows)
m_tests = len(BASES_TO_SCAN)
df_p["p_density_bonf"] = np.minimum(1.0, df_p["p_density"]*m_tests)
df_p["p_runlen_bonf"]  = np.minimum(1.0, df_p["p_runlen"] *m_tests)
df_p.to_csv("/content/out_ng/mc_pvalues.csv", index=False)

# Base‑sensitivity plots (best error per base) for each sequence
for seq in SEQ_LIST:
    df = df_best[df_best.sequence==seq].copy()
    plt.figure()
    plt.scatter(df["base"], df["error_best"])
    for _, r in df.iterrows():
        plt.annotate(f"k={int(r.k_best)}", (r.base, r.error_best), textcoords="offset points", xytext=(4,4), fontsize=9)
    plt.xlabel("base B")
    plt.ylabel("best |α* − α|")
    plt.title(f"Best triadic error per base (sequence={seq})")
    plt.tight_layout()
    plt.savefig(f"/content/out_ng/plot_best_by_base_{seq}.png", dpi=140)
    plt.close()

# Compact summary
print("\n=== SUMMARY ===")
print(f"digits_used N                : {N_DIGITS}")
print(f"Observed best errors (min over k) by (sequence,base):")
print(df_best.sort_values(['sequence','error_best'])[["sequence","base","k_best","error_best"]].to_string(index=False))
print("\nHold-out errors (k from first half, score on second half):")
print(df_hold.sort_values(['sequence','base'])[["base","sequence","k_hat","holdout_abs_err"]].to_string(index=False))
print("\nMC p-values for F (base=10):")
print(f"  observed best error : {obs_err_base10_F:.3e}")
print(f"  p_density (unadj)   : {p_den:.4f}  over {MC_DEN_TRIALS} trials")
print(f"  p_runlen (unadj)    : {p_run:.4f}  over {MC_RUN_TRIALS} trials")
print("\nPer-base p-values for F (conservative reuse of base=10 nulls):")
print(df_p.to_string(index=False))

print("\nSaved outputs → /content/out_ng")
print("Files:")
for fn in sorted(os.listdir("/content/out_ng")):
    print(" -", fn)


Generating sequences...

=== BASELINE: Complement identity (finite N, exact) ===
digits_used N          : 4000
DF + DR (finite)       : 111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111