In [1]:
# --- Preregistered analysis: Fibonacci-decimal → alpha, with rigorous MC tests ---
# Everything in one cell for easy copy/paste.

import numpy as np, pandas as pd, math, random, itertools, time
from decimal import Decimal, getcontext

# ---------- precision / knobs ----------
N_BITS         = 2000           # digits from each sequence
K_RANGE        = list(range(0, 13))   # triadic exponents k for m = 3^k
N_TRIALS_DENS  = 4000           # MC density-matched trials
N_TRIALS_RUN   = 2000           # MC runlength-shuffled trials
SEED           = 12345          # reproducibility
ALLOW_NEGATIVE = True           # treat negative candidates as well (min(|x-α|, |−x-α|))
SAVE_DIR       = "/content/out_preregistered"

# ---------- constants ----------
getcontext().prec = 6000
ALPHA_REF  = Decimal("0.0072973525693")      # reference α
PI         = Decimal(str(math.pi))
SQRT3      = Decimal(3).sqrt()
PHI        = (Decimal(1) + Decimal(5).sqrt()) / 2
PHI_MINUS1 = PHI - 1

# ---------- helpers ----------
def fib_word(n):
    s = "0"
    # morphism: 0->01, 1->0
    while len(s) < n:
        s = "".join(("01" if ch=="0" else "0") for ch in s)
    return s[:n]

def complement_bits(bits):
    return "".join("1" if b=="0" else "0" for b in bits)

def thue_morse(n):
    # parity of popcount of index
    return "".join("1" if bin(i).count("1")%2 else "0" for i in range(n))

def phi_minus_one_bits(n):
    # binary expansion of φ-1 using Decimal arithmetic
    x = PHI_MINUS1
    out = []
    for _ in range(n):
        x *= 2
        if x >= 1:
            out.append('1'); x -= 1
        else:
            out.append('0')
    return "".join(out)

def dec_from_decimal_digits(bits):
    # interpret bits as DECIMAL digits after the dot: 0.b1 b2 b3 ... in base 10
    # value = sum(b_i * 10^{-i})
    ten = Decimal(10)
    acc = Decimal(0)
    # most weight is in the first ~20 digits; Decimal keeps us exact enough to 1e-8 scale
    pow10 = ten
    for b in bits:
        if b == '1':
            acc += Decimal(1) / pow10
        pow10 *= ten
    return acc

def triadic_scaled_value(D, k):
    # α_k* = (3^k / 1000) * D
    return (Decimal(3) ** k) * D / Decimal(1000)

def err_to_alpha(x):
    # symmetric error if negative values are allowed:
    if ALLOW_NEGATIVE:
        return min(abs(x - ALPHA_REF), abs(-x - ALPHA_REF))
    else:
        return abs(x - ALPHA_REF)

def runs(bits):
    # return run lengths of 0s and 1s in order
    if not bits: return []
    out = []
    cur = bits[0]; L=1
    for b in bits[1:]:
        if b==cur: L+=1
        else:
            out.append((cur, L))
            cur=b; L=1
    out.append((cur, L))
    return out

def shuffle_runs_preserve_hist(bits, rng):
    # keep multiset of run lengths, randomize their order, rebuild a sequence starting with the original first bit
    rs = runs(bits)
    first_bit = rs[0][0]
    lengths0 = [L for (bit,L) in rs if bit=='0']
    lengths1 = [L for (bit,L) in rs if bit=='1']
    rng.shuffle(lengths0); rng.shuffle(lengths1)
    i0=i1=0
    cur = first_bit
    out=[]
    for _ in rs:
        if cur=='0':
            L = lengths0[i0]; i0+=1
            out.extend('0'*L); cur='1'
        else:
            L = lengths1[i1]; i1+=1
            out.extend('1'*L); cur='0'
    return "".join(out)[:len(bits)]

def precompute_weights_decimal(n):
    ten = Decimal(10)
    w = []
    p = ten
    for _ in range(n):
        w.append(Decimal(1)/p)
        p *= ten
    return w

def dec_from_digits_with_weights(bits, weights):
    # faster when weights are precomputed
    acc = Decimal(0)
    for b, w in zip(bits, weights):
        if b=='1': acc += w
    return acc

def mc_density_errors(p, weights, k_range, n_trials, rng):
    # Bernoulli(p) over decimal-digit weights
    ten = Decimal(10)
    errs=[]
    W = np.array([float(w) for w in weights], dtype=np.float64)  # float32/64 OK for MC
    # vectorized generation
    for _ in range(n_trials):
        vec = rng.random(len(W)) < p
        Dfloat = np.dot(vec.astype(np.float64), W)
        # convert to Decimal once
        D = Decimal(str(Dfloat))
        e = min(err_to_alpha(triadic_scaled_value(D, k)) for k in k_range)
        errs.append(float(e))
    return np.array(errs)

def mc_runlength_errors(template_bits, weights, k_range, n_trials, rng):
    errs=[]
    for _ in range(n_trials):
        sample_bits = shuffle_runs_preserve_hist(template_bits, rng)
        D = dec_from_digits_with_weights(sample_bits, weights)
        e = min(err_to_alpha(triadic_scaled_value(D, k)) for k in k_range)
        errs.append(float(e))
    return np.array(errs)

# ---------- build sequences ----------
rng = np.random.default_rng(SEED)

F_bits   = fib_word(N_BITS)                 # Fibonacci word
R_bits   = complement_bits(F_bits)          # Rabbit = bitwise complement in this setup
TM_bits  = thue_morse(N_BITS)               # Thue–Morse (control)
FIBBI_bits = phi_minus_one_bits(N_BITS)     # binary of (phi-1)

# sanity: sum of decimals DF+DR = 1/9 (no carry)
weights10 = precompute_weights_decimal(N_BITS)
DF  = dec_from_digits_with_weights(F_bits, weights10)
DR  = dec_from_digits_with_weights(R_bits, weights10)
one_ninth = Decimal(1)/Decimal(9)
assert str((DF+DR) - one_ninth)[:5] == "0E-7" or abs((DF+DR) - one_ninth) < Decimal("1e-20"), "DF+DR should be 1/9"

# ---------- observed effects ----------
row_obs = []
for name, bits in [("F",F_bits), ("R",R_bits), ("TM",TM_bits), ("FIBBI",FIBBI_bits)]:
    D   = dec_from_digits_with_weights(bits, weights10)
    rec = []
    for k in K_RANGE:
        x = triadic_scaled_value(D,k)
        rec.append((k, float(err_to_alpha(x))))
    k_best, e_best = min(rec, key=lambda t:t[1])
    row_obs.append([name, k_best, e_best, float(D)])

obs_df = pd.DataFrame(row_obs, columns=["sequence","k_best","error_best","D_value"])
print("Observed best triadic errors (symmetric for negative values):\n", obs_df, "\n")

# ---------- Monte Carlo: density- and runlength-matched p-values for F only ----------
p1 = F_bits.count('1')/len(F_bits)
errs_dens = mc_density_errors(p1, weights10, K_RANGE, N_TRIALS_DENS, rng)
errs_run  = mc_runlength_errors(F_bits, weights10, K_RANGE, N_TRIALS_RUN, rng)

e_obs_F   = obs_df.loc[obs_df.sequence=="F","error_best"].iloc[0]
pval_dens = (errs_dens <= e_obs_F).mean()
pval_run  = (errs_run  <= e_obs_F).mean()

print(f"F vs density-matched MC:   p ≈ {pval_dens:0.4g} (n={len(errs_dens)})")
print(f"F vs runlength-shuffled MC: p ≈ {pval_run:0.4g} (n={len(errs_run)})")

# ---------- Dyadic micro-envelopes around the k=6 estimate (c∈{1,3}, ℓ up to 14), symmetric negative handling ----------
def dyadic_micro_envelope_errors(D, k_center=6, ell_max=14):
    base = triadic_scaled_value(D, k_center)
    errs=[]
    for c in (1,3):
        for ell in range(9, ell_max+1):
            for sgn in (-1, +1):
                x = Decimal(c) * base * (Decimal(1) + (Decimal(sgn) / (Decimal(2) ** ell)))
                errs.append(float(err_to_alpha(x)))
    return np.array(errs)

D_F = dec_from_digits_with_weights(F_bits, weights10)
env_errs_obs = dyadic_micro_envelope_errors(D_F)
e_env_obs = env_errs_obs.min()

# MC null for envelopes: reuse density-matched random D draws
def mc_env_density(n_trials):
    e = []
    for _ in range(n_trials):
        vec = rng.random(len(weights10)) < p1
        Dfloat = float(np.dot(vec.astype(np.float64), np.array([float(w) for w in weights10])))
        D = Decimal(str(Dfloat))
        e.append(dyadic_micro_envelope_errors(D).min())
    return np.array(e)

env_mc = mc_env_density(N_TRIALS_DENS//2)
pval_env = (env_mc <= e_env_obs).mean()
print(f"Dyadic micro-envelope p-value (density null, symmetric negative): p ≈ {pval_env:0.4g}\n")

# ---------- Prefix invariance: δ/step vs N ----------
def prefix_ratio_series(bits, Ns, k=6):
    out=[]
    for n in Ns:
        Dn = dec_from_digits_with_weights(bits[:n], precompute_weights_decimal(n))
        x  = triadic_scaled_value(Dn, k)
        delta = err_to_alpha(x) if (x > ALPHA_REF) else -err_to_alpha(x)
        step  = Dn / Decimal(1000)   # one-click step
        out.append((n, float(delta/step)))
    return pd.DataFrame(out, columns=["N","delta_over_step"])

N_list = list(range(100, 2001, 200))
rat_df = prefix_ratio_series(F_bits, N_list)
slope = np.polyfit(rat_df["N"], rat_df["delta_over_step"], 1)[0]
print("Prefix ratio (delta/step) ~ constant? slope ≈", slope)

# ---------- Base sensitivity (optional, fixed B set) ----------
def digits_to_baseB_value(bits, B):
    # interpret bits as base-B fractional digits: sum b_i * B^{-i}
    acc = Decimal(0)
    Bb = Decimal(B)
    den = Bb
    for b in bits:
        if b=='1':
            acc += Decimal(1)/den
        den *= Bb
    return acc

def base_sweep(bits, B_list=(7,8,9,10,12), K=K_RANGE):
    rows=[]
    for B in B_list:
        D_B = digits_to_baseB_value(bits, B)
        e = min(err_to_alpha(triadic_scaled_value(D_B, k)) for k in K)
        rows.append((B, float(e)))
    return pd.DataFrame(rows, columns=["base_B","best_error"])

base_df = base_sweep(F_bits)
print("\nBase sensitivity (F sequence):\n", base_df)

# ---------- Save tidy outputs ----------
import os
os.makedirs(SAVE_DIR, exist_ok=True)
obs_df.to_csv(f"{SAVE_DIR}/observed_best_errors.csv", index=False)
pd.DataFrame({"err_density":errs_dens}).to_csv(f"{SAVE_DIR}/mc_density_errors.csv", index=False)
pd.DataFrame({"err_runlen":errs_run}).to_csv(f"{SAVE_DIR}/mc_runlength_errors.csv", index=False)
pd.DataFrame({"env_mc_err":env_mc}).to_csv(f"{SAVE_DIR}/mc_env_errors.csv", index=False)
rat_df.to_csv(f"{SAVE_DIR}/prefix_ratio.csv", index=False)
base_df.to_csv(f"{SAVE_DIR}/base_sensitivity.csv", index=False)

print(f"\nSaved CSVs to {SAVE_DIR}")


Observed best triadic errors (symmetric for negative values):
   sequence  k_best    error_best   D_value
0        F       6  1.040367e-08  0.010010
1        R       4  8.918293e-04  0.101101
2       TM       6  7.289455e-04  0.011010
3    FIBBI       4  8.116465e-04  0.100111 

F vs density-matched MC:   p ≈ 0.00125 (n=4000)
F vs runlength-shuffled MC: p ≈ 0.026 (n=2000)
Dyadic micro-envelope p-value (density null, symmetric negative): p ≈ 0.0625

Prefix ratio (delta/step) ~ constant? slope ≈ 2.834480203664113e-22

Base sensitivity (F sequence):
    base_B    best_error
0       7  2.323415e-03
1       8  3.492946e-03
2       9  1.715146e-03
3      10  1.040367e-08
4      12  2.231903e-03

Saved CSVs to /content/out_preregistered
