In [1]:
# @title Binary Heartbeat → Triadic Step → Fission/Fusion → Coupling (Precision-Safe)
# @markdown **What changed vs prior run**
# @markdown - Keeps **Decimal** all the way; errors measured in **parts-per-trillion (ppt)** to avoid float collapse.
# @markdown - Declares just **k ∈ {0, −6}** (i.e., ×1 and ÷729) to avoid multiple-comparison fishing.
# @markdown - Rejects candidates whose scaled series is effectively **constant** in ppt units (no fake “weaving”).
# @markdown - Outputs: candidate metrics (train/test) + extras + coupling mod-1 tables.

import math, itertools, statistics
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext, ROUND_FLOOR

# ---------------- Parameters ----------------
N_min, N_max, N_step = 50, 400, 1              # dense sweep (lets the oscillation "breathe")
lengths = list(range(N_min, N_max+1, N_step))
split_N = int(N_min + 0.7*(N_max - N_min))     # 70/30 train/holdout by prefix length
tri_k_vals = [0, -6]                           # pre-declared triadic membrane: ×1 and ÷729

# Precision: safe for 400+ bits of Decimal math
getcontext().prec = N_max + 120

# Constants (Decimal)
alpha_inv = Decimal("137.035999")
alpha     = Decimal(1) / alpha_inv
pi        = Decimal(str(math.pi))
phi       = Decimal((1 + 5**0.5) / 2)
sqrt2     = Decimal(str(math.sqrt(2)))
econst    = Decimal(str(math.e))
gamma     = Decimal("0.57721566490153286060")
CatalanG  = Decimal("0.91596559417721901505")
zeta3     = Decimal("1.20205690315959428540")

TARGETS = {"alpha_inv": alpha_inv}
TRANSFORMS = {  # fixed, pre-declared couplings
    "s":            lambda s: s,
    "1/s":          lambda s: (None if s==0 else (Decimal(1)/s)),
    "s/alpha_inv":  lambda s: (s/alpha_inv),
    "s*alpha":      lambda s: (s*alpha),
    "s/pi":         lambda s: (s/pi),
    "s*pi":         lambda s: (s*pi),
    "s/phi":        lambda s: (s/phi),
    "s*phi":        lambda s: (s*phi),
}

# ---------------- Words ----------------
def fib_word(N: int) -> str:
    s = "0"
    while len(s) < N:
        s = s.replace("0","X").replace("1","Y")
        s = s.replace("X","01").replace("Y","0")
    return s[:N]

def rabbit_word(N: int) -> str:
    a, b = "1", "10"
    while len(b) < N:
        a, b = b, a + b
    return b[:N]

def thue_morse(N: int) -> str:
    return "".join(str(bin(i).count("1") & 1) for i in range(N))

Nmax = N_max
F_bits = fib_word(Nmax)
R_bits = rabbit_word(Nmax)
TM_bits= thue_morse(Nmax)

# ---------------- Encodings (Decimal) ----------------
def bigit_decimal(bits: str) -> Decimal:
    return Decimal("0."+bits) if bits else Decimal(0)

def binary_fraction(bits: str) -> Decimal:
    acc = Decimal(0); w = Decimal(1)
    for ch in bits:
        w = w/2
        if ch == "1":
            acc += w
    return acc

def series_from(bits_full: str, encoder):
    out = []
    for N in lengths:
        out.append( encoder(bits_full[:N]) )
    return out  # list[Decimal]

def ratio_series(A_bits: str, B_bits: str, enc):
    out = []
    for N in lengths:
        a = enc(A_bits[:N]); b = enc(B_bits[:N])
        if b == 0:
            out.append(None)
        else:
            out.append(a/b)
    return out

SEEDS = {
    ("F",          "decimal_bigit"):    series_from(F_bits,  bigit_decimal),
    ("F",          "binary_fraction"):  series_from(F_bits,  binary_fraction),
    ("R_over_F",   "decimal_bigit"):    ratio_series(R_bits, F_bits, bigit_decimal),
    ("R_over_F",   "binary_fraction"):  ratio_series(R_bits, F_bits, binary_fraction),
    ("TM_over_F",  "decimal_bigit"):    ratio_series(TM_bits,F_bits, bigit_decimal),
    ("TM_over_F",  "binary_fraction"):  ratio_series(TM_bits,F_bits, binary_fraction),
}

# ---------------- Gates (Decimal-safe) ----------------
def g_id(x):          return x
def g_inv(x):         return (None if (x is None or x==0) else (Decimal(1)/x))
def g_comp(x):        return (None if x is None else (Decimal(1)-x))
def g_x_over_1mx(x):  return (None if (x is None or (Decimal(1)-x)==0) else (x/(Decimal(1)-x)))
def g_1mx_over_x(x):  return (None if (x is None or x==0) else ((Decimal(1)-x)/x))

GATES = {"id": g_id, "inv": g_inv, "comp": g_comp, "x/(1-x)": g_x_over_1mx, "(1-x)/x": g_1mx_over_x}

def apply_chain(series, g1, g2):
    a = [GATES[g1](v) for v in series]
    b = [GATES[g2](v) for v in a]
    return b

def tri_scale(series, k):
    f = (Decimal(3) ** k)
    return [ (None if v is None else v * f) for v in series ]

# ---------------- Metrics (Decimal → ppt) ----------------
PPT = Decimal(10) ** 12  # parts per trillion
def rel_err_ppt(series, target: Decimal):
    # returns list[Decimal] of signed relative error in ppt
    t = target
    out = []
    for v in series:
        if v is None:
            out.append(None)
        else:
            out.append( ( (v - t) / t ) * PPT )
    return out

def split_train_test():
    train_mask = [ (N <= split_N) for N in lengths ]
    idx_train = [i for i,m in enumerate(train_mask) if m]
    idx_test  = [i for i,m in enumerate(train_mask) if not m]
    return idx_train, idx_test

IDX_T, IDX_H = split_train_test()

def dec_stats_abs(x_list):
    # robust stats on |x| (list[Decimal or None]) → (median_abs, bandwidth95_abs, MAD_abs)
    x = [abs(v) for v in x_list if v is not None]
    if not x: return (None, None, None)
    x_sorted = sorted(x)
    n = len(x_sorted)
    med = x_sorted[n//2] if n%2==1 else (x_sorted[n//2-1]+x_sorted[n//2])/2
    q05 = x_sorted[max(0, int(0.05*n)-1)]
    q95 = x_sorted[min(n-1, int(0.95*n)-1)]
    bandwidth95 = q95 - q05
    # MAD on |x| around its median
    dev = [abs(u - med) for u in x_sorted]
    dev_sorted = sorted(dev)
    MAD = dev_sorted[len(dev_sorted)//2] if len(dev_sorted)%2==1 else (dev_sorted[len(dev_sorted)//2-1]+dev_sorted[len(dev_sorted)//2])/2
    return (med, bandwidth95, MAD)

def sign_change_rate(x_list):
    # on signed sequence; ignore None
    x = [v for v in x_list if v is not None]
    if len(x) < 2: return None
    signs = [ (0 if v==0 else (1 if v>0 else -1)) for v in x ]
    ch = sum( 1 for i in range(1,len(signs)) if signs[i] != signs[i-1] )
    return Decimal(ch) / Decimal(len(signs)-1)

def plateau_median_run_rounded(x_list, decimals=0):
    # round to integer ppt (decimals=0) and compute median run length
    x = [v for v in x_list if v is not None]
    if not x: return None
    q = [ int(v.quantize(Decimal(1), rounding=ROUND_FLOOR)) if decimals==0 else int((v * (Decimal(10)**decimals)).quantize(Decimal(1), rounding=ROUND_FLOOR)) for v in x ]
    runs = []
    cur = q[0]; cnt = 1
    for z in q[1:]:
        if z==cur: cnt += 1
        else: runs.append(cnt); cur = z; cnt = 1
    runs.append(cnt)
    runs.sort()
    return Decimal(runs[len(runs)//2])

def stdev_ppt(x_list):
    x = [float(v) for v in x_list if v is not None]
    if len(x) < 2: return 0.0
    return float(np.std(np.array(x), ddof=1))

def ks_uniform_mod1(fracs):
    # one-sample KS against U[0,1], returns (D, p_approx)
    u = np.sort(np.asarray(fracs, dtype=float))
    n = len(u)
    if n == 0:
        return (np.nan, np.nan)
    cdf = np.arange(1, n+1)/n
    D_plus  = np.max(cdf - u)
    D_minus = np.max(u - (np.arange(n)/n))
    D = max(D_plus, D_minus)
    # p-value approx (Smirnov) for large n
    # p ≈ 2 * sum_{k=1..∞} (-1)^{k-1} exp(-2 k^2 n D^2)
    s = 0.0
    for k in range(1, 100):
        term = (-1)**(k-1) * math.exp(-2 * (k*k) * n * (D*D))
        s += term
        if abs(term) < 1e-10: break
    p = max(0.0, min(1.0, 2*s))
    return (float(D), float(p))

def frac_part_decimal(x: Decimal) -> float:
    # fractional part in [0,1) using Decimal, then to float
    if x is None:
        return np.nan
    i = x.to_integral_value(rounding=ROUND_FLOOR)
    return float(x - i)

# ---------------- Search over candidates ----------------
records_A = []
records_B = []

for (seed_name, enc_name), seq in SEEDS.items():

    # Gate chains
    for g1, g2 in itertools.product(GATES.keys(), repeat=2):
        y = apply_chain(seq, g1, g2)
        for k in tri_k_vals:
            yk = tri_scale(y, k)

            # Skip if any None remains
            if any(v is None for v in yk):
                continue

            # Part A: error vs alpha_inv in ppt (signed)
            e_ppt = rel_err_ppt(yk, alpha_inv)
            eT = [e_ppt[i] for i in IDX_T]
            eH = [e_ppt[i] for i in IDX_H]

            # Variation check (reject constant series in ppt units)
            sdT = stdev_ppt(eT); sdH = stdev_ppt(eH)
            variable_train = sdT > 0.0
            variable_test  = sdH > 0.0

            # Summaries (median abs relative error (ppt) too)
            med_abs_T, bw95_T, MAD_T = dec_stats_abs(eT)
            med_abs_H, bw95_H, MAD_H = dec_stats_abs(eH)
            scr_T = sign_change_rate(eT)
            scr_H = sign_change_rate(eH)
            plat_T = plateau_median_run_rounded(eT, decimals=0)
            plat_H = plateau_median_run_rounded(eH, decimals=0)

            recA = {
                "seed": seed_name, "encoding": enc_name, "gate1": g1, "gate2": g2, "tri_k": k,
                "train_median_abs_rel_err_ppt": float(med_abs_T) if med_abs_T is not None else np.nan,
                "test_median_abs_rel_err_ppt":  float(med_abs_H) if med_abs_H is not None else np.nan,
                "train_bw95_ppt": float(bw95_T) if bw95_T is not None else np.nan,
                "test_bw95_ppt":  float(bw95_H) if bw95_H is not None else np.nan,
                "train_MAD_ppt":  float(MAD_T)  if MAD_T  is not None else np.nan,
                "test_MAD_ppt":   float(MAD_H)  if MAD_H  is not None else np.nan,
                "train_sign_change_rate": float(scr_T) if scr_T is not None else np.nan,
                "test_sign_change_rate":  float(scr_H) if scr_H is not None else np.nan,
                "train_plateau_median_run_intppt": float(plat_T) if plat_T is not None else np.nan,
                "test_plateau_median_run_intppt":  float(plat_H) if plat_H is not None else np.nan,
                "train_series_std_ppt": sdT,
                "test_series_std_ppt":  sdH,
                "train_variable": variable_train,
                "test_variable":  variable_test,
            }
            records_A.append(recA)

            # Part B: Coupling transforms (only if variable on train or test)
            if variable_train or variable_test:
                # Build Decimal fracs for each transform
                for tname, tf in TRANSFORMS.items():
                    vals = []
                    ok = True
                    for v in yk:
                        try:
                            w = tf(v)
                            if w is None:
                                ok = False; break
                            vals.append(w)
                        except Exception:
                            ok = False; break
                    if not ok or len(vals)==0:
                        continue
                    fracs = [ frac_part_decimal(v) for v in vals ]
                    fracs = [f for f in fracs if np.isfinite(f)]
                    if len(fracs)==0:
                        continue
                    D, p = ks_uniform_mod1(fracs)
                    # distances to integer / half-integer in mod-1
                    f = np.asarray(fracs)
                    mean_dist_int  = float(np.mean(np.minimum(f, 1-f)))
                    mean_dist_half = float(np.mean(np.abs(f - 0.5)))
                    records_B.append({
                        "seed": seed_name, "encoding": enc_name,
                        "gate1": g1, "gate2": g2, "tri_k": k,
                        "transform": tname,
                        "mod1_KS_D": D, "mod1_KS_p": p,
                        "mean_frac_dist_to_int":  mean_dist_int,
                        "mean_frac_dist_to_half": mean_dist_half,
                        "train_variable": variable_train,
                        "test_variable":  variable_test,
                    })

# ---------------- Tables ----------------
partA = pd.DataFrame.from_records(records_A)
partB = pd.DataFrame.from_records(records_B)

# Filter: candidates that *vary* on both train and test
partA_var = partA[(partA["train_variable"]) & (partA["test_variable"])].copy()

def show_top_by(metric="train_median_abs_rel_err_ppt", k=12):
    if partA_var.empty:
        print("\nNo variable candidates; adjust N range or encodings.")
        return
    cols = ["seed","encoding","gate1","gate2","tri_k",
            "train_median_abs_rel_err_ppt","test_median_abs_rel_err_ppt",
            "train_bw95_ppt","test_bw95_ppt",
            "train_MAD_ppt","test_MAD_ppt",
            "train_sign_change_rate","test_sign_change_rate",
            "train_plateau_median_run_intppt","test_plateau_median_run_intppt"]
    top = (partA_var.sort_values([metric, "test_median_abs_rel_err_ppt"])
                    .head(k)[cols])
    print(f"\n=== Part A: top by {metric} (lower is better) — values in ppt ===")
    print(top.to_string(index=False))

show_top_by("train_median_abs_rel_err_ppt")

# Part B: show strongest mod-1 clustering among variable candidates
if not partB.empty:
    # retain only pairs that are variable (join with partA_var keys)
    keys = ["seed","encoding","gate1","gate2","tri_k"]
    var_keys = partA_var[keys].drop_duplicates()
    partB_var = partB.merge(var_keys, on=keys, how="inner")
    if not partB_var.empty:
        best_mod1 = (partB_var
                     .sort_values(["mean_frac_dist_to_int","mod1_KS_p"])
                     .groupby(["encoding","seed"], as_index=False)
                     .head(3))
        print("\n=== Part B: strongest mod-1 clustering (variable series only) ===")
        print(best_mod1[["encoding","seed","gate1","gate2","tri_k","transform",
                         "mean_frac_dist_to_int","mean_frac_dist_to_half",
                         "mod1_KS_D","mod1_KS_p"]]
              .sort_values(["encoding","seed","mean_frac_dist_to_int"])
              .to_string(index=False))
    else:
        print("\nPart B: no variable series survived to coupling analysis.")
else:
    print("\nPart B: no coupling rows were produced (no variable series).")

# ---------------- Save ----------------
partA.to_csv("/content/phase_alpha_inv_candidates_ppt.csv", index=False)
partA_var.to_csv("/content/phase_alpha_inv_candidates_ppt_VARIABLE.csv", index=False)
partB.to_csv("/content/coupling_mod1_scores_VARIABLE.csv", index=False)
print("\nSaved:")
print(" - /content/phase_alpha_inv_candidates_ppt.csv")
print(" - /content/phase_alpha_inv_candidates_ppt_VARIABLE.csv")
print(" - /content/coupling_mod1_scores_VARIABLE.csv")



=== Part A: top by train_median_abs_rel_err_ppt (lower is better) — values in ppt ===
     seed        encoding   gate1   gate2  tri_k  train_median_abs_rel_err_ppt  test_median_abs_rel_err_ppt  train_bw95_ppt  test_bw95_ppt  train_MAD_ppt  test_MAD_ppt  train_sign_change_rate  test_sign_change_rate  train_plateau_median_run_intppt  test_plateau_median_run_intppt
        F   decimal_bigit      id     inv      0                  2.710010e+11                 2.710010e+11    7.289924e-49  7.355534e-288  7.289990e-162  0.000000e+00                     0.0                    0.0                            246.0                           105.0
        F   decimal_bigit     inv      id      0                  2.710010e+11                 2.710010e+11    7.289924e-49  7.355534e-288  7.289990e-162  0.000000e+00                     0.0                    0.0                            246.0                           105.0
        F   decimal_bigit      id (1-x)/x      0                  2.78298