In [1]:
# ======================================================================
# Bigit Rails: Pre-registered, reproducible test harness (single cell)
# ======================================================================
# What this cell does (frozen grammar/search space):
#   • Words: {Fib, Thue–Morse, Period-Doubling, Rudin–Shapiro, 1/phi-bits}
#   • Ns: {12, 27, 81, 243}
#   • Scaling: {OFF, ON} where ON multiplies BIGIT by 729/1000
#   • Ops: Single {S1..S4} and Two-step {T1..T16} exactly per your SOP
#   • Targets: {sqrt(3)/2, 1/sqrt(3), pi/(2*sqrt(3)), alpha_inv = 137.035999084}
#   • Metrics: abs error to the three 0–1 targets; ppm to alpha_inv
#   • Sanity: T14–T16 return-to-start; domain guardrails (div/0 etc)
#   • Plateau: hit at two adjacent N (12→27, 27→81, 81→243) under 1e-6
#   • Null-gap: compare each word’s error vs Rudin–Shapiro baseline
#
# Notes:
#   - BIGIT is the decimal number 0.<first N digits> where digits ∈ {0,1}
#   - 1/phi bits are generated from the binary fractional expansion of 1/phi
#   - All arithmetic uses Decimal (high precision) for stability
#   - No randomness; fully deterministic and reproducible
# ======================================================================

from decimal import Decimal, getcontext, ROUND_HALF_EVEN
from math import isfinite
import itertools, statistics
import pandas as pd

# -------------------------
# Precision / constants
# -------------------------
getcontext().prec = 200  # plenty for 243-digit BIGIT arithmetic and ops
getcontext().rounding = ROUND_HALF_EVEN

D = Decimal  # alias

# Fixed numerical targets (from your SOP)
SQRT3 = D(3).sqrt()
T_SQRT3_OVER_2   =  SQRT3 / D(2)                       # √3/2
T_ONE_OVER_SQRT3 =  D(1) / SQRT3                       # 1/√3

# π to high precision (string literal; enough for our purposes)
PI = D("3.14159265358979323846264338327950288419716939937510")
T_PI_OVER_2SQRT3 = PI / (D(2)*SQRT3)                   # π/(2√3)

ALPHA_INV = D("137.035999084")                         # per your manual

# Tolerances (pre-registered)
EPS_SCOUT   = D("1e-3")
EPS_SERIOUS = D("1e-6")
ALPHA_PPM_SERIOUS = D("50")    # shown but we report raw ppm as well

# -------------------------
# Utility helpers
# -------------------------
def to_decimal_bigit(digits):
    """digits: list/str of '0'/'1' -> Decimal('0.<digits>')"""
    return D("0." + "".join(digits))

def apply_scaling(x, amplify=False):
    """Optional 729/1000 scaling (OFF by default)."""
    return (x * D(729) / D(1000)) if amplify else x

def safe_div(a,b):
    if b == 0:
        return None
    return a / b

def ppm(v, ref):
    return (abs(v - ref) / ref) * D(1_000_000)

def zone_label(op_code, x_start, x_out):
    """Lightweight zone hint (for sanity logs only; not used for stats)."""
    try:
        if op_code.startswith("S"):
            k = int(op_code[1:])
            if k == 1:  # 1 - x
                return "near 1" if D("0.5") < x_out < D("1.5") else "off-zone"
            if k == 2:  # 1 / x
                return ">1" if x_out > 1 else "off-zone"
            if k == 3:  # x/(1-x)
                return "small +" if x_out > 0 and x_out < 2 else "off-zone"
            if k == 4:  # -x
                return "small -" if x_out < 0 and abs(x_out) < 1 else "off-zone"
        # Two-steps are varied; just return 'ok' unless NaN/None
        return "ok" if x_out is not None else "NA"
    except Exception:
        return "NA"

# -------------------------
# Word generators (first N digits)
# -------------------------
def fib_word(n):
    a, b = "0", "01"  # standard morphism start
    s = "0"
    while len(s) < n:
        s = s.replace("0","X").replace("1","Y")
        s = s.replace("X","01").replace("Y","0")  # 0→01, 1→0
    return list(s[:n])

def thue_morse(n):
    # bit parity of popcount(i)
    out = []
    for i in range(n):
        out.append('1' if bin(i).count('1') % 2 == 1 else '0')
    return out

def period_doubling(n):
    s = "0"
    # morphism: 0->01, 1->00
    while len(s) < n:
        s = s.replace("0","X").replace("1","Y")
        s = s.replace("X","01").replace("Y","00")
    return list(s[:n])

def rudin_shapiro(n):
    # digit = 1 if count of overlapping "11" pairs in binary(i) is odd
    out = []
    for i in range(n):
        b = bin(i)[2:]
        cnt = sum(1 for j in range(len(b)-1) if b[j]=='1' and b[j+1]=='1')
        out.append('1' if cnt % 2 == 1 else '0')
    return out

def invphi_bits(n):
    # Binary fractional bits of 1/phi, then treat those bits as decimal digits
    # phi = (1+sqrt(5))/2
    phi = (D(1) + D(5).sqrt()) / D(2)
    x = D(1) / phi
    bits = []
    for _ in range(n):
        x = x * 2
        if x >= 1:
            bits.append('1')
            x = x - 1
        else:
            bits.append('0')
    return bits

WORD_BUILDERS = {
    "Fib": fib_word,
    "TM": thue_morse,
    "PD": period_doubling,
    "RS": rudin_shapiro,
    "InvPhiBits": invphi_bits,
}

# -------------------------
# Rails (Ops): S1..S4; T1..T16
# -------------------------
def S1(x):  # 1 - x
    return D(1) - x

def S2(x):  # 1 / x
    return safe_div(D(1), x)

def S3(x):  # x / (1 - x)
    return safe_div(x, (D(1) - x))

def S4(x):  # -x
    return -x

S = { "S1": S1, "S2": S2, "S3": S3, "S4": S4 }

# Two-step codes (ordered)
T = {
    "T1":  ("S2","S1"),
    "T2":  ("S1","S2"),
    "T3":  ("S1","S3"),
    "T4":  ("S3","S2"),
    "T5":  ("S3","S1"),
    "T6":  ("S4","S1"),
    "T7":  ("S1","S4"),
    "T8":  ("S4","S3"),
    "T9":  ("S3","S4"),
    "T10": ("S2","S4"),
    "T11": ("S4","S2"),
    "T12": ("S2","S3"),
    "T13": ("S3","S3"),
    "T14": ("S2","S2"),  # returns to start
    "T15": ("S1","S1"),  # returns to start
    "T16": ("S4","S4"),  # returns to start
}

def apply_two_step(x, code):
    sA, sB = T[code]
    # guardrails handled by S2/S3 returning None on invalid input
    y = S[sA](x)
    if y is None: return None
    z = S[sB](y)
    return z

# -------------------------
# Runner
# -------------------------
WORDS = list(WORD_BUILDERS.keys())
NS = [12, 27, 81, 243]
SCALE_OPTIONS = [False, True]  # 729/1000 OFF vs ON

S_CODES = list(S.keys())
T_CODES = list(T.keys())

def evaluate_all():
    records = []

    # Pre-compute RS errors per (N, amp, op) for null-gap
    rs_cache = {}

    for N, amp in itertools.product(NS, SCALE_OPTIONS):
        # build RS once per N
        rs_digits = WORD_BUILDERS["RS"](N)
        x_rs = apply_scaling(to_decimal_bigit(rs_digits), amp)
        # Single
        for op in S_CODES:
            val = S[op](x_rs)
            rs_cache[("RS","S",op,N,amp)] = val
        # Two-step
        for op in T_CODES:
            val = apply_two_step(x_rs, op)
            rs_cache[("RS","T",op,N,amp)] = val

    for word, N, amp in itertools.product(WORDS, NS, SCALE_OPTIONS):
        digits = WORD_BUILDERS[word](N)
        x0 = apply_scaling(to_decimal_bigit(digits), amp)

        # Single-step ops
        for op in S_CODES:
            val = S[op](x0)
            zlab = zone_label(op, x0, val) if val is not None else "NA"
            rec = base_record(word, N, amp, "S", op, digits, x0, val, zlab)
            records.append(rec)

        # Two-step ops
        for op in T_CODES:
            val = apply_two_step(x0, op)
            zlab = zone_label(op, x0, val) if val is not None else "NA"
            rec = base_record(word, N, amp, "T", op, digits, x0, val, zlab)
            records.append(rec)

    df = pd.DataFrame.from_records(records)

    # Sanity checks (T14–T16 return to start)
    sanity = df[(df['op_kind']=="T") & (df['op_code'].isin(["T14","T15","T16"]))]
    sanity['return_error'] = (sanity['val'] - sanity['x0']).abs()
    sanity_summary = (sanity.groupby(['word','N','amp','op_code'])['return_error']
                      .max().reset_index().sort_values('return_error'))

    # Error metrics to targets
    df['err_abs_sqrt3_over_2']   = (df['val'] - float(T_SQRT3_OVER_2)).abs()
    df['err_abs_inv_sqrt3']      = (df['val'] - float(T_ONE_OVER_SQRT3)).abs()
    df['err_abs_pi_over_2sqrt3'] = (df['val'] - float(T_PI_OVER_2SQRT3)).abs()

    # α inverse ppm (value must be positive/finite to be meaningful)
    df['alpha_ppm'] = (df['val'] - float(ALPHA_INV)).abs() / float(ALPHA_INV) * 1_000_000

    # Hits under serious threshold (0–1 targets) and alpha ppm serious
    df['hit_sqrt3_over_2']   = df['err_abs_sqrt3_over_2']   <= float(EPS_SERIOUS)
    df['hit_inv_sqrt3']      = df['err_abs_inv_sqrt3']      <= float(EPS_SERIOUS)
    df['hit_pi_over_2sqrt3'] = df['err_abs_pi_over_2sqrt3'] <= float(EPS_SERIOUS)
    df['hit_alpha_ppm']      = df['alpha_ppm']              <= float(ALPHA_PPM_SERIOUS)

    # Plateau detection: hit at two adjacent Ns for each (word, amp, op, target)
    def plateau_hits(target_flag):
        rows = []
        for (word, amp, op_kind, op_code), sub in df.groupby(['word','amp','op_kind','op_code']):
            # sub sorted by N
            sub = sub.sort_values('N')
            flags = list(sub[target_flag])
            Ns = list(sub['N'])
            adj = any(flags[i] and flags[i+1] for i in range(len(flags)-1))
            rows.append((word, amp, op_kind, op_code, target_flag, adj))
        return pd.DataFrame(rows, columns=['word','amp','op_kind','op_code','target','plateau_adjacent'])

    plateau_df = pd.concat([
        plateau_hits('hit_sqrt3_over_2'),
        plateau_hits('hit_inv_sqrt3'),
        plateau_hits('hit_pi_over_2sqrt3'),
        plateau_hits('hit_alpha_ppm'),
    ], ignore_index=True)

    # Null-gap vs RS: for each row, subtract RS error at same (N, amp, op)
    def error_cols():
        return ['err_abs_sqrt3_over_2','err_abs_inv_sqrt3','err_abs_pi_over_2sqrt3','alpha_ppm']
    df['err_vs_RS_sqrt3_over_2'] = None
    df['err_vs_RS_inv_sqrt3'] = None
    df['err_vs_RS_pi_over_2sqrt3'] = None
    df['ppm_vs_RS_alpha'] = None

    for idx, row in df.iterrows():
        key = ( "RS", row['op_kind'], row['op_code'], row['N'], row['amp'] )
        rs_val = rs_cache.get(key, None)
        if rs_val is None:
            continue
        # Build a tiny Series-like container for RS errors
        rs_e_s   = abs(rs_val - T_SQRT3_OVER_2)
        rs_e_i   = abs(rs_val - T_ONE_OVER_SQRT3)
        rs_e_p   = abs(rs_val - T_PI_OVER_2SQRT3)
        rs_ppm   = ppm(rs_val, ALPHA_INV)
        # Delta (word minus RS)
        if row['val'] is not None:
            df.at[idx, 'err_vs_RS_sqrt3_over_2'] = float(abs(row['val'] - float(T_SQRT3_OVER_2)) - float(rs_e_s))
            df.at[idx, 'err_vs_RS_inv_sqrt3']    = float(abs(row['val'] - float(T_ONE_OVER_SQRT3)) - float(rs_e_i))
            df.at[idx, 'err_vs_RS_pi_over_2sqrt3']= float(abs(row['val'] - float(T_PI_OVER_2SQRT3)) - float(rs_e_p))
            df.at[idx, 'ppm_vs_RS_alpha']        = float(abs(row['val'] - float(ALPHA_INV))/float(ALPHA_INV)*1e6 - float(rs_ppm))

    # Summaries:
    top_near_one = (df[['word','N','amp','op_kind','op_code',
                        'err_abs_sqrt3_over_2','err_abs_inv_sqrt3','err_abs_pi_over_2sqrt3']]
                    .melt(id_vars=['word','N','amp','op_kind','op_code'],
                          var_name='target', value_name='abs_error')
                    .sort_values('abs_error').head(25))

    top_alpha = df.sort_values('alpha_ppm').head(25)

    # Null-gap summary: mean(delta) by word vs RS (negative is better than RS)
    nullgap = (df.groupby(['word','op_kind','op_code','amp'])
               [['err_vs_RS_sqrt3_over_2','err_vs_RS_inv_sqrt3','err_vs_RS_pi_over_2sqrt3','ppm_vs_RS_alpha']]
               .mean().reset_index().sort_values(['word','op_kind','op_code']))

    # Output
    print("\n=== Sanity: T14–T16 return-to-start (max absolute error) ===")
    print(sanity_summary.to_string(index=False))

    print("\n=== Top 25 near-one constant matches (abs error) ===")
    print(top_near_one.to_string(index=False))

    print("\n=== Top 25 α⁻¹ matches (ppm) ===")
    print(top_alpha[['word','N','amp','op_kind','op_code','val','alpha_ppm']].to_string(index=False))

    print("\n=== Plateau hits across adjacent N (True = at least one adjacent pair) ===")
    print(plateau_df.sort_values(['target','word','op_kind','op_code']).to_string(index=False))

    print("\n=== Null-gap vs Rudin–Shapiro (mean Δ; negative = better than RS) ===")
    print(nullgap.to_string(index=False))

    return df, plateau_df, nullgap

def base_record(word, N, amp, op_kind, op_code, digits, x0, val, zlab):
    # targets may be undefined if val is None
    rec = {
        'word': word,
        'N': N,
        'amp': amp,
        'op_kind': op_kind,
        'op_code': op_code,
        'digits': ''.join(digits),
        'x0': float(x0),
        'val': float(val) if val is not None else float('nan'),
        'zone': zlab,
    }
    return rec

# Run
df, plateau_df, nullgap = evaluate_all()

# Save CSVs for convenience (download from Colab "Files" pane if desired)
df.to_csv("bigit_rails_full_results.csv", index=False)
plateau_df.to_csv("bigit_plateaus.csv", index=False)
nullgap.to_csv("bigit_nullgap_vs_RS.csv", index=False)
print("\nSaved: bigit_rails_full_results.csv, bigit_plateaus.csv, bigit_nullgap_vs_RS.csv")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sanity['return_error'] = (sanity['val'] - sanity['x0']).abs()



=== Sanity: T14–T16 return-to-start (max absolute error) ===
      word   N   amp op_code  return_error
       Fib  12 False     T14           0.0
       Fib  12 False     T15           0.0
       Fib  12 False     T16           0.0
       Fib  12  True     T14           0.0
       Fib  12  True     T15           0.0
       Fib  12  True     T16           0.0
       Fib  27 False     T14           0.0
       Fib  27 False     T15           0.0
       Fib  27 False     T16           0.0
       Fib  27  True     T14           0.0
       Fib  27  True     T15           0.0
       Fib  27  True     T16           0.0
       Fib  81 False     T14           0.0
       Fib  81 False     T15           0.0
       Fib  81 False     T16           0.0
       Fib  81  True     T14           0.0
       Fib  81  True     T15           0.0
       Fib  81  True     T16           0.0
       Fib 243 False     T14           0.0
       Fib 243 False     T15           0.0
       Fib 243 False     T16       