In [1]:
# === PRIME FAMILY φ-PHASE TEST (A″): φ, φ+Fibit, φ+α (no heavy assumptions) ===
import os, math, json, csv, numpy as np
import matplotlib.pyplot as plt
from numpy.random import default_rng

rng = default_rng(137)
plt.rcParams["figure.dpi"] = 120

BASE_DIR = "phi_family_test_A2"
os.makedirs(BASE_DIR, exist_ok=True)
os.makedirs(f"{BASE_DIR}/plots", exist_ok=True)
os.makedirs(f"{BASE_DIR}/artifacts", exist_ok=True)

# Windows and caps (keep fast)
WINDOWS = [(10**6, 10**7), (10**7, 5*10**7)]
CAP_PRIMES = 250_000

# === Constants
phi = (1 + 5**0.5) / 2
alpha = 1/137.035999084  # fine-structure (CODATAish)
SQ2 = 2**0.5
E   = math.e
PI2 = math.pi/2

# --- Build Fibit: Fibonacci word -> decimal 0.0100101001001... (N digits)
def fib_word_digits(N=4096):
    a,b = "0","01"
    s = a
    while len(s) < N:
        a,b = b, a+b
        s = a
    return s[:N]
def fibit_value(num_digits=4096):
    s = fib_word_digits(num_digits)
    # interpret "0.xxx..." decimal; we only need its value modulo ~1e-6
    # compute as sum_{k>=1} s[k-1]*10^{-k}
    pw = 10.0**(-np.arange(1, len(s)+1, dtype=float))
    bits = np.fromiter((1 if ch=='1' else 0 for ch in s), dtype=float, count=len(s))
    return float(np.dot(bits, pw))

Fibit = fibit_value(4096)  # high precision decimal projection of the Fibonacci word

# τ candidates (pre-registered; no cherry-picking)
def neighbor_grid(center, span=0.005, step=0.001):
    xs = np.arange(center-span, center+span+1e-12, step)
    return [float(round(x,12)) for x in xs]

taus = sorted(set(
    [phi, phi**2, 1.0/phi] +
    neighbor_grid(phi, 0.005, 0.001) +                      # around φ
    [phi + Fibit, phi + alpha] +                            # your targets
    neighbor_grid(phi + Fibit, 0.005, 0.001) +              # around φ+Fibit
    neighbor_grid(phi + alpha, 0.005, 0.001) +              # around φ+α
    [3.0, 9.0, 27.0, 729.0, SQ2, E, PI2]                    # gears + classics
))

# === Primes & families
def primes_in_window(lo, hi, cap=None):
    from sympy import primerange
    arr = np.fromiter(primerange(int(lo), int(hi)), dtype=np.int64)
    if cap and arr.size > cap:
        idx = rng.choice(arr.size, size=cap, replace=False)
        arr = arr[idx]
    return np.sort(arr)

def family_pairs(primes, d):
    pset = set(primes.tolist())
    out = [(p, p+d) for p in primes if (p+d) in pset]
    return np.array(out, dtype=np.int64)

# Sequences per family:
#  - bases: starting p in each pair
#  - gaps_raw: differences between successive bases (no normalization)
FAMS = [("twin",2), ("cousin",4), ("sexy",6)]

# === Lenses (low assumption)
# Given a positive sequence x -> phases = frac( τ * f(x) )
def lens_identity(x):      return x
def lens_recip(x):         return 1.0/np.maximum(x, 1e-12)
def lens_log(x):           return np.log(np.maximum(x, 1+1e-12))
def lens_recip_log(x):     return 1.0/np.log(np.maximum(x, 1+1e-12))
LENSES = [("id", lens_identity), ("inv", lens_recip), ("log", lens_log), ("invlog", lens_recip_log)]

# === Stats
def emd_hist_01(phases, bins=200):
    x = np.asarray(phases, dtype=float) % 1.0
    h, edges = np.histogram(x, bins=bins, range=(0,1), density=True)
    centers = (edges[:-1] + edges[1:]) / 2
    cdf = np.cumsum(h)
    if cdf[-1]==0: return 0.0
    cdf /= cdf[-1]
    cdf_u = centers
    return float(np.trapezoid(np.abs(cdf - cdf_u), centers))

def kuiper_uniform01(x, m_mc=800):
    x = np.sort((np.asarray(x, dtype=float) % 1.0))
    n = x.size
    if n==0: return 0.0, 1.0
    grid = np.arange(1, n+1)/n
    Dp = np.max(grid - x)
    Dm = np.max(x - (np.arange(0, n)/n))
    V  = float(Dp + Dm)
    u  = np.sort(rng.random((m_mc, n)), axis=1)
    Dp0 = np.max(grid - u, axis=1)
    u2  = np.sort(rng.random((m_mc, n)), axis=1)
    Dm0 = np.max(u2 - (np.arange(0, n)/n), axis=1)
    V0  = Dp0 + Dm0
    p   = (np.sum(V0 >= V) + 1) / (m_mc + 1)
    return V, float(p)

def phases_from(seq, tau, lens_fn):
    v = np.asarray(seq, dtype=float)
    v = v[np.isfinite(v) & (v>0)]
    if v.size==0: return np.array([])
    y = lens_fn(v)
    return (tau * y) % 1.0

# === Run
summary = []   # lo,hi,family,series,lens,tau,n,emd,kuiper_p,rank_as_max,rank_as_min
series_used = ["bases","gaps_raw"]

for (lo,hi) in WINDOWS:
    primes = primes_in_window(lo, hi, cap=CAP_PRIMES)
    # Build families
    fam_data = {}
    for name,d in FAMS:
        pairs = family_pairs(primes, d)
        bases = pairs[:,0] if pairs.size else np.array([],dtype=np.int64)
        gaps_raw = np.diff(np.unique(bases)) if bases.size>1 else np.array([],dtype=float)
        fam_data[name] = {"bases": bases, "gaps_raw": gaps_raw}

    # Analyze
    for fam in FAMS:
        name = fam[0]
        for series in series_used:
            seq = fam_data[name][series]
            if seq.size == 0:
                for lens_name,_ in LENSES:
                    for tau in taus:
                        summary.append([lo,hi,name,series,lens_name,tau,0,0.0,1.0,False,False])
                continue

            for lens_name, lens_fn in LENSES:
                # compute all τ stats
                stats = []
                for tau in taus:
                    ph = phases_from(seq, tau, lens_fn)
                    emd = emd_hist_01(ph, bins=160)
                    _, p = kuiper_uniform01(ph, m_mc=800)
                    stats.append((tau, emd, p))
                # ranks
                emds = np.array([s[1] for s in stats])
                max_rank_order = np.argsort(-emds)  # largest -> rank 1
                min_rank_order = np.argsort(emds)   # smallest -> rank 1
                rank_as_max = np.empty(len(stats), dtype=int); rank_as_min = np.empty(len(stats), dtype=int)
                rank_as_max[max_rank_order] = np.arange(1, len(stats)+1)
                rank_as_min[min_rank_order] = np.arange(1, len(stats)+1)

                for i,(tau, emd, p) in enumerate(stats):
                    summary.append([lo,hi,name,series,lens_name,float(tau),int(seq.size),float(emd),float(p),
                                    int(rank_as_max[i]), int(rank_as_min[i])])

                # quick plot around φ neighborhood for visual sanity
                nb_mask = [(abs(t - phi) <= 0.005) for t in taus]
                if any(nb_mask):
                    t_nb = np.array([t for t,m in zip(taus,nb_mask) if m])
                    e_nb = np.array([s[1] for s,m in zip(stats,nb_mask) if m])
                    plt.figure()
                    idx = np.argsort(t_nb); plt.plot(t_nb[idx], e_nb[idx], marker='o', linewidth=1)
                    plt.axvline(phi, linestyle='--')
                    plt.title(f"{name}/{series}/{lens_name} — EMD vs τ near φ [{lo},{hi})  n={seq.size}")
                    plt.xlabel("τ"); plt.ylabel("EMD to uniform")
                    plt.tight_layout()
                    plt.savefig(f"{BASE_DIR}/plots/{name}_{series}_{lens_name}_phi_neighborhood_{lo}_{hi}.png"); plt.close()

# Save
with open(f"{BASE_DIR}/artifacts/summary.csv","w") as f:
    w=csv.writer(f)
    w.writerow(["lo","hi","family","series","lens","tau","n","emd","kuiper_p","rank_as_max","rank_as_min"])
    w.writerows(summary)

# Tiny report focusing on φ, φ+Fibit, φ+α across lenses/series
def pick_rows(tau_target):
    return [r for r in summary if abs(r[5]-tau_target) < 1e-12]

report = {
  "phi": phi,
  "phi_plus_fibit": phi + Fibit,
  "phi_plus_alpha": phi + alpha,
  "windows": WINDOWS,
  "highlights": {
    "phi": pick_rows(phi)[:50],               # truncated view; CSV has all
    "phi_plus_fibit": pick_rows(phi + Fibit)[:50],
    "phi_plus_alpha": pick_rows(phi + alpha)[:50],
  }
}
with open(f"{BASE_DIR}/artifacts/report.json","w") as f:
    json.dump(report, f, indent=2)

print(json.dumps({
  "phi": phi,
  "phi_plus_fibit": phi + Fibit,
  "phi_plus_alpha": phi + alpha,
  "summary_rows": len(summary),
  "csv": f"{BASE_DIR}/artifacts/summary.csv",
  "report": f"{BASE_DIR}/artifacts/report.json",
  "plots_dir": f"{BASE_DIR}/plots"
}, indent=2))


{
  "phi": 1.618033988749895,
  "phi_plus_fibit": 1.628043998849995,
  "phi_plus_alpha": 1.6253313413191788,
  "summary_rows": 2160,
  "csv": "phi_family_test_A2/artifacts/summary.csv",
  "report": "phi_family_test_A2/artifacts/report.json",
  "plots_dir": "phi_family_test_A2/plots"
}
