# Fine & Golden Autoscan v20
*Colab‑ready notebook – last updated 2025-07-14*

---
## Purpose
Run a full delta analysis and clustering workflow on your latest **plateaus** Excel files without any hard‑coded paths or environment‑specific tricks. Point the config at your data, hit **Run all**, and you’re good.


In [None]:

# ================= CONFIGURATION ==========================
VERSION   = 20    # numeric tag that matches your filenames
# 🔑 POINT THIS AT THE FOLDER THAT CONTAINS YOUR Excel files
# Examples:
#   '/content/drive/MyDrive/fine_golden'    (if using Google Drive)
#   '/content'                              (if you upload directly in Colab)
DATA_DIR = ''      # <-- EDIT ME
# Distance metric for clustering
DIST_METRIC = 'euclidean'     # 'euclidean', 'manhattan', 'cosine', ...
BANDWIDTH   = 0.001           # ε / bandwidth for DBSCAN / MeanShift
# ==========================================================


In [None]:

# ─── OPTIONAL: Mount Google Drive ─────────────────────────
# Skip if you’ve already mounted or are uploading directly.
try:
    import google.colab
    from google.colab import drive
    drive.mount('/content/drive')
    print('✔ Google Drive mounted')
except ModuleNotFoundError:
    print('Not running in Colab or Drive already mounted – skipping.')

# ─── OPTIONAL: Manual file upload (fallback) ──────────────
# If you prefer to upload the Excel files directly, uncomment:
# from google.colab import files; files.upload()


In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN, MeanShift
from pathlib import Path


In [None]:

# ─── CONSTANTS ────────────────────────────────────────────
ALPHA_INV = 137.035999084  # latest CODATA inverse fine‑structure constant
PHI       = (1 + 5**0.5) / 2
SQRT2     = 2**0.5
EULER_E   = np.e

def delta(x, ref=ALPHA_INV):
    """Absolute fractional difference between x and ref."""
    return abs(x - ref) / ref


In [None]:

def load_plateau_dfs(data_dir:str, version:int):
    """Return raw_df, ops_df, or (None, None) if not found."""
    raw_path = Path(data_dir) / f'plateaus_raw_v{version}.xlsx'
    ops_path = Path(data_dir) / f'plateaus_ops_v{version}.xlsx'
    for p in [raw_path, ops_path]:
        if not p.exists():
            print(f'⚠ File not found: {p}.\n'
                  '  → Check DATA_DIR, VERSION, or upload the file.')
            return None, None
    return pd.read_excel(raw_path), pd.read_excel(ops_path)

raw_df, ops_df = load_plateau_dfs(DATA_DIR, VERSION)
if raw_df is None:
    raise SystemExit('❌ Required files missing – fix & re‑run this cell.')
print(f'✔ Loaded raw {raw_df.shape} and ops {ops_df.shape}')


In [None]:

# ─── DELTA ANALYSIS ───────────────────────────────────────
NUM_COL = 'slice'  # update if your numeric column is named differently
if NUM_COL not in ops_df.columns:
    raise KeyError(f'Column {NUM_COL!r} not found in ops_df.')

ops_df['delta_alpha'] = ops_df[NUM_COL].apply(lambda x: delta(x, ALPHA_INV))
ops_df.head()


In [None]:

# ─── CLUSTERING ───────────────────────────────────────────
vals = ops_df[NUM_COL].values.reshape(-1, 1)

if DIST_METRIC in {'euclidean', 'manhattan'}:
    model = DBSCAN(eps=BANDWIDTH, metric=DIST_METRIC, min_samples=2).fit(vals)
else:
    model = MeanShift(bandwidth=BANDWIDTH).fit(vals)

ops_df['cluster'] = model.labels_
print(ops_df['cluster'].value_counts())


In [None]:

plt.figure(figsize=(10, 5))
plt.scatter(ops_df.index, ops_df[NUM_COL], c=ops_df['cluster'])
plt.title('Clustered slices')
plt.xlabel('Index')
plt.ylabel(NUM_COL)
plt.show()


In [None]:

out_path = Path(DATA_DIR) / f'ops_with_clusters_v{VERSION}.csv'
ops_df.to_csv(out_path, index=False)
print(f'✔ Results saved to {out_path}')
