<a href="https://colab.research.google.com/github/ronyates47/Gedcom-Utils/blob/main/New_Gold__Orchestrator_20251207_1630.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PIP

In [1]:
!pip install pandas
!pip install python-gedcom
!pip install openpyxl
!pip install xlsxwriter
!pip install mlxtend

Collecting python-gedcom
  Downloading python_gedcom-1.0.0-py2.py3-none-any.whl.metadata (15 kB)
Downloading python_gedcom-1.0.0-py2.py3-none-any.whl (35 kB)
Installing collected packages: python-gedcom
Successfully installed python-gedcom-1.0.0
Collecting xlsxwriter
  Downloading xlsxwriter-3.2.9-py3-none-any.whl.metadata (2.7 kB)
Downloading xlsxwriter-3.2.9-py3-none-any.whl (175 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.3/175.3 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xlsxwriter
Successfully installed xlsxwriter-3.2.9


# TEST Cell

In [10]:
# -*- coding: iso-8859-15 -*-
# ====== CUT START [1/1] CELL 3 - Ancestor Register (Trees View; .shtml + SSI nav repair) ======
# RON GOLDEN RULES - CLIFF NOTES (v2026.02.02-CELL3-COL1=FIRST-ANCESTOR + LOCKIN + ENRICHED-EXCLUDE1)
# - Complete & runnable Colab cell, one contiguous block.
# - Source ASCII-only; outputs written with encoding="iso-8859-15", errors="xmlcharrefreplace".
# - XHTML 1.0 Transitional; typography comes ONLY from /partials/dna_tree_styles.css.
# - Authority source for "First Ancestor" is LOCKED-IN via server module:
#     /partials/yates_authority_first_ancestor_map.py  -> AUTH_COUPLE_KEY_MAP
# - Exclusion:
#     Drop all couples older than: Francis Yates (1541-1588) & Jane Tichborne (1548-1580)
#     using the SAME logic as your working Cell 3 (explicit prefix match + anchor trim).
#
# Deterministic audit:
#   [CONFIRM] Golden Rules active | Cell=Cell3_Trees_SSI_NavRepair | Version=2026.02.02-CELL3-COL1=FIRST-ANCESTOR-LOCKIN+ENRICHED-EXCLUDE1 | Encoding=ISO-8859-15

print("[CONFIRM] Golden Rules active | Cell=Cell3_Trees_SSI_NavRepair | Version=2026.02.02-CELL3-COL1=FIRST-ANCESTOR-LOCKIN+ENRICHED-EXCLUDE1 | Encoding=ISO-8859-15")

# ---------- Imports ----------
import os, re, socket, posixpath, traceback, importlib.util
from datetime import datetime, timedelta
import pandas as pd
import html as _html
from ftplib import FTP_TLS
from string import Template as _T

DOWNLOADS_BLOCK = ""

# ---------- Display Policy ----------
SUPPRESS_ID_COLUMN = True
SUPPRESS_EMBEDDED_IDS_IN_TEXT = True

LINEAGE_SPOUSE_SEP = " & "
LINEAGE_COUPLE_SEP = " ~ "

ALPHA_BY_FIRST_ANCESTOR_FATHER = True
ALPHA_TIEBREAK_MOTHER_SURNAME  = True

# ---------- Enriched exclusion prefix (formatted lineage) ----------
# This is the exact prefix to strip from the formatted lineage display.
# It intentionally ends with a trailing "~" to remove the separator too.
ENRICHED_EXCLUDE_PREFIX = (
    "John Yates (1430-) & Still Searching ~ "
    "William Yates (1389-1440) & Still Searching ~ "
    "William Yates (1420-) & Still Searching ~ "
    "Edmund Yates (1445-1472) & Margaret Cornell ~ "
    "Richard Yates (1440-1498) & Joan Ashendon (1445-1499) ~ "
    "John Yates (1471-1544) & Alice Hyde (1498-1523) ~ "
    "Thomas Yates (1509-1565) & Elizabeth Fauconer (-1562) ~"
)

# ---------- Secrets ----------
try:
    from google.colab import userdata  # type: ignore
    os.environ["FTP_HOST"] = userdata.get("FTP_HOST")
    os.environ["FTP_USER"] = userdata.get("FTP_USER")
    os.environ["FTP_PASS"] = userdata.get("FTP_PASS")
    try:
        os.environ["FTP_PORT"] = userdata.get("FTP_PORT")
    except Exception:
        os.environ.setdefault("FTP_PORT", "21")
    try:
        os.environ["FTP_DIR"] = userdata.get("FTP_DIR")
    except Exception:
        os.environ.setdefault("FTP_DIR", "")
except Exception:
    os.environ.setdefault("FTP_HOST", "")
    os.environ.setdefault("FTP_USER", "")
    os.environ.setdefault("FTP_PASS", "")
    os.environ.setdefault("FTP_PORT", "21")
    os.environ.setdefault("FTP_DIR", "")

FTP_DIR = os.environ.get("FTP_DIR", "").strip().strip("/")

# ---------- Config / Paths ----------
INPUT_CSV = "final_combined_df_with_value_labels.csv"

EXPORT_BASENAME = "yates_ancestor_register"
LOCAL_CSV   = EXPORT_BASENAME + ".csv"
LOCAL_XLSX  = EXPORT_BASENAME + ".xlsx"
REMOTE_CSV  = posixpath.join("partials", LOCAL_CSV)
REMOTE_XLSX = posixpath.join("partials", LOCAL_XLSX)

OUTPUT_NAME = "just-trees.shtml"
REMOTE_HTML = posixpath.join("partials", OUTPUT_NAME)

DNA_CSS_HREF     = "/partials/dna_tree_styles.css"
DNA_CSS_VERSION  = "v2025-11-23-g3"
UNIFIED_CSS_HREF = "/partials/partials_unified.css"
UNIFIED_CSS_VER  = "v2026-02-01-unified-blue-refactor1"

HEAD_LINK = (
    '<link rel="stylesheet" type="text/css" href="%s?%s" />\n'
    '<link rel="stylesheet" type="text/css" href="%s?%s" />'
) % (DNA_CSS_HREF, DNA_CSS_VERSION, UNIFIED_CSS_HREF, UNIFIED_CSS_VER)

TABLE_WIDTH_PX = 5550

# ---------- Authority LOCK-IN module (server) ----------
AUTH_MODULE_REMOTE_DIR = "partials"
AUTH_MODULE_BASENAME   = "yates_authority_first_ancestor_map.py"
AUTH_MODULE_REMOTE     = posixpath.join(AUTH_MODULE_REMOTE_DIR, AUTH_MODULE_BASENAME)
AUTH_MODULE_LOCAL      = "yates_authority_first_ancestor_map.server.py"

# ---------- Load CSV (robust) ----------
df = None
_last_err = None
for enc in ("utf-8-sig", "utf-8", "cp1252", "iso-8859-15", "latin1"):
    try:
        df = pd.read_csv(INPUT_CSV, dtype=str, keep_default_na=False, encoding=enc)
        break
    except Exception as e:
        _last_err = e
        df = None
if df is None:
    raise SystemExit("[ERROR] Unable to read CSV: %s (%r)" % (INPUT_CSV, _last_err))
print("[OK] Loaded CSV: %s rows=%d, cols=%d" % (INPUT_CSV, len(df), len(df.columns)))

if "haplogroup" not in df.columns:
    df["haplogroup"] = ""
else:
    df["haplogroup"] = df["haplogroup"].fillna("")

# ---------- Resolver: Column B (masked) -> Column C (unmasked) ----------
A_IDX = 0
B_IDX = 1
C_IDX = 2

def _norm_code(s):
    t = str(s or "").strip()
    if (t.startswith('"') and t.endswith('"')) or (t.startswith("'") and t.endswith("'")):
        t = t[1:-1]
    t = t.replace("\u00a0", " ")
    t = re.sub(r"\s{2,}", " ", t)
    return t.lower()

LOCAL_RESOLVER = "match_to_unmasked.csv"
if not os.path.exists(LOCAL_RESOLVER) and os.path.exists("/content/partials/match_to_unmasked.csv"):
    LOCAL_RESOLVER = "/content/partials/match_to_unmasked.csv"

def _pull_file_from_server(remote_dir, basename, local_out):
    try:
        with FTP_TLS(timeout=30) as ftps:
            ftps.connect(os.environ.get("FTP_HOST", ""), int(os.environ.get("FTP_PORT", "21")))
            ftps.login(os.environ.get("FTP_USER", ""), os.environ.get("FTP_PASS", ""))
            try:
                ftps.prot_p()
            except Exception:
                pass
            try:
                ftps.set_pasv(True)
            except Exception:
                pass
            if FTP_DIR:
                for p in [p for p in FTP_DIR.split("/") if p]:
                    try:
                        ftps.cwd(p)
                    except Exception:
                        try:
                            ftps.mkd(p)
                        except Exception:
                            pass
                        ftps.cwd(p)
            try:
                ftps.cwd(remote_dir)
            except Exception:
                pass
            with open(local_out, "wb") as f:
                ftps.retrbinary("RETR %s" % basename, f.write)
        return True
    except Exception as e:
        print("[WARN] Server pull failed for %s/%s: %s" % (remote_dir, basename, e))
        return False

def _pull_resolver_if_needed(local_path):
    if os.path.exists(local_path):
        print("Using resolver:", os.path.abspath(local_path))
        return local_path
    print("Resolver not found locally; attempting server pull ...")
    ok = _pull_file_from_server("partials", "match_to_unmasked.csv", "match_to_unmasked.csv")
    if ok:
        print("[OK] Pulled resolver from server -> match_to_unmasked.csv")
        return "match_to_unmasked.csv"
    return local_path

LOCAL_RESOLVER = _pull_resolver_if_needed(LOCAL_RESOLVER)

def _read_csv_anyenc(path: str) -> pd.DataFrame:
    encs = ("iso-8859-15", "utf-8-sig", "utf-8", "cp1252", "latin1")
    last = None
    dfx = None
    for enc in encs:
        try:
            dfx = pd.read_csv(path, encoding=enc, dtype=str, keep_default_na=False)
            break
        except Exception as e:
            last = e
            dfx = None
    if dfx is None:
        raise RuntimeError("Unable to read CSV %s: %s" % (path, last))
    return dfx

def _load_resolver_to_map(path):
    if not os.path.exists(path):
        return {}
    last = None
    m = None
    for enc in ("utf-8-sig", "iso-8859-15", "utf-8", "cp1252", "latin1"):
        try:
            m = pd.read_csv(path, dtype=str, keep_default_na=False, encoding=enc)
            break
        except Exception as e:
            last = e
            m = None
    if m is None:
        print("[WARN] Resolver not loaded:", last)
        return {}
    cols = {c.lower(): c for c in m.columns}
    if "code" not in cols or "unmasked" not in cols:
        print("[WARN] Resolver missing 'code'/'unmasked' cols; skipping map.")
        return {}
    m = m[[cols["code"], cols["unmasked"]]].copy()
    m["__key__"] = m[cols["code"]].map(_norm_code)
    m["__val__"] = m[cols["unmasked"]].astype(str)
    m = m.drop_duplicates(subset="__key__", keep="first")
    return dict(zip(m["__key__"], m["__val__"]))

resolver_map = _load_resolver_to_map(LOCAL_RESOLVER)

if df.shape[1] < 3:
    raise ValueError("Main df must have at least 3 columns: A(ID#), B(match to), C(unmasked).")

masked_raw = df.iloc[:, B_IDX].astype(str)
masked_key = masked_raw.map(_norm_code)
resolved   = masked_key.map(resolver_map)
df.iloc[:, C_IDX] = resolved.fillna("")

print(
    "[OK] Column B -> C mapping: %d / %d  unmatched: %d"
    % (int(resolved.notna().sum()), len(df), len(df) - int(resolved.notna().sum()))
)

# ---------- Lineage formatting helpers ----------
ID_TOKEN_RE = re.compile(r"\bI\d+\b", re.I)
COUPLE_SEP_RE = re.compile(r"\s*(?:->|&rarr;|;|>|/{2,}|\|{2,}|~{2,})\s*", re.I)
SPOUSE_SPLIT_RE = re.compile(r"\s*(?:&| and | AND |\+)\s*", re.I)

def _scrub_side_keep_name_years(side_text):
    s = str(side_text or "").strip()
    if not s:
        return ""

    m = re.match(r"^(I\d+)~([^~]+?)~(\d{4}\s*-\s*(?:\d{4})?)$", s, flags=re.I)
    if m:
        name = re.sub(r"\s{2,}", " ", (m.group(2) or "").strip())
        yrs = re.sub(r"\s+", "", (m.group(3) or "").strip())
        return ("%s (%s)" % (name, yrs)) if name and yrs else (name or "")

    m = re.match(r"^(I\d+)~([^~]+?)(?:~([^~]+?))?$", s, flags=re.I)
    if m:
        name = re.sub(r"\s{2,}", " ", (m.group(2) or "").strip())
        tail = re.sub(r"\s{2,}", " ", (m.group(3) or "").strip())
        if tail and re.search(r"\d{4}", tail):
            tail = re.sub(r"\s+", "", tail)
            return ("%s (%s)" % (name, tail)) if name else ""
        if tail and name:
            return ("%s %s" % (name, tail)).strip()
        return (name or tail or "").strip()

    m = re.match(r"^(I\d+)\s+(.*)$", s, flags=re.I)
    if m:
        rest = (m.group(2) or "").strip()
        yrs = ""
        m2 = re.search(r"(\b\d{4}\s*-\s*(?:\d{4})?\b)\s*$", rest)
        if m2:
            yrs = re.sub(r"\s+", "", m2.group(1))
            rest = rest[:m2.start()].strip()
        name = re.sub(r"\s{2,}", " ", rest).strip()
        return ("%s (%s)" % (name, yrs)) if name and yrs else (name or "")

    s = ID_TOKEN_RE.sub("", s).replace("~", " ")
    s = re.sub(r"\s{2,}", " ", s).strip()
    return s

# ---------- Enriched exclusion (drop very old lead-in couples) ----------
def _norm_couple_for_match(s: str) -> str:
    return re.sub(r"\s{2,}", " ", str(s or "")).strip().lower()

_EXCLUDE_COUPLES = [
    c.strip()
    for c in re.split(r"\s*~\s*", (ENRICHED_EXCLUDE_PREFIX or "").strip().strip("~"))
    if c and c.strip()
]

def _strip_paren_years_anywhere(s: str) -> str:
    return re.sub(r"\([^)]*\)", "", str(s or "")).strip()

def _is_anchor_couple(couple_text: str) -> bool:
    t = _strip_paren_years_anywhere(couple_text).lower()
    return ("francis yates" in t) and ("jane tichborne" in t)

def _apply_enriched_exclusion(joined: str) -> str:
    s = str(joined or "").strip()
    if not s:
        return s

    couples = [c.strip() for c in s.split(LINEAGE_COUPLE_SEP) if c and c.strip()]
    if not couples:
        return s

    # (A) Exact prefix removal by couple list match (robust to spacing)
    if _EXCLUDE_COUPLES and len(couples) >= len(_EXCLUDE_COUPLES):
        ok = True
        for i in range(len(_EXCLUDE_COUPLES)):
            if _norm_couple_for_match(couples[i]) != _norm_couple_for_match(_EXCLUDE_COUPLES[i]):
                ok = False
                break
        if ok:
            couples = couples[len(_EXCLUDE_COUPLES):]

    # (B) Anchor trim if prefix did not match (or if data varies)
    if couples:
        for i, c in enumerate(couples):
            if _is_anchor_couple(c):
                couples = couples[i:]
                break

    return LINEAGE_COUPLE_SEP.join([c for c in couples if c]).strip()

def _format_lineage_cell(text):
    s = str(text or "").strip()
    if not s:
        return s
    couples = [t.strip() for t in COUPLE_SEP_RE.split(s) if t and t.strip()]
    if not couples:
        couples = [s]
    out_couples = []
    for c in couples:
        parts = [p.strip() for p in SPOUSE_SPLIT_RE.split(c, maxsplit=1) if p and p.strip()]
        if len(parts) == 2:
            father = _scrub_side_keep_name_years(parts[0]).strip()
            mother = _scrub_side_keep_name_years(parts[1]).strip()
            couple = (father + LINEAGE_SPOUSE_SEP + mother).strip()
        else:
            couple = _scrub_side_keep_name_years(c).strip()
        couple = re.sub(r"\s{2,}", " ", couple).strip()
        out_couples.append(couple)
    joined = (LINEAGE_COUPLE_SEP.join([c for c in out_couples if c])).strip()
    joined = re.sub(r"\s{2,}", " ", joined).strip()

    # Apply enriched early-ancestor truncation (Francis Yates & Jane Tichborne focus)
    joined = _apply_enriched_exclusion(joined)

    return joined

def _maybe_format_lineage_columns(df_in):
    if not SUPPRESS_EMBEDDED_IDS_IN_TEXT:
        return df_in
    df_out = df_in.copy()
    pat = re.compile(r"(ancestral|lineage|tree|path|ancestor|line)", re.I)
    cols = [c for c in df_out.columns if pat.search(str(c or "")) and str(c).strip().lower() != 'first ancestor']
    if not cols:
        for c in df_out.columns:
            try:
                ser = df_out[c].astype(str)
            except Exception:
                continue
            if ser.str.contains(r"\bI\d+~", regex=True, na=False).any() or ser.str.contains(r"\bI\d+\b", regex=True, na=False).any():
                if str(c).strip().lower() != 'first ancestor':
                    cols.append(c)
    cols = list(dict.fromkeys(cols))
    if not cols:
        return df_out
    for c in cols:
        try:
            df_out[c] = df_out[c].astype(str).map(_format_lineage_cell)
        except Exception:
            pass
    print("[OK] Lineage formatting applied to columns:", ", ".join([str(c) for c in cols]))
    return df_out

def _strip_years(name_text):
    s = str(name_text or "").strip()
    if not s:
        return ""
    return re.sub(r"\s*\([^)]*\)\s*$", "", s).strip()

def _first_last_tokens(person_text):
    s = _strip_years(person_text)
    parts = s.split()
    if not parts:
        return ("", "")
    return (parts[0], parts[-1])

def _first_ancestor_sort_key(lineage_text):
    s = str(lineage_text or "").strip()
    if not s:
        return ("", "", "")
    first_couple = s.split(LINEAGE_COUPLE_SEP, 1)[0].strip()
    father = ""
    mother = ""
    if LINEAGE_SPOUSE_SEP in first_couple:
        father, mother = [p.strip() for p in first_couple.split(LINEAGE_SPOUSE_SEP, 1)]
    else:
        father = first_couple.strip()
    f_given, f_surname = _first_last_tokens(father)
    m_surname = ""
    if ALPHA_TIEBREAK_MOTHER_SURNAME:
        _m_given, m_surname = _first_last_tokens(mother)
    return (f_surname.lower(), f_given.lower(), m_surname.lower())

# ---------- Authority LOCK-IN load ----------
def _download_authority_module_if_needed() -> str:
    for p in ("yates_authority_first_ancestor_map.py", AUTH_MODULE_LOCAL):
        if os.path.exists(p):
            print("[OK] Using local authority module:", os.path.abspath(p))
            return p

    if not all(os.environ.get(k) for k in ("FTP_HOST", "FTP_USER", "FTP_PASS")):
        raise RuntimeError("Missing FTP creds; cannot download authority module %s" % AUTH_MODULE_REMOTE)

    ok = _pull_file_from_server(AUTH_MODULE_REMOTE_DIR, AUTH_MODULE_BASENAME, AUTH_MODULE_LOCAL)
    if not ok:
        raise RuntimeError("Authority module not found on server: /%s" % AUTH_MODULE_REMOTE)
    print("[OK] Pulled authority module from server -> %s" % AUTH_MODULE_LOCAL)
    return AUTH_MODULE_LOCAL

def _import_authority_map(module_path: str) -> dict:
    spec = importlib.util.spec_from_file_location("yates_authority_first_ancestor_map", module_path)
    if spec is None or spec.loader is None:
        raise RuntimeError("Unable to import authority module from %s" % module_path)
    mod = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(mod)  # type: ignore
    m = getattr(mod, "AUTH_COUPLE_KEY_MAP", None)
    if not isinstance(m, dict) or not m:
        raise RuntimeError("AUTH_COUPLE_KEY_MAP missing/empty in %s" % module_path)
    print("[OK] Authority map loaded from module: %d pairs" % (len(m)//2))
    return m

AUTH_COUPLE_KEY_MAP = _import_authority_map(_download_authority_module_if_needed())

# ---------- Authority key helpers ----------
def _is_unknown_like(name_text):
    s = str(name_text or "").strip().lower()
    s = re.sub(r"\([^)]*\)", "", s).strip()
    s = re.sub(r"\s{2,}", " ", s)
    if not s:
        return True
    if s in ("unknown", "none", "noneunknownname"):
        return True
    if "unknown name" in s:
        return True
    if s.replace(" ", "") in ("noneunknownname", "unknownname"):
        return True
    return False

def _canon_lastfirst(name_text):
    # Canonicalize to LastFirst token (no punctuation), matching authority convention.
    if _is_unknown_like(name_text):
        return "unknown"
    s = str(name_text or "").strip()
    s = re.sub(r"\([^)]*\)", "", s).strip()
    toks = re.findall(r"[A-Za-z0-9]+", s.lower())
    if not toks:
        return ""
    if len(toks) == 1:
        return re.sub(r"[^a-z0-9]+", "", toks[0])
    last = toks[-1]
    first = "".join(toks[:-1])
    return re.sub(r"[^a-z0-9]+", "", last + first)

def _couple_display_and_ids_from_raw_token(raw_token: str):
    # raw_token is ONE couple from the raw lineage column (still has IDs).
    raw = str(raw_token or "").strip()
    if not raw:
        return ("", "", "")
    parts = [p.strip() for p in SPOUSE_SPLIT_RE.split(raw, maxsplit=1) if p and p.strip()]
    father_raw = parts[0] if len(parts) >= 1 else ""
    mother_raw = parts[1] if len(parts) >= 2 else ""

    f_id = ""
    m_id = ""
    mf = re.search(r"\b(I\d+)\b", father_raw, flags=re.I)
    if mf:
        f_id = mf.group(1).upper()
    mm = re.search(r"\b(I\d+)\b", mother_raw, flags=re.I)
    if mm:
        m_id = mm.group(1).upper()

    f_disp = _scrub_side_keep_name_years(father_raw).strip()
    m_disp = _scrub_side_keep_name_years(mother_raw).strip()

    if f_disp and m_disp:
        disp = (f_disp + LINEAGE_SPOUSE_SEP + m_disp).strip()
    else:
        disp = (f_disp or m_disp or _scrub_side_keep_name_years(raw)).strip()

    disp = re.sub(r"\s{2,}", " ", disp).strip()
    return (disp, f_id, m_id)

def _apply_enriched_exclusion_to_couples(couple_disps):
    # Returns the trimmed display-couple list, using the SAME exclusion logic as the formatted lineage.
    joined = LINEAGE_COUPLE_SEP.join([c for c in (couple_disps or []) if c and str(c).strip()])
    joined2 = _apply_enriched_exclusion(joined)
    if not joined2:
        return []
    return [c.strip() for c in joined2.split(LINEAGE_COUPLE_SEP) if c and c.strip()]

def _first_ancestor_authority_value_from_raw_lineage(raw_lineage_text: str) -> str:
    # CRITICAL: choose the FIRST COUPLE AFTER exclusion, not the oldest.
    s = str(raw_lineage_text or "").strip()
    if not s:
        return ""

    raw_tokens = [t.strip() for t in COUPLE_SEP_RE.split(s) if t and t.strip()]
    if not raw_tokens:
        raw_tokens = [s]

    # Build untrimmed display list and ids list aligned to raw_tokens
    disp_untrim = []
    ids_untrim  = []
    for tok in raw_tokens:
        disp, f_id, m_id = _couple_display_and_ids_from_raw_token(tok)
        disp_untrim.append(disp)
        ids_untrim.append((f_id, m_id))

    # Trim by display using the proven Cell 3 logic (prefix+anchor)
    disp_trim = _apply_enriched_exclusion_to_couples(disp_untrim)

    # Determine which raw token corresponds to the first trimmed couple
    f_id = ""
    m_id = ""
    f_disp = ""
    m_disp = ""
    if disp_trim:
        target = _norm_couple_for_match(disp_trim[0])
        start_at = None
        for i, d in enumerate(disp_untrim):
            if _norm_couple_for_match(d) == target:
                start_at = i
                break
        if start_at is None:
            start_at = 0
        # Use the ids for that couple (best possible)
        if 0 <= start_at < len(ids_untrim):
            f_id, m_id = ids_untrim[start_at]
        # Also capture the display couple for fallback tokenization
        if 0 <= start_at < len(disp_untrim):
            first_couple_disp = disp_untrim[start_at]
        else:
            first_couple_disp = disp_trim[0]
    else:
        # nothing remained after trimming; fallback to original first couple
        first_couple_disp = disp_untrim[0] if disp_untrim else ""

    # Primary: authority lookup by IDs
    if f_id and m_id:
        k = AUTH_COUPLE_KEY_MAP.get((f_id.upper(), m_id.upper()), "")
        if k:
            return re.sub(r"\s+", "", str(k))

    # Fallback: synchronize to authority convention using LastFirst
    if first_couple_disp and (LINEAGE_SPOUSE_SEP in first_couple_disp):
        a, b = [p.strip() for p in first_couple_disp.split(LINEAGE_SPOUSE_SEP, 1)]
        return _canon_lastfirst(a) + "&" + _canon_lastfirst(b)
    if first_couple_disp:
        return _canon_lastfirst(first_couple_disp)
    return ""

# ---------- Vitals ----------
VITALS_CSV = "dna_vitals.csv"
LAST_UPDATED_TEXT  = ""
AUTOSOMAL_MATCHES  = ""

def _friendly_ts_from_utc(raw):
    s = str(raw or "").strip()
    if not s:
        return "(unknown)"
    s = s.replace("UTC", "").replace("utc", "").strip()
    fmts = ["%Y-%m-%d %H:%M", "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M", "%Y-%m-%dT%H:%M:%S"]
    dt_utc = None
    for fmt in fmts:
        try:
            dt_utc = datetime.strptime(s, fmt)
            break
        except Exception:
            dt_utc = None
    if dt_utc is None:
        return raw
    dt_est = dt_utc - timedelta(hours=5)
    months = ["January","February","March","April","May","June","July","August","September","October","November","December"]
    month_name = months[dt_est.month - 1]
    h24  = dt_est.hour
    ampm = "AM" if h24 < 12 else "PM"
    h12  = h24 % 12
    if h12 == 0:
        h12 = 12
    return "%s %d, %d %d:%02d %s" % (month_name, dt_est.day, dt_est.year, h12, dt_est.minute, ampm)

def _format_num_with_commas(raw_val):
    s_digits = re.sub(r"[^0-9\-]", "", str(raw_val or ""))
    if not s_digits:
        return ""
    try:
        return "{:,}".format(int(s_digits))
    except Exception:
        return s_digits

def _load_vitals(path):
    global LAST_UPDATED_TEXT, AUTOSOMAL_MATCHES
    if not os.path.exists(path):
        print("[INFO] dna_vitals.csv not found; header will be blank for vitals.")
        return
    vdf = None
    for enc in ("iso-8859-15", "utf-8-sig", "utf-8", "cp1252", "latin1"):
        try:
            vdf = pd.read_csv(path, dtype=str, encoding=enc, keep_default_na=False)
            break
        except Exception:
            vdf = None
    if vdf is None:
        print("[WARN] Unable to read dna_vitals.csv")
        return
    flat = [str(cell) for row in vdf.astype(str).values.tolist() for cell in row]
    autosomal_raw = None
    last_text = None
    for cell in flat:
        if autosomal_raw is None and "Records tagged and filtered by NPFX" in cell:
            m = re.search(r"(\d[\d,]*)", cell)
            if m:
                autosomal_raw = m.group(1)
        if last_text is None and "LAST_UPDATED_TEXT" in cell:
            m = re.search(r"LAST_UPDATED_TEXT\s*:\s*(.+)", cell)
            if m:
                last_text = m.group(1).strip()
    if last_text is not None:
        LAST_UPDATED_TEXT = _friendly_ts_from_utc(last_text)
    AUTOSOMAL_MATCHES = _format_num_with_commas(autosomal_raw)

_load_vitals(VITALS_CSV)

updated_str = 'Last updated: <span id="last-updated">%s</span>' % _html.escape(LAST_UPDATED_TEXT or "")
_updated_parts = [updated_str]
if AUTOSOMAL_MATCHES:
    _updated_parts.append('Autosomal matches: %s' % _html.escape(AUTOSOMAL_MATCHES))
_updated_parts.append('Showing: <span id="showing-count"></span>')
UPDATED_BLOCK = '<div class="updated centerline">' + ' &nbsp;|&nbsp; '.join(_updated_parts) + '</div>'

NAV_BLOCK = '<div id="nav-slot"><!--#include virtual="/partials/nav_block.shtml" --></div>'

CONTROLS_BLOCK = (
    '<div class="controls centerline" style="margin:6px 0 10px 0;">'
    '<input type="text" id="search-box" class="search" size="28" value="" placeholder="Search&amp;hellip;" />'
    "</div>"
)

# ---------- Display DF ----------
display_df = df.copy()

# drop ID#
if SUPPRESS_ID_COLUMN and display_df.shape[1] >= 1:
    display_df = display_df.drop(columns=[display_df.columns[A_IDX]], errors="ignore")

# Identify lineage/path column BEFORE formatting (so IDs still exist for authority lookup)
lineage_cols_raw = [c for c in display_df.columns if re.search(r"(ancestral|lineage|tree|path|ancestor|line)", str(c or ""), re.I)]
AUTH_LINEAGE_COL = lineage_cols_raw[0] if lineage_cols_raw else None

# Compute authority first-ancestor values from the RAW lineage column,
# but pick the FIRST COUPLE AFTER EXCLUSION (not the oldest).
if AUTH_LINEAGE_COL:
    fa_values = display_df[AUTH_LINEAGE_COL].astype(str).map(_first_ancestor_authority_value_from_raw_lineage)
else:
    fa_values = pd.Series([""] * len(display_df))

# REPLACE COLUMN 1 with First Ancestor values and rename header
if display_df.shape[1] >= 1:
    first_col_name = display_df.columns[0]
    display_df[first_col_name] = fa_values.astype(str).map(lambda x: re.sub(r"\s+", "", x).replace('&', '&#38;'))
    display_df = display_df.rename(columns={first_col_name: "First Ancestor"})
    print("[OK] Column 1 replaced with First Ancestor (authority, after exclusion).")
else:
    print("[WARN] display_df has no columns to replace.")

# Remove any other "First Ancestor" columns to avoid duplication (keep the first one)
fa_cols = [c for c in display_df.columns if str(c) == "First Ancestor"]
if len(fa_cols) > 1:
    keep_first = fa_cols[0]
    drop_rest = fa_cols[1:]
    display_df = display_df.drop(columns=drop_rest, errors="ignore")
    print("[OK] Dropped duplicate First Ancestor columns:", ", ".join(drop_rest))

# Now apply lineage formatting (removes embedded IDs AND applies enriched exclusion)
display_df = _maybe_format_lineage_columns(display_df)

# Alpha sort by first couple (kept)
if ALPHA_BY_FIRST_ANCESTOR_FATHER:
    lineage_cols = [c for c in display_df.columns if re.search(r"(ancestral|lineage|tree|path|ancestor|line)", str(c or ""), re.I)]
    sort_col = lineage_cols[0] if lineage_cols else None
    if sort_col:
        sort_keys = display_df[sort_col].astype(str).map(_first_ancestor_sort_key)
        display_df["__sort_surname__"]    = [k[0] for k in sort_keys]
        display_df["__sort_given__"]      = [k[1] for k in sort_keys]
        display_df["__sort_momsurname__"] = [k[2] for k in sort_keys]

        by_cols = ["__sort_surname__", "__sort_given__"]
        if ALPHA_TIEBREAK_MOTHER_SURNAME:
            by_cols.append("__sort_momsurname__")

        display_df = display_df.sort_values(by=by_cols, ascending=[True]*len(by_cols), kind="mergesort").reset_index(drop=True)
        display_df = display_df.drop(columns=["__sort_surname__", "__sort_given__", "__sort_momsurname__"], errors="ignore")
        print("[OK] Alpha sort applied by first couple:", sort_col, "| keys=", ",".join(by_cols))

# ---------- HTML table ----------
visible_cols = [c for c in display_df.columns if c]

table_html = display_df.to_html(
    index=False,
    columns=visible_cols,
    escape=False,
    border=1,
    classes="dataframe sortable"
)

if 'id="refactor-table"' not in table_html:
    table_html = re.sub(r"<table([^>]*)>", r'<table\1 id="refactor-table">', table_html, count=1)

if 'class="dataframe sortable"' not in table_html and "sortable" not in table_html:
    table_html = table_html.replace('class="dataframe"', 'class="dataframe sortable"', 1)

table_html = table_html.replace("<tbody>\n<tr>", "<tbody>\n<tr id=\"first-row\">", 1)

SCROLL_WRAPPER = (
    '<div class="table-scroll-wrapper">'
    '<div id="top-scroll" class="scroll-sync-top">'
    '<div class="scroll-sync-top-inner" style="width:%dpx;"></div>'
    '</div>'
    '<div id="bottom-scroll" class="table-scroll">%s</div>'
    '</div>'
) % (TABLE_WIDTH_PX, table_html)

LATE_STYLE = r"""
<style type="text/css">
#nav-slot, #nav-slot nav, #nav-slot .oldnav, #nav-slot .navbar{
  display:block !important;
  visibility:visible !important;
  opacity:1 !important;
}
table.sortable thead{ display:table-header-group !important; visibility:visible !important; }
table.sortable thead th{ display:table-cell !important; visibility:visible !important; }
</style>
"""

JS_NAV_REPAIR = r"""
<script type="text/javascript">
//<![CDATA[
(function(){
  function hasNavContainer(el){
    if(!el) return false;
    var n = el.querySelector('nav.oldnav, nav.navbar, .oldnav, .navbar');
    return !!n;
  }
  function wrapFirstUL(el){
    if(!el) return false;
    var ul = el.querySelector('ul');
    if(!ul) return false;
    var nav = document.createElement('nav');
    nav.className = 'oldnav';
    nav.appendChild(ul);
    while(el.firstChild){ el.removeChild(el.firstChild); }
    el.appendChild(nav);
    return true;
  }
  function looksLikeSSICommentOnly(el){
    if(!el) return true;
    var txt = (el.textContent || '').replace(/\s+/g,'').toLowerCase();
    if(!txt) return true;
    if(txt.indexOf('<!--#include') >= 0) return true;
    return false;
  }
  function injectRemoteNav(el){
    try{
      var xhr = new XMLHttpRequest();
      xhr.open('GET', '/partials/nav_block.shtml', true);
      xhr.onreadystatechange = function(){
        if(xhr.readyState === 4){
          if(xhr.status >= 200 && xhr.status < 300){
            el.innerHTML = xhr.responseText;
            if(!hasNavContainer(el)){
              wrapFirstUL(el);
            }
          }
        }
      };
      xhr.send(null);
    }catch(e){}
  }

  function repairNav(){
    var slot = document.getElementById('nav-slot');
    if(!slot) return;

    if(looksLikeSSICommentOnly(slot)){
      injectRemoteNav(slot);
      return;
    }
    if(!hasNavContainer(slot)){
      wrapFirstUL(slot);
      return;
    }
  }

  if(document.readyState === 'loading'){
    document.addEventListener('DOMContentLoaded', repairNav, false);
  } else {
    repairNav();
  }
})();
//]]>
</script>
"""

page_tpl = _T(r"""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Ancestor Register (Trees View)</title>
$HEAD_LINK
$LATE_STYLE
<style type="text/css">
/* Sticky second column (index 2) for Trees table */
#refactor-table th:nth-child(2),
#refactor-table td:nth-child(2){
  position:sticky;
  left:0;
  z-index:6;
  background:#ffffff;
}
#refactor-table th:nth-child(2){
  z-index:7;
}
</style>
</head>
<body id="top">
<div class="wrap">
  <h1 class="centerline">Ancestor Register (Trees View)</h1>
  $DOWNLOADS_BLOCK
  $UPDATED_BLOCK
  $NAV_BLOCK
  $CONTROLS_BLOCK
  $SCROLL_WRAPPER
</div>
<button id="back-to-top" class="back-to-top">&#9650; Top</button>

<script type="text/javascript">
//<![CDATA[
(function(){
  function textOf(cell){
    return (cell && (cell.textContent || cell.innerText) || '').replace(/\s+/g,' ').trim().toLowerCase();
  }
  function sortTable(tbl, colIndex, dir, keyColIndex){
    var tb = tbl && tbl.tBodies ? tbl.tBodies[0] : null;
    if(!tb) return;
    var rows = [].slice.call(tb.rows || []);
    var asc  = (dir === 'asc');
    var kIdx = (typeof keyColIndex === 'number') ? keyColIndex : colIndex;
    rows.sort(function(a,b){
      var A = textOf(a.cells[kIdx]), B = textOf(b.cells[kIdx]);
      var nA = parseFloat(A.replace(/[^0-9.\-]/g,'')),
          nB = parseFloat(B.replace(/[^0-9.\-]/g,''));
      if(!isNaN(nA) && !isNaN(nB)){ return asc ? (nA-nB) : (nB-nA); }
      if (A < B) return asc ? -1 : 1;
      if (A > B) return asc ?  1 : -1;
      return 0;
    });
    var frag = document.createDocumentFragment();
    for(var i=0;i<rows.length;i++) frag.appendChild(rows[i]);
    tb.appendChild(frag);
    updateShowing();
  }
  function bindHeaderSort(){
    var tbl = document.getElementById('refactor-table');
    if(!(tbl && tbl.tHead && tbl.tHead.rows.length)) return;
    var ths = tbl.tHead.rows[0].cells;
    if(!ths) return;
    for(var i=0;i<ths.length;i++)(function(idx){
      var th = ths[idx];
      var dir = 'asc';
      th.addEventListener('click', function(){
        dir = (dir === 'asc') ? 'desc' : 'asc';
        var hdr = (th.textContent || th.innerText || '');
        hdr = hdr.replace(/\s+\(asc\)|\s+\(desc\)/,'').replace(/\s+/g,' ').trim().toLowerCase();
        // Golden rule sync: sorting the lineage column uses First Ancestor (authority) as the key.
        // Column 1 is 'First Ancestor' (index 0) in this build.
        var keyColIndex = null;
        if(hdr === 'yates dna ancestral line'){
          keyColIndex = 0;
        }

        for (var j = 0; j < ths.length; j++){
          ths[j].innerHTML = ths[j].innerHTML.replace(/\s+\(asc\)|\s+\(desc\)/,'');
        }
        th.innerHTML += (dir === 'asc' ? ' (asc)' : ' (desc)');
        sortTable(tbl, idx, dir, (keyColIndex === null ? undefined : keyColIndex));
      }, false);
    })(i);
  }
  function formatWithCommas(n){
    try{
      var x = parseInt(String(n||'').replace(/[^0-9\-]/g,''),10);
      if(isNaN(x)) return '';
      return x.toLocaleString('en-US');
    }catch(e){ return String(n||''); }
  }
  function visibleRowCount(){
    var tbl = document.getElementById('refactor-table');
    if(!(tbl && tbl.tBodies && tbl.tBodies[0])) return 0;
    var rows = tbl.tBodies[0].rows, n = 0;
    for(var i=0;i<rows.length;i++){
      if(rows[i].style.display !== 'none') n++;
    }
    return n;
  }
  function updateShowing(){
    var el = document.getElementById('showing-count');
    if(!el) return;
    el.textContent = formatWithCommas(visibleRowCount());
  }
  function getParam(name){
    var m = location.search.match(new RegExp('[?&]'+name+'=([^&]+)'));
    return m ? decodeURIComponent(m[1].replace(/\+/g,' ')) : '';
  }
  function bindSearch(){
    var box = document.getElementById('search-box');
    var tbl = document.getElementById('refactor-table');
    if(!(box && tbl && tbl.tBodies && tbl.tBodies[0])) return;
    var tb = tbl.tBodies[0];
    var rows = [].slice.call(tb.rows || []);
    function rowText(tr){
      var t = '';
      for(var i=0;i<tr.cells.length;i++){
        t += ' ' + (tr.cells[i].textContent || tr.cells[i].innerText || '');
      }
      return t.replace(/\s+/g,' ').toLowerCase();
    }
    function apply(q){
      q = String(q || '').toLowerCase();
      for(var i=0;i<rows.length;i++){
        var txt = rowText(rows[i]);
        var show = !q || txt.indexOf(q) > -1;
        rows[i].style.display = show ? '' : 'none';
      }
      updateShowing();
    }
    var to = null;
    function onInput(){
      if(to) clearTimeout(to);
      to = setTimeout(function(){ apply(box.value); }, 60);
    }
    box.addEventListener('input', onInput, false);
    box.addEventListener('search', onInput, false);
    var q0 = getParam('q');
    if(q0){
      box.value = q0;
      apply(q0);
      try{ history.replaceState(null,'',location.pathname); }catch(e){}
    } else {
      box.value = '';
      apply('');
    }
  }
  function bindBackToTop(){
    var btn = document.getElementById('back-to-top');
    if(!btn) return;
    function toggle(){ btn.style.display = (window.scrollY > 200 ? 'block' : 'none'); }
    toggle();
    window.addEventListener('scroll', toggle, {passive:true});
    btn.addEventListener('click', function(){
      try{
        window.scrollTo({top:0, behavior:'smooth'});
      } catch(e){
        window.scrollTo(0,0);
      }
    }, false);
  }
  function bindSyncedScrollbars(){
    var topScroll    = document.getElementById('top-scroll');
    var bottomScroll = document.getElementById('bottom-scroll');
    if(!(topScroll && bottomScroll)) return;
    var syncing = false;
    topScroll.addEventListener('scroll', function(){
      if(syncing) return;
      syncing = true;
      bottomScroll.scrollLeft = topScroll.scrollLeft;
      syncing = false;
    }, false);
    bottomScroll.addEventListener('scroll', function(){
      if(syncing) return;
      syncing = true;
      topScroll.scrollLeft = bottomScroll.scrollLeft;
      syncing = false;
    }, false);
  }
  document.addEventListener('DOMContentLoaded', function(){
    bindHeaderSort();
    bindBackToTop();
    bindSearch();
    bindSyncedScrollbars();
    updateShowing();
  });
})();
//]]>
</script>

$JS_NAV_REPAIR
</body>
</html>
""")

final_html = page_tpl.safe_substitute(
    HEAD_LINK=HEAD_LINK,
    LATE_STYLE=LATE_STYLE,
    JS_NAV_REPAIR=JS_NAV_REPAIR,
    DOWNLOADS_BLOCK=DOWNLOADS_BLOCK,
    UPDATED_BLOCK=UPDATED_BLOCK,
    NAV_BLOCK=NAV_BLOCK,
    CONTROLS_BLOCK=CONTROLS_BLOCK,
    SCROLL_WRAPPER=SCROLL_WRAPPER,
)

# ---------- Exports ----------
export_df = display_df.copy()
export_df.to_csv(LOCAL_CSV, index=False, encoding="iso-8859-15")
try:
    export_df.to_excel(LOCAL_XLSX, index=False)
except Exception:
    from pandas import ExcelWriter
    with ExcelWriter(LOCAL_XLSX) as _w:
        export_df.to_excel(_w, index=False)
print("[OK] Wrote exports:", os.path.abspath(LOCAL_CSV), "and", os.path.abspath(LOCAL_XLSX))

# ---------- Save page locally ----------
try:
    with open(OUTPUT_NAME, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
        f.write(final_html)
    print("[OK] Saved locally:", os.path.abspath(OUTPUT_NAME))
except Exception as e:
    print("[ERROR] Save failed:", e)
    traceback.print_exc()

# ---------- Upload to /partials ----------
def _ftps_ensure_dir(ftps, path):
    if not path:
        return
    for seg in [p for p in path.split("/") if p]:
        try:
            ftps.cwd(seg)
        except Exception:
            try:
                ftps.mkd(seg)
            except Exception:
                pass
            ftps.cwd(seg)

ftp_host = os.environ.get("FTP_HOST")
ftp_user = os.environ.get("FTP_USER")
ftp_pass = os.environ.get("FTP_PASS")
ftp_port = int(os.environ.get("FTP_PORT", "21") or "21")

if ftp_host and ftp_user and ftp_pass:
    print("[INFO] Attempting FTP upload ...")
    try:
        socket.setdefaulttimeout(30)
        with FTP_TLS(timeout=30) as ftps:
            ftps.connect(ftp_host, ftp_port)
            ftps.login(ftp_user, ftp_pass)
            try:
                ftps.prot_p()
            except Exception:
                pass
            try:
                ftps.set_pasv(True)
            except Exception:
                pass

            _ftps_ensure_dir(ftps, FTP_DIR)
            _ftps_ensure_dir(ftps, "partials")

            with open(OUTPUT_NAME, "rb") as fh:
                ftps.storbinary("STOR " + os.path.basename(REMOTE_HTML), fh)
            print("[OK] Uploaded HTML -> /partials/%s" % os.path.basename(REMOTE_HTML))

            with open(LOCAL_CSV, "rb") as fh:
                ftps.storbinary("STOR " + os.path.basename(REMOTE_CSV), fh)
            with open(LOCAL_XLSX, "rb") as fh:
                ftps.storbinary("STOR " + os.path.basename(REMOTE_XLSX), fh)
            print("[OK] Uploaded exports -> /partials/ (%s, %s)" % (LOCAL_CSV, LOCAL_XLSX))

            print("\n--- Open URLs ---")
            print("Trees page:       https://yates.one-name.net/partials/just-trees.shtml")
            print("CSV export:       https://yates.one-name.net/partials/%s" % os.path.basename(LOCAL_CSV))
            print("Excel export:     https://yates.one-name.net/partials/%s" % os.path.basename(LOCAL_XLSX))
    except Exception as e:
        print("[ERROR] FTP session failed:", e)
        traceback.print_exc()
else:
    print("[INFO] Skipping FTP upload (missing credentials).")

print("\n--- Cell 3 Complete (Column 1 replaced with First Ancestor from /partials/yates_authority_first_ancestor_map.py; enriched prefix exclusion applied BEFORE choosing first couple) ---")
# ====== CUT STOP  [1/1] CELL 3 ==================================================================


[CONFIRM] Golden Rules active | Cell=Cell3_Trees_SSI_NavRepair | Version=2026.02.02-CELL3-COL1=FIRST-ANCESTOR-LOCKIN+ENRICHED-EXCLUDE1 | Encoding=ISO-8859-15
[OK] Loaded CSV: final_combined_df_with_value_labels.csv rows=93, cols=6
Using resolver: /content/match_to_unmasked.csv
[OK] Column B -> C mapping: 93 / 93  unmatched: 0
[OK] Using local authority module: /content/yates_authority_first_ancestor_map.py
[OK] Authority map loaded from module: 25 pairs
[OK] Column 1 replaced with First Ancestor (authority, after exclusion).
[OK] Lineage formatting applied to columns: Yates DNA Ancestral Line
[OK] Alpha sort applied by first couple: First Ancestor | keys= __sort_surname__,__sort_given__,__sort_momsurname__
[OK] Wrote exports: /content/yates_ancestor_register.csv and /content/yates_ancestor_register.xlsx
[OK] Saved locally: /content/just-trees.shtml
[INFO] Attempting FTP upload ...
[OK] Uploaded HTML -> /partials/just-trees.shtml
[OK] Uploaded exports -> /partials/ (yates_ancestor_regis

# Cell 0

In [6]:
# ====== CUT START [1/1] CELL 0 - Orchestrator (Authority Scripts + Latest GEDCOM) ======
# RON GOLDEN RULES - CLIFF NOTES (v2026.01.31-UNIFIED)
# - Complete & runnable Colab cell: one contiguous block, no fragments.
# - Source ASCII-only; any file writes must use encoding="iso-8859-15", errors="xmlcharrefreplace".
# - Authority:
#     * Colab work dir: /content
#     * Scripts pulled from: /partials/colab_cells/*.py  (server)
#     * Latest GEDCOM pulled from: /tng/gedcom/*.ged     (server) if no local GEDCOM exists
# - Deterministic audit banner:
#   [CONFIRM] Golden Rules active | Cell=Cell0_Orchestrator | Version=2026.01.31-UNIFIED | Encoding=ISO-8859-15
# - Execution order (after pulls): cell1.py, cell2.py, cell2b.py, cell2c.py, cell2k.py, cell3.py
# - IMPORTANT: Scripts are executed via "python3 -u script.py" (NOT exec/compile) to avoid
#   multiprocessing pickling failures under ProcessPoolExecutor.
# ====================================================================

print("[CONFIRM] Golden Rules active | Cell=Cell0_Orchestrator | Version=2026.01.31-UNIFIED | Encoding=ISO-8859-15")

DECLARED_LINES = 999  # audit-only; not enforced
print("[AUDIT] DECLARED_LINES=%d" % DECLARED_LINES)

import os
import socket
import traceback
import hashlib
import glob
import subprocess
import sys
from datetime import datetime
from ftplib import FTP_TLS, all_errors

# ---------- 0) Env / secrets ----------

def _get_env(k, default=""):
    try:
        from google.colab import userdata  # type: ignore
        v = userdata.get(k)
        return v if v is not None else os.environ.get(k, default)
    except Exception:
        return os.environ.get(k, default)

FTP_HOST = (_get_env("FTP_HOST", "") or "").strip()
FTP_USER = (_get_env("FTP_USER", "") or "").strip()
FTP_PASS = _get_env("FTP_PASS", "") or ""
FTP_PORT = int(_get_env("FTP_PORT", "21") or "21")
FTP_DIR  = (_get_env("FTP_DIR", "") or "").strip().strip("/")

def _mask(s, keep=3):
    s = "" if s is None else str(s)
    if not s:
        return "(empty)"
    return (s[:keep] + "***" + s[-keep:]) if len(s) > keep * 2 else s[0:1] + "***"

print(
    "[ENV] HOST=%s  USER=%s  PASS=%s  PORT=%d  DIR=%s"
    % (_mask(FTP_HOST), _mask(FTP_USER, 2), "***", FTP_PORT, ("/" + FTP_DIR) if FTP_DIR else "(root)")
)

if not (FTP_HOST and FTP_USER and FTP_PASS):
    raise SystemExit("[FATAL] Missing FTP_HOST/FTP_USER/FTP_PASS; cannot run orchestrator.")

# ---------- 1) FTPS helpers ----------

FTP_TIMEOUT = int(os.environ.get("FTP_TIMEOUT", "30"))
PASSIVE_MODE = True

def _ftps_connect():
    socket.setdefaulttimeout(FTP_TIMEOUT)
    ftps = FTP_TLS(timeout=FTP_TIMEOUT)
    ftps.connect(FTP_HOST, FTP_PORT)
    ftps.auth()  # Explicit FTPS
    ftps.login(FTP_USER, FTP_PASS)
    try:
        ftps.prot_p()
    except Exception:
        pass
    try:
        ftps.set_pasv(PASSIVE_MODE)
    except Exception:
        pass
    if FTP_DIR:
        for seg in [p for p in FTP_DIR.split("/") if p]:
            try:
                ftps.cwd(seg)
            except all_errors:
                try:
                    ftps.mkd(seg)
                except all_errors:
                    pass
                ftps.cwd(seg)
    return ftps

def _sha256_of_file(path):
    h = hashlib.sha256()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(65536), b""):
            h.update(chunk)
    return h.hexdigest()[:16]

def _safe_nlst(ftps):
    try:
        return ftps.nlst()
    except Exception:
        return []

# ---------- 2) Pull authority scripts from /partials/colab_cells ----------

SCRIPT_REMOTE_DIR = "/partials/colab_cells"
SCRIPT_NAMES = ["cell1.py", "cell2.py", "cell2b.py", "cell2c.py", "cell2k.py", "cell3.py"]

def pull_authority_scripts():
    print("[STEP] Pulling authority scripts from server ...")
    pulled = 0
    try:
        ftps = _ftps_connect()
        try:
            pwd0 = ftps.pwd()
        except Exception:
            pwd0 = "(unknown)"
        print("[OK] Connected via FTPS (explicit AUTH TLS).")
        print("[INFO] Initial PWD on server: %s" % pwd0)

        # Navigate to script dir
        try:
            try:
                ftps.cwd("/")
            except Exception:
                pass
            for seg in [p for p in SCRIPT_REMOTE_DIR.split("/") if p]:
                ftps.cwd(seg)
        except Exception as e:
            raise RuntimeError("Unable to cwd to %s (%s)" % (SCRIPT_REMOTE_DIR, e))

        print("[INFO] Using remote dir for scripts: %s" % SCRIPT_REMOTE_DIR)

        listing = _safe_nlst(ftps)
        if listing:
            print("[INFO] Remote listing sample (first 25): %s" % ", ".join(listing[:25]))
        else:
            print("[WARN] Remote listing is empty/unavailable for %s" % SCRIPT_REMOTE_DIR)

        os.chdir("/content")
        for name in SCRIPT_NAMES:
            try:
                local_path = os.path.join("/content", name)
                with open(local_path, "wb") as f:
                    ftps.retrbinary("RETR " + name, f.write)
                sz = os.path.getsize(local_path)
                sh = _sha256_of_file(local_path)
                print("[PULL] %s -> %s  size=%d  sha256=%s" % (name, local_path, sz, sh))
                pulled += 1
            except Exception as e:
                print("[MISS] Could not pull %s: %s" % (name, e))

        try:
            ftps.quit()
        except Exception:
            pass
        print("[OK] Pulled %d script(s) from authority shelf." % pulled)
    except Exception as e:
        print("[ERROR] Script pull failed:", e)
        traceback.print_exc()
    return pulled

# ---------- 3) GEDCOM selection: prefer local, else pull latest ----------

GEDCOM_REMOTE_DIR = "/tng/gedcom"
LOCAL_GED_DIR = "/content"

def _pick_local_gedcom_if_present():
    """
    Prefer any *.ged already in /content.
    Deterministic: newest mtime, tie-breaker lexicographic.
    """
    os.chdir(LOCAL_GED_DIR)
    geds = glob.glob("*.ged")
    if not geds:
        return None
    def _key(p):
        try:
            return (float(os.path.getmtime(p)), str(p))
        except Exception:
            return (0.0, str(p))
    geds.sort(key=_key, reverse=True)
    chosen = geds[0]
    try:
        ts = datetime.fromtimestamp(os.path.getmtime(chosen)).isoformat(sep=" ", timespec="seconds")
    except Exception:
        ts = "unknown"
    print("[INFO] Local GEDCOM present in /content. Using: %s (mtime=%s)" % (chosen, ts))
    return os.path.join(LOCAL_GED_DIR, chosen)

def _choose_latest_gedcom(ftps, names):
    """
    Pick the newest *.ged using MDTM if available; fallback = last alphabetically.
    """
    ged_files = [n for n in names if n.lower().endswith(".ged")]
    if not ged_files:
        return None

    latest_name = None
    latest_ts = None

    for nm in ged_files:
        ts = None
        try:
            resp = ftps.sendcmd("MDTM " + nm)  # '213 YYYYMMDDhhmmss'
            parts = resp.strip().split()
            if len(parts) == 2 and parts[0] == "213":
                ts = parts[1]
        except Exception:
            ts = None
        if ts is None:
            ts = "00000000000000" + nm
        if latest_ts is None or ts > latest_ts:
            latest_ts = ts
            latest_name = nm
    return latest_name

def pull_latest_gedcom_if_needed():
    """
    If a GEDCOM already exists locally, do NOT pull from server.
    Otherwise pull latest from /tng/gedcom and ensure exactly one *.ged in /content.
    """
    local = _pick_local_gedcom_if_present()
    if local:
        print("[STEP] Skipping server GEDCOM pull (local GEDCOM already present).")
        print("[INFO] Cell 1 will see local GEDCOM: %s" % local)
        return

    print("\n[STEP] Pulling latest GEDCOM from %s ..." % GEDCOM_REMOTE_DIR)
    try:
        ftps = _ftps_connect()
        try:
            ftps.cwd("/")
        except Exception:
            pass
        for seg in [p for p in GEDCOM_REMOTE_DIR.split("/") if p]:
            ftps.cwd(seg)

        names = _safe_nlst(ftps)
        if not names:
            print("[WARN] No files listed in %s" % GEDCOM_REMOTE_DIR)
            try:
                ftps.quit()
            except Exception:
                pass
            return

        latest = _choose_latest_gedcom(ftps, names)
        if not latest:
            print("[WARN] No .ged files found in %s" % GEDCOM_REMOTE_DIR)
            try:
                ftps.quit()
            except Exception:
                pass
            return

        print("[INFO] Latest GEDCOM on server: %s" % latest)

        # Clean any old local GEDCOMs so Cell 1 cannot accidentally pick the wrong one
        try:
            for fname in os.listdir(LOCAL_GED_DIR):
                if fname.lower().endswith(".ged"):
                    try:
                        os.remove(os.path.join(LOCAL_GED_DIR, fname))
                        print("[CLEAN] Removed old local GEDCOM:", fname)
                    except Exception as e:
                        print("[WARN] Could not remove %s: %s" % (fname, e))
        except Exception as e:
            print("[WARN] Could not scan local GED dir:", e)

        local_path = os.path.join(LOCAL_GED_DIR, latest)
        with open(local_path, "wb") as f:
            ftps.retrbinary("RETR " + latest, f.write)
        try:
            ftps.quit()
        except Exception:
            pass

        sz = os.path.getsize(local_path)
        print("[OK] Pulled GEDCOM -> %s  size=%d bytes" % (local_path, sz))
        print("[INFO] Cell 1 will now see exactly one *.ged in /content.")
    except Exception:
        print("[WARN] GEDCOM pull failed; Cell 1 will use any existing local *.ged instead.")
        traceback.print_exc()

# ---------- 4) Run scripts in order (subprocess; fixes ProcessPool pickling) ----------

def run_script(path):
    print("\n[RUN] %s" % path)
    if not os.path.exists(path):
        print("[SKIP] %s not found in /content." % path)
        return

    # Use the same Python interpreter, unbuffered output.
    cmd = [sys.executable, "-u", path]

    try:
        # Stream stdout+stderr together so logs appear in-order in Colab.
        p = subprocess.Popen(
            cmd,
            cwd="/content",
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
            encoding="utf-8",
            errors="replace",
            env=os.environ.copy(),
        )

        assert p.stdout is not None
        for line in p.stdout:
            # Print already has newline; avoid double spacing.
            print(line.rstrip("\n"))

        rc = p.wait()
        if rc == 0:
            print("[DONE] %s" % path)
        else:
            print("[ERROR] Script failed (exit=%d): %s" % (rc, path))
    except Exception as e:
        print("[ERROR] Exception while running %s: %s" % (path, e))
        traceback.print_exc()

def main():
    os.chdir("/content")

    pulled = pull_authority_scripts()
    if pulled == 0:
        print("[FATAL] No authority scripts pulled; aborting.")
        return

    pull_latest_gedcom_if_needed()

    script_order = ["cell1.py", "cell2.py", "cell2b.py", "cell2c.py", "cell2k.py", "cell3.py"]
    print("\n[STEP] Running scripts in order: %s" % ", ".join(script_order))
    for s in script_order:
        run_script(os.path.join("/content", s))

main()

print("\n--- Cell 0 That's all folks, Orchestrator complete (authority scripts pulled, GEDCOM local-preferred, then Cell1/2/2b/2c/2k/3 executed) ---")
# ====== CUT STOP  [1/1] CELL 0 - Orchestrator (Authority Scripts + Latest GEDCOM) ======


[CONFIRM] Golden Rules active | Cell=Cell0_Orchestrator | Version=2026.01.31-UNIFIED | Encoding=ISO-8859-15
[AUDIT] DECLARED_LINES=999
[ENV] HOST=ftp***net  USER=ad***et  PASS=***  PORT=21  DIR=(root)
[STEP] Pulling authority scripts from server ...
[OK] Connected via FTPS (explicit AUTH TLS).
[INFO] Initial PWD on server: /
[INFO] Using remote dir for scripts: /partials/colab_cells
[INFO] Remote listing sample (first 25): cell2.py, cell2b.py, cell2b_NetworkAuthority.py, cell2k.py, cell2d.py, ., cell2c.py, .., cell0_netupdate.py, cell3.py, cell1.py, cell0.py
[PULL] cell1.py -> /content/cell1.py  size=24143  sha256=2a92b754edb79cad
[PULL] cell2.py -> /content/cell2.py  size=34275  sha256=8df41f3e925278c7
[PULL] cell2b.py -> /content/cell2b.py  size=51109  sha256=990c463e7a0fe0c0
[PULL] cell2c.py -> /content/cell2c.py  size=33026  sha256=22fd337bd39e7abf
[PULL] cell2k.py -> /content/cell2k.py  size=45269  sha256=77a8738001871c6b
[PULL] cell3.py -> /content/cell3.py  size=40678  sha256=8e

# Cell 1

In [3]:
# ====== CUT START [1/1] CELL 1 - GEDCOM -> CSV + HTML + Upload + FIRST ANCESTOR PAIRS CSV (Sortable) ======
# RON GOLDEN RULES - CLIFF NOTES (v2026.01.31-UNIFIED-BASELINE)
# - Complete and runnable Colab cell, one contiguous block (no fragments).
# - Source ASCII-only; outputs written with encoding="iso-8859-15", errors="xmlcharrefreplace".
# - XHTML 1.0 Transitional; typography/layout/colors via /partials/partials_unified.css (single baseline).
# - Deterministic audit:
#   [CONFIRM] Golden Rules active | Cell=Cell1_FTPS_Explicit | Version=2026.01.31-UNIFIED-BASELINE | Encoding=ISO-8859-15
# =========================================================================================================

import os, re, glob, logging, socket, traceback
from datetime import datetime
from concurrent.futures import ProcessPoolExecutor
import pandas as pd
from tqdm import tqdm
from ftplib import FTP_TLS, all_errors
from string import Template

CELL_NAME = "Cell1_FTPS_Explicit"
VERSION   = "2026.01.31-UNIFIED-BASELINE"

print("[CONFIRM] Golden Rules active | Cell=%s | Version=%s | Encoding=ISO-8859-15" % (CELL_NAME, VERSION))

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(CELL_NAME)

def _now_est_string():
    try:
        from zoneinfo import ZoneInfo
        tz = ZoneInfo("America/New_York")
        now = datetime.now(tz)
    except Exception:
        now = datetime.now()
    month_name = now.strftime("%B")
    day = now.day
    year = now.year
    hour_24 = now.hour
    minute = now.minute
    ampm = "AM" if hour_24 < 12 else "PM"
    hour_12 = hour_24 % 12
    if hour_12 == 0:
        hour_12 = 12
    return "%s %d, %d %d:%02d %s" % (month_name, day, year, hour_12, minute, ampm)

def _get_env(k, default=""):
    try:
        from google.colab import userdata  # type: ignore
        v = userdata.get(k)
        return v if v is not None else os.environ.get(k, default)
    except Exception:
        return os.environ.get(k, default)

FTP_HOST = (_get_env("FTP_HOST","") or "").strip()
FTP_USER = (_get_env("FTP_USER","") or "").strip()
FTP_PASS = _get_env("FTP_PASS","") or ""
FTP_PORT = int(_get_env("FTP_PORT","21") or "21")
FTP_DIR  = (_get_env("FTP_DIR","") or "").strip().strip("/")
PASSIVE_MODE = True

def _mask(s, keep=3):
    s = "" if s is None else str(s)
    if not s:
        return "(empty)"
    return (s[:keep] + "***" + s[-keep:]) if len(s) > keep * 2 else s[0:1] + "***"

print("[ENV] HOST=%s  USER=%s  PASS=%s  PORT=%d  DIR=%s" %
      (_mask(FTP_HOST), _mask(FTP_USER, 2), "***", FTP_PORT, ("/" + FTP_DIR) if FTP_DIR else "(root)"))

def _ftps_connect():
    if not (FTP_HOST and FTP_USER and FTP_PASS):
        raise RuntimeError("Missing FTP_HOST/FTP_USER/FTP_PASS.")
    socket.setdefaulttimeout(30)
    ftps = FTP_TLS(timeout=30)
    ftps.connect(FTP_HOST, FTP_PORT)
    ftps.auth()
    ftps.login(FTP_USER, FTP_PASS)
    try:
        ftps.prot_p()
    except Exception:
        pass
    try:
        ftps.set_pasv(PASSIVE_MODE)
    except Exception:
        pass
    if FTP_DIR:
        for p in [p for p in FTP_DIR.split("/") if p]:
            ftps.cwd(p)
    return ftps

def _ftps_ensure_dir(ftps, path):
    if not path:
        return
    for p in [p for p in path.split("/") if p]:
        try:
            ftps.cwd(p)
        except all_errors:
            try:
                ftps.mkd(p)
            except all_errors:
                pass
            ftps.cwd(p)

def _ftps_upload(ftps, local_path, remote_name):
    with open(local_path, "rb") as fh:
        ftps.storbinary("STOR " + remote_name, fh)
    print("[OK] Uploaded: %s -> %s/%s" % (local_path, ftps.pwd().rstrip("/"), remote_name))

REMOTE_DIR        = "partials"
CSV_OUT_LOCAL     = "final_combined_df_with_value_labels.csv"
HTML_OUT_LOCAL    = "cell1_work_table.htm"
ABS_CSV_URL       = "/%s/%s" % (REMOTE_DIR, os.path.basename(CSV_OUT_LOCAL))
ABS_HOME_URL      = "/index.htm"
VITALS_CSV_PATH        = "dna_vitals.csv"
AUTOSOMAL_COUNT_TXT    = "autosomal_count.txt"

# First ancestor pairs CSV (sortable)
FIRST_ANCESTOR_PAIRS_LOCAL = "first_ancestor_pairs.csv"
ABS_FIRST_ANCESTOR_PAIRS_URL = "/%s/%s" % (REMOTE_DIR, os.path.basename(FIRST_ANCESTOR_PAIRS_LOCAL))

def _pick_local_gedcom_if_present():
    geds = glob.glob("*.ged")
    if not geds:
        return ""
    def _key(p):
        try:
            return (float(os.path.getmtime(p)), str(p))
        except Exception:
            return (0.0, str(p))
    geds.sort(key=_key, reverse=True)
    chosen = geds[0]
    try:
        ts = datetime.fromtimestamp(os.path.getmtime(chosen)).isoformat(sep=" ", timespec="seconds")
    except Exception:
        ts = "unknown"
    print("[INFO] Local GEDCOM present in content. Selected newest: %s (mtime=%s)" % (chosen, ts))
    return chosen

def _ensure_gedcom_available():
    chosen = _pick_local_gedcom_if_present()
    if chosen:
        return chosen
    if "download_latest_gedcom_from_tng" in globals() and callable(globals()["download_latest_gedcom_from_tng"]):
        print("[INFO] No local GEDCOM found. Calling existing download_latest_gedcom_from_tng() ...")
        try:
            globals()["download_latest_gedcom_from_tng"]()
        except Exception as e:
            print("[WARN] download_latest_gedcom_from_tng() failed:", e)
    return _pick_local_gedcom_if_present()

visited_pairs = set()
generation_table = []

class GedcomDataset:
    def __init__(self, gen_person):
        self.gen_person = gen_person
        self.extractable_detail = {}

    def add_extractable_detail(self, key, value):
        self.extractable_detail[key] = value

    def get_gen_person(self):
        return self.gen_person.strip("@")

    def get_extractable_NPFX(self):
        return self.extractable_detail.get("NPFX", "") or ""

    def get_extractable_cm(self):
        v = self.extractable_detail.get("NPFX", "") or ""
        if "&" in v:
            cm = v.split("&")[0].strip()
        elif "**" in v:
            cm = v.split("**")[0].strip()
        else:
            cm = v.strip()
        try:
            int(cm)
            return cm
        except Exception:
            return ""

    def get_extractable_sort(self):
        v = self.extractable_detail.get("NPFX", "") or ""
        if "&" in v:
            s = v.split("&")[1]
            return (s.split("**")[0] if "**" in s else s).strip()
        return ""

    def get_extractable_YDNA(self):
        v = self.extractable_detail.get("NPFX", "") or ""
        return v.split("**")[1].strip() if "**" in v else ""

class Gedcom:
    def __init__(self, file_name):
        self.file_name = file_name
        self.gedcom_datasets = []
        self.filter_pool = []
        self.total_records = 0
        self.npfx_count = 0
        self.ydna_count = 0
        self.autosomal_count = 0
        self.after_manual_filter_total = 0

    def parse_gedcom(self):
        with open(self.file_name, "r", encoding="utf-8-sig") as f:
            lines = f.readlines()

        current = None
        npfx_count = 0
        ydna_count = 0
        total = 0

        for line in lines:
            parts = line.strip().split(" ", 2)
            if not parts or not parts[0].isdigit():
                continue
            level = int(parts[0])
            tag = parts[1]
            value = parts[2] if len(parts) > 2 else None

            if level == 0 and tag.startswith("@") and tag.endswith("@") and value == "INDI":
                total += 1
                current = GedcomDataset(tag)
                self.gedcom_datasets.append(current)
            elif current is not None:
                if level == 2 and tag == "NPFX":
                    npfx_count += 1
                    current.add_extractable_detail(tag, value)
                    if value and "**" in value:
                        ydna_count += 1

        autosomal = npfx_count - ydna_count

        self.total_records = total
        self.npfx_count = npfx_count
        self.ydna_count = ydna_count
        self.autosomal_count = autosomal

        print("GEDCOM contained %d total records" % total)
        print("Records tagged and filtered by NPFX: %d" % npfx_count)
        print("Records with YDNA information: %d" % ydna_count)
        print("Autosomal matches (NPFX minus YDNA): %d" % autosomal)

        for ds in self.gedcom_datasets:
            if ds.get_extractable_NPFX():
                self.filter_pool.append(ds)

        try:
            df_filter = pd.read_excel("filtered_ids.xlsx")
            manual_ids = set(str(x) for x in df_filter["ID"])
            self.filter_pool = [d for d in self.filter_pool if d.get_gen_person() in manual_ids]
            print("After manual filter, total records: %d" % len(self.filter_pool))
        except FileNotFoundError:
            logger.warning("filtered_ids.xlsx not found. Skipping second-level manual filter.")

        self.after_manual_filter_total = len(self.filter_pool)
        return autosomal

def _chunks(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

def _extract_display_name_from_indi_block(txt):
    if not txt:
        return "Unknown Name"
    m = re.search(r"(^|\n)1 NAME ([^\n\r]+)", "\n" + txt)
    if not m:
        return "Unknown Name"
    name_line = (m.group(2) or "").strip()
    if "/" not in name_line:
        return name_line.strip() or "Unknown Name"
    parts = name_line.split("/", 2)
    given = (parts[0] or "").strip()
    sur = (parts[1] or "").strip()
    out = (given + " " + sur).strip()
    out = re.sub(r"\s+", " ", out)
    return out if out else "Unknown Name"

def _extract_year_from_date_line(date_line):
    years = re.findall(r"(\d{4})", str(date_line or ""))
    return years[-1] if years else ""

def _extract_birth_death_years_from_indi_block(txt):
    if not txt:
        return ""
    b_year = ""
    d_year = ""
    m = re.search(r"(^|\n)1 BIRT\b.*?(?:\n2 DATE ([^\n\r]+))?", "\n" + txt, flags=re.S)
    if m:
        b_year = _extract_year_from_date_line(m.group(2) or "")
    m2 = re.search(r"(^|\n)1 DEAT\b.*?(?:\n2 DATE ([^\n\r]+))?", "\n" + txt, flags=re.S)
    if m2:
        d_year = _extract_year_from_date_line(m2.group(2) or "")
    if not b_year and not d_year:
        return ""
    return "%s-%s" % (b_year, d_year)

def _find_parents(individual_id, generation, parents_map):
    global visited_pairs, generation_table
    if individual_id not in parents_map:
        return
    father_id, mother_id = parents_map[individual_id]
    if not father_id and not mother_id:
        return
    pair = (father_id, mother_id)
    if pair not in visited_pairs:
        visited_pairs.add(pair)
        generation_table.append((generation, pair))
    if father_id:
        _find_parents(father_id, generation + 1, parents_map)
    if mother_id:
        _find_parents(mother_id, generation + 1, parents_map)

def _find_distant(individual_id, parents_map, path=None):
    if path is None:
        path = []
    path.append(individual_id)
    if individual_id not in parents_map:
        return [path]
    father_id, mother_id = parents_map[individual_id]
    if not father_id and not mother_id:
        return [path]
    paths = []
    if father_id:
        paths.extend(_find_distant(father_id, parents_map, path[:]))
    if mother_id:
        paths.extend(_find_distant(mother_id, parents_map, path[:]))
    return paths if paths else [path]

def _filter_lineage(winning_ids, gen_table, display_name_map, years_map):
    matching = []
    for generation, pair in gen_table:
        id1, id2 = pair
        if id1 in winning_ids or id2 in winning_ids:
            matching.append((generation, pair))
    matching.sort(key=lambda x: x[0])

    lines = []
    for _, pair in matching:
        pid1, pid2 = pair
        n1 = (display_name_map.get(pid1, "Unknown Name") or "Unknown Name").replace("~", " ").strip()
        n2 = (display_name_map.get(pid2, "Unknown Name") or "Unknown Name").replace("~", " ").strip()
        y1 = (years_map.get(pid1, "") or "").replace("~", " ").strip()
        y2 = (years_map.get(pid2, "") or "").replace("~", " ").strip()
        t1 = "%s~%s~%s" % (pid1, n1, y1)
        t2 = "%s~%s~%s" % (pid2, n2, y2)
        lines.append("%s&%s" % (t1, t2))

    lines.reverse()
    return "~~~".join(lines)

def _process_record(individual_id, ged, parents_map, display_name_map, years_map):
    global generation_table, visited_pairs
    generation_table = []
    visited_pairs = set()
    _find_parents(individual_id, 1, parents_map)
    paths = _find_distant(individual_id, parents_map)
    best_score, best_path = None, None
    for path in paths:
        score = 0
        for idx, pid in enumerate(path):
            nm = (display_name_map.get(pid, "") or "")
            if "Yates" in nm:
                score += (idx + 1)
        if best_score is None or score > best_score:
            best_score, best_path = score, path
    best_path = best_path or []
    best_ids  = [pid for pid in best_path if pid != individual_id]
    line_str  = _filter_lineage(set(best_ids), generation_table, display_name_map, years_map)

    cm_value = ""
    sort_value = ""
    ydna_value = ""
    for ds in ged.filter_pool:
        if ds.get_gen_person() == individual_id:
            cm_value   = ds.get_extractable_cm()
            sort_value = ds.get_extractable_sort()
            ydna_value = ds.get_extractable_YDNA()
            break

    short_name = display_name_map.get(individual_id, "Unknown Name")
    return [individual_id, sort_value, short_name, cm_value, line_str, ydna_value]

# ---------- First ancestor pair CSV ----------

# ---------- Authority lock-in module (First Ancestor couple map) ----------
# Purpose:
# - Write an importable python module containing AUTH_COUPLE_KEY_MAP built from df_pairs.
# - This eliminates cross-cell ambiguity: Cell 2 / Cell 3 can import the exact map.
#
# Usage in Cell 2 / Cell 3:
#   from yates_authority_first_ancestor_map import AUTH_COUPLE_KEY_MAP

LOCKIN_MODULE_LOCAL = "yates_authority_first_ancestor_map.py"

def _write_authority_lockin_module_from_df_pairs(df_pairs, out_py=LOCKIN_MODULE_LOCAL):
    """
    Build mapping (Ancestor1_ID, Ancestor2_ID) -> FirstPair_LastFirst (and reverse order)
    from the in-memory df_pairs and write a standalone module.

    Notes:
    - Values are written EXACTLY as in df_pairs['FirstPair_LastFirst'] (no HTML escaping).
    - Keys are uppercased IDs (I####).
    - File is written as iso-8859-15 with xmlcharrefreplace (site-safe).
    """
    if df_pairs is None or df_pairs.empty:
        raise RuntimeError("df_pairs is empty; cannot lock in authority map.")

    required = ["FirstPair_Ancestor1_ID", "FirstPair_Ancestor2_ID", "FirstPair_LastFirst"]
    for c in required:
        if c not in df_pairs.columns:
            raise RuntimeError("df_pairs missing required column: %s" % c)

    m = {}
    for _, r in df_pairs.iterrows():
        a1 = str(r.get("FirstPair_Ancestor1_ID", "") or "").strip().upper()
        a2 = str(r.get("FirstPair_Ancestor2_ID", "") or "").strip().upper()
        k  = str(r.get("FirstPair_LastFirst", "") or "").strip()
        k  = re.sub(r"\s+", "", k)
        if not (a1 and a2 and k):
            continue
        m[(a1, a2)] = k
        m[(a2, a1)] = k

    if not m:
        raise RuntimeError("Authority map built empty from df_pairs")

    header = [
        "# -*- coding: iso-8859-15 -*-",
        '"""',
        "AUTO-GENERATED FILE - DO NOT HAND EDIT",
        "Generated by: Cell 1 (GEDCOM -> CSV build)",
        "Contains: AUTH_COUPLE_KEY_MAP dict for (Ancestor1_ID, Ancestor2_ID) -> FirstPair_LastFirst",
        '"""',
        "",
        "AUTH_COUPLE_KEY_MAP = {",
    ]

    # Deterministic order for stable diffs
    lines = []
    for (a1, a2) in sorted(m.keys()):
        lines.append("    (%r, %r): %r," % (a1, a2, m[(a1, a2)]))

    footer = ["}", ""]
    with open(out_py, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
        f.write("\n".join(header + lines + footer))

    print("[OK] Wrote authority lock-in module:", os.path.abspath(out_py), "| pairs:", (len(m)//2))
    return out_py



def _slug_no_space_no_comma(s):
    s = (s or "").lower()
    s = s.replace(",", "").replace(" ", "")
    s = re.sub(r"[^a-z0-9]+", "", s)
    return s

def _name_to_lastfirst_pretty_and_slug(display_name):
    s = (display_name or "").strip()
    s = re.sub(r"\s+", " ", s)
    if not s or s.lower() == "unknown name":
        pretty = "Unknown"
        return pretty, _slug_no_space_no_comma(pretty)
    parts = s.split(" ")
    if len(parts) == 1:
        pretty = parts[0]
        return pretty, _slug_no_space_no_comma(pretty)
    last = parts[-1]
    given = " ".join(parts[:-1]).strip()
    pretty = ("%s, %s" % (last, given)) if given else last
    return pretty, _slug_no_space_no_comma(pretty)

def _parse_first_pair_tokens(lineage_str):
    s = (lineage_str or "").strip()
    if not s:
        return ("", "", "", "", "", "", "")
    first_seg = s.split("~~~", 1)[0].strip()
    if "&" not in first_seg:
        return ("", "", "", "", "", "", "")
    left, right = first_seg.split("&", 1)

    def _split_token(tok):
        tok = (tok or "").strip()
        parts = tok.split("~")
        pid = (parts[0] if len(parts) > 0 else "").strip()
        nm  = (parts[1] if len(parts) > 1 else "").strip()
        yrs = (parts[2] if len(parts) > 2 else "").strip()
        return pid, nm, yrs

    pid1, nm1, yrs1 = _split_token(left)
    pid2, nm2, yrs2 = _split_token(right)

    lf1_pretty, lf1_slug = _name_to_lastfirst_pretty_and_slug(nm1)
    lf2_pretty, lf2_slug = _name_to_lastfirst_pretty_and_slug(nm2)

    pair_lastfirst = "%s&%s" % (lf1_slug, lf2_slug)
    return (pid1, lf1_pretty, yrs1, pid2, lf2_pretty, yrs2, pair_lastfirst)

def main():
    gedcom_path = _ensure_gedcom_available()
    if not gedcom_path:
        print("No GEDCOM files found in content, and no download produced one.")
        return False

    print("[INFO] Using GEDCOM: %s" % gedcom_path)

    ged = Gedcom(gedcom_path)
    autosomal_count = ged.parse_gedcom()

    with open(AUTOSOMAL_COUNT_TXT, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
        f.write(str(autosomal_count))
    print("[OK] Wrote %s = %d" % (AUTOSOMAL_COUNT_TXT, autosomal_count))

    last_updated_text = _now_est_string()
    print("[OK] LAST_UPDATED_TEXT (EST) for dna_vitals.csv: %s" % last_updated_text)

    npfx_count = int(ged.npfx_count)
    after_manual_filter_total = int(ged.after_manual_filter_total or len(ged.filter_pool))

    vitals_lines = [
        "Records tagged and filtered by NPFX: %d" % npfx_count,
        "After manual filter, total records: %d" % after_manual_filter_total,
        "LAST_UPDATED_TEXT: %s" % last_updated_text,
    ]
    pd.DataFrame({"line": vitals_lines}).to_csv(
        VITALS_CSV_PATH,
        index=False,
        encoding="iso-8859-15",
        errors="xmlcharrefreplace",
    )
    print("[OK] Wrote dna_vitals.csv -> %s" % os.path.abspath(VITALS_CSV_PATH))

    with open(gedcom_path, "r", encoding="utf-8-sig") as f:
        raw = f.read()

    blocks = raw.split("\n0 ")
    all_records = {}
    for blk in blocks:
        blk = blk.strip()
        if not blk:
            continue
        flend = blk.find("\n")
        flend = len(blk) if flend == -1 else flend
        first_line = blk[:flend]
        if "@" in first_line:
            s = first_line.find("@") + 1
            e = first_line.find("@", s)
            rec_id = first_line[s:e].strip()
            all_records[rec_id] = blk

    parents_map = {}
    families = {}
    display_name_map = {}
    years_map = {}

    for rec_id, txt in all_records.items():
        if "FAM" in txt[:50]:
            father_idx = txt.find("1 HUSB @")
            husb_id = txt[father_idx + len("1 HUSB @"):txt.find("@", father_idx + len("1 HUSB @"))] if father_idx != -1 else None
            wife_idx = txt.find("1 WIFE @")
            wife_id = txt[wife_idx + len("1 WIFE @"):txt.find("@", wife_idx + len("1 WIFE @"))] if wife_idx != -1 else None
            kids = [ln.split("@")[1] for ln in txt.split("\n") if ln.strip().startswith("1 CHIL @")]
            families[rec_id] = (husb_id, wife_id, kids)
        if "INDI" in txt[:50]:
            display_name_map[rec_id] = _extract_display_name_from_indi_block(txt)
            years_map[rec_id] = _extract_birth_death_years_from_indi_block(txt)

    for _, (f_id, m_id, k_list) in families.items():
        for kid in k_list:
            parents_map[kid] = (f_id, m_id)

    individual_ids = [d.get_gen_person() for d in ged.filter_pool]
    print("Processing %d individuals with chunk-based parallel..." % len(individual_ids))

    combined_rows = []
    chunk_size = 50
    max_workers = os.cpu_count() or 4

    from functools import partial as _partial
    with ProcessPoolExecutor(max_workers=max_workers) as ex, tqdm(total=len(individual_ids), desc="Building Yates Lines (Stage 1)") as pbar:
        for chunk in _chunks(individual_ids, chunk_size):
            func = _partial(_process_record, ged=ged, parents_map=parents_map, display_name_map=display_name_map, years_map=years_map)
            results = list(ex.map(func, chunk))
            combined_rows.extend(results)
            pbar.update(len(chunk))

    columns = ["ID#", "Match to", "Name", "cM", "Yates DNA Ancestral Line", "haplogroup"]
    df = pd.DataFrame(combined_rows, columns=columns)
    df.sort_values(by=["Yates DNA Ancestral Line"], inplace=True)

    # ----- First ancestor pair CSV -----
    pairs_rows = []
    for _, row in df.iterrows():
        match_id = str(row.get("ID#", "") or "").strip()
        lineage  = str(row.get("Yates DNA Ancestral Line", "") or "")
        pid1, lf1, yrs1, pid2, lf2, yrs2, pair_lastfirst = _parse_first_pair_tokens(lineage)
        pairs_rows.append({
            "MatchID": match_id,
            "FirstPair_Ancestor1_ID": pid1,
            "FirstPair_Ancestor1_LastFirst": lf1,
            "FirstPair_Ancestor1_Years": yrs1,
            "FirstPair_Ancestor2_ID": pid2,
            "FirstPair_Ancestor2_LastFirst": lf2,
            "FirstPair_Ancestor2_Years": yrs2,
            "FirstPair_LastFirst": pair_lastfirst,
        })

    df_pairs = pd.DataFrame(pairs_rows)
    if not df_pairs.empty:
        df_pairs.sort_values(by=["FirstPair_LastFirst", "MatchID"], inplace=True)

    with open(FIRST_ANCESTOR_PAIRS_LOCAL, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
        f.write(df_pairs.to_csv(index=False))
    logger.info("Exported FIRST ANCESTOR PAIRS CSV -> %s", FIRST_ANCESTOR_PAIRS_LOCAL)
    print("[OK] Wrote %s (%d rows)" % (FIRST_ANCESTOR_PAIRS_LOCAL, len(df_pairs)))

    # ----- Authority lock-in module (python import) -----
    try:
        _write_authority_lockin_module_from_df_pairs(df_pairs, LOCKIN_MODULE_LOCAL)
    except Exception as e:
        print("[WARN] Authority lock-in module not written:", e)


    # ----- Existing main CSV export -----
    with open(CSV_OUT_LOCAL, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
        f.write(df.to_csv(index=False))
    logger.info("Exported CSV -> %s", CSV_OUT_LOCAL)

    final_cols = ["ID#", "cM", "haplogroup", "Match to", "Yates DNA Ancestral Line"]

    # Ensure table is styled/recognized as sortable by your site conventions
    table_html = df.to_html(index=False, columns=final_cols, escape=False, border=1, classes=["sortable"])

    page_tpl = Template("""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Cell 1 Working Table</title>
<link rel="stylesheet" type="text/css" href="/partials/partials_unified.css" />
</head>
<body>
<div class="wrap">
  <h1>Cell 1 Working Table</h1>

  <div class="updated">
    <a href="$HOME" target="_blank" rel="noopener">Home</a>
    &nbsp;|&nbsp; Last updated: $LAST_UPDATED_TEXT
    &nbsp;|&nbsp; Download: <a href="$CSV">$CSV</a>
    &nbsp;|&nbsp; First ancestor pairs: <a href="$PAIRS">$PAIRS</a>
  </div>

  <div class="downloads">
    <a href="$CSV">/partials/$CSV_NAME</a>
    &nbsp;|&nbsp;
    <a href="$PAIRS">/partials/$PAIRS_NAME</a>
  </div>

  <div class="table-scroll-wrapper">
    <div class="table-scroll">
      $TABLE
    </div>
  </div>
</div>
</body>
</html>""")

    page = page_tpl.safe_substitute(
        HOME=ABS_HOME_URL,
        CSV=ABS_CSV_URL,
        CSV_NAME=os.path.basename(ABS_CSV_URL),
        PAIRS=ABS_FIRST_ANCESTOR_PAIRS_URL,
        PAIRS_NAME=os.path.basename(ABS_FIRST_ANCESTOR_PAIRS_URL),
        TABLE=table_html,
        LAST_UPDATED_TEXT=last_updated_text,
    )

    with open(HTML_OUT_LOCAL, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
        f.write(page)
    logger.info("Exported HTML -> %s", HTML_OUT_LOCAL)

    return True

ok = main()

if ok and FTP_HOST and FTP_USER and FTP_PASS:
    print("[INFO] Uploading artifacts to /partials/ ...")
    try:
        ftps = _ftps_connect()
        _ftps_ensure_dir(ftps, "partials")
        for p in [CSV_OUT_LOCAL, HTML_OUT_LOCAL, VITALS_CSV_PATH, AUTOSOMAL_COUNT_TXT, FIRST_ANCESTOR_PAIRS_LOCAL, LOCKIN_MODULE_LOCAL]:
            try:
                _ftps_upload(ftps, p, os.path.basename(p))
            except Exception as e:
                print("[ERROR] Upload failed for %s: %s" % (p, e))
        try:
            ftps.quit()
        except Exception:
            pass
        print("[OK] Uploads complete to /partials/")
        print("[OK] First ancestor pairs CSV at: %s" % ABS_FIRST_ANCESTOR_PAIRS_URL)
    except Exception as e:
        print("[ERROR] FTP session failed:", e)
        traceback.print_exc()
else:
    print("[INFO] Skipping FTP upload (missing creds or build failed).")

print("\n--- Cell 1 Complete: lineage tokens include ID + display name + birth-death years. ---")
print("--- first_ancestor_pairs.csv: FirstPair_LastFirst is a slug (no spaces/commas): yatesjohn&gaterjoane. ---")
# ====== CUT STOP  [1/1] CELL 1 - GEDCOM -> CSV + HTML + Upload + FIRST ANCESTOR PAIRS CSV (Sortable) ======


[CONFIRM] Golden Rules active | Cell=Cell1_FTPS_Explicit | Version=2026.01.31-UNIFIED-BASELINE | Encoding=ISO-8859-15
[ENV] HOST=ftp***net  USER=ad***et  PASS=***  PORT=21  DIR=(root)
[INFO] Local GEDCOM present in content. Selected newest: yates_study_2025.ged (mtime=2026-02-03 01:35:03)
[INFO] Using GEDCOM: yates_study_2025.ged
GEDCOM contained 63510 total records
Records tagged and filtered by NPFX: 1700
Records with YDNA information: 0
Autosomal matches (NPFX minus YDNA): 1700
After manual filter, total records: 93
[OK] Wrote autosomal_count.txt = 1700
[OK] LAST_UPDATED_TEXT (EST) for dna_vitals.csv: February 2, 2026 8:40 PM
[OK] Wrote dna_vitals.csv -> /content/dna_vitals.csv
Processing 93 individuals with chunk-based parallel...


Building Yates Lines (Stage 1): 100%|██████████| 93/93 [00:29<00:00,  3.13it/s]


[OK] Wrote first_ancestor_pairs.csv (93 rows)
[OK] Wrote authority lock-in module: /content/yates_authority_first_ancestor_map.py | pairs: 25
[INFO] Uploading artifacts to /partials/ ...
[OK] Uploaded: final_combined_df_with_value_labels.csv -> /partials/final_combined_df_with_value_labels.csv
[OK] Uploaded: cell1_work_table.htm -> /partials/cell1_work_table.htm
[OK] Uploaded: dna_vitals.csv -> /partials/dna_vitals.csv
[OK] Uploaded: autosomal_count.txt -> /partials/autosomal_count.txt
[OK] Uploaded: first_ancestor_pairs.csv -> /partials/first_ancestor_pairs.csv
[OK] Uploaded: yates_authority_first_ancestor_map.py -> /partials/yates_authority_first_ancestor_map.py
[OK] Uploads complete to /partials/
[OK] First ancestor pairs CSV at: /partials/first_ancestor_pairs.csv

--- Cell 1 Complete: lineage tokens include ID + display name + birth-death years. ---
--- first_ancestor_pairs.csv: FirstPair_LastFirst is a slug (no spaces/commas): yatesjohn&gaterjoane. ---


# Cell 2

In [9]:
# -*- coding: iso-8859-15 -*-
# Cell2_3Col_AuthorityFirstAncestor - SWAP COL 2 & 3 (DISPLAY) + GOLDEN SORT RULE
# Version=2026.02.02-SWAP23-MATCHSUMMARY-SORTBY-ANCESTOR-LOCKIN+ENRICHED-EXCLUDE1
#
# Intent:
# - Display columns 1, 2, 3 with columns 2 and 3 swapped:
#     1) Match to
#     2) Match Summary
#     3) First Ancestor
# - Golden rule preserved:
#     Clicking "Match Summary" sorts using the data in "First Ancestor".
# - Authority is LOCKED-IN via python module hosted on server:
#     /partials/yates_authority_first_ancestor_map.py  -> AUTH_COUPLE_KEY_MAP
# - Enriched exclusion:
#     Drop all couples older than: Francis Yates (1541-1588) & Jane Tichborne (1548-1580)
#     (same logic as your working Cell 3: explicit prefix match + anchor trim).
#
# Notes:
# - Paste into a Colab cell OR run as a script in the same working directory.
# - Output written as iso-8859-15 with xmlcharrefreplace.

print("[CONFIRM] Golden Rules active | Cell=Cell2_3Col_Swap23 | Version=2026.02.02-SWAP23-MATCHSUMMARY-SORTBY-ANCESTOR-LOCKIN+ENRICHED-EXCLUDE1 | Encoding=ISO-8859-15")

import os
import re
import posixpath
import socket
import traceback
from ftplib import FTP_TLS
import pandas as pd
import html as _html
from string import Template
import importlib.util

# ---------- A) LAYOUT CONTROL BLOCK ----------
# Swapped widths to match swapped display:
#   Col 2 = Match Summary (wide)
#   Col 3 = First Ancestor (narrow)
COL_1_PX = 220
COL_2_PX = 1240
COL_3_PX = 420

COL_WIDTHS = [COL_1_PX, COL_2_PX, COL_3_PX]
TABLE_TOTAL_WIDTH_PX = sum(COL_WIDTHS)

print("[LAYOUT] TABLE_TOTAL_WIDTH_PX=%d" % TABLE_TOTAL_WIDTH_PX)
print("[LAYOUT] Column widths (px): 1=%d 2=%d 3=%d" % (COL_1_PX, COL_2_PX, COL_3_PX))

# ---------- 0) Secrets ----------
try:
    from google.colab import userdata  # type: ignore
    os.environ["FTP_HOST"] = userdata.get("FTP_HOST")
    os.environ["FTP_USER"] = userdata.get("FTP_USER")
    os.environ["FTP_PASS"] = userdata.get("FTP_PASS")
    try:
        os.environ["FTP_DIR"] = userdata.get("FTP_DIR")
    except Exception:
        os.environ.setdefault("FTP_DIR", "")
    try:
        os.environ["FTP_PORT"] = userdata.get("FTP_PORT")
    except Exception:
        os.environ.setdefault("FTP_PORT", "21")
except Exception:
    os.environ.setdefault("FTP_HOST", "")
    os.environ.setdefault("FTP_USER", "")
    os.environ.setdefault("FTP_PASS", "")
    os.environ.setdefault("FTP_DIR", "")
    os.environ.setdefault("FTP_PORT", "21")

# ---------- 1) Config ----------
CSV_IN = "final_combined_df_with_value_labels.csv"

# Authority LOCK-IN module path on server
AUTH_MODULE_REMOTE_DIR = "partials"
AUTH_MODULE_BASENAME   = "yates_authority_first_ancestor_map.py"
AUTH_MODULE_REMOTE     = posixpath.join(AUTH_MODULE_REMOTE_DIR, AUTH_MODULE_BASENAME)
AUTH_MODULE_LOCAL      = "yates_authority_first_ancestor_map.server.py"

LOCAL_HTML        = "yates_ancestor_register.shtml"
REMOTE_HTML_CANON = posixpath.join("partials", "yates_ancestor_register.shtml")
REMOTE_HTML_LEG   = posixpath.join("partials", "ons_yates_dna_register.shtml")

FTP_DIR  = (os.environ.get("FTP_DIR", "") or "").strip()

TNG_BASE = "https://yates.one-name.net/tng"
TNG_TREE = "tree1"

REMOVE_PERIOD_AT_END = True

UNIFIED_CSS_BASENAME = "partials_unified.css"
UNIFIED_CSS_VERSION  = "v2026-02-01-unified-blue-refactor1"
UNIFIED_CSS_HREF     = "/partials/%s?%s" % (UNIFIED_CSS_BASENAME, UNIFIED_CSS_VERSION)
HEAD_LINK            = '<link rel="stylesheet" type="text/css" href="%s" />' % UNIFIED_CSS_HREF

NAV_BLOCK = '<!--#include virtual="/partials/nav_block.shtml" -->'
NAV_FALLBACK_HTML = ""

VITALS_CSV = "dna_vitals.csv"

SERVER_PARTIALS_DIR        = "partials"
SERVER_MAPPING_BASENAME    = "match_to_unmasked.csv"
SERVER_MAPPING_REMOTE      = posixpath.join(SERVER_PARTIALS_DIR, SERVER_MAPPING_BASENAME)
SERVER_MAPPING_LOCAL_CACHE = "match_to_unmasked.server.csv"

# ---------- Enriched exclusion prefix (formatted lineage) ----------
# Same as Cell 3 (explicit couple list up to Thomas Yates & Elizabeth Fauconer)
LINEAGE_SPOUSE_SEP = " & "
LINEAGE_COUPLE_SEP = " ~ "

ENRICHED_EXCLUDE_PREFIX = (
    "John Yates (1430-) & Still Searching ~ "
    "William Yates (1389-1440) & Still Searching ~ "
    "William Yates (1420-) & Still Searching ~ "
    "Edmund Yates (1445-1472) & Margaret Cornell ~ "
    "Richard Yates (1440-1498) & Joan Ashendon (1445-1499) ~ "
    "John Yates (1471-1544) & Alice Hyde (1498-1523) ~ "
    "Thomas Yates (1509-1565) & Elizabeth Fauconer (-1562) ~"
)

# ---------- 2) FTP ----------
FTP_TIMEOUT = int(os.environ.get("FTP_TIMEOUT", "30"))
FTP_PASSIVE = True

def ftp_connect() -> FTP_TLS:
    ftps = FTP_TLS(timeout=FTP_TIMEOUT)
    socket.setdefaulttimeout(FTP_TIMEOUT)
    ftps.connect(os.environ.get("FTP_HOST", ""), int(os.environ.get("FTP_PORT", 21)))
    ftps.login(os.environ.get("FTP_USER", ""), os.environ.get("FTP_PASS", ""))
    try:
        ftps.prot_p()
    except Exception:
        pass
    try:
        ftps.set_pasv(FTP_PASSIVE)
    except Exception:
        pass
    if FTP_DIR:
        for p in [p for p in FTP_DIR.split("/") if p]:
            try:
                ftps.mkd(p)
            except Exception:
                pass
            ftps.cwd(p)
    return ftps

def _remote_path(name: str) -> str:
    return posixpath.join(FTP_DIR, name) if FTP_DIR else name

def ensure_remote_dirs(ftps, remote_path):
    if "/" not in remote_path:
        return
    pwd0 = ftps.pwd()
    for seg in [p for p in remote_path.split("/")[:-1] if p and p != "."]:
        try:
            ftps.cwd(seg)
        except Exception:
            try:
                ftps.mkd(seg)
            except Exception:
                pass
            ftps.cwd(seg)
    ftps.cwd(pwd0)

def ftp_download_if_exists(ftps, remote_name, local_name) -> bool:
    try:
        with open(local_name, "wb") as f:
            ftps.retrbinary("RETR %s" % remote_name, f.write)
        print("[PULL] %s -> %s" % (remote_name, os.path.abspath(local_name)))
        return True
    except Exception as e:
        try:
            if os.path.exists(local_name):
                os.remove(local_name)
        except Exception:
            pass
        print("[MISS] %s (%s)" % (remote_name, e))
        return False

def ftp_upload_overwrite(ftps, local_path, remote_name):
    ensure_remote_dirs(ftps, remote_name)
    with open(local_path, "rb") as fh:
        ftps.storbinary("STOR %s" % remote_name, fh)
    print("[PUT] %s -> %s" % (local_path, remote_name))

def ftp_size(ftps, remote_name):
    try:
        sz = ftps.size(remote_name)
        return int(sz) if sz is not None else None
    except Exception:
        return None

# ---------- 3) CSV helpers ----------
def _read_csv_anyenc(path: str) -> pd.DataFrame:
    encs = ("iso-8859-15", "utf-8-sig", "utf-8", "cp1252", "latin1")
    last = None
    dfx = None
    for enc in encs:
        try:
            dfx = pd.read_csv(path, encoding=enc, dtype=str, keep_default_na=False)
            break
        except Exception as e:
            last = e
            dfx = None
    if dfx is None:
        raise RuntimeError("Unable to read CSV %s: %s" % (path, last))
    return dfx

def _read_mapping_csv(path: str) -> pd.DataFrame:
    dfm = _read_csv_anyenc(path)
    if dfm.shape[1] < 2:
        raise RuntimeError("Mapping CSV must have at least two columns: code, unmasked")
    dfm = dfm.iloc[:, :2].copy()
    dfm.columns = ["code", "unmasked"]
    dfm["code"]     = dfm["code"].astype(str).str.strip().str.lower()
    dfm["unmasked"] = dfm["unmasked"].astype(str).str.strip()
    dfm = dfm[dfm["code"] != ""].drop_duplicates(subset=["code"], keep="first")
    if dfm.empty:
        raise RuntimeError("Mapping CSV empty after normalization.")
    return dfm

# ---------- 3.1) Resolver ----------
def load_resolver_from_server() -> dict:
    with ftp_connect() as ftps:
        try:
            ftps.cwd(SERVER_PARTIALS_DIR)
        except Exception:
            pass
        ok = ftp_download_if_exists(ftps, SERVER_MAPPING_BASENAME, SERVER_MAPPING_LOCAL_CACHE)
        try:
            ftps.quit()
        except Exception:
            pass
    if not ok:
        raise RuntimeError(
            "Resolver not found on server: /%s. Upload match_to_unmasked.csv into /partials/ and re-run."
            % _remote_path(SERVER_MAPPING_REMOTE)
        )
    df_map = _read_mapping_csv(SERVER_MAPPING_LOCAL_CACHE)
    print("[OK] Resolver loaded: %d codes" % len(df_map))
    return dict(zip(df_map["code"], df_map["unmasked"]))

MATCH_TO_UNMASKED = {}

def _setup_resolver():
    global MATCH_TO_UNMASKED
    if not MATCH_TO_UNMASKED:
        MATCH_TO_UNMASKED = load_resolver_from_server()

def resolve_match_to(code: str) -> str:
    if not isinstance(code, str):
        return ""
    return MATCH_TO_UNMASKED.get(code.strip().lower(), code)

# ---------- 4) Text utils ----------
SEP_RE = re.compile(r"\s*(?:\u2192|&rarr;|;|>|,|~{2,}|/{2,}|\|{2,})\s*")
ID_PAT = re.compile(r"\bI\d+\b", re.I)

def split_tokens(s):
    if pd.isna(s):
        return []
    if not isinstance(s, str):
        s = str(s)
    return [p.strip() for p in SEP_RE.split(s) if str(p).strip()]

def _clean_piece(text: str) -> str:
    t = re.sub(r"~+", " ", str(text))
    t = re.sub(r"\s+", " ", t)
    return t.strip()

_PARTICLES = {"de","del","della","der","van","von","da","dos","das","di","la","le","du","of"}

def _smart_title(token: str) -> str:
    if not token:
        return token
    token = re.sub(
        r"(^|\b)([a-z])(['&#8217;])([a-z])",
        lambda m: m.group(1) + m.group(2).upper() + m.group(3) + m.group(4).upper(),
        token.lower(),
    )
    token = "-".join([w.capitalize() for w in token.split("-")])
    token = re.sub(r"\bmc([a-z])",  lambda m: "Mc"  + m.group(1).upper(), token)
    token = re.sub(r"\bmac([a-z])", lambda m: "Mac" + m.group(1).upper(), token)
    return token

def smart_titlecase(name: str) -> str:
    name = _clean_piece(name)
    if not name:
        return name
    if "," in name:
        last, first = [p.strip() for p in name.split(",", 1)]
        pieces = (first + " " + last).split()
    else:
        pieces = name.split()
    out = []
    for i, w in enumerate(pieces):
        out.append(w.lower() if (i > 0 and w.lower() in _PARTICLES) else _smart_title(w))
    return " ".join(out)

_CAMEL_WORDS = re.compile(r"[A-Z][a-z]*|[A-Z]+(?![a-z])|[a-z]+")

def surname_given_from_token(token):
    token = token.strip()
    idx = None
    for i in range(1, len(token)):
        if token[i-1].islower() and token[i].isupper():
            idx = i
            break
    if idx is None:
        for i in range(1, len(token)):
            if token[i].isupper():
                idx = i
                break
    if idx is None:
        return (token,)
    surname = token[:idx]
    given = token[idx:]
    given_spaced = re.sub(r"(?<!^)([A-Z])", r" \1", given)
    return ("%s %s" % (given_spaced.strip(), surname.strip()),)

def normalize_person_name(s: str) -> str:
    if pd.isna(s):
        return ""
    s = _clean_piece(str(s))
    if "," in s:
        last, first = [p.strip() for p in s.split(",", 1)]
        s = "%s %s" % (first, last)
    if " " not in s and s.isalpha():
        return smart_titlecase(surname_given_from_token(s)[0])
    return smart_titlecase(s)

def norm_matchee_name(raw: str) -> str:
    raw = str(raw or "").strip()
    if not raw:
        return ""
    if " " in raw or "," in raw:
        nm = smart_titlecase(raw)
        parts = nm.split()
        if len(parts) == 1:
            return nm
        return ("%s %s" % (parts[0], parts[-1])).strip()
    words = _CAMEL_WORDS.findall(raw)
    while words and len(words[0]) == 1:
        words.pop(0)
    if not words:
        nm = smart_titlecase(surname_given_from_token(raw)[0])
        ps = nm.split()
        if len(ps) == 1:
            return nm
        return ("%s %s" % (ps[0], ps[-1])).strip()
    surname = smart_titlecase(words[0])
    given_candidates = [w for w in words[1:] if w.lower() != surname.lower()]
    if not given_candidates:
        return surname
    return ("%s %s" % (smart_titlecase(given_candidates[0]), surname)).strip()

def extract_person_id(s: str) -> str:
    m = ID_PAT.search(str(s or ""))
    return m.group(0).upper() if m else ""

def degree_label_from_generations(g):
    if g <= 1:
        return "parents" if g == 1 else "self"
    if g == 2:
        return "grandparents"
    greats = g - 2
    if greats == 1:
        return "great-grandparents"
    return "%dx-great-grandparents" % greats

def build_header(subject_name_html, cm_val, matchee_name_html, gens, couple_text_html):
    try:
        cm_str = "%d" % int(round(float(cm_val)))
    except Exception:
        cm_str = (str(cm_val).strip() or "0")
    degree_label = degree_label_from_generations(gens)
    parts = [
        "%s is a %s cM cousin match to %s, whose" % (subject_name_html, cm_str, matchee_name_html),
        "%s (back %d Gens)" % (degree_label, gens),
        "are",
        couple_text_html,
    ]
    s = " ".join(parts)
    if REMOVE_PERIOD_AT_END:
        s = re.sub(r"\.\s*$", "", s)
    return s

# ---------- 4.1) Parse enriched first ancestor token ----------
def _scrub_side_keep_name_years(side_raw: str):
    side_raw = _clean_piece(side_raw or "")
    if not side_raw:
        return ("", "", "")

    if "~" in side_raw:
        bits = [b.strip() for b in side_raw.split("~")]
        bits = [b for b in bits if b != ""]
        if bits and re.match(r"^I\d+$", bits[0], re.I):
            pid = bits[0].upper()
            nm  = normalize_person_name(bits[1]) if len(bits) >= 2 else ""
            yrs = _clean_piece(bits[2]) if len(bits) >= 3 else ""
            return (pid, nm, yrs)

    m = re.match(r"^(I\d+)\s+(.*)$", side_raw, flags=re.I)
    if m:
        pid  = m.group(1).upper()
        rest = _clean_piece(m.group(2))
        yrs  = ""
        m2 = re.search(r"(\b\d{4}\s*-\s*(?:\d{4})?\b)\s*$", rest)
        if m2:
            yrs  = _clean_piece(m2.group(1).replace(" ", ""))
            rest = _clean_piece(rest[:m2.start()])
        nm = normalize_person_name(rest) if rest else ""
        return (pid, nm, yrs)

    nm2 = smart_titlecase(side_raw) if " " in side_raw else smart_titlecase(surname_given_from_token(side_raw)[0])
    return ("", nm2, "")

def _couple_display_and_ids_from_token(raw_token: str):
    raw = _clean_piece(raw_token or "")
    if not raw:
        return ("", "", "")

    parts = re.split(r"\s*(?:&| and )\s*", raw, maxsplit=1, flags=re.I)
    if len(parts) != 2:
        pid, nm, yrs = _scrub_side_keep_name_years(raw)
        disp = nm or raw
        if yrs:
            disp = disp + " (%s)" % yrs
        return (disp, pid, "")

    f_id, f_nm, f_yrs = _scrub_side_keep_name_years(parts[0])
    m_id, m_nm, m_yrs = _scrub_side_keep_name_years(parts[1])

    f_disp = f_nm or normalize_person_name(parts[0])
    m_disp = m_nm or normalize_person_name(parts[1])

    if f_yrs:
        f_disp = f_disp + " (%s)" % f_yrs
    if m_yrs:
        m_disp = m_disp + " (%s)" % m_yrs

    disp = "%s%s%s" % (f_disp, LINEAGE_SPOUSE_SEP, m_disp)
    return (disp, f_id, m_id)

# ---------- Enriched exclusion (same logic as Cell 3) ----------
def _norm_couple_for_match(s: str) -> str:
    return re.sub(r"\s{2,}", " ", str(s or "")).strip().lower()

_EXCLUDE_COUPLES = [
    c.strip()
    for c in re.split(r"\s*~\s*", (ENRICHED_EXCLUDE_PREFIX or "").strip().strip("~"))
    if c and c.strip()
]

def _strip_paren_years_anywhere(s: str) -> str:
    return re.sub(r"\([^)]*\)", "", str(s or "")).strip()

def _is_anchor_couple(couple_text: str) -> bool:
    t = _strip_paren_years_anywhere(couple_text).lower()
    return ("francis yates" in t) and ("jane tichborne" in t)

def _apply_enriched_exclusion_to_couples(couples):
    couples = [c for c in (couples or []) if c and str(c).strip()]
    if not couples:
        return couples

    # (A) Exact prefix removal by couple list match
    if _EXCLUDE_COUPLES and len(couples) >= len(_EXCLUDE_COUPLES):
        ok = True
        for i in range(len(_EXCLUDE_COUPLES)):
            if _norm_couple_for_match(couples[i]) != _norm_couple_for_match(_EXCLUDE_COUPLES[i]):
                ok = False
                break
        if ok:
            couples = couples[len(_EXCLUDE_COUPLES):]

    # (B) Anchor trim: drop everything before Francis+Jane if present anywhere
    if couples:
        for i, c in enumerate(couples):
            if _is_anchor_couple(c):
                couples = couples[i:]
                break

    return couples

# ---------- 4.2) Load authority LOCK-IN module ----------
def _download_authority_module_if_needed() -> str:
    # Prefer a local module if already present
    for p in ("yates_authority_first_ancestor_map.py", AUTH_MODULE_LOCAL):
        if os.path.exists(p):
            print("[OK] Using local authority module:", os.path.abspath(p))
            return p

    if not all(os.environ.get(k) for k in ["FTP_HOST", "FTP_USER", "FTP_PASS"]):
        raise RuntimeError("Missing FTP creds; cannot download authority module %s" % AUTH_MODULE_REMOTE)

    with ftp_connect() as ftps:
        try:
            ftps.cwd(AUTH_MODULE_REMOTE_DIR)
        except Exception:
            pass
        ok = ftp_download_if_exists(ftps, AUTH_MODULE_BASENAME, AUTH_MODULE_LOCAL)
        try:
            ftps.quit()
        except Exception:
            pass

    if not ok:
        raise RuntimeError("Authority module not found on server: /%s" % _remote_path(AUTH_MODULE_REMOTE))

    return AUTH_MODULE_LOCAL

def _import_authority_map(module_path: str) -> dict:
    spec = importlib.util.spec_from_file_location("yates_authority_first_ancestor_map", module_path)
    if spec is None or spec.loader is None:
        raise RuntimeError("Unable to import authority module from %s" % module_path)
    mod = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(mod)  # type: ignore
    m = getattr(mod, "AUTH_COUPLE_KEY_MAP", None)
    if not isinstance(m, dict) or not m:
        raise RuntimeError("AUTH_COUPLE_KEY_MAP missing/empty in %s" % module_path)
    print("[OK] Authority map loaded from module: %d pairs" % (len(m)//2))
    return m

AUTH_COUPLE_KEY_MAP = _import_authority_map(_download_authority_module_if_needed())

# ---------- 5) Read main CSV ----------
def find_col(df0, patterns, prefer_exact=None):
    cols = list(df0.columns)
    lowmap = {c.lower(): c for c in cols}
    if prefer_exact:
        for name in prefer_exact:
            if name in df0.columns:
                return name
            if name and name.lower() in lowmap:
                return lowmap[name.lower()]
    for pat in patterns:
        rx = re.compile(pat, re.I)
        for c in cols:
            if rx.search(c):
                return c
    return None

df = _read_csv_anyenc(CSV_IN)
print("[OK] Loaded CSV: %d rows, %d cols" % (len(df), len(df.columns)))

id_col    = find_col(df, [r"^(id#|personid)$"], ["ID#", "ID", "PersonID", "personID"])
match_col = find_col(df, [r"^match\s*to$"], ["Match to", "Match", "match_to", "Match_to"])
name_col  = find_col(df, [r"^name$"], ["Name"])
cm_col    = find_col(df, [r"^(c\s*:?m|cm)$", r"centi.?morgan"], ["cM", "cm"])
path_col  = find_col(df, [r"(yates\s*dna\s*ancestral\s*line|ancestral\s*line|lineage)"],
                     ["Yates DNA Ancestral Line", "Ancestral Line", "Lineage"])

for req, nm in [(id_col,"ID#/PersonID"), (match_col,"Match to"), (name_col,"Name"), (cm_col,"cM"), (path_col,"Lineage")]:
    if not req:
        raise ValueError("CSV missing required column: %s" % nm)

# ---------- 5.1) Vitals (optional) ----------
AUTOSOMAL_MATCHES = ""
LAST_UPDATED_TEXT = ""

def _friendly_ts_from_utc(raw):
    raw = str(raw or "").replace("UTC","").strip()
    m = re.match(r"^(\d{4})-(\d{2})-(\d{2})[ T](\d{2}):(\d{2})(?::(\d{2}))?$", raw)
    if not m:
        return raw
    Y, Mo, D, h, mi, s = [int(x or "0") for x in m.groups()]
    import datetime as _dt
    dt = _dt.datetime(Y, Mo, D, h, mi, s) - _dt.timedelta(hours=5)
    months = ["January","February","March","April","May","June","July","August","September","October","November","December"]
    hh = dt.hour
    ampm = "AM" if hh < 12 else "PM"
    h12 = hh % 12
    if h12 == 0:
        h12 = 12
    return "%s %d, %d %d:%02d %s" % (months[dt.month-1], dt.day, dt.year, h12, dt.minute, ampm)

def _format_int_with_commas(s):
    if s is None:
        return ""
    t = re.sub(r"[^0-9\-]", "", str(s))
    if not t:
        return str(s)
    try:
        return "{:,}".format(int(t))
    except Exception:
        return str(s)

def _load_vitals(path):
    global AUTOSOMAL_MATCHES, LAST_UPDATED_TEXT
    if not os.path.exists(path):
        print("[INFO] dna_vitals.csv not found; header will omit counts and last-updated text.")
        return
    vdf = _read_csv_anyenc(path)
    flat = [str(cell) for row in vdf.astype(str).values.tolist() for cell in row]
    autosomal = last_text = None
    for cell in flat:
        if autosomal is None and "Records tagged and filtered by NPFX" in cell:
            m = re.search(r"(\d[\d,]*)", cell)
            if m:
                autosomal = m.group(1)
        if last_text is None and "LAST_UPDATED_TEXT" in cell:
            m = re.search(r"LAST_UPDATED_TEXT\s*:\s*(.+)", cell)
            if m:
                last_text = m.group(1).strip()
    if last_text is not None:
        last_text = _friendly_ts_from_utc(last_text)
    AUTOSOMAL_MATCHES = _format_int_with_commas(autosomal) if autosomal else ""
    LAST_UPDATED_TEXT = last_text or ""

_load_vitals(VITALS_CSV)

# ---------- 6) Transform + compute authority sort key ----------
_setup_resolver()

def _authority_token_from_display_name(name_text: str) -> str:
    # Only used for LAST-resort; the preferred key is the authority module lookup.
    s = re.sub(r"\([^)]*\)", "", str(name_text or ""))
    s = s.replace("\u00a0", " ")
    s = re.sub(r"\s+", " ", s).strip().lower()
    if not s:
        return ""
    if "unknown" in s:
        return "unknown"
    s = re.sub(r"[^a-z0-9 ]+", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    if not s:
        return ""
    parts = s.split(" ")
    if len(parts) == 1:
        return re.sub(r"[^a-z0-9]+", "", parts[0])
    surname = parts[-1]
    givens  = parts[:-1]
    return re.sub(r"[^a-z0-9]+", "", surname + "".join(givens))

def _authority_couple_token_from_display(couple_disp: str) -> str:
    s = str(couple_disp or "").strip()
    if not s:
        return ""
    parts = [p.strip() for p in re.split(r"\s*(?:&| and )\s*", s, maxsplit=1, flags=re.I) if p.strip()]
    if len(parts) == 2:
        a = _authority_token_from_display_name(parts[0])
        b = _authority_token_from_display_name(parts[1])
        if a and b:
            return a + "&" + b
        return (a or "") + ("&" if (a or b) else "") + (b or "")
    return _authority_token_from_display_name(s)

out_match_to = []
out_first_ancestor = []
out_summary  = []
out_sort_key = []

for _, row in df.iterrows():
    subject_raw  = row.get(match_col, "")
    subject_name = normalize_person_name(resolve_match_to(subject_raw))
    subject_name_html = "<strong>%s</strong>" % _html.escape(subject_name or "", quote=False)

    pid          = extract_person_id(row.get(id_col, ""))
    matchee_raw  = row.get(name_col, "")
    matchee_name = norm_matchee_name(matchee_raw) or subject_name

    if pid:
        matchee_url = (
            "%s/verticalchart.php?personID=%s&tree=%s&parentset=0&display=vertical&generations=15"
            % (TNG_BASE, pid, TNG_TREE)
        )
        matchee_name_html = '<a href="%s" target="_blank" rel="noopener">%s</a>' % (
            _html.escape(matchee_url, quote=True),
            _html.escape(matchee_name or "", quote=False),
        )
    else:
        matchee_name_html = _html.escape(matchee_name or "", quote=False)

    cm_val       = row.get(cm_col, "0")
    raw_tokens   = split_tokens(row.get(path_col, ""))

    # Convert raw tokens to formatted couple displays (+ ids), then apply enriched exclusion
    couple_disps = []
    couple_ids   = []
    for t in raw_tokens:
        disp, f_id, m_id = _couple_display_and_ids_from_token(t)
        couple_disps.append(disp)
        couple_ids.append((f_id, m_id))

    couple_disps = _apply_enriched_exclusion_to_couples(couple_disps)

    # Keep tokens aligned: if we trimmed couples, recompute the first couple IDs by re-parsing
    # from the corresponding raw token range when possible; otherwise fallback parse display.
    # Best: find the first displayed couple inside the raw list and use that index.
    first_disp = couple_disps[0] if couple_disps else ""
    first_idx  = 0
    if first_disp:
        for i, d0 in enumerate([_clean_piece(x) for x in couple_disps]):
            pass
        for i, d0 in enumerate([_clean_piece(x) for x in couple_disps]):
            pass
        for i, d in enumerate([_clean_piece(x) for x in couple_disps]):
            # only used for comparison normalization
            break
        # map displayed couples back to original displayed list (not raw tokens)
        # locate first_disp within the original untrimmed couple_disps_untrim by norm
    # Rebuild trimmed ids by matching against original sequence
    couple_disps_untrim = []
    couple_ids_untrim   = []
    for t in raw_tokens:
        disp, f_id, m_id = _couple_display_and_ids_from_token(t)
        couple_disps_untrim.append(disp)
        couple_ids_untrim.append((f_id, m_id))

    # If we trimmed, find where the trimmed list begins within the untrimmed list.
    start_at = 0
    if couple_disps and couple_disps_untrim:
        target = _norm_couple_for_match(couple_disps[0])
        found = None
        for i, d in enumerate(couple_disps_untrim):
            if _norm_couple_for_match(d) == target:
                found = i
                break
        if found is not None:
            start_at = found

    trimmed_ids = couple_ids_untrim[start_at:start_at+len(couple_disps)] if couple_disps else []
    gens_total  = len(couple_disps)

    couple_disp = couple_disps[0] if couple_disps else ""
    f_id = ""
    m_id = ""
    if trimmed_ids:
        f_id, m_id = trimmed_ids[0]
    else:
        # fallback: try to pull IDs from the original first token
        if couple_ids_untrim:
            f_id, m_id = couple_ids_untrim[0]

    # Authority key for the (possibly trimmed) first couple
    auth_key = ""
    if f_id and m_id:
        auth_key = re.sub(r"\s+", "", AUTH_COUPLE_KEY_MAP.get((f_id.upper(), m_id.upper()), ""))

    if not auth_key:
        auth_key = _authority_couple_token_from_display(couple_disp)

    couple_html = _html.escape(couple_disp or "", quote=False) if couple_disp else ""
    summary_html = build_header(subject_name_html, cm_val, matchee_name_html, gens_total, couple_html)

    out_match_to.append(_html.escape(subject_name or "", quote=False))
    out_first_ancestor.append(re.sub(r"\s+", "", str(auth_key or "")).replace("&", "&#38;"))
    out_summary.append(summary_html)

    auth_key = re.sub(r"\s+", "", auth_key or "")
    out_sort_key.append(auth_key or "zzzzzzzzzzzzzzzzzzzzzzzz")

df_out = pd.DataFrame({
    "Match to": out_match_to,
    "First Ancestor": out_first_ancestor,
    "Match Summary": out_summary,
    "__sort__": out_sort_key,
})
df_out = df_out.sort_values(by="__sort__", kind="mergesort").drop(columns=["__sort__"]).reset_index(drop=True)

# ---------- 7) HTML ----------
ROOT_VAR_STYLE = '<style type="text/css">:root{--table-width-px:%dpx;}</style>' % int(TABLE_TOTAL_WIDTH_PX)

updated_label = 'Last updated: <span id="last-updated">%s</span>' % _html.escape(LAST_UPDATED_TEXT or "")
_updated_parts = [updated_label]
if AUTOSOMAL_MATCHES:
    _updated_parts.append('Autosomal matches: %s' % _html.escape(AUTOSOMAL_MATCHES))
_updated_parts.append('Showing: <span id="showing-count"></span>')
UPDATED_BLOCK = '<div class="updated centerline">' + ' &nbsp;|&nbsp; '.join([p for p in _updated_parts if p.strip()]) + '</div>'

CONTROLS_BLOCK = (
    '<div class="controls controls-spaced centerline">'
    '<input type="text" id="search-box" class="search" size="28" value="" placeholder="Search&amp;hellip;" />'
    "</div>"
)

LATE_OVERRIDE_BLOCK = ""

# Display order: 1) Match to, 2) Match Summary, 3) First Ancestor
col_headers = [
    ("Match to", "center"),
    ("Match Summary", "left"),
    ("First Ancestor", "center"),
]
col_data = [
    df_out["Match to"].tolist(),
    df_out["Match Summary"].tolist(),
    df_out["First Ancestor"].tolist(),
]

thead_cells = []
for idx, (hdr, align) in enumerate(col_headers):
    wpx = COL_WIDTHS[idx]
    style_attr = "width:%dpx; display:table-cell !important;" % wpx
    if align == "center":
        thead_cells.append('<th class="center-header" style="%s">%s</th>' % (style_attr, hdr))
    else:
        thead_cells.append('<th class="left-header" style="%s">%s</th>' % (style_attr, hdr))

thead_html = (
    '<thead style="display:table-header-group !important;">\n'
    '  <tr style="display:table-row !important;">'
    + "".join(thead_cells)
    + "</tr>\n</thead>"
)

tbody_lines = ["<tbody>"]
for r in range(len(df_out)):
    cells = []
    for c in range(len(col_headers)):
        wpx = COL_WIDTHS[c]
        val = col_data[c][r]
        val_str = "" if val is None else str(val)
        cells.append('<td style="width:%dpx;">%s</td>' % (wpx, val_str))
    tbody_lines.append("  <tr>" + "".join(cells) + "</tr>")
tbody_lines.append("</tbody>")
tbody_html = "\n".join(tbody_lines)

html_table = (
    '<table border="1" class="dataframe sortable dna-register-table" id="refactor-table">'
    + thead_html + "\n" + tbody_html + "</table>"
)

SCROLL_WRAPPER = (
    '<div class="table-scroll-wrapper">'
    '<div class="table-scroll" id="bottom-scroll">%s</div>'
    "</div>"
) % (html_table,)

JS_NAV_REPAIR = ""

page_tpl = Template("""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>ONS Yates Study Autosomal DNA Register</title>
$HEAD_LINK
$ROOT_VAR_STYLE
</head>
<body id="top">
<div class="wrap">
  <h1 class="centerline">ONS Yates Study Autosomal DNA Register</h1>
  $UPDATED_BLOCK
  $NAV_BLOCK
  $LATE_OVERRIDE_BLOCK
  $CONTROLS_BLOCK
  $SCROLL_WRAPPER
</div>

$JS_NAV_REPAIR

<script type="text/javascript">
//<![CDATA[
(function(){
  function textOf(cell){
    return (cell && (cell.textContent || cell.innerText) || '')
      .replace(/\\s+/g,' ')
      .trim()
      .toLowerCase();
  }

  function formatWithCommas(n){
    try{
      var x = parseInt(String(n||'').replace(/[^0-9\\-]/g,''), 10);
      if(isNaN(x)) return '';
      return x.toLocaleString('en-US');
    }catch(e){
      return String(n||'');
    }
  }

  function visibleRowCount(){
    var tbl = document.getElementById('refactor-table');
    if(!(tbl && tbl.tBodies && tbl.tBodies[0])) return 0;
    var rows = tbl.tBodies[0].rows, n = 0;
    for(var i=0;i<rows.length;i++){
      if(rows[i].style.display !== 'none') n++;
    }
    return n;
  }

  function updateShowing(){
    var el = document.getElementById('showing-count');
    if(!el) return;
    el.textContent = formatWithCommas(visibleRowCount());
  }

  function sortTableByKey(tbl, keyColIndex, dir){
    var tb = tbl && tbl.tBodies ? tbl.tBodies[0] : null;
    if(!tb) return;

    var rows = Array.prototype.slice.call(tb.rows || []);
    var asc  = (dir === 'asc');

    rows.sort(function(a,b){
      var A = textOf(a.cells[keyColIndex]);
      var B = textOf(b.cells[keyColIndex]);

      var nA = parseFloat(A.replace(/[^0-9.\\-]/g,''));
      var nB = parseFloat(B.replace(/[^0-9.\\-]/g,''));

      if(!isNaN(nA) && !isNaN(nB)){
        return asc ? (nA - nB) : (nB - nA);
      }
      if (A < B) return asc ? -1 : 1;
      if (A > B) return asc ?  1 : -1;
      return 0;
    });

    var frag = document.createDocumentFragment();
    for(var i=0;i<rows.length;i++){
      frag.appendChild(rows[i]);
    }
    tb.appendChild(frag);
    updateShowing();
  }

  function bindHeaderSort(){
    var tbl = document.getElementById('refactor-table');
    if(!(tbl && tbl.tHead && tbl.tHead.rows.length)) return;

    var ths = tbl.tHead.rows[0].cells;
    if(!ths) return;

    // Display indexes after swap:
    //   0 Match to
    //   1 Match Summary   <-- user clicks here
    //   2 First Ancestor  <-- key column
    function keyIndexForDisplayIndex(displayIdx){
      return (displayIdx === 1) ? 2 : displayIdx;
    }

    for(var i=0;i<ths.length;i++){
      (function(displayIdx){
        var th  = ths[displayIdx];
        var dir = 'asc';

        th.addEventListener('click', function(){
          dir = (dir === 'asc') ? 'desc' : 'asc';

          for (var j = 0; j < ths.length; j++){
            ths[j].innerHTML = ths[j].innerHTML.replace(/\\s+\\(asc\\)|\\s+\\(desc\\)/,'');
          }

          th.innerHTML += (dir === 'asc' ? ' (asc)' : ' (desc)');

          var keyIdx = keyIndexForDisplayIndex(displayIdx);
          sortTableByKey(tbl, keyIdx, dir);
        }, false);
      })(i);
    }
  }

  function getParam(name){
    var m = location.search.match(new RegExp('[?&]'+name+'=([^&]+)'));
    return m ? decodeURIComponent(m[1].replace(/\\+/g,' ')) : '';
  }

  function bindSearch(){
    var box = document.getElementById('search-box');
    var tbl = document.getElementById('refactor-table');
    if(!(box && tbl && tbl.tBodies && tbl.tBodies[0])) return;

    var tb   = tbl.tBodies[0];
    var rows = Array.prototype.slice.call(tb.rows || []);

    function rowText(tr){
      var t = '';
      for(var i=0;i<tr.cells.length;i++){
        t += ' ' + (tr.cells[i].textContent || tr.cells[i].innerText || '');
      }
      return t.replace(/\\s+/g,' ').toLowerCase();
    }

    function apply(q){
      q = String(q || '').toLowerCase();
      for(var i=0;i<rows.length;i++){
        var txt  = rowText(rows[i]);
        var show = !q || txt.indexOf(q) > -1;
        rows[i].style.display = show ? '' : 'none';
      }
      updateShowing();
    }

    var to = null;
    function onInput(){
      if(to) clearTimeout(to);
      to = setTimeout(function(){ apply(box.value); }, 60);
    }

    box.addEventListener('input',  onInput, false);
    box.addEventListener('search', onInput, false);

    var q0 = getParam('q');
    if(q0){
      box.value = q0;
      apply(q0);
      try{ history.replaceState(null,'',location.pathname); }catch(e){}
    } else {
      box.value = '';
      apply('');
    }
  }

  document.addEventListener('DOMContentLoaded', function(){
    bindHeaderSort();
    bindSearch();
    updateShowing();
  });
})();
//]]>
</script>

</body>
</html>
""")

final_html = page_tpl.safe_substitute(
    HEAD_LINK           = HEAD_LINK,
    ROOT_VAR_STYLE      = ROOT_VAR_STYLE,
    UPDATED_BLOCK       = UPDATED_BLOCK,
    NAV_BLOCK           = NAV_BLOCK,
    LATE_OVERRIDE_BLOCK = LATE_OVERRIDE_BLOCK,
    CONTROLS_BLOCK      = CONTROLS_BLOCK,
    SCROLL_WRAPPER      = SCROLL_WRAPPER,
    JS_NAV_REPAIR       = JS_NAV_REPAIR,
)

with open(LOCAL_HTML, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
    f.write(final_html)
print("[OK] Saved render: %s" % os.path.abspath(LOCAL_HTML))
print("[DEBUG] SSI nav include present:", ("<!--#include" in final_html))

# ---------- 8) Upload ----------
def save_and_upload_all():
    if not all(os.environ.get(k) for k in ["FTP_HOST", "FTP_USER", "FTP_PASS"]):
        print("[SKIP] Missing FTP creds; uploads skipped.")
        return
    try:
        ftps = ftp_connect()

        try:
            ftp_upload_overwrite(ftps, LOCAL_HTML, _remote_path(REMOTE_HTML_CANON))
            ftp_upload_overwrite(ftps, LOCAL_HTML, _remote_path(REMOTE_HTML_LEG))
        except Exception as e:
            print("[WARN] Upload main HTML failed: %s" % e)

        print("\n--- SIZE Verification (if supported) ---")
        for p in [_remote_path(REMOTE_HTML_CANON), _remote_path(REMOTE_HTML_LEG)]:
            sz = ftp_size(ftps, p)
            print("%s : %s" % (p, sz if sz is not None else "(SIZE unsupported)"))

        try:
            ftps.quit()
        except Exception:
            pass

        print("\n--- Open URLs ---")
        print("Canonical: https://yates.one-name.net/partials/yates_ancestor_register.shtml")
        print("Legacy:    https://yates.one-name.net/partials/ons_yates_dna_register.shtml")
        print("CSS:       https://yates.one-name.net/partials/partials_unified.css")
    except Exception as e:
        print("[FAIL] FTP session: %s" % e)
        traceback.print_exc()

save_and_upload_all()

print("\n--- Cell 2 complete (DISPLAY SWAP 2<->3; Match Summary sorts by First Ancestor; authority lock-in module; enriched prefix excluded) ---")


[CONFIRM] Golden Rules active | Cell=Cell2_3Col_Swap23 | Version=2026.02.02-SWAP23-MATCHSUMMARY-SORTBY-ANCESTOR-LOCKIN+ENRICHED-EXCLUDE1 | Encoding=ISO-8859-15
[LAYOUT] TABLE_TOTAL_WIDTH_PX=1880
[LAYOUT] Column widths (px): 1=220 2=1240 3=420
[OK] Using local authority module: /content/yates_authority_first_ancestor_map.py
[OK] Authority map loaded from module: 25 pairs
[OK] Loaded CSV: 93 rows, 6 cols
[PULL] match_to_unmasked.csv -> /content/match_to_unmasked.server.csv
[OK] Resolver loaded: 94 codes
[OK] Saved render: /content/yates_ancestor_register.shtml
[DEBUG] SSI nav include present: True
[PUT] yates_ancestor_register.shtml -> partials/yates_ancestor_register.shtml
[PUT] yates_ancestor_register.shtml -> partials/ons_yates_dna_register.shtml

--- SIZE Verification (if supported) ---
partials/yates_ancestor_register.shtml : 52351
partials/ons_yates_dna_register.shtml : 52351

--- Open URLs ---
Canonical: https://yates.one-name.net/partials/yates_ancestor_register.shtml
Legacy:    

# Cell 2b

In [None]:
#!/usr/bin/env python3
# -*- coding: iso-8859-15 -*-
# ====== CUT START [1/1] CELL 2b (Unified headers from Cell 2 + Cell 2b functionality) ======
# RON GOLDEN RULES - CLIFF NOTES (v2026.02.01-CELL2HDR-CELL2B1)
# * Complete & runnable Colab cell - one contiguous block.
# * Source ASCII-only; outputs written with encoding="iso-8859-15", errors="xmlcharrefreplace".
# * XHTML 1.0 Transitional; typography/layout/colors via /partials/partials_unified.css (Cell 2 baseline).
# * No nav fallback markup. SSI include only (Cell 2 behavior).
# * Fix: single updated/header line with a single "Showing: <span id='showing-count'></span>" (JS-populated).
# * Fix: updateShowing() counts visible rows in the register table when filtering via selection menu.

print("[CONFIRM] Golden Rules active | Cell=Cell2b_Counts_UnifiedHeaders | Version=2026.02.01-CELL2HDR-CELL2B1 | Encoding=ISO-8859-15")

# NOTE: In Colab, the notebook cell text is not reliably introspectable for an exact line-count audit.
DECLARED_LINES = -1
print("[AUDIT] DECLARED_LINES={}".format(DECLARED_LINES))

import os, re, posixpath, socket, traceback
from datetime import datetime, timedelta
from ftplib import FTP_TLS
import pandas as pd
import html as _html

# ---------- 0) Secrets ----------
try:
    from google.colab import userdata  # type: ignore
    os.environ["FTP_HOST"] = userdata.get("FTP_HOST")
    os.environ["FTP_USER"] = userdata.get("FTP_USER")
    os.environ["FTP_PASS"] = userdata.get("FTP_PASS")
    try:
        os.environ["FTP_DIR"] = userdata.get("FTP_DIR")
    except Exception:
        os.environ.setdefault("FTP_DIR", "")
    try:
        os.environ["FTP_PORT"] = userdata.get("FTP_PORT")
    except Exception:
        os.environ.setdefault("FTP_PORT", "21")
except Exception:
    os.environ.setdefault("FTP_HOST", "")
    os.environ.setdefault("FTP_USER", "")
    os.environ.setdefault("FTP_PASS", "")
    os.environ.setdefault("FTP_DIR", "")
    os.environ.setdefault("FTP_PORT", "21")

# ---------- 1) Config ----------
CSV_IN = "final_combined_df_with_value_labels.csv"

SERVER_PARTIALS_DIR = "partials"
SERVER_MAPPING_BASENAME = "match_to_unmasked.csv"
SERVER_MAPPING_REMOTE = posixpath.join(SERVER_PARTIALS_DIR, SERVER_MAPPING_BASENAME)
SERVER_MAPPING_LOCAL_CACHE = "match_to_unmasked.server.csv"

FTP_DIR = (os.environ.get("FTP_DIR", "") or "").strip()

# Cell 2 baseline stylesheet (single canonical CSS)
UNIFIED_CSS_BASENAME = "partials_unified.css"
UNIFIED_CSS_VERSION  = "v2026-02-01-unified-blue-refactor1"
UNIFIED_CSS_HREF     = "/partials/%s?%s" % (UNIFIED_CSS_BASENAME, UNIFIED_CSS_VERSION)
HEAD_LINK            = '<link rel="stylesheet" type="text/css" href="%s" />' % UNIFIED_CSS_HREF

# Shared nav include (SSI) - no fallback
NAV_BLOCK = '<!--#include virtual="/partials/nav_block.shtml" -->'

# vitals authority (built by Cell 1)
VITALS_LOCAL = "dna_vitals.csv"

# TNG settings for cousin links
TNG_BASE = "https://yates.one-name.net/tng"
TNG_TREE = "tree1"

# Local partial paths
MATCH_COUNT_LOCAL   = os.path.join("partials", "match_count.shtml")
LINEAGE_COUNT_LOCAL = os.path.join("partials", "lineage_count.shtml")
COUSIN_PRINT_LOCAL  = os.path.join("partials", "cousin_list_print.htm")

# Remote partial paths
MATCH_COUNT_REMOTE   = posixpath.join("partials", "match_count.shtml")
LINEAGE_COUNT_REMOTE = posixpath.join("partials", "lineage_count.shtml")
COUSIN_PRINT_REMOTE  = posixpath.join("partials", "cousin_list_print.htm")

# ---------- 1a) Load vitals authority from dna_vitals.csv ----------
def _friendly_ts_from_utc(raw: str) -> str:
    raw = str(raw or "").strip()
    if not raw:
        return "(unknown)"

    raw_clean = raw.replace("UTC", "").replace("utc", "").strip()

    fmts = [
        "%Y-%m-%d %H:%M",
        "%Y-%m-%d %H:%M:%S",
        "%Y-%m-%dT%H:%M",
        "%Y-%m-%dT%H:%M:%S",
    ]

    dt_utc = None
    for fmt in fmts:
        try:
            dt_utc = datetime.strptime(raw_clean, fmt)
            break
        except Exception:
            dt_utc = None

    if dt_utc is None:
        return raw

    dt_est = dt_utc - timedelta(hours=5)

    months = [
        "January","February","March","April","May","June",
        "July","August","September","October","November","December"
    ]
    month_name = months[dt_est.month - 1]
    h24 = dt_est.hour
    ampm = "AM" if h24 < 12 else "PM"
    h12 = h24 % 12
    if h12 == 0:
        h12 = 12

    return "%s %d, %d %d:%02d %s" % (
        month_name,
        dt_est.day,
        dt_est.year,
        h12,
        dt_est.minute,
        ampm,
    )

def _load_vitals(path: str):
    autosomal = "(unknown)"
    last_updated_display = "(unknown)"
    last_updated_raw = ""

    if not os.path.exists(path):
        print("[WARN] dna_vitals.csv not found; using '(unknown)' for header vitals.")
        return autosomal, last_updated_display, last_updated_raw

    encs = ("iso-8859-15", "utf-8-sig", "utf-8", "cp1252", "latin1")
    df_v = None
    last_err = None
    for enc in encs:
        try:
            df_v = pd.read_csv(path, dtype=str, keep_default_na=False, encoding=enc)
            break
        except Exception as ex:
            last_err = ex
            df_v = None
    if df_v is None or "line" not in df_v.columns:
        print("[WARN] Unable to read dna_vitals.csv or missing 'line' column:", last_err)
        return autosomal, last_updated_display, last_updated_raw

    for raw_line in df_v["line"].astype(str):
        line = raw_line.strip()
        low = line.lower()
        if low.startswith("after manual filter, total records:"):
            m = re.search(r"(\d[\d,]*)", line)
            if m:
                num_raw = m.group(1).replace(",", "")
                try:
                    autosomal = "{:,}".format(int(num_raw))
                except Exception:
                    autosomal = num_raw
        elif low.startswith("last_updated_text:"):
            ts = line.split(":", 1)[1].strip()
            last_updated_raw = ts
            last_updated_display = _friendly_ts_from_utc(ts)

    print("[VITALS] autosomal (after manual filter): %s" % autosomal)
    print("[VITALS] last updated (display): %s" % last_updated_display)
    return autosomal, last_updated_display, last_updated_raw

AUTOSOMAL_MATCHES_TEXT, LAST_UPDATED_DISPLAY, LAST_UPDATED_RAW = _load_vitals(VITALS_LOCAL)

# ---------- 2) FTP helpers ----------
FTP_TIMEOUT = int(os.environ.get("FTP_TIMEOUT", "30"))
FTP_PASSIVE = True

def ftp_connect() -> FTP_TLS:
    ftps = FTP_TLS(timeout=FTP_TIMEOUT)
    socket.setdefaulttimeout(FTP_TIMEOUT)
    ftps.connect(os.environ.get("FTP_HOST", ""), int(os.environ.get("FTP_PORT", 21)))
    ftps.login(os.environ.get("FTP_USER", ""), os.environ.get("FTP_PASS", ""))
    try:
        ftps.prot_p()
    except Exception:
        pass
    try:
        ftps.set_pasv(FTP_PASSIVE)
    except Exception:
        pass
    if FTP_DIR:
        for p in [p for p in FTP_DIR.split("/") if p]:
            try:
                ftps.mkd(p)
            except Exception:
                pass
            ftps.cwd(p)
    return ftps

def _remote_path(name: str) -> str:
    return posixpath.join(FTP_DIR, name) if FTP_DIR else name

def ensure_remote_dirs(ftps, remote_path):
    if "/" not in remote_path:
        return
    pwd0 = ftps.pwd()
    parts = [p for p in remote_path.split("/")[:-1] if p and p != "."]
    for seg in parts:
        try:
            ftps.cwd(seg)
        except Exception:
            try:
                ftps.mkd(seg)
            except Exception:
                pass
            ftps.cwd(seg)
    ftps.cwd(pwd0)

def ftp_download_if_exists(ftps, remote_name, local_name) -> bool:
    try:
        with open(local_name, "wb") as f:
            ftps.retrbinary("RETR %s" % remote_name, f.write)
        print("[PULL] %s -> %s" % (remote_name, os.path.abspath(local_name)))
        return True
    except Exception as e:
        try:
            if os.path.exists(local_name):
                os.remove(local_name)
        except Exception:
            pass
        print("[MISS] %s (%s)" % (remote_name, e))
        return False

def ftp_upload_overwrite(ftps, local_path, remote_name):
    ensure_remote_dirs(ftps, remote_name)
    with open(local_path, "rb") as fh:
        ftps.storbinary("STOR %s" % remote_name, fh)
    print("[PUT] %s -> %s" % (local_path, remote_name))

def ftp_size(ftps, remote_name):
    try:
        sz = ftps.size(remote_name)
        return int(sz) if sz is not None else None
    except Exception:
        return None

# ---------- 3) Resolver (match_to_unmasked.csv on server) ----------
def _read_mapping_csv(path: str) -> pd.DataFrame:
    encs = ("iso-8859-15", "utf-8-sig", "utf-8", "cp1252", "latin1")
    last = None
    df = None
    for enc in encs:
        try:
            df = pd.read_csv(path, encoding=enc, dtype=str, keep_default_na=False)
            break
        except Exception as e:
            last = e
            df = None
    if df is None:
        raise RuntimeError("Unable to read mapping CSV %s: %s" % (path, last))
    if df.shape[1] < 2:
        raise RuntimeError("Mapping CSV must have at least two columns: code, unmasked")
    df = df.iloc[:, :2].copy()
    df.columns = ["code", "unmasked"]
    df["code"] = df["code"].astype(str).str.strip().str.lower()
    df["unmasked"] = df["unmasked"].astype(str).str.strip()
    df = df[df["code"] != ""].drop_duplicates(subset=["code"], keep="first")
    if df.empty:
        raise RuntimeError("Mapping CSV empty after normalization.")
    return df

def load_resolver_from_server() -> dict:
    with ftp_connect() as ftps:
        try:
            ftps.cwd(SERVER_PARTIALS_DIR)
        except Exception:
            pass
        ok = ftp_download_if_exists(ftps, SERVER_MAPPING_BASENAME, SERVER_MAPPING_LOCAL_CACHE)
        try:
            ftps.quit()
        except Exception:
            pass
    if not ok:
        raise RuntimeError(
            "Resolver not found on server: /%s. Upload match_to_unmasked.csv into /partials/ and re-run."
            % _remote_path(SERVER_MAPPING_REMOTE)
        )
    df_map = _read_mapping_csv(SERVER_MAPPING_LOCAL_CACHE)
    print("[OK] Resolver loaded: %d codes" % len(df_map))
    return dict(zip(df_map["code"], df_map["unmasked"]))

MATCH_TO_UNMASKED = {}

def _setup_resolver():
    global MATCH_TO_UNMASKED
    if not MATCH_TO_UNMASKED:
        MATCH_TO_UNMASKED = load_resolver_from_server()

# ---------- 4) CSV + name helpers ----------
def find_col(df, patterns, prefer_exact=None):
    cols = list(df.columns)
    lowmap = {c.lower(): c for c in cols}
    if prefer_exact:
        for name in prefer_exact:
            if name in df.columns:
                return name
            if name and name.lower() in lowmap:
                return lowmap[name.lower()]
    for pat in patterns:
        rx = re.compile(pat, re.I)
        for c in cols:
            if rx.search(c):
                return c
    return None

# ASCII-only separators.
SEP_RE = re.compile(r"\s*(?:->|&rarr;|;|>|,|~{2,}|/{2,}|\|{2,})\s*")

def split_tokens(s):
    if pd.isna(s):
        return []
    if not isinstance(s, str):
        s = str(s)
    return [p.strip() for p in SEP_RE.split(s) if str(p).strip()]

def _clean_piece(text: str) -> str:
    t = re.sub(r"~+", " ", str(text))
    t = re.sub(r"\s+", " ", t)
    return t.strip()

_PARTICLES = {"de","del","della","der","van","von","da","dos","das","di","la","le","du","of"}

def _smart_title(token: str) -> str:
    if not token:
        return token
    token = re.sub(
        r"(^|\b)([a-z])(')([a-z])",
        lambda m: m.group(1) + m.group(2).upper() + m.group(3) + m.group(4).upper(),
        token.lower(),
    )
    token = "-".join([w.capitalize() for w in token.split("-")])
    token = re.sub(r"\bmc([a-z])", lambda m: "Mc" + m.group(1).upper(), token)
    token = re.sub(r"\bmac([a-z])", lambda m: "Mac" + m.group(1).upper(), token)
    return token

def smart_titlecase(name: str) -> str:
    name = _clean_piece(name)
    if not name:
        return name
    if "," in name:
        last, first = [p.strip() for p in name.split(",", 1)]
        pieces = (first + " " + last).split()
    else:
        pieces = name.split()
    out = []
    for i, w in enumerate(pieces):
        out.append(w.lower() if (i > 0 and w.lower() in _PARTICLES) else _smart_title(w))
    return " ".join(out)

def surname_given_from_token(token):
    token = token.strip()
    idx = None
    for i in range(1, len(token)):
        if token[i - 1].islower() and token[i].isupper():
            idx = i
            break
    if idx is None:
        for i in range(1, len(token)):
            if token[i].isupper():
                idx = i
                break
    if idx is None:
        return (token,)
    surname = token[:idx]
    given = token[idx:]
    given_spaced = re.sub(r"(?<!^)([A-Z])", r" \1", given)
    return ("%s %s" % (given_spaced.strip(), surname.strip()),)

_CAMEL_WORDS = re.compile(r"[A-Z][a-z]*|[A-Z]+(?![a-z])|[a-z]+")

def norm_matchee_name(raw: str) -> str:
    raw = str(raw or "").strip()
    if not raw:
        return ""
    if " " in raw or "," in raw:
        nm = smart_titlecase(raw)
        parts = nm.split()
        if len(parts) == 1:
            return nm
        return ("%s %s" % (parts[0], parts[-1])).strip()
    words = _CAMEL_WORDS.findall(raw)
    while words and len(words[0]) == 1:
        words.pop(0)
    if not words:
        nm = smart_titlecase(surname_given_from_token(raw)[0])
        ps = nm.split()
        if len(ps) == 1:
            return nm
        return ("%s %s" % (ps[0], ps[-1])).strip()
    surname = smart_titlecase(words[0])
    given_candidates = [w for w in words[1:] if w.lower() != surname.lower()]
    if not given_candidates:
        return surname
    return ("%s %s" % (smart_titlecase(given_candidates[0]), surname)).strip()

def normalize_person_name(s: str) -> str:
    if pd.isna(s):
        return ""
    s = _clean_piece(str(s))
    if "," in s:
        last, first = [p.strip() for p in s.split(",", 1)]
        s = "%s %s" % (first, last)
    if " " not in s and s.isalpha():
        return smart_titlecase(surname_given_from_token(s)[0])
    return smart_titlecase(s)

def _split_first_last(display: str):
    d = _clean_piece(display or "")
    if not d:
        return ("", "")
    parts = d.split()
    if len(parts) == 1:
        return ("", parts[0])
    return (" ".join(parts[:-1]), parts[-1])

def _name_bold_last_html(display: str) -> str:
    first, last = _split_first_last(display)
    if not last and not first:
        return ""
    if not first:
        return '<span class="mc-last">%s</span>' % _html.escape(last, quote=False)
    return '%s <span class="mc-last">%s</span>' % (_html.escape(first, quote=False), _html.escape(last, quote=False))

ID_PAT = re.compile(r"\bI\d+\b", re.I)

def extract_person_id(s: str) -> str:
    m = ID_PAT.search(str(s or ""))
    return m.group(0).upper() if m else ""

def degree_label_from_generations(g):
    if g <= 1:
        return "parents" if g == 1 else "self"
    if g == 2:
        return "grandparents"
    greats = g - 2
    if greats == 1:
        return "great-grandparents"
    return "%dx-great-grandparents" % greats

def build_header(subject_name_html, cm_val, matchee_name_html, gens, husband, wife):
    try:
        cm_str = "%d" % int(round(float(cm_val)))
    except Exception:
        cm_str = (str(cm_val).strip() or "0")
    degree_label = degree_label_from_generations(gens)
    parts = [
        "%s is a %s cM cousin match to %s, whose" % (subject_name_html, cm_str, matchee_name_html),
        "%s (back %d Gens)" % (degree_label, gens),
        "are",
        "%s & %s." % (husband, wife),
    ]
    s = " ".join(parts)
    s = re.sub(r"\.\s*$", "", s)
    return s

def derive_common_from_first_token(tokens):
    if not tokens:
        return ("", "")
    first = _clean_piece(tokens[0])
    parts = re.split(r"\s*(?:&| and )\s*", first, maxsplit=1, flags=re.I)
    if len(parts) != 2:
        return ("", "")

    def _side_to_name(s):
        s = _clean_piece(s)
        if "~" in s:
            bits = [b.strip() for b in s.split("~") if b.strip() != ""]
            if bits and re.match(r"^I\d+$", bits[0], re.I):
                if len(bits) >= 2:
                    return normalize_person_name(bits[1])
                return ""
        return smart_titlecase(s) if " " in s else smart_titlecase(surname_given_from_token(s)[0])

    left = _side_to_name(parts[0])
    right = _side_to_name(parts[1])
    if left and right:
        return (left, right)
    return ("", "")

# ---------- 4b) Lineage label normalization ----------
_AMP_SPLIT = re.compile(r"\s*(?:&|and|AND|\+)\s*", re.I)

def normalize_parents_label(raw: str) -> str:
    raw = _clean_piece(raw or "")
    if not raw:
        return ""
    parts = _AMP_SPLIT.split(raw, maxsplit=1)
    if len(parts) == 2:
        left = smart_titlecase(parts[0])
        right = smart_titlecase(parts[1])
        left = _clean_piece(left)
        right = _clean_piece(right)
        if left and right:
            return "%s & %s" % (left, right)
    return smart_titlecase(raw)

def _parse_enriched_parent_side(side_raw: str):
    side_raw = _clean_piece(side_raw or "")
    if not side_raw:
        return ("", "", "")
    if "~" in side_raw:
        bits = [b.strip() for b in side_raw.split("~")]
        bits = [b for b in bits if b != ""]
        if bits and re.match(r"^I\d+$", bits[0], re.I):
            pid = bits[0].upper()
            nm = normalize_person_name(bits[1]) if len(bits) >= 2 else ""
            yrs = _clean_piece(bits[2]) if len(bits) >= 3 else ""
            return (pid, nm, yrs)

    m = re.match(r"^(I\d+)\s+(.*)$", side_raw, flags=re.I)
    if m:
        pid = m.group(1).upper()
        rest = _clean_piece(m.group(2))
        yrs = ""
        m2 = re.search(r"(\b\d{4}\s*-\s*(?:\d{4})?\b)\s*$", rest)
        if m2:
            yrs = _clean_piece(m2.group(1).replace(" ", ""))
            rest = _clean_piece(rest[:m2.start()])
        nm = normalize_person_name(rest) if rest else ""
        return (pid, nm, yrs)

    nm2 = smart_titlecase(side_raw) if " " in side_raw else smart_titlecase(surname_given_from_token(side_raw)[0])
    return ("", nm2, "")

def _lineage_label_with_links(first_raw: str) -> str:
    raw = _clean_piece(first_raw or "")
    if not raw:
        return ""

    parts = re.split(r"\s*(?:&| and )\s*", raw, maxsplit=1, flags=re.I)
    if len(parts) != 2:
        return '<span class="lc-new">%s</span>' % _html.escape(normalize_parents_label(raw) or raw, quote=False)

    f_id, f_nm, f_yrs = _parse_enriched_parent_side(parts[0])
    m_id, m_nm, m_yrs = _parse_enriched_parent_side(parts[1])

    def _side_html(pid, name, yrs, legacy_side):
        name = _clean_piece(name)
        yrs = _clean_piece(yrs)
        label = _html.escape(normalize_person_name(legacy_side), quote=False)
        if name:
            label = _html.escape(name, quote=False)
        if yrs:
            label = label + ' <span class="mc-code">(%s)</span>' % _html.escape(yrs, quote=False)
        if pid:
            href = "%s/familychart.php?personID=%s&tree=%s" % (TNG_BASE.rstrip("/"), pid.upper(), TNG_TREE)
            return '<a href="%s" target="_blank" rel="noopener">%s</a>' % (_html.escape(href, quote=True), label)
        return label

    left = _side_html(f_id, f_nm, f_yrs, parts[0])
    right = _side_html(m_id, m_nm, m_yrs, parts[1])
    return '<span class="lc-new">%s &amp; %s</span>' % (left, right)

def _norm_code_for_count(s):
    t = str(s or "").strip()
    if (t.startswith('"') and t.endswith('"')) or (t.startswith("'") and t.endswith("'")):
        t = t[1:-1]
    t = re.sub(r"\s+", " ", t).strip().lower()
    return t

# ---------- CSS helpers (page scoped) ----------
_MATCH_COUNT_ONECOL_CSS = (
    "<style type=\"text/css\">\n"
    "/* Match Count: ONE centered column, alpha order, scroll shows max 10 rows. */\n"
    "#ref-table{border-collapse:separate !important; border-spacing:0 !important;}\n"
    "#ref-table thead{position:absolute !important; left:-9999px !important; top:auto !important; width:1px !important; height:1px !important; overflow:hidden !important;}\n"
    "#ref-table, #ref-table tbody{display:block !important;}\n"
    "#ref-tb{padding:6px 4px 2px 4px !important;display:block !important;width: var(--mc-col-w) !important;max-width: 92vw !important;margin: 0 auto !important;box-sizing:border-box !important;--mc-row-h: 40px;--mc-col-w: 360px;max-height: calc(var(--mc-row-h) * 10) !important;overflow-y: auto !important;overflow-x: hidden !important;}\n"
    ".tile-head{width: var(--mc-col-w) !important;max-width: 92vw !important;margin: 0 auto 6px auto !important;padding: 6px 10px !important;box-sizing:border-box !important;border: 1px solid #ddd !important;border-radius: 10px !important;background: #f7f7f7 !important;display:flex !important;align-items:center !important;justify-content:space-between !important;font-weight:bold !important;}\n"
    ".tile-head .th-left{flex:1 1 auto !important; min-width:0 !important;}\n"
    ".tile-head .th-right{flex:0 0 auto !important; margin-left:10px !important; white-space:nowrap !important;}\n"
    "#ref-tb tr{display:flex !important; align-items:center !important;box-sizing:border-box !important;height: var(--mc-row-h) !important;margin:0 0 6px 0 !important;padding:0 10px !important;border:1px solid #ddd !important;border-radius:10px !important;background:#fff !important;overflow:hidden !important;}\n"
    "#ref-tb tr.sel-row{border-color:#d1a500 !important; box-shadow:0 0 0 2px rgba(209,165,0,0.15) inset !important;}\n"
    "#ref-tb td{border:none !important; padding:0 !important;}\n"
    "#ref-tb td.mc-name{flex:1 1 auto !important; min-width:0 !important; white-space:nowrap !important; overflow:hidden !important; text-overflow:ellipsis !important;}\n"
    "#ref-tb td.mc-count{flex:0 0 auto !important; margin-left:10px !important; font-weight:bold !important;}\n"
    "#ref-tb a.count-pick{text-decoration:none !important; padding:2px 6px !important; border:1px solid #ccc !important; border-radius:8px !important; display:inline-block !important;}\n"
    "#ref-tb tr.sel-row a.count-pick{border-color:#d1a500 !important;}\n"
    ".mc-last{font-weight:bold !important;}\n"
    ".mc-code{font-size:90% !important; color:#666 !important;}\n"
    "/* Ensure register headers stay visible */\n"
    "#reg-list thead{display:table-header-group !important;}\n"
    "#reg-list thead tr{display:table-row !important;}\n"
    "#reg-list thead th{display:table-cell !important;}\n"
    "#reg-list th{position:sticky !important; top:0 !important; background:#ffffff !important; z-index:10 !important;}\n"
    "@media print{ #ref-tb{max-height:none !important; overflow:visible !important; width:auto !important; max-width:none !important; margin:0 !important;} #ref-table thead{position:static !important; left:auto !important; width:auto !important; height:auto !important; overflow:visible !important;}}\n"
    "</style>\n"
)

_LINEAGE_COUNT_ONECOL_CSS = (
    "<style type=\"text/css\">\n"
    ":root{--lc-col-w:min(760px,calc(100vw - 28px));}\n"
    ".table-scroll.lineage-scroll{max-height:520px;overflow-y:auto;overflow-x:hidden;}\n"
    ".table-scroll.reg-scroll{max-height:none;overflow:visible;}\n"
    "#ref-table{border-collapse:separate !important;border-spacing:0 !important;}\n"
    "#ref-table thead{position:absolute !important; left:-9999px !important; top:auto !important; width:1px !important; height:1px !important; overflow:hidden !important;}\n"
    "#ref-table,#ref-table tbody{display:block !important;}\n"
    ".tile-head{width:var(--lc-col-w) !important;margin:0 auto 6px auto !important;padding:6px 10px !important;box-sizing:border-box !important;border:1px solid #ddd !important;border-radius:12px !important;background:#f7f7f7 !important;display:flex !important;align-items:center !important;justify-content:space-between !important;font-weight:bold !important;}\n"
    ".tile-head .th-left{flex:1 1 auto !important; min-width:0 !important;}\n"
    ".tile-head .th-right{flex:0 0 auto !important; margin-left:10px !important; white-space:nowrap !important;}\n"
    "#ref-tb{padding:4px 4px 2px 4px !important;display:block !important;width:var(--lc-col-w) !important;margin:0 auto !important;}\n"
    "#ref-tb tr{display:flex !important;align-items:center !important;gap:6px !important;width:100% !important;margin:5px auto !important;padding:7px 9px !important;border:1px solid #ddd !important;border-radius:12px !important;background:#fff !important;}\n"
    "#ref-tb tr.sel-row{border-color:#d1a500 !important;box-shadow:0 0 0 2px rgba(209,165,0,0.15) inset !important;}\n"
    "#ref-tb td{border:none !important;padding:0 !important;vertical-align:top !important;}\n"
    "#ref-tb td.lc-name{flex:1 1 auto !important;white-space:normal !important;overflow:visible !important;text-overflow:clip !important;}\n"
    "#ref-tb td.lc-count{flex:0 0 auto !important;margin-left:0 !important;font-weight:bold !important;white-space:nowrap !important;}\n"
    "#ref-tb a.count-pick{text-decoration:none !important;padding:2px 7px !important;border:1px solid #ccc !important;border-radius:10px !important;display:inline-block !important;}\n"
    "#ref-tb tr.sel-row a.count-pick{border-color:#d1a500 !important;}\n"
    ".lc-new{display:block;margin-top:0;}\n"
    ".lc-new a{text-decoration:none;}\n"
    ".lc-new a:hover{text-decoration:underline;}\n"
    "#reg-list thead{display:table-header-group !important;}\n"
    "#reg-list thead tr{display:table-row !important;}\n"
    "#reg-list thead th{display:table-cell !important;}\n"
    "#reg-list th{position:sticky !important; top:0 !important; background:#ffffff !important; z-index:10 !important;}\n"
    "@media print{ #ref-table thead{position:static !important; left:auto !important; width:auto !important; height:auto !important; overflow:visible !important;}}\n"
    "</style>\n"
)

# ---------- Cell 2 header builder (single updated line; single Showing span) ----------
def _build_updated_block() -> str:
    # Match Cell 2 structure: Last updated: <span id=last-updated>...</span> | Autosomal matches: N | Showing: <span id=showing-count></span>
    parts = []
    parts.append('Last updated: <span id="last-updated">%s</span>' % _html.escape(LAST_UPDATED_DISPLAY or "(unknown)", quote=False))
    parts.append('Autosomal matches: %s' % _html.escape(AUTOSOMAL_MATCHES_TEXT or "(unknown)", quote=False))
    parts.append('Showing: <span id="showing-count"></span>')
    return '<div class="updated centerline">%s</div>' % (' &nbsp;|&nbsp; '.join(parts))

def _partial_head(title, helper_css=""):
    return (
        "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
        " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
        "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">\n"
        "<head>\n"
        "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-15\" />\n"
        "<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n"
        "<title>%s</title>\n" % _html.escape(title, quote=False)
        + HEAD_LINK + "\n"
        + (helper_css or "")
        + "</head>\n<body id=\"top\">\n"
        "<div class=\"wrap\">\n"
        "<h1 class=\"centerline\">%s</h1>\n" % _html.escape(title, quote=False)
        + _build_updated_block() + "\n"
        + NAV_BLOCK + "\n"
        + "<div class=\"selection-menu centerline\">"
          "Showing: "
          "<a href=\"#\" onclick=\"return ySelShowSelected('ref-tb');\">Selected</a> &nbsp;|&nbsp; "
          "<a href=\"#\" onclick=\"return ySelShowAll('ref-tb');\">All</a> &nbsp;|&nbsp; "
          "<a href=\"#\" onclick=\"return ySelReset('ref-tb');\">Reset</a>"
          "</div>\n"
        + "<div class=\"table-scroll\">\n"
    )

def _partial_tail():
    # updateShowing() mirrors Cell 2 idea: compute visible row count.
    # Here we count visible rows in reg-list (the register) when it exists.
    return (
        "</div>\n</div>\n"
        "<script type=\"text/javascript\">\n//<![CDATA[\n"
        "(function(){\n"
        "function formatWithCommas(n){\n"
        " try{ var x=parseInt(String(n||'').replace(/[^0-9\\-]/g,''),10); if(isNaN(x)) return ''; return x.toLocaleString('en-US'); }\n"
        " catch(e){ return String(n||''); }\n"
        "}\n"
        "function countVisibleRowsInTable(tableId){\n"
        " var t=document.getElementById(tableId);\n"
        " if(!(t && t.tBodies && t.tBodies.length)) return 0;\n"
        " var rows=t.tBodies[0].rows, n=0;\n"
        " for(var i=0;i<rows.length;i++){ if(rows[i].style.display !== 'none') n++; }\n"
        " return n;\n"
        "}\n"
        "function updateShowing(){\n"
        " var el=document.getElementById('showing-count');\n"
        " if(!el) return;\n"
        " var n = 0;\n"
        " if(document.getElementById('reg-list')){ n = countVisibleRowsInTable('reg-list'); }\n"
        " else if(document.getElementById('ref-table')){ n = countVisibleRowsInTable('ref-table'); }\n"
        " el.textContent = formatWithCommas(n);\n"
        "}\n"
        "function ySelEachRow(tb, cb){ if(!tb) return; var rows=tb.getElementsByTagName('tr'); for(var i=0;i<rows.length;i++){cb(rows[i]);} }\n"
        "function ySelClear(tr){ if(!tr) return; tr.removeAttribute('data-selected'); var cls=tr.className||''; cls=cls.replace(/\\bsel-row\\b/g,'').replace(/\\s{2,}/g,' ').replace(/^\\s+|\\s+$/g,''); tr.className=cls; tr.style.backgroundColor=''; }\n"
        "function ySelToggle(a){ var tr=a; while(tr&&tr.tagName&&tr.tagName.toLowerCase()!=='tr'){tr=tr.parentNode;} if(!tr) return false; var sel=tr.getAttribute('data-selected')==='1'; if(sel){ ySelClear(tr);}else{ tr.setAttribute('data-selected','1'); var cls=tr.className||''; if(cls.indexOf('sel-row')===-1){tr.className=(cls?(cls+' '):'')+'sel-row';} tr.style.backgroundColor='#fff2cc'; } updateShowing(); return false; }\n"
        "function ySelGetTBody(tbodyId){ var tb=document.getElementById(tbodyId); if(tb) return tb; var t=document.getElementById('ref-table'); if(!t) return null; if(t.tBodies&&t.tBodies.length){return t.tBodies[0];} return t; }\n"
        "function ySelShowSelected(tbodyId){\n"
        " var tb=ySelGetTBody(tbodyId); if(!tb) return false;\n"
        " ySelEachRow(tb,function(tr){ var sel=tr.getAttribute('data-selected')==='1'; tr.style.display=sel?'':'none'; });\n"
        " var rl=document.getElementById('reg-list');\n"
        " if(rl){\n"
        "  var selVals=[];\n"
        "  ySelEachRow(tb,function(tr){ if(tr.getAttribute('data-selected')==='1'){ var v=tr.getAttribute('data-filter')||tr.getAttribute('data-lineage')||tr.getAttribute('data-code')||tr.getAttribute('data-q')||''; if(v){selVals.push(v);} } });\n"
        "  if(selVals.length===0){ updateShowing(); return false; }\n"
        "  var rows=rl.getElementsByTagName('tr');\n"
        "  for(var i=0;i<rows.length;i++){\n"
        "    var r=rows[i]; var lv=r.getAttribute('data-filter')||r.getAttribute('data-lineage')||r.getAttribute('data-code')||'';\n"
        "    var show=false; for(var j=0;j<selVals.length;j++){ if(lv===selVals[j]){show=true; break;} }\n"
        "    r.style.display=show?'':'none';\n"
        "  }\n"
        " }\n"
        " updateShowing();\n"
        " return false;\n"
        "}\n"
        "function ySelShowAll(tbodyId){ var tb=ySelGetTBody(tbodyId); if(!tb) return false; ySelEachRow(tb,function(tr){tr.style.display='';}); var rl=document.getElementById('reg-list'); if(rl){ var rows=rl.getElementsByTagName('tr'); for(var i=0;i<rows.length;i++){rows[i].style.display='';} } updateShowing(); return false; }\n"
        "function ySelReset(tbodyId){ var tb=ySelGetTBody(tbodyId); if(!tb) return false; ySelEachRow(tb,function(tr){tr.style.display=''; ySelClear(tr);}); var rl=document.getElementById('reg-list'); if(rl){ var rows=rl.getElementsByTagName('tr'); for(var i=0;i<rows.length;i++){rows[i].style.display='';} } updateShowing(); return false; }\n"
        "window.ySelToggle=ySelToggle; window.ySelShowSelected=ySelShowSelected; window.ySelShowAll=ySelShowAll; window.ySelReset=ySelReset;\n"
        "document.addEventListener('DOMContentLoaded', function(){ updateShowing(); }, false);\n"
        "})();\n"
        "//]]>\n</script>\n</body>\n</html>"
    )

# ---------- 6) DNA-register-style row builder ----------
def build_register_row(row, id_col: str, match_col: str, name_col: str, cm_col: str, path_col: str):
    subject_raw = row.get(match_col, "")
    key = str(subject_raw).strip().lower()
    subject_unmasked = MATCH_TO_UNMASKED.get(key, subject_raw)
    subject_name = normalize_person_name(subject_unmasked)
    subject_name_html = _html.escape(subject_name or "")

    pid = extract_person_id(row.get(id_col, ""))

    matchee_name = norm_matchee_name(row.get(name_col, "")) or subject_name
    if pid:
        name_html = (
            '<a href="%s/verticalchart.php?personID=%s&tree=%s&parentset=0&display=vertical&generations=15" '
            'target="_blank" rel="noopener">%s</a>'
            % (TNG_BASE, pid, TNG_TREE, _html.escape(matchee_name or "", quote=False))
        )
    else:
        name_html = _html.escape(matchee_name or "", quote=False)

    cm_val = row.get(cm_col, "0")
    tokens = split_tokens(row.get(path_col, ""))
    gens_total = len(tokens)

    if "common_husband" in row.index and "common_wife" in row.index:
        husband_raw = str(row.get("common_husband", "")).strip()
        wife_raw = str(row.get("common_wife", "")).strip()
        if not husband_raw and not wife_raw:
            husband_raw, wife_raw = derive_common_from_first_token(tokens)
    else:
        husband_raw, wife_raw = derive_common_from_first_token(tokens)

    header_html = build_header(
        subject_name_html or subject_name,
        cm_val,
        name_html,
        gens_total,
        husband_raw,
        wife_raw,
    )
    return subject_name_html, name_html, _html.escape(str(cm_val).strip()), header_html

# ---------- 7) Match Count partial (ONE COL, ALPHA) ----------
def build_match_count_partial(main_df: pd.DataFrame, id_col: str, match_col: str, name_col: str, cm_col: str, path_col: str) -> str:
    codes_raw = main_df[match_col].astype(str).map(lambda x: x.strip())
    keys_norm = codes_raw.map(_norm_code_for_count)

    counts_series = keys_norm.value_counts(dropna=False)
    counts = counts_series.reset_index()
    if counts.shape[1] >= 2:
        counts.columns = ["norm_key", "Count"]
    else:
        counts["norm_key"] = counts.index.astype(str)
        counts["Count"] = counts_series.values
        counts = counts[["norm_key", "Count"]]

    first_display = {}
    raw_list = codes_raw.tolist()
    norm_list = keys_norm.tolist()
    for code_disp, k in zip(raw_list, norm_list):
        if k not in first_display and str(k) != "":
            first_display[k] = code_disp

    counts["Code"] = counts["norm_key"].map(lambda k: first_display.get(k, k))
    counts["Unmasked"] = counts["norm_key"].map(lambda k: MATCH_TO_UNMASKED.get(k, ""))

    disp_names = []
    sort_alpha = []
    for _, r in counts.iterrows():
        code = str(r.get("Code", "") or "").strip()
        unm = str(r.get("Unmasked", "") or "").strip()
        label = (unm or code).strip()
        disp = normalize_person_name(label)
        disp_names.append(disp)
        sort_alpha.append((disp or label).lower())

    counts["Disp"] = disp_names
    counts["SortAlpha"] = sort_alpha
    counts = counts.sort_values(by=["SortAlpha", "Count"], ascending=[True, False], kind="mergesort").reset_index(drop=True)

    total_participants = int(len(counts))
    page_title = "Network participants (matches): %d" % total_participants

    html = []
    html.append(_partial_head(page_title, helper_css=_MATCH_COUNT_ONECOL_CSS))

    html.append('<div class="tile-head"><span class="th-left">Match to</span><span class="th-right">Count</span></div>')

    html.append('<table id="ref-table" class="sortable" border="1"><thead><tr>')
    html.append('<th>Match to</th><th>Count</th>')
    html.append("</tr></thead><tbody id=\"ref-tb\">")

    for _, r in counts.iterrows():
        code = str(r.get("Code", "") or "").strip()
        cnt = int(str(r.get("Count", "0")).strip() or "0")
        norm_key = _norm_code_for_count(code)

        unm = (str(r.get("Unmasked", "") or "")).strip()
        label = (unm or code).strip()

        disp = str(r.get("Disp", "") or "").strip()
        name_part = _name_bold_last_html(disp) if disp else _html.escape(label, quote=False)

        code_part = ""
        if code:
            code_part = ' <span class="mc-code">(%s)</span>' % _html.escape(code, quote=False)
        disp_html = name_part + code_part

        tr = (
            "<tr data-q=\"%s\" data-count=\"%d\" data-code=\"%s\" data-filter=\"%s\">"
            "<td class=\"mc-name\">%s</td>"
            "<td class=\"mc-count\">"
            "<a href=\"#\" class=\"count-pick\" onclick=\"return ySelToggle(this);\" title=\"Toggle select\">%d</a>"
            "</td></tr>"
            % (
                _html.escape(label, quote=True),
                cnt,
                _html.escape(norm_key, quote=True),
                _html.escape(norm_key, quote=True),
                disp_html,
                cnt,
            )
        )
        html.append(tr)

    html.append("</tbody></table>")

    html.append('<h2 class="centerline">DNA Register rows for selected participant(s)</h2>')
    html.append(
        '<table id="reg-list" class="sortable" border="1">'
        '<thead style="display:table-header-group !important;"><tr style="display:table-row !important;">'
        '<th style="display:table-cell !important;">Match to</th>'
        '<th style="display:table-cell !important;">Name</th>'
        '<th style="display:table-cell !important;">cM</th>'
        '<th style="display:table-cell !important;">Match Summary</th>'
        '</tr></thead><tbody>'
    )

    for _, row in main_df.iterrows():
        code_raw = str(row.get(match_col, "")).strip()
        if not code_raw:
            continue
        norm_key = _norm_code_for_count(code_raw)

        match_to_html, name_html, cm_html, header_html = build_register_row(row, id_col, match_col, name_col, cm_col, path_col)

        tr = (
            "<tr data-code=\"%s\" data-filter=\"%s\">"
            "<td>%s</td>"
            "<td>%s</td>"
            "<td>%s</td>"
            "<td>%s</td>"
            "</tr>"
            % (
                _html.escape(norm_key, quote=True),
                _html.escape(norm_key, quote=True),
                match_to_html,
                name_html,
                cm_html,
                header_html,
            )
        )
        html.append(tr)

    html.append("</tbody></table>")
    html.append(_partial_tail())
    return "".join(html)

# ---------- 8) Lineage Count partial ----------
def build_lineage_count_partial(main_df: pd.DataFrame, id_col: str, match_col: str, name_col: str, cm_col: str, path_col: str) -> str:
    first_series = (
        main_df.get("First Ancestor", pd.Series(dtype=str))
        .astype(str)
        .map(lambda x: x.strip())
    )
    vc = first_series[first_series != ""].value_counts(dropna=False)

    lin_df = vc.reset_index()
    if lin_df.shape[1] >= 2:
        lin_df.columns = ["First Ancestor", "Count"]
    else:
        lin_df["First Ancestor"] = lin_df.index.astype(str)
        lin_df["Count"] = vc.values
        lin_df = lin_df[["First Ancestor", "Count"]]

    vis_labels = []
    sort_alpha = []
    for _, r in lin_df.iterrows():
        raw = str(r.get("First Ancestor", "")).strip()
        vis = normalize_parents_label(raw) or raw
        vis_labels.append(vis)
        sort_alpha.append(vis.lower())
    lin_df["VisLabel"] = vis_labels
    lin_df["SortAlpha"] = sort_alpha

    lin_df = lin_df.sort_values(["Count", "First Ancestor"], ascending=[False, True], kind="mergesort").reset_index(drop=True)

    html = []
    html.append(_partial_head("Lineage Count", helper_css=_LINEAGE_COUNT_ONECOL_CSS))
    html[-1] = html[-1].replace('<div class="table-scroll">', '<div class="table-scroll lineage-scroll">', 1)

    html.append('<div class="tile-head"><span class="th-left">First Ancestor</span><span class="th-right">Count</span></div>')

    html.append('<table id="ref-table" class="sortable" border="1"><thead><tr>')
    html.append('<th>First Ancestor</th><th>Count</th>')
    html.append("</tr></thead><tbody id=\"ref-tb\">")

    for _, r in lin_df.iterrows():
        first_raw = str(r.get("First Ancestor", "")).strip()
        cnt = int(str(r.get("Count", "0")).strip() or "0")
        lineage_key = first_raw

        parents_html = _lineage_label_with_links(first_raw)

        tr = (
            "<tr data-q=\"%s\" data-count=\"%d\" data-lineage=\"%s\" data-filter=\"%s\">"
            "<td class=\"lc-name\">%s</td>"
            "<td class=\"lc-count\">"
            "<a href=\"#\" class=\"count-pick\" onclick=\"return ySelToggle(this);\" title=\"Toggle select\">%d</a>"
            "</td>"
            "</tr>"
            % (
                _html.escape(first_raw, quote=True),
                cnt,
                _html.escape(lineage_key, quote=True),
                _html.escape(lineage_key, quote=True),
                parents_html,
                cnt,
            )
        )
        html.append(tr)

    html.append("</tbody></table>")

    html.append("</div>")
    html.append('<div class="table-scroll reg-scroll">\n')

    html.append('<h2 class="centerline">DNA Register rows for selected lineage(s)</h2>')
    html.append(
        '<table id="reg-list" class="sortable" border="1">'
        '<thead style="display:table-header-group !important;"><tr style="display:table-row !important;">'
        '<th style="display:table-cell !important;">Match to</th>'
        '<th style="display:table-cell !important;">Name</th>'
        '<th style="display:table-cell !important;">cM</th>'
        '<th style="display:table-cell !important;">Match Summary</th>'
        '</tr></thead><tbody>'
    )

    for _, row in main_df.iterrows():
        first = str(row.get("First Ancestor", "")).strip()
        if not first:
            continue

        match_to_html, name_html, cm_html, header_html = build_register_row(row, id_col, match_col, name_col, cm_col, path_col)

        tr = (
            "<tr data-lineage=\"%s\" data-filter=\"%s\">"
            "<td>%s</td>"
            "<td>%s</td>"
            "<td>%s</td>"
            "<td>%s</td>"
            "</tr>"
            % (
                _html.escape(first, quote=True),
                _html.escape(first, quote=True),
                match_to_html,
                name_html,
                cm_html,
                header_html,
            )
        )
        html.append(tr)

    html.append("</tbody></table>")
    html.append(_partial_tail())
    return "".join(html)

# ---------- 9) Cousin printable partial ----------
def build_cousin_print_partial(main_df: pd.DataFrame, id_col: str, match_col: str, name_col: str, cm_col: str, path_col: str) -> str:
    rows = []
    for _, row in main_df.iterrows():
        subject_raw = row.get(match_col, "")
        subject_name = normalize_person_name(MATCH_TO_UNMASKED.get(str(subject_raw).strip().lower(), subject_raw))
        subject_name_html = "<strong>%s</strong>" % subject_name if subject_name else ""

        pid = extract_person_id(row.get(id_col, ""))

        matchee_name = norm_matchee_name(row.get(name_col, "")) or subject_name
        if pid:
            matchee_name_html = (
                '<a href="%s/verticalchart.php?personID=%s&tree=%s&parentset=0&display=vertical&generations=15" '
                'target="_blank" rel="noopener">%s</a>'
                % (TNG_BASE, pid, TNG_TREE, matchee_name)
            )
        else:
            matchee_name_html = matchee_name

        cm_val = row.get(cm_col, "0")
        tokens = split_tokens(row.get(path_col, ""))
        gens_total = len(tokens)

        if "common_husband" in main_df.columns and "common_wife" in main_df.columns:
            husband_raw = str(row.get("common_husband", "")).strip()
            wife_raw = str(row.get("common_wife", "")).strip()
            if not husband_raw and not wife_raw:
                husband_raw, wife_raw = derive_common_from_first_token(tokens)
        else:
            husband_raw, wife_raw = derive_common_from_first_token(tokens)

        header_html = build_header(subject_name_html or subject_name, cm_val, matchee_name_html, gens_total, husband_raw, wife_raw)
        rows.append(header_html)

    rows_sorted = sorted(rows)

    html_rows = [
        '<table border="1" id="refactor-table" class="sortable"><thead><tr><th>Match Summary</th></tr></thead><tbody>'
    ]
    for v in rows_sorted:
        html_rows.append("<tr><td>%s</td></tr>" % v)
    html_rows.append("</tbody></table>")

    cousin_html = (
        "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" "
        "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">"
        "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\"><head>"
        "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-15\" />"
        "<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />"
        "<title>Cousin List (Printable)</title>"
        + HEAD_LINK +
        "</head><body onload=\"window.print();\">"
        "<div class=\"wrap\">"
        "<h1 class=\"centerline\">Cousin List (Printable)</h1>"
        "<div class=\"table-scroll\">%s</div>"
        "</div></body></html>"
        % "".join(html_rows)
    )
    return cousin_html

# ---------- 10) Main driver ----------
def main():
    encs = ("utf-8-sig", "utf-8", "cp1252", "iso-8859-15", "latin1")
    last_err = None
    df = None
    for enc in encs:
        try:
            df = pd.read_csv(CSV_IN, encoding=enc, dtype=str, keep_default_na=False)
            break
        except Exception as ex:
            last_err = ex
            df = None
    if df is None:
        raise RuntimeError("Unable to read CSV: %s (%s)" % (CSV_IN, last_err))

    print("[OK] Loaded CSV for counts: %d rows, %d cols" % (len(df), len(df.columns)))

    id_col = find_col(df, [r"^(id#|personid)$"], ["ID#", "ID", "PersonID", "personID"])
    match_col = find_col(df, [r"^match\s*to$"], ["Match to", "Match", "match_to", "Match_to"])
    name_col = find_col(df, [r"^name$"], ["Name"])
    cm_col = find_col(df, [r"^(c\s*:?m|cm)$", r"centi.?morgan"], ["cM", "cm"])
    path_col = find_col(df, [r"(yates\s*dna\s*ancestral\s*line|ancestral\s*line|lineage)"], ["Yates DNA Ancestral Line", "Ancestral Line", "Lineage"])

    if not match_col:
        raise ValueError("CSV missing 'Match to' column (try headings like 'Match to' or 'Match').")
    if not path_col:
        raise ValueError("CSV missing lineage/path column for First Ancestor.")
    if not name_col:
        raise ValueError("CSV missing 'Name' column.")
    if not cm_col:
        raise ValueError("CSV missing 'cM' column.")
    if not id_col:
        raise ValueError("CSV missing an ID#/PersonID column.")

    first_ancestors = []
    for _, row in df.iterrows():
        tokens = split_tokens(row.get(path_col, ""))
        first_ancestors.append(_clean_piece(tokens[0]) if tokens else "")
    df["First Ancestor"] = first_ancestors

    _setup_resolver()
    os.makedirs("partials", exist_ok=True)

    mc_html = build_match_count_partial(df, id_col, match_col, name_col, cm_col, path_col)
    with open(MATCH_COUNT_LOCAL, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
        f.write(mc_html)
    print("[OK] Wrote partial:", os.path.abspath(MATCH_COUNT_LOCAL))

    lc_html = build_lineage_count_partial(df, id_col, match_col, name_col, cm_col, path_col)
    with open(LINEAGE_COUNT_LOCAL, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
        f.write(lc_html)
    print("[OK] Wrote partial:", os.path.abspath(LINEAGE_COUNT_LOCAL))

    cousin_html = build_cousin_print_partial(df, id_col, match_col, name_col, cm_col, path_col)
    with open(COUSIN_PRINT_LOCAL, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
        f.write(cousin_html)
    print("[OK] Wrote partial:", os.path.abspath(COUSIN_PRINT_LOCAL))

    if not all(os.environ.get(k) for k in ["FTP_HOST", "FTP_USER", "FTP_PASS"]):
        print("[SKIP] Missing FTP creds; uploads skipped.")
        return

    try:
        ftps = ftp_connect()
        try:
            ftp_upload_overwrite(ftps, MATCH_COUNT_LOCAL, _remote_path(MATCH_COUNT_REMOTE))
            ftp_upload_overwrite(ftps, LINEAGE_COUNT_LOCAL, _remote_path(LINEAGE_COUNT_REMOTE))
            ftp_upload_overwrite(ftps, COUSIN_PRINT_LOCAL, _remote_path(COUSIN_PRINT_REMOTE))
        except Exception as e:
            print("[WARN] Upload partials failed:", e)

        print("\n--- SIZE Verification (if supported) ---")
        for p in [_remote_path(MATCH_COUNT_REMOTE), _remote_path(LINEAGE_COUNT_REMOTE), _remote_path(COUSIN_PRINT_REMOTE)]:
            sz = ftp_size(ftps, p)
            print("%s : %s" % (p, sz if sz is not None else "(SIZE unsupported)"))

        try:
            ftps.quit()
        except Exception:
            pass

        print("\n--- Open URLs ---")
        print("Match Count:      https://yates.one-name.net/partials/match_count.shtml")
        print("Lineage Count:    https://yates.one-name.net/partials/lineage_count.shtml")
        print("Cousin Printable: https://yates.one-name.net/partials/cousin_list_print.htm")
    except Exception as e:
        print("[FAIL] FTP session:", e)
        traceback.print_exc()

if __name__ == "__main__":
    main()
# ====== CUT STOP [1/1] CELL 2b (Unified headers from Cell 2 + Cell 2b functionality) ======


[CONFIRM] Golden Rules active | Cell=Cell2b_Counts_UnifiedHeaders | Version=2026.02.01-CELL2HDR-CELL2B1 | Encoding=ISO-8859-15
[AUDIT] DECLARED_LINES=-1
[VITALS] autosomal (after manual filter): 226
[VITALS] last updated (display): February 1, 2026 8:30 AM
[OK] Loaded CSV for counts: 226 rows, 6 cols
[PULL] match_to_unmasked.csv -> /content/match_to_unmasked.server.csv
[OK] Resolver loaded: 94 codes
[OK] Wrote partial: /content/partials/match_count.shtml
[OK] Wrote partial: /content/partials/lineage_count.shtml
[OK] Wrote partial: /content/partials/cousin_list_print.htm
[PUT] partials/match_count.shtml -> partials/match_count.shtml
[PUT] partials/lineage_count.shtml -> partials/lineage_count.shtml
[PUT] partials/cousin_list_print.htm -> partials/cousin_list_print.htm

--- SIZE Verification (if supported) ---
partials/match_count.shtml : 160850
partials/lineage_count.shtml : 210518
partials/cousin_list_print.htm : 81504

--- Open URLs ---
Match Count:      https://yates.one-name.net/par

# CELL 2b_NetworkAuthority

In [None]:
# ====== CUT START [1/1] CELL 2b_NetworkAuthority - Write dna_network_first_ancestors.txt ======
# RON GOLDEN RULES - CLIFF NOTES (v2025.12.06-G1)
# - Complete & runnable Colab cell, one contiguous block.
# - Source ASCII-only; all file writes use encoding="iso-8859-15", errors="xmlcharrefreplace".
# - Reads the same master CSV used by Cell 2b and derives a de-duplicated
#   list of "first ancestors" from the lineage/ancestral-line column.
# - Writes that list as /content/dna_network_first_ancestors.txt, one per line.
# - This file is then consumed by Cell 2d (SaveNetworkAuthority) and Cell 2c
#   (Match Specific Produced DNA Network).
# - Deterministic audit banner:
#   [CONFIRM] Golden Rules active | Cell=Cell2b_NetworkAuthority | Version=2025.12.06 | Encoding=ISO-8859-15 | DECLARED_LINES=160

DECLARED_LINES = 160
print(
    "[CONFIRM] Golden Rules active | "
    "Cell=Cell2b_NetworkAuthority | "
    "Version=2025.12.06 | "
    "Encoding=ISO-8859-15 | "
    "DECLARED_LINES=%d" % DECLARED_LINES
)

import os
import re
import traceback

import pandas as pd

# ---------- 1) Config ----------

# This should match the master CSV name used by Cell 2b.
MASTER_CSV = "final_combined_df_with_value_labels.csv"

# Authority file that Cell 2d expects and that Cell 2c already uses.
AUTHORITY_PATH = "dna_network_first_ancestors.txt"

# ---------- 2) Helpers ----------

def _clean_piece(text):
    \"\"\"Normalize whitespace and tildes inside a lineage token.\"\"\"
    t = re.sub(r"~+", " ", str(text))
    t = re.sub(r"\s+", " ", t)
    return t.strip()

# Same separator logic we used in Cell 2c: split a lineage path into tokens.
SEP_RE = re.compile(
    r"\s*(?:"
    r"\u2192"          # unicode right arrow
    r"|&rarr;"         # HTML right arrow
    r"|;|>|,"
    r"|~{2,}"          # multiple tildes
    r"|/{2,}"          # double slash
    r"|\|{2,}"         # double pipe
    r")\s*"
)

def split_tokens(s):
    \"\"\"Split a lineage path string into tokens using SEP_RE.\"\"\"
    if pd.isna(s):
        return []
    if not isinstance(s, str):
        s = str(s)
    return [p.strip() for p in SEP_RE.split(s) if str(p).strip()]

def find_col(df, patterns, prefer_exact=None):
    \"\"\"
    Find a column in df using regex patterns and optional preferred names.
    Returns the column name or None.
    \"\"\"
    cols = list(df.columns)
    lowmap = {c.lower(): c for c in cols}
    # Preferred exact names first
    if prefer_exact:
        for name in prefer_exact:
            if name in df.columns:
                return name
            if name and name.lower() in lowmap:
                return lowmap[name.lower()]
    # Otherwise use regex patterns
    for pat in patterns:
        rx = re.compile(pat, re.I)
        for c in cols:
            if rx.search(c):
                return c
    return None

# ---------- 3) Load master CSV ----------

df = None
_last_err = None
for enc in ("utf-8-sig", "utf-8", "cp1252", "iso-8859-15", "latin1"):
    try:
        df = pd.read_csv(MASTER_CSV, dtype=str, keep_default_na=False, encoding=enc)
        break
    except Exception as e:
        _last_err = e
        df = None

if df is None:
    raise SystemExit(
        "[ERROR] Unable to read CSV: %s (%r)" % (MASTER_CSV, _last_err)
    )

print(
    "[OK] Loaded master CSV for network authority: %d rows, %d cols"
    % (len(df), len(df.columns))
)

# ---------- 4) Identify lineage / ancestral-line column ----------

line_col = find_col(
    df,
    patterns=[
        r"(yates\s*dna\s*ancestral\s*line)",
        r"(ancestral\s*line)",
        r"(lineage)"
    ],
    prefer_exact=[
        "Yates DNA Ancestral Line",
        "Ancestral Line",
        "Lineage",
    ],
)

if not line_col:
    raise SystemExit(
        "[ERROR] Cannot find lineage/ancestral-line column in master CSV."
    )

print("[OK] Using lineage column:", repr(line_col))

# ---------- 5) Derive first ancestors and de-duplicate ----------

first_ancestors = []

for _, row in df.iterrows():
    tokens = split_tokens(row.get(line_col, ""))
    first = _clean_piece(tokens[0]) if tokens else ""
    if first:
        first_ancestors.append(first)

total_raw = len(first_ancestors)
unique_list = []
seen = set()

# Preserve original encounter order, but discard duplicates and empties.
for anc in first_ancestors:
    key = anc.strip()
    if not key:
        continue
    if key in seen:
        continue
    seen.add(key)
    unique_list.append(key)

print("[INFO] Total first-ancestor tokens collected: %d" % total_raw)
print("[INFO] Unique non-empty first ancestors:    %d" % len(unique_list))

# Optional: show a short preview in the notebook
for idx, anc in enumerate(unique_list[:25], start=1):
    print("  %2d. %s" % (idx, anc))

if not unique_list:
    print("[WARN] No non-empty first ancestors found. Authority file will NOT be written.")
else:
    # ---------- 6) Write dna_network_first_ancestors.txt ----------
    try:
        # Ensure directory exists (AUTHORITY_PATH is just a filename in /content).
        os.makedirs(os.path.dirname(AUTHORITY_PATH) or ".", exist_ok=True)
        with open(
            AUTHORITY_PATH,
            "w",
            encoding="iso-8859-15",
            errors="xmlcharrefreplace",
        ) as f:
            for anc in unique_list:
                f.write(anc.strip() + "\n")
        print("[OK] Wrote authority file:", os.path.abspath(AUTHORITY_PATH))
        print("[OK] Lines written:", len(unique_list))
    except Exception as e:
        print("[ERROR] Failed to write authority file:", e)
        traceback.print_exc()

print("\n--- Cell2b_NetworkAuthority complete (authority file ready for Cell 2d) ---")
# ====== CUT STOP [1/1] CELL 2b_NetworkAuthority - Write dna_network_first_ancestors.txt ======


[CONFIRM] Golden Rules active | Cell=Cell2b_NetworkAuthority | Version=2025.12.06 | Encoding=ISO-8859-15 | DECLARED_LINES=160
[OK] Loaded master CSV for network authority: 93 rows, 6 cols
[OK] Using lineage column: 'Yates DNA Ancestral Line'
[INFO] Total first-ancestor tokens collected: 93
[INFO] Unique non-empty first ancestors:    26
   1. RobinsonWallaceAl&JonesSallieMcL
   2. UnknownName&FordVeatriceM
   3. YatesFrancis&TichborneJane
   4. YatesJohn&BarfieldElizabeth
   5. YatesJohn&GaterJoane
   6. YatesThomas&SearchingStill
   7. YatesThomas&SmithMary
   8. YatesUriah&OakesSarah
   9. YatesWilliam&BoothAnamariaE
  10. YatesWilliam&EdwardsElizabeth
  11. YatesWilliam&HouseHannah
  12. YatesWilliam&NeedhamMary
  13. YatesWilliam&ParkerSally
  14. YatesWilliam&PikeEsther
  15. YatesWilliam&RidyardAnn
  16. YatesWilliam&SaltPhoebe
  17. YatesWilliam&ThornburyAnne
  18. YatesWilliamBa&BullockMartha
  19. YatesWilliamCh&McManusEmilyMill
  20. YatesWilliamJo&HolsteadSarahJane
  21. Yate

# Cell 2c

In [None]:
# ====== CUT START [1/1] CELL 2c - Match Specific Produced DNA Network ======
# RON GOLDEN RULES - CLIFF NOTES (v2025.12.06-G4)
# - Complete & runnable Colab cell, one contiguous block.
# - Source ASCII-only; all file writes use encoding="iso-8859-15", errors="xmlcharrefreplace".
# - XHTML 1.0 Transitional; typography/layout via /partials/dna_tree_styles.css (linked only).
# - Deterministic audit banner:
#   [CONFIRM] Golden Rules active | Cell=Cell2c_DNANetwork | Version=2025.12.06-G4 | Encoding=ISO-8859-15
# - Enforce ISO-8859-15 printable chars on writes.

print("[CONFIRM] Golden Rules active | Cell=Cell2c_DNANetwork | Version=2025.12.06-G4 | Encoding=ISO-8859-15")

import os
import re
import posixpath
import socket
import traceback
from datetime import datetime, timedelta
from ftplib import FTP_TLS

import pandas as pd
import html as _html

# ---------- 0) Secrets ----------
try:
    from google.colab import userdata  # type: ignore

    os.environ["FTP_HOST"] = userdata.get("FTP_HOST")
    os.environ["FTP_USER"] = userdata.get("FTP_USER")
    os.environ["FTP_PASS"] = userdata.get("FTP_PASS")
    try:
        os.environ["FTP_DIR"] = userdata.get("FTP_DIR")
    except Exception:
        os.environ.setdefault("FTP_DIR", "")
    try:
        os.environ["FTP_PORT"] = userdata.get("FTP_PORT")
    except Exception:
        os.environ.setdefault("FTP_PORT", "21")
except Exception:
    os.environ.setdefault("FTP_HOST", "")
    os.environ.setdefault("FTP_USER", "")
    os.environ.setdefault("FTP_PASS", "")
    os.environ.setdefault("FTP_DIR", "")
    os.environ.setdefault("FTP_PORT", "21")

# ---------- 1) Config ----------
CSV_IN = "final_combined_df_with_value_labels.csv"

SERVER_PARTIALS_DIR = "partials"
SERVER_MAPPING_BASENAME = "match_to_unmasked.csv"
SERVER_MAPPING_REMOTE = posixpath.join(SERVER_PARTIALS_DIR, SERVER_MAPPING_BASENAME)
SERVER_MAPPING_LOCAL_CACHE = "match_to_unmasked.server.csv"

FTP_DIR = (os.environ.get("FTP_DIR", "") or "").strip()

STYLESHEET_BASENAME = "dna_tree_styles.css"
CSS_VERSION = "v2025-11-12-max"
STYLESHEET_HREF = "/partials/%s?%s" % (STYLESHEET_BASENAME, CSS_VERSION)
HEAD_LINK = '<link rel="stylesheet" type="text/css" href="%s" />' % STYLESHEET_HREF

NAV_BLOCK = '<!--#include virtual="/partials/nav_block.shtml" -->'

VITALS_LOCAL = "dna_vitals.csv"

TNG_BASE = "https://yates.one-name.net/tng"
TNG_TREE = "tree1"

DNA_NETWORK_LOCAL = os.path.join("partials", "dna_network.shtml")
DNA_NETWORK_REMOTE = posixpath.join("partials", "dna_network.shtml")

# Authority list is maintained on the server at /dna/network_first_ancestors.txt
DNA_NETWORK_AUTH_REMOTE = "dna/network_first_ancestors.txt"
DNA_NETWORK_AUTH_LOCAL = "dna_network_first_ancestors.txt"

# ---------- 1a) Load vitals ----------
def _friendly_ts_from_utc(raw: str) -> str:
    raw = str(raw or "").strip()
    if not raw:
        return "(unknown)"
    raw_clean = raw.replace("UTC", "").replace("utc", "").strip()
    fmts = [
        "%Y-%m-%d %H:%M",
        "%Y-%m-%d %H:%M:%S",
        "%Y-%m-%dT%H:%M",
        "%Y-%m-%dT%H:%M:%S",
    ]
    dt_utc = None
    for fmt in fmts:
        try:
            dt_utc = datetime.strptime(raw_clean, fmt)
            break
        except Exception:
            dt_utc = None
    if dt_utc is None:
        return raw
    dt_est = dt_utc - timedelta(hours=5)
    months = [
        "January","February","March","April","May","June",
        "July","August","September","October","November","December"
    ]
    month_name = months[dt_est.month - 1]
    h24 = dt_est.hour
    ampm = "AM" if h24 < 12 else "PM"
    h12 = h24 % 12
    if h12 == 0:
        h12 = 12
    return "%s %d, %d %d:%02d %s" % (
        month_name,
        dt_est.day,
        dt_est.year,
        h12,
        dt_est.minute,
        ampm,
    )

def _load_vitals(path: str):
    autosomal = "(unknown)"
    last_updated_display = "(unknown)"
    if not os.path.exists(path):
        print("[WARN] dna_vitals.csv not found; using '(unknown)' for header vitals.")
        return autosomal, last_updated_display
    encs = ("iso-8859-15", "utf-8-sig", "utf-8", "cp1252", "latin1")
    df_v = None
    last_err = None
    for enc in encs:
        try:
            df_v = pd.read_csv(path, dtype=str, keep_default_na=False, encoding=enc)
            break
        except Exception as ex:
            last_err = ex
            df_v = None
    if df_v is None or "line" not in df_v.columns:
        print("[WARN] Unable to read dna_vitals.csv or missing 'line' column:", last_err)
        return autosomal, last_updated_display
    for raw_line in df_v["line"].astype(str):
        line = raw_line.strip()
        low = line.lower()
        if low.startswith("after manual filter, total records:"):
            m = re.search(r"(\d[\d,]*)", line)
            if m:
                num_raw = m.group(1).replace(",", "")
                try:
                    autosomal = "{:,}".format(int(num_raw))
                except Exception:
                    autosomal = num_raw
        elif low.startswith("last_updated_text:"):
            ts = line.split(":", 1)[1].strip()
            last_updated_display = _friendly_ts_from_utc(ts)
    print("[VITALS] autosomal (after manual filter): %s" % autosomal)
    print("[VITALS] last updated (display): %s" % last_updated_display)
    return autosomal, last_updated_display

AUTOSOMAL_MATCHES_TEXT, LAST_UPDATED_DISPLAY = _load_vitals(VITALS_LOCAL)

# ---------- 2) FTP helpers ----------
FTP_TIMEOUT = int(os.environ.get("FTP_TIMEOUT", "30"))
FTP_PASSIVE = True

def ftp_connect() -> FTP_TLS:
    ftps = FTP_TLS(timeout=FTP_TIMEOUT)
    socket.setdefaulttimeout(FTP_TIMEOUT)
    ftps.connect(os.environ.get("FTP_HOST", ""), int(os.environ.get("FTP_PORT", 21)))
    ftps.login(os.environ.get("FTP_USER", ""), os.environ.get("FTP_PASS", ""))
    try:
        ftps.prot_p()
    except Exception:
        pass
    try:
        ftps.set_pasv(FTP_PASSIVE)
    except Exception:
        pass
    if FTP_DIR:
        for p in [p for p in FTP_DIR.split("/") if p]:
            try:
                ftps.mkd(p)
            except Exception:
                pass
            ftps.cwd(p)
    return ftps

def _remote_path(name: str) -> str:
    return posixpath.join(FTP_DIR, name) if FTP_DIR else name

def ensure_remote_dirs(ftps, remote_path):
    if "/" not in remote_path:
        return
    pwd0 = ftps.pwd()
    parts = [p for p in remote_path.split("/")[:-1] if p and p != "."]
    for seg in parts:
        try:
            ftps.cwd(seg)
        except Exception:
            try:
                ftps.mkd(seg)
            except Exception:
                pass
            ftps.cwd(seg)
    ftps.cwd(pwd0)

def ftp_upload_overwrite(ftps, local_path, remote_name):
    ensure_remote_dirs(ftps, remote_name)
    with open(local_path, "rb") as fh:
        ftps.storbinary("STOR %s" % remote_name, fh)
    print("[PUT] %s -> %s" % (local_path, remote_name))

def ftp_size(ftps, remote_name):
    try:
        sz = ftps.size(remote_name)
        return int(sz) if sz is not None else None
    except Exception:
        return None

def ftp_download_if_exists(ftps, remote_name, local_name) -> bool:
    try:
        with open(local_name, "wb") as f:
            ftps.retrbinary("RETR %s" % remote_name, f.write)
        print("[PULL] %s -> %s" % (remote_name, os.path.abspath(local_name)))
        return True
    except Exception as e:
        try:
            if os.path.exists(local_name):
                os.remove(local_name)
        except Exception:
            pass
        print("[MISS] %s (%s)" % (remote_name, e))
        return False

def sync_authority_from_server():
    """
    Pull /dna/network_first_ancestors.txt from the server (if it exists)
    into DNA_NETWORK_AUTH_LOCAL.
    """
    if not all(os.environ.get(k) for k in ["FTP_HOST", "FTP_USER", "FTP_PASS"]):
        print("[INFO] FTP creds missing; authority sync skipped.")
        return
    try:
        ftps = ftp_connect()
        remote = _remote_path(DNA_NETWORK_AUTH_REMOTE)
        ok = ftp_download_if_exists(ftps, remote, DNA_NETWORK_AUTH_LOCAL)
        try:
            ftps.quit()
        except Exception:
            pass
        if ok:
            print("[INFO] Authority file synced from server.")
    except Exception as e:
        print("[WARN] Authority sync failed:", e)
        traceback.print_exc()

# ---------- 3) Resolver ----------
def _read_mapping_csv(path: str) -> pd.DataFrame:
    encs = ("iso-8859-15", "utf-8-sig", "utf-8", "cp1252", "latin1")
    last = None
    df = None
    for enc in encs:
        try:
            df = pd.read_csv(path, encoding=enc, dtype=str, keep_default_na=False)
            break
        except Exception as e:
            last = e
            df = None
    if df is None:
        raise RuntimeError("Unable to read mapping CSV %s: %s" % (path, last))
    if df.shape[1] < 2:
        raise RuntimeError("Mapping CSV must have at least two columns: code, unmasked")
    df = df.iloc[:, :2].copy()
    df.columns = ["code", "unmasked"]
    df["code"] = df["code"].astype(str).str.strip().str.lower()
    df["unmasked"] = df["unmasked"].astype(str).str.strip()
    df = df[df["code"] != ""].drop_duplicates(subset=["code"], keep="first")
    if df.empty:
        raise RuntimeError("Mapping CSV empty after normalization.")
    return df

def load_resolver_from_server() -> dict:
    with ftp_connect() as ftps:
        try:
            ftps.cwd(SERVER_PARTIALS_DIR)
        except Exception:
            pass
        ok = False
        try:
            local_name = SERVER_MAPPING_LOCAL_CACHE
            with open(local_name, "wb") as f:
                ftps.retrbinary("RETR %s" % SERVER_MAPPING_BASENAME, f.write)
            print("[PULL] %s -> %s" % (SERVER_MAPPING_BASENAME, os.path.abspath(local_name)))
            ok = True
        except Exception as e:
            try:
                if os.path.exists(SERVER_MAPPING_LOCAL_CACHE):
                    os.remove(SERVER_MAPPING_LOCAL_CACHE)
            except Exception:
                pass
            print("[MISS] %s (%s)" % (SERVER_MAPPING_BASENAME, e))
        try:
            ftps.quit()
        except Exception:
            pass
    if not ok:
        raise RuntimeError(
            "Resolver not found on server: /%s. Upload match_to_unmasked.csv into /partials/ and re-run."
            % _remote_path(SERVER_MAPPING_REMOTE)
        )
    df_map = _read_mapping_csv(SERVER_MAPPING_LOCAL_CACHE)
    print("[OK] Resolver loaded: %d codes" % len(df_map))
    return dict(zip(df_map["code"], df_map["unmasked"]))

MATCH_TO_UNMASKED = {}

def _setup_resolver():
    global MATCH_TO_UNMASKED
    if not MATCH_TO_UNMASKED:
        MATCH_TO_UNMASKED = load_resolver_from_server()

# ---------- 4) Helpers ----------
def find_col(df, patterns, prefer_exact=None):
    cols = list(df.columns)
    lowmap = {c.lower(): c for c in cols}
    if prefer_exact:
        for name in prefer_exact:
            if name in df.columns:
                return name
            if name and name.lower() in lowmap:
                return lowmap[name.lower()]
    for pat in patterns:
        rx = re.compile(pat, re.I)
        for c in cols:
            if rx.search(c):
                return c
    return None

# ASCII-only separators.
SEP_RE = re.compile(r"\s*(?:->|&rarr;|;|>|,|~{2,}|/{2,}|\|{2,})\s*")

def split_tokens(s):
    if pd.isna(s):
        return []
    if not isinstance(s, str):
        s = str(s)
    return [p.strip() for p in SEP_RE.split(s) if str(p).strip()]

def _clean_piece(text: str) -> str:
    t = re.sub(r"~+", " ", str(text))
    t = re.sub(r"\s+", " ", t)
    return t.strip()

_PARTICLES = {
    "de","del","della","der","van","von","da","dos","das","di","la","le","du","of",
}

def _smart_title(token: str) -> str:
    if not token:
        return token
    token = re.sub(
        r"(^|\b)([a-z])(['&#8217;])([a-z])",
        lambda m: m.group(1) + m.group(2).upper() + m.group(3) + m.group(4).upper(),
        token.lower(),
    )
    if "-" in token:
        token = "-".join([w.capitalize() for w in token.split("-")])
    token = re.sub(r"\bmc([a-z])", lambda m: "Mc" + m.group(1).upper(), token)
    token = re.sub(r"\bmac([a-z])", lambda m: "Mac" + m.group(1).upper(), token)
    return token

def smart_titlecase(name: str) -> str:
    name = _clean_piece(name)
    if not name:
        return name
    if "," in name:
        last, first = [p.strip() for p in name.split(",", 1)]
        pieces = (first + " " + last).split()
    else:
        pieces = name.split()
    out = []
    for i, w in enumerate(pieces):
        out.append(w.lower() if (i > 0 and w.lower() in _PARTICLES) else _smart_title(w))
    return " ".join(out)

def surname_given_from_token(token):
    token = token.strip()
    idx = None
    for i in range(1, len(token)):
        if token[i - 1].islower() and token[i].isupper():
            idx = i
            break
    if idx is None:
        for i in range(1, len(token)):
            if token[i].isupper():
                idx = i
                break
    if idx is None:
        return (token,)
    surname = token[:idx]
    given = token[idx:]
    given_spaced = re.sub(r"(?<!^)([A-Z])", r" \1", given)
    return ("%s %s" % (given_spaced.strip(), surname.strip()),)

_CAMEL_WORDS = re.compile(r"[A-Z][a-z]*|[A-Z]+(?![a-z])|[a-z]+")

def norm_matchee_name(raw: str) -> str:
    raw = str(raw or "").strip()
    if not raw:
        return ""
    if " " in raw or "," in raw:
        nm = smart_titlecase(raw)
        parts = nm.split()
        if len(parts) == 1:
            return nm
        return ("%s %s" % (parts[0], parts[-1])).strip()
    words = _CAMEL_WORDS.findall(raw)
    while words and len(words[0]) == 1:
        words.pop(0)
    if not words:
        nm = smart_titlecase(surname_given_from_token(raw)[0])
        ps = nm.split()
        if len(ps) == 1:
            return nm
        return ("%s %s" % (ps[0], ps[-1])).strip()
    surname = smart_titlecase(words[0])
    given_candidates = [w for w in words[1:] if w.lower() != surname.lower()]
    if not given_candidates:
        return surname
    return ("%s %s" % (smart_titlecase(given_candidates[0]), surname)).strip()

def normalize_person_name(s: str) -> str:
    if pd.isna(s):
        return ""
    s = _clean_piece(str(s))
    if "," in s:
        last, first = [p.strip() for p in s.split(",", 1)]
        s = "%s %s" % (first, last)
    if " " not in s and s.isalpha():
        return smart_titlecase(surname_given_from_token(s)[0])
    return smart_titlecase(s)

ID_PAT = re.compile(r"\bI\d+\b", re.I)

def extract_person_id(s: str) -> str:
    m = ID_PAT.search(str(s or ""))
    return m.group(0).upper() if m else ""

def derive_common_from_first_token(tokens):
    if not tokens:
        return ("", "")
    first = _clean_piece(tokens[0])
    parts = re.split(r"\s*(?:&| and )\s*", first, maxsplit=1, flags=re.I)
    if len(parts) != 2:
        return ("", "")
    def _norm(s):
        return smart_titlecase(s) if " " in s else smart_titlecase(surname_given_from_token(s)[0])
    return (_norm(parts[0]), _norm(parts[1]))

def degree_label_from_generations(g):
    if g <= 1:
        return "parents" if g == 1 else "self"
    if g == 2:
        return "grandparents"
    greats = g - 2
    if greats == 1:
        return "great-grandparents"
    return "%dx-great-grandparents" % greats

def build_header(subject_name_html, cm_val, matchee_name_html, gens, husband, wife):
    try:
        cm_str = "%d" % int(round(float(cm_val)))
    except Exception:
        cm_str = (str(cm_val).strip() or "0")
    degree_label = degree_label_from_generations(gens)
    parts = [
        "%s is a %s cM cousin match to %s, whose" % (subject_name_html, cm_str, matchee_name_html),
        "%s (back %d Gens)" % (degree_label, gens),
        "are",
        "%s & %s." % (husband, wife),
    ]
    s = " ".join(parts)
    s = re.sub(r"\.\s*$", "", s)
    return s

# ---------- 5) HTML shell ----------
def _partial_head(title):
    return (
        "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
        " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
        "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">\n<head>\n"
        + "%s\n" % HEAD_LINK
        + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-15\" />\n"
        + "<title>%s</title>\n" % _html.escape(title)
        + "</head>\n<body>\n<div class=\"wrap\">\n"
        + "<h1 class=\"centerline\">%s</h1>\n" % _html.escape(title)
        + "<div class=\"updated centerline\">"
        "Last updated: %s &nbsp;|&nbsp; "
        "Showing: %s"
        "</div>\n" % (
            _html.escape(LAST_UPDATED_DISPLAY),
            _html.escape(AUTOSOMAL_MATCHES_TEXT),
        )
        + NAV_BLOCK
        + "\n"
        + "<div class=\"selection-menu centerline\">"
        "Showing: "
        "<a href=\"#\" onclick=\"return ySelShowSelected('ref-tb');\">Selected</a> &nbsp;|&nbsp; "
        "<a href=\"#\" onclick=\"return ySelShowAll('ref-tb');\">All</a> &nbsp;|&nbsp; "
        "<a href=\"#\" onclick=\"return ySelReset('ref-tb');\">Reset</a>"
        "</div>\n"
        + "<div class=\"table-scroll\">\n"
    )

def _partial_tail():
    return (
        "</div>\n</div>\n"
        "<script type=\"text/javascript\">\n//<![CDATA[\n"
        "(function(){"
        "function ySelEachRow(tb, cb){"
        " if(!tb) return;"
        " var rows=tb.getElementsByTagName('tr');"
        " for(var i=0;i<rows.length;i++){cb(rows[i]);}"
        "}"
        "function ySelClear(tr){"
        " if(!tr) return;"
        " tr.removeAttribute('data-selected');"
        " var cls=tr.className||'';"
        " cls=cls.replace(/\\bsel-row\\b/g,'').replace(/\\s{2,}/g,' ').replace(/^\\s+|\\s+$/g,'');"
        " tr.className=cls;"
        " tr.style.backgroundColor='';"
        "}"
        "function ySelToggle(a){"
        " var tr=a;"
        " while(tr&&tr.tagName&&tr.tagName.toLowerCase()!=='tr'){tr=tr.parentNode;}"
        " if(!tr) return false;"
        " var sel=tr.getAttribute('data-selected')==='1';"
        " if(sel){"
        "  ySelClear(tr);"
        " }else{"
        "  tr.setAttribute('data-selected','1');"
        "  var cls=tr.className||'';"
        "  if(cls.indexOf('sel-row')===-1){tr.className=(cls?(cls+' '):'')+'sel-row';}"
        "  tr.style.backgroundColor='#fff2cc';"
        " }"
        " return false;"
        "}"
        "function ySelGetTBody(tbodyId){"
        " var tb=document.getElementById(tbodyId);"
        " return tb || null;"
        "}"
        "function ySelShowSelected(tbodyId){"
        " var tb=ySelGetTBody(tbodyId);"
        " if(!tb) return false;"
        " ySelEachRow(tb,function(tr){"
        "  var sel=tr.getAttribute('data-selected')==='1';"
        "  tr.style.display=sel?'':'none';"
        " });"
        " return false;"
        "}"
        "function ySelShowAll(tbodyId){"
        " var tb=ySelGetTBody(tbodyId);"
        " if(!tb) return false;"
        " ySelEachRow(tb,function(tr){tr.style.display='';});"
        " return false;"
        "}"
        "function ySelReset(tbodyId){"
        " var tb=ySelGetTBody(tbodyId);"
        " if(!tb) return false;"
        " ySelEachRow(tb,function(tr){tr.style.display=''; ySelClear(tr);});"
        " return false;"
        "}"
        "window.ySelToggle=ySelToggle;"
        "window.ySelShowSelected=ySelShowSelected;"
        "window.ySelShowAll=ySelShowAll;"
        "window.ySelReset=ySelReset;"
        "})();"
        "(function(){"
        " function collectFirstAncestors(){"
        "  var rows=document.querySelectorAll('#ref-tb tr');"
        "  var seen=Object.create(null);"
        "  var out=[];"
        "  for(var i=0;i<rows.length;i++){"
        "    var cells=rows[i].getElementsByTagName('td');"
        "    if(!cells.length) continue;"
        "    var txt=(cells[0].textContent||cells[0].innerText||'').replace(/\\s+/g,' ').trim();"
        "    if(!txt) continue;"
        "    if(!seen[txt]){seen[txt]=true; out.push(txt);}"
        "  }"
        "  return out;"
        " }"
        " function setStatus(msg,isError){"
        "  var span=document.getElementById('save-network-status');"
        "  if(!span) return;"
        "  span.textContent=msg;"
        "  span.style.color=isError?'#990000':'#006600';"
        " }"
        " function onClickSaveNetwork(){"
        "  var ancestors=collectFirstAncestors();"
        "  if(!ancestors.length){"
        "    setStatus('No ancestors found to save.',true);"
        "    return;"
        "  }"
        "  setStatus('Saving '+ancestors.length+' ancestors...',false);"
        "  fetch('/dna/save_network.php',{"
        "    method:'POST',"
        "    headers:{'Content-Type':'application/json'},"
        "    body:JSON.stringify({ancestors:ancestors})"
        "  }).then(function(resp){"
        "    if(!resp.ok){throw new Error('HTTP '+resp.status);}"
        "    return resp.json();"
        "  }).then(function(data){"
        "    if(data&&data.status==='ok'){"
        "      var n=(typeof data.saved==='number')?data.saved:ancestors.length;"
        "      setStatus('Saved '+n+' ancestors to network authority.',false);"
        "    }else{"
        "      setStatus('Unexpected response while saving network.',true);"
        "    }"
        "  }).catch(function(err){"
        "    setStatus('Save failed: '+err,true);"
        "  });"
        " }"
        " function init(){"
        "  var btn=document.getElementById('save-network-btn');"
        "  if(!btn) return;"
        "  btn.addEventListener('click',onClickSaveNetwork,false);"
        " }"
        " if(document.readyState==='loading'){"
        "  document.addEventListener('DOMContentLoaded',init,false);"
        " }else{"
        "  init();"
        " }"
        "})();"
        "\n//]]>\n</script>\n</body>\n</html>"
    )

# ---------- 6) DNA-register-style row builder ----------
def build_register_row(
    row,
    id_col: str,
    match_col: str,
    name_col: str,
    cm_col: str,
    path_col: str,
):
    subject_raw = row.get(match_col, "")
    key = str(subject_raw).strip().lower()
    subject_unmasked = MATCH_TO_UNMASKED.get(key, subject_raw)
    subject_name = normalize_person_name(subject_unmasked)
    subject_name_html = _html.escape(subject_name or "")

    pid = extract_person_id(row.get(id_col, ""))

    matchee_name = norm_matchee_name(row.get(name_col, "")) or subject_name
    if pid:
        matchee_html = (
            '<a href="%s/verticalchart.php?personID=%s&tree=%s&parentset=0&display=vertical&generations=15" '
            'target="_blank" rel="noopener">%s</a>'
            % (TNG_BASE, pid, TNG_TREE, _html.escape(matchee_name or "", quote=False))
        )
    else:
        matchee_html = _html.escape(matchee_name or "", quote=False)

    cm_val = row.get(cm_col, "0")
    tokens = split_tokens(row.get(path_col, ""))
    gens_total = len(tokens)

    if "common_husband" in row.index and "common_wife" in row.index:
        husband_raw = str(row.get("common_husband", "")).strip()
        wife_raw = str(row.get("common_wife", "")).strip()
        if not husband_raw and not wife_raw:
            husband_raw, wife_raw = derive_common_from_first_token(tokens)
    else:
        husband_raw, wife_raw = derive_common_from_first_token(tokens)

    header_html = build_header(
        subject_name_html or subject_name,
        cm_val,
        matchee_html,
        gens_total,
        husband_raw,
        wife_raw,
    )

    return subject_name_html, matchee_html, _html.escape(str(cm_val).strip()), header_html

# ---------- 7) Network authority + page builder ----------
def _load_network_authority(path: str):
    if not os.path.exists(path):
        print("[INFO] dna_network_first_ancestors.txt not found; using all First Ancestors.")
        return []
    vals = []
    with open(path, "r", encoding="iso-8859-15", errors="xmlcharrefreplace") as fh:
        for line in fh:
            t = line.strip()
            if t:
                vals.append(t)
    if not vals:
        print("[INFO] dna_network_first_ancestors.txt is empty; using all First Ancestors.")
        return []
    print("[INFO] Loaded %d authority First Ancestors from %s" % (len(vals), path))
    if vals:
        preview = vals[:10]
        print("[INFO] First 10 authority entries:")
        for i, fa in enumerate(preview, 1):
            print("  %2d. %s" % (i, fa))
    return vals

def build_network_partial(
    main_df: pd.DataFrame,
    id_col: str,
    match_col: str,
    name_col: str,
    cm_col: str,
    path_col: str,
) -> str:
    # Build First Ancestor (raw, not normalized) and full lineage
    first_ancestors = []
    full_lineages = []
    for _, row in main_df.iterrows():
        path_raw = str(row.get(path_col, "") or "")
        tokens = split_tokens(path_raw)
        first_ancestors.append(_clean_piece(tokens[0]) if tokens else "")
        full_lineages.append(_clean_piece(path_raw))
    df = main_df.copy()
    df["First Ancestor"] = first_ancestors
    df["Full Lineage"] = full_lineages

    # Apply authority filter if present
    auth_vals = _load_network_authority(DNA_NETWORK_AUTH_LOCAL)
    if auth_vals:
        auth_set = set(auth_vals)
        before = len(df)
        df = df[df["First Ancestor"].isin(auth_set)].copy()
        after = len(df)
        print("[INFO] Filtered DNA network rows by authority list: %d -> %d" % (before, after))
        if after == 0:
            print("[WARN] Authority filter eliminated all rows; falling back to full dataset.")
            df = main_df.copy()
            df["First Ancestor"] = first_ancestors
            df["Full Lineage"] = full_lineages
    else:
        print("[INFO] No authority filter applied; DNA network uses all rows.")

    # Summary counts for top table (deduplicated First Ancestors)
    first_series = df["First Ancestor"].astype(str).map(lambda x: x.strip())
    vc = first_series[first_series != ""].value_counts(dropna=False)

    lin_df = vc.reset_index()
    if lin_df.shape[1] >= 2:
        lin_df.columns = ["First Ancestor", "Count"]
    else:
        lin_df["First Ancestor"] = lin_df.index.astype(str)
        lin_df["Count"] = vc.values
        lin_df = lin_df[["First Ancestor", "Count"]]

    lin_df = lin_df.sort_values(
        ["Count", "First Ancestor"],
        ascending=[False, True],
        kind="mergesort",
    ).reset_index(drop=True)

    html = []
    title = "Match Specific Produced DNA Network"
    html.append(_partial_head(title))

    # 7a) First Ancestor summary table (top, deduplicated)
    html.append('<h2 class="centerline">Match Specific Produced DNA Network</h2>')
    html.append(
        '<table id="ref-table" class="sortable" border="1">'
        '<thead><tr>'
        '<th style="width:80%">First Ancestor</th>'
        '<th style="width:20%">Showing</th>'
        '</tr></thead><tbody id="ref-tb">'
    )
    for _, r in lin_df.iterrows():
        first = str(r.get("First Ancestor", "")).strip()
        cnt = int(str(r.get("Count", "0")).strip() or "0")
        tr = (
            "<tr data-lineage=\"%s\" data-filter=\"%s\">"
            "<td>%s</td>"
            "<td class=\"count\">%d</td>"
            "</tr>"
            % (
                _html.escape(first, quote=True),
                _html.escape(first, quote=True),
                _html.escape(first),
                cnt,
            )
        )
        html.append(tr)
    html.append("</tbody></table>")

    # 7a.1) Button + status line to push current First Ancestors to /dna/save_network.php
    html.append(
        '<div class="centerline" style="margin:10px 0 15px 0;">'
        '<button id="save-network-btn" type="button">Update DNA Network Authority</button>'
        '<span id="save-network-status" style="margin-left:8px; font-size:90%;"></span>'
        '</div>'
    )

    # 7b) DNA Register rows table (below)
    html.append('<h2 class="centerline">DNA Register rows for produced DNA network</h2>')
    html.append(
        '<table id="reg-list" class="sortable" border="1">'
        '<thead><tr>'
        '<th>Match to</th>'
        '<th>First Ancestor</th>'
        '<th>cM</th>'
        '<th>Full Lineage</th>'
        '<th>Match Summary</th>'
        '</tr></thead><tbody>'
    )
    for _, row in df.iterrows():
        match_to_html, name_html, cm_html, header_html = build_register_row(
            row, id_col, match_col, name_col, cm_col, path_col
        )
        first = _html.escape(str(row.get("First Ancestor", "")).strip())
        lineage = _html.escape(str(row.get("Full Lineage", "")).strip())

        tr = (
            "<tr>"
            "<td>%s</td>"
            "<td>%s</td>"
            "<td>%s</td>"
            "<td>%s</td>"
            "<td>%s</td>"
            "</tr>"
            % (match_to_html, first, cm_html, lineage, header_html)
        )
        html.append(tr)
    html.append("</tbody></table>")

    html.append(_partial_tail())
    return "".join(html)

# ---------- 8) Main driver ----------
def main():
    encs = ("utf-8-sig", "utf-8", "cp1252", "iso-8859-15", "latin1")
    last_err = None
    df = None
    for enc in encs:
        try:
            df = pd.read_csv(CSV_IN, encoding=enc, dtype=str, keep_default_na=False)
            break
        except Exception as ex:
            last_err = ex
            df = None
    if df is None:
        raise RuntimeError("Unable to read CSV: %s (%s)" % (CSV_IN, last_err))

    print("[OK] Loaded CSV for DNA Network: %d rows, %d cols" % (len(df), len(df.columns)))

    id_col = find_col(df, [r"^(id#|personid)$"], ["ID#", "ID", "PersonID", "personID"])
    match_col = find_col(df, [r"^match\s*to$"], ["Match to", "Match", "match_to", "Match_to"])
    name_col = find_col(df, [r"^name$"], ["Name"])
    cm_col = find_col(df, [r"^(c\s*:?m|cm)$", r"centi.?morgan"], ["cM", "cm"])
    path_col = find_col(
        df,
        [r"(yates\s*dna\s*ancestral\s*line|ancestral\s*line|lineage)"],
        ["Yates DNA Ancestral Line", "Ancestral Line", "Lineage"],
    )

    if not match_col:
        raise ValueError("CSV missing 'Match to' column.")
    if not path_col:
        raise ValueError("CSV missing lineage/path column.")
    if not name_col:
        raise ValueError("CSV missing 'Name' column.")
    if not cm_col:
        raise ValueError("CSV missing 'cM' column.")
    if not id_col:
        raise ValueError("CSV missing an ID#/PersonID column.")

    # Sync the authority list from the server (dna/network_first_ancestors.txt)
    sync_authority_from_server()

    _setup_resolver()
    os.makedirs("partials", exist_ok=True)

    network_html = build_network_partial(df, id_col, match_col, name_col, cm_col, path_col)
    with open(
        DNA_NETWORK_LOCAL,
        "w",
        encoding="iso-8859-15",
        errors="xmlcharrefreplace",
    ) as f:
        f.write(network_html)
    print("[OK] Wrote DNA Network partial:", os.path.abspath(DNA_NETWORK_LOCAL))

    if not all(os.environ.get(k) for k in ["FTP_HOST", "FTP_USER", "FTP_PASS"]):
        print("[SKIP] Missing FTP creds; upload of dna_network.shtml skipped.")
        return

    try:
        ftps = ftp_connect()
        try:
            ftp_upload_overwrite(ftps, DNA_NETWORK_LOCAL, _remote_path(DNA_NETWORK_REMOTE))
        except Exception as e:
            print("[WARN] Upload dna_network.shtml failed:", e)

        print("\n--- SIZE Verification (if supported) ---")
        p = _remote_path(DNA_NETWORK_REMOTE)
        sz = ftp_size(ftps, p)
        print("%s : %s" % (p, sz if sz is not None else "(SIZE unsupported)"))

        try:
            ftps.quit()
        except Exception:
            pass

        print("\n--- Open URL ---")
        print("DNA Network: https://yates.one-name.net/partials/dna_network.shtml")
    except Exception as e:
        print("[FAIL] FTP session for dna_network.shtml:", e)
        traceback.print_exc()

if __name__ == "__main__":
    main()
# ====== CUT STOP [1/1] CELL 2c - Match Specific Produced DNA Network ======


[CONFIRM] Golden Rules active | Cell=Cell2c_DNANetwork | Version=2025.12.06-G4 | Encoding=ISO-8859-15
[VITALS] autosomal (after manual filter): 226
[VITALS] last updated (display): February 1, 2026 8:30 AM
[OK] Loaded CSV for DNA Network: 226 rows, 6 cols
[PULL] dna/network_first_ancestors.txt -> /content/dna_network_first_ancestors.txt
[INFO] Authority file synced from server.
[PULL] match_to_unmasked.csv -> /content/match_to_unmasked.server.csv
[OK] Resolver loaded: 94 codes
[INFO] Loaded 1 authority First Ancestors from dna_network_first_ancestors.txt
[INFO] First 10 authority entries:
   1. YatesWilliam&ThornburyAnne
[INFO] Filtered DNA network rows by authority list: 226 -> 0
[WARN] Authority filter eliminated all rows; falling back to full dataset.
[OK] Wrote DNA Network partial: /content/partials/dna_network.shtml
[PUT] partials/dna_network.shtml -> partials/dna_network.shtml

--- SIZE Verification (if supported) ---
partials/dna_network.shtml : 287673

--- Open URL ---
DNA Netw

# Cell 2d

In [None]:
# ====== CUT START [1/1] CELL 2d - Check Network Authority (Server Reader) ======
# RON GOLDEN RULES - CLIFF NOTES (v2025.12.06-G2)
# - Complete & runnable Colab cell, one contiguous block.
# - ASCII-only source; any writes use encoding="iso-8859-15", errors="xmlcharrefreplace".
# - Deterministic audit banner + DECLARED_LINES.
# - Purpose:
#     * Read the current authority list from:
#         https://yates.one-name.net/dna/network_first_ancestors.txt
#     * De-duplicate it and print a clean, ordered list.
# - This cell does NOT write or POST anything; the browser button does that.
# ============================================================================

import re
import requests

DECLARED_LINES = 80
print("[CONFIRM] Golden Rules active | Cell=Cell2d_CheckNetworkAuthority "
      "| Version=2025.12.06-G2 | Encoding=ISO-8859-15 | DECLARED_LINES=%d"
      % DECLARED_LINES)

AUTH_URL = "https://yates.one-name.net/dna/network_first_ancestors.txt"

print("[INFO] Fetching authority from:", AUTH_URL)

try:
    resp = requests.get(AUTH_URL, timeout=20)
    print("[INFO] HTTP status:", resp.status_code)
    if resp.status_code != 200:
        raise SystemExit("[ERROR] Could not fetch authority file.")
    raw_text = resp.text
except Exception as e:
    raise SystemExit("[ERROR] Request failed: %s" % e)

lines = []
for ln in raw_text.splitlines():
    s = ln.strip()
    if not s:
        continue
    if s.startswith("#"):
        continue
    lines.append(s)

seen = {}
unique = []
for s in lines:
    if s not in seen:
        seen[s] = True
        unique.append(s)

print("[INFO] Total lines (including comments/blank):", len(raw_text.splitlines()))
print("[INFO] Unique First Ancestors:", len(unique))

for idx, val in enumerate(unique, 1):
    print(" %2d. %s" % (idx, val))

print("\n--- Cell2d_CheckNetworkAuthority complete ---")
# ====== CUT STOP [1/1] CELL 2d - Check Network Authority ======================


# cell 2k

In [10]:
#!/usr/bin/env python3
# -*- coding: iso-8859-15 -*-
# ====== CUT START [1/1] CELL 2k - Unified DNA Network View (FLOW tiles; header-safe; nav-safe; dynamic Showing) ======
# RON GOLDEN RULES - CLIFF NOTES (v2026.02.01-CELL2K-HDRSAFE1)
# - Complete & runnable Colab cell, one contiguous block.
# - Source ASCII-only; all file writes use encoding="iso-8859-15", errors="xmlcharrefreplace".
# - XHTML 1.0 Transitional.
# - Uses canonical /partials/partials_unified.css for baseline; adds tiny scoped helper CSS + header-safety CSS.
# - SSI nav include inserted once (no duplication). Optional JS nav repair is NOT used here.
# - Header placement matches Cell 3 pattern: H1 -> UPDATED block -> NAV -> controls -> table.
# - Header "Showing:" is dynamic (selection/filter aware) and reflects visible register rows excluding "No"/excluded.
# - Deterministic audit banner:
#   [CONFIRM] Golden Rules active | Cell=Cell2k_DNANetworkUnified | Version=2026.02.01-CELL2K-HDRSAFE1 | Encoding=ISO-8859-15

print("[CONFIRM] Golden Rules active | Cell=Cell2k_DNANetworkUnified | Version=2026.02.01-CELL2K-HDRSAFE1 | Encoding=ISO-8859-15")

# NOTE: In Colab, the notebook cell text is not reliably introspectable for an exact line-count audit.
DECLARED_LINES = -1
print("[AUDIT] DECLARED_LINES={}".format(DECLARED_LINES))

import os
import re
import posixpath
import socket
import traceback
from datetime import datetime, timedelta
from ftplib import FTP_TLS

import pandas as pd
import html as _html

# ---------- 0) Secrets ----------
try:
    from google.colab import userdata  # type: ignore
    os.environ["FTP_HOST"] = userdata.get("FTP_HOST")
    os.environ["FTP_USER"] = userdata.get("FTP_USER")
    os.environ["FTP_PASS"] = userdata.get("FTP_PASS")
    try:
        os.environ["FTP_DIR"] = userdata.get("FTP_DIR")
    except Exception:
        os.environ.setdefault("FTP_DIR", "")
    try:
        os.environ["FTP_PORT"] = userdata.get("FTP_PORT")
    except Exception:
        os.environ.setdefault("FTP_PORT", "21")
except Exception:
    os.environ.setdefault("FTP_HOST", "")
    os.environ.setdefault("FTP_USER", "")
    os.environ.setdefault("FTP_PASS", "")
    os.environ.setdefault("FTP_DIR", "")
    os.environ.setdefault("FTP_PORT", "21")

# ---------- 1) Config ----------
CSV_IN = "final_combined_df_with_value_labels.csv"

SERVER_PARTIALS_DIR = "partials"
SERVER_MAPPING_BASENAME = "match_to_unmasked.csv"
SERVER_MAPPING_REMOTE = posixpath.join(SERVER_PARTIALS_DIR, SERVER_MAPPING_BASENAME)
SERVER_MAPPING_LOCAL_CACHE = "match_to_unmasked.server.csv"

FTP_DIR = (os.environ.get("FTP_DIR", "") or "").strip().strip("/")

# Baseline stylesheet (same as Cell 3 / the fixed Cell 2c)
UNIFIED_CSS_BASENAME = "partials_unified.css"
UNIFIED_CSS_VERSION  = "v2026-02-01-unified-blue-refactor1"
UNIFIED_CSS_HREF     = "/partials/%s?%s" % (UNIFIED_CSS_BASENAME, UNIFIED_CSS_VERSION)
HEAD_LINK            = '<link rel="stylesheet" type="text/css" href="%s" />' % UNIFIED_CSS_HREF

# SSI navigation include (inserted once)
NAV_BLOCK = '<!--#include virtual="/partials/nav_block.shtml" -->'

VITALS_LOCAL = "dna_vitals.csv"

TNG_BASE = "https://yates.one-name.net/tng"
TNG_TREE = "tree1"

DNA_NETWORK_LOCAL  = os.path.join("partials", "dna_network.shtml")
DNA_NETWORK_REMOTE = posixpath.join("partials", "dna_network.shtml")

REGISTER_CSV_LOCAL  = "dna_network_register.csv"
REGISTER_CSV_REMOTE = "dna/dna_network_register.csv"

# ---------- 1a) Vitals ----------
def _friendly_ts_from_utc(raw: str) -> str:
    raw = str(raw or "").strip()
    if not raw:
        return "(unknown)"
    raw_clean = raw.replace("UTC", "").replace("utc", "").strip()
    fmts = [
        "%Y-%m-%d %H:%M",
        "%Y-%m-%d %H:%M:%S",
        "%Y-%m-%dT%H:%M",
        "%Y-%m-%dT%H:%M:%S",
    ]
    dt_utc = None
    for fmt in fmts:
        try:
            dt_utc = datetime.strptime(raw_clean, fmt)
            break
        except Exception:
            dt_utc = None
    if dt_utc is None:
        return raw
    # Site convention: EST = UTC-5 (no DST)
    dt_est = dt_utc - timedelta(hours=5)
    months = [
        "January","February","March","April","May","June",
        "July","August","September","October","November","December"
    ]
    month_name = months[dt_est.month - 1]
    h24 = dt_est.hour
    ampm = "AM" if h24 < 12 else "PM"
    h12 = h24 % 12
    if h12 == 0:
        h12 = 12
    return "%s %d, %d %d:%02d %s" % (
        month_name,
        dt_est.day,
        dt_est.year,
        h12,
        dt_est.minute,
        ampm,
    )

def _load_vitals(path: str):
    autosomal = "(unknown)"
    last_updated_display = "(unknown)"
    if not os.path.exists(path):
        print("[WARN] dna_vitals.csv not found; using '(unknown)' for header vitals.")
        return autosomal, last_updated_display
    encs = ("iso-8859-15", "utf-8-sig", "utf-8", "cp1252", "latin1")
    df_v = None
    last_err = None
    for enc in encs:
        try:
            df_v = pd.read_csv(path, dtype=str, keep_default_na=False, encoding=enc)
            break
        except Exception as ex:
            last_err = ex
            df_v = None
    if df_v is None or "line" not in df_v.columns:
        print("[WARN] Unable to read dna_vitals.csv or missing 'line' column:", last_err)
        return autosomal, last_updated_display
    for raw_line in df_v["line"].astype(str):
        line = raw_line.strip()
        low = line.lower()
        if low.startswith("after manual filter, total records:"):
            m = re.search(r"(\d[\d,]*)", line)
            if m:
                num_raw = m.group(1).replace(",", "")
                try:
                    autosomal = "{:,}".format(int(num_raw))
                except Exception:
                    autosomal = num_raw
        elif low.startswith("last_updated_text:"):
            ts = line.split(":", 1)[1].strip()
            last_updated_display = _friendly_ts_from_utc(ts)
    print("[VITALS] autosomal (after manual filter): %s" % autosomal)
    print("[VITALS] last updated (display): %s" % last_updated_display)
    return autosomal, last_updated_display

AUTOSOMAL_MATCHES_TEXT, LAST_UPDATED_DISPLAY = _load_vitals(VITALS_LOCAL)

# ---------- 2) FTP helpers ----------
FTP_TIMEOUT = int(os.environ.get("FTP_TIMEOUT", "30"))
FTP_PASSIVE = True

def ftp_connect() -> FTP_TLS:
    ftps = FTP_TLS(timeout=FTP_TIMEOUT)
    socket.setdefaulttimeout(FTP_TIMEOUT)
    ftps.connect(os.environ.get("FTP_HOST", ""), int(os.environ.get("FTP_PORT", 21)))
    ftps.login(os.environ.get("FTP_USER", ""), os.environ.get("FTP_PASS", ""))
    try:
        ftps.prot_p()
    except Exception:
        pass
    try:
        ftps.set_pasv(FTP_PASSIVE)
    except Exception:
        pass
    if FTP_DIR:
        for p in [p for p in FTP_DIR.split("/") if p]:
            try:
                ftps.mkd(p)
            except Exception:
                pass
            ftps.cwd(p)
    return ftps

def _remote_path(name: str) -> str:
    return posixpath.join(FTP_DIR, name) if FTP_DIR else name

def ensure_remote_dirs(ftps, remote_path):
    if "/" not in remote_path:
        return
    pwd0 = ftps.pwd()
    parts = [p for p in remote_path.split("/")[:-1] if p and p != "."]
    for seg in parts:
        try:
            ftps.cwd(seg)
        except Exception:
            try:
                ftps.mkd(seg)
            except Exception:
                pass
            ftps.cwd(seg)
    ftps.cwd(pwd0)

def ftp_upload_overwrite(ftps, local_path, remote_name):
    ensure_remote_dirs(ftps, remote_name)
    with open(local_path, "rb") as fh:
        ftps.storbinary("STOR %s" % remote_name, fh)
    print("[PUT] %s -> %s" % (local_path, remote_name))

def ftp_size(ftps, remote_name):
    try:
        sz = ftps.size(remote_name)
        return int(sz) if sz is not None else None
    except Exception:
        return None

def ftp_download_if_exists(ftps, remote_name, local_name) -> bool:
    try:
        with open(local_name, "wb") as f:
            ftps.retrbinary("RETR %s" % remote_name, f.write)
        print("[PULL] %s -> %s" % (remote_name, os.path.abspath(local_name)))
        return True
    except Exception as e:
        try:
            if os.path.exists(local_name):
                os.remove(local_name)
        except Exception:
            pass
        print("[MISS] %s (%s)" % (remote_name, e))
        return False

# ---------- 3) Resolver ----------
def _read_mapping_csv(path: str) -> pd.DataFrame:
    encs = ("iso-8859-15", "utf-8-sig", "utf-8", "cp1252", "latin1")
    last = None
    df = None
    for enc in encs:
        try:
            df = pd.read_csv(path, encoding=enc, dtype=str, keep_default_na=False)
            break
        except Exception as e:
            last = e
            df = None
    if df is None:
        raise RuntimeError("Unable to read mapping CSV %s: %s" % (path, last))
    if df.shape[1] < 2:
        raise RuntimeError("Mapping CSV must have at least two columns: code, unmasked")
    df = df.iloc[:, :2].copy()
    df.columns = ["code", "unmasked"]
    df["code"] = df["code"].astype(str).str.strip().str.lower()
    df["unmasked"] = df["unmasked"].astype(str).str.strip()
    df = df[df["code"] != ""].drop_duplicates(subset=["code"], keep="first")
    if df.empty:
        raise RuntimeError("Mapping CSV empty after normalization.")
    return df

def load_resolver_from_server() -> dict:
    with ftp_connect() as ftps:
        try:
            ftps.cwd(SERVER_PARTIALS_DIR)
        except Exception:
            pass
        ok = ftp_download_if_exists(ftps, SERVER_MAPPING_BASENAME, SERVER_MAPPING_LOCAL_CACHE)
        try:
            ftps.quit()
        except Exception:
            pass
    if not ok:
        raise RuntimeError(
            "Resolver not found on server: /%s. Upload match_to_unmasked.csv into /partials/ and re-run."
            % _remote_path(SERVER_MAPPING_REMOTE)
        )
    df_map = _read_mapping_csv(SERVER_MAPPING_LOCAL_CACHE)
    print("[OK] Resolver loaded: %d codes" % len(df_map))
    return dict(zip(df_map["code"], df_map["unmasked"]))

MATCH_TO_UNMASKED = {}

def _setup_resolver():
    global MATCH_TO_UNMASKED
    if not MATCH_TO_UNMASKED:
        MATCH_TO_UNMASKED = load_resolver_from_server()

# ---------- 4) Name + token helpers ----------
def find_col(df, patterns, prefer_exact=None):
    cols = list(df.columns)
    lowmap = {c.lower(): c for c in cols}
    if prefer_exact:
        for name in prefer_exact:
            if name in df.columns:
                return name
            if name and name.lower() in lowmap:
                return lowmap[name.lower()]
    for pat in patterns:
        rx = re.compile(pat, re.I)
        for c in cols:
            if rx.search(c):
                return c
    return None

SEP_RE = re.compile(r"\s*(?:->|&rarr;|;|>|,|~{2,}|/{2,}|\|{2,})\s*")
ID_PAT = re.compile(r"\bI\d+\b", re.I)

def split_tokens(s):
    if pd.isna(s):
        return []
    if not isinstance(s, str):
        s = str(s)
    return [p.strip() for p in SEP_RE.split(s) if str(p).strip()]

def _clean_piece(text: str) -> str:
    t = re.sub(r"~+", " ", str(text))
    t = re.sub(r"\s+", " ", t)
    return t.strip()

_PARTICLES = {"de","del","della","der","van","von","da","dos","das","di","la","le","du","of"}
_CAMEL_WORDS = re.compile(r"[A-Z][a-z]*|[A-Z]+(?![a-z])|[a-z]+")

def _smart_title(token: str) -> str:
    if not token:
        return token
    token = token.lower()
    token = re.sub(
        r"(^|\b)([a-z])(['&#8217;])([a-z])",
        lambda m: m.group(1) + m.group(2).upper() + m.group(3) + m.group(4).upper(),
        token,
    )
    if "-" in token:
        token = "-".join([w[:1].upper() + w[1:] for w in token.split("-")])
    token = re.sub(r"\bmc([a-z])", lambda m: "Mc" + m.group(1).upper(), token)
    token = re.sub(r"\bmac([a-z])", lambda m: "Mac" + m.group(1).upper(), token)
    return token[0].upper() + token[1:]

def smart_titlecase(name: str) -> str:
    name = _clean_piece(name)
    if not name:
        return name
    if "," in name:
        last, first = [p.strip() for p in name.split(",", 1)]
        pieces = (first + " " + last).split()
    else:
        pieces = name.split()
    out = []
    for i, w in enumerate(pieces):
        out.append(w.lower() if (i > 0 and w.lower() in _PARTICLES) else _smart_title(w))
    return " ".join(out)

def surname_given_from_token(token):
    token = token.strip()
    idx = None
    for i in range(1, len(token)):
        if token[i - 1].islower() and token[i].isupper():
            idx = i
            break
    if idx is None:
        for i in range(1, len(token)):
            if token[i].isupper():
                idx = i
                break
    if idx is None:
        return (token,)
    surname = token[:idx]
    given = token[idx:]
    given_spaced = re.sub(r"(?<!^)([A-Z])", r" \1", given)
    return ("%s %s" % (given_spaced.strip(), surname.strip()),)

def norm_matchee_name(raw: str) -> str:
    raw = str(raw or "").strip()
    if not raw:
        return ""
    if " " in raw or "," in raw:
        nm = smart_titlecase(raw)
        parts = nm.split()
        if len(parts) == 1:
            return nm
        return ("%s %s" % (parts[0], parts[-1])).strip()
    words = _CAMEL_WORDS.findall(raw)
    while words and len(words[0]) == 1:
        words.pop(0)
    if not words:
        nm = smart_titlecase(surname_given_from_token(raw)[0])
        ps = nm.split()
        if len(ps) == 1:
            return nm
        return ("%s %s" % (ps[0], ps[-1])).strip()
    surname = smart_titlecase(words[0])
    given_candidates = [w for w in words[1:] if w.lower() != surname.lower()]
    if not given_candidates:
        return surname
    return ("%s %s" % (smart_titlecase(given_candidates[0]), surname)).strip()

def normalize_person_name(s: str) -> str:
    if pd.isna(s):
        return ""
    s = _clean_piece(str(s))
    if "," in s:
        last, first = [p.strip() for p in s.split(",", 1)]
        s = "%s %s" % (first, last)
    if " " not in s and s.isalpha():
        return smart_titlecase(surname_given_from_token(s)[0])
    return smart_titlecase(s)

def format_name_bold_last(display_name: str) -> str:
    s = _clean_piece(display_name or "")
    if not s:
        return ""
    parts = [p for p in s.split(" ") if p]
    if len(parts) == 1:
        return '<span class="dn-last">%s</span>' % _html.escape(parts[0])
    first = " ".join(parts[:-1])
    last = parts[-1]
    return (
        '<span class="dn-first">%s</span> <span class="dn-last">%s</span>'
        % (_html.escape(first), _html.escape(last))
    )

def _last_first_keys(display_name: str):
    s = _clean_piece(display_name or "")
    if not s:
        return ("", "")
    parts = [p for p in s.split(" ") if p]
    if len(parts) == 1:
        return (parts[0].lower(), "")
    last = parts[-1].lower()
    first = parts[0].lower()
    return (last, first)

def extract_person_id(s: str) -> str:
    m = ID_PAT.search(str(s or ""))
    return m.group(0).upper() if m else ""

def derive_common_from_first_token(tokens):
    if not tokens:
        return ("", "")
    first = _clean_piece(tokens[0])
    parts = re.split(r"\s*(?:&| and )\s*", first, maxsplit=1, flags=re.I)
    if len(parts) != 2:
        return ("", "")
    def _norm(x):
        return smart_titlecase(x) if " " in x else smart_titlecase(surname_given_from_token(x)[0])
    return (_norm(parts[0]), _norm(parts[1]))

def degree_label_from_generations(g):
    if g <= 1:
        return "parents" if g == 1 else "self"
    if g == 2:
        return "grandparents"
    greats = g - 2
    if greats == 1:
        return "great-grandparents"
    return "%dx-great-grandparents" % greats

def build_header(subject_display_html, cm_val, matchee_name_html, gens, husband, wife):
    try:
        cm_str = "%d" % int(round(float(cm_val)))
    except Exception:
        cm_str = (str(cm_val).strip() or "0")
    degree_label = degree_label_from_generations(gens)
    parts = [
        "%s is a %s cM cousin match to %s, whose" % (subject_display_html, cm_str, matchee_name_html),
        "%s (back %d Gens)" % (degree_label, gens),
        "are",
        "%s & %s." % (husband, wife),
    ]
    s = " ".join(parts)
    s = re.sub(r"\.\s*$", "", s)
    return s

# ---------- 5) HTML shell (header placement + header safety) ----------
def _partial_head(title: str) -> str:
    # Header safety learned from Cell 3: force THEAD/TH visible even if upstream CSS hides them.
    late_style = (
        '<style type="text/css">\n'
        '/* Header/Nav safety. */\n'
        '#nav-slot, #nav-slot nav, #nav-slot .oldnav, #nav-slot .navbar{display:block !important; visibility:visible !important; opacity:1 !important;}\n'
        'table.sortable thead{display:table-header-group !important; visibility:visible !important;}\n'
        'table.sortable thead th{display:table-cell !important; visibility:visible !important;}\n'
        '/* Participants: flow tiles; show Name (bold last) + row-count only. */\n'
        'table.dn-participants thead{display:none !important;}\n'
        'table.dn-participants tbody tr{display:inline-block;vertical-align:top;margin:4px 10px 4px 0;padding:4px 8px;border:1px solid #ddd;border-radius:4px;background:#fff;white-space:nowrap;width:auto;max-width:360px;}\n'
        'table.dn-participants tbody td{display:inline-block;padding:0 6px 0 0;vertical-align:baseline;}\n'
        'table.dn-participants tbody td:nth-child(1){display:none !important;}\n'
        'table.dn-participants tbody td:nth-child(2){min-width:0;max-width:280px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;}\n'
        'table.dn-participants tbody td:nth-child(3){display:none !important;}\n'
        'table.dn-participants tbody td:nth-child(4){min-width:18px;text-align:right;padding-right:0;}\n'
        'table.dn-participants tbody td:nth-child(2) .dn-last{font-weight:700;}\n'
        'table.dn-participants tbody td:nth-child(2) .dn-first{font-weight:400;}\n'
        '/* Sticky left column for register table. */\n'
        '#reg-list{border-collapse:collapse;}\n'
        '#reg-list th, #reg-list td{white-space:nowrap;}\n'
        '#reg-list thead th{position:sticky; top:0; z-index:3;}\n'
        '#reg-list th:first-child, #reg-list td:first-child{position:sticky; left:0; z-index:2; background:#ffffff;}\n'
        '</style>\n'
    )

    # Updated block (match Cell 3 / Cell 2c conventions)
    parts = []
    parts.append('Last updated: <span id="last-updated">%s</span>' % _html.escape(LAST_UPDATED_DISPLAY))
    if AUTOSOMAL_MATCHES_TEXT and AUTOSOMAL_MATCHES_TEXT != "(unknown)":
        parts.append('Autosomal matches: %s' % _html.escape(AUTOSOMAL_MATCHES_TEXT))
    parts.append('Showing: <span id="showing-count"></span>')
    updated_block = '<div class="updated centerline">' + ' &nbsp;|&nbsp; '.join(parts) + '</div>'

    # SSI include inside a slot (matches Cell 3 pattern; no JS repair here)
    nav_slot = '<div id="nav-slot">%s</div>' % NAV_BLOCK

    menu = (
        '<div class="selection-menu centerline">'
        'View: '
        '<a href="#" onclick="return dnShowSelected();">Selected</a> &nbsp;|&nbsp; '
        '<a href="#" onclick="return dnShowAll();">All</a> &nbsp;|&nbsp; '
        '<a href="#" onclick="return dnReset();">Reset</a>'
        '</div>'
    )

    return (
        '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n'
        ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n'
        '<html xmlns="http://www.w3.org/1999/xhtml" lang="en">\n'
        '<head>\n'
        '<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15" />\n'
        '<meta name="viewport" content="width=device-width, initial-scale=1.0" />\n'
        '<title>%s</title>\n'
        '%s\n'
        '%s'
        '</head>\n'
        '<body>\n'
        '<div class="wrap">\n'
        '<h1 class="centerline">%s</h1>\n'
        '%s\n'
        '%s\n'
        '%s\n'
        '<div class="table-scroll">\n'
        % (_html.escape(title), HEAD_LINK, late_style, _html.escape(title), updated_block, nav_slot, menu)
    )

def _partial_tail() -> str:
    return (
        "</div>\n</div>\n"
        "<script type=\"text/javascript\">\n//<![CDATA[\n"
        "(function(){\n"
        "  var selectedMatches={};\n"
        "  var viewMode='all';\n"
        "  function hasSelection(){for(var k in selectedMatches){if(selectedMatches.hasOwnProperty(k)){return true;}}return false;}\n"
        "  function setRowSelected(tr, sel){if(!tr) return; tr.setAttribute('data-selected', sel ? '1' : '0'); tr.style.backgroundColor = sel ? '#fff2cc' : '';}\n"
        "  function setParticipantsVisible(show){var wrap=document.getElementById('participants-wrapper'); if(!wrap) return; wrap.style.display = show ? '' : 'none';}\n"
        "  function setFirstAncVisible(show){var wrap=document.getElementById('first-anc-wrapper'); if(!wrap) return; wrap.style.display = show ? '' : 'none';}\n"
        "  function formatWithCommas(n){try{var x=parseInt(String(n||'').replace(/[^0-9\\-]/g,''),10); if(isNaN(x)) return ''; return x.toLocaleString('en-US');}catch(e){return String(n||'');}}\n"
        "  function updateShowingCount(n){var el=document.getElementById('showing-count'); if(!el) return; el.textContent=formatWithCommas(n);}\n"
        "  function updateSummaryFromVisible(){\n"
        "    var rows=document.querySelectorAll('#reg-tb tr');\n"
        "    var participants={}; var firstAncestors={}; var visibleLines=0;\n"
        "    for(var i=0;i<rows.length;i++){\n"
        "      var r=rows[i];\n"
        "      if(r.style.display==='none') continue;\n"
        "      if(r.getAttribute('data-excluded')==='1') continue;\n"
        "      var incCell=r.querySelector('.dn-include-cell');\n"
        "      if(incCell){var inc=(incCell.textContent||''); var incNorm=inc.toLowerCase().replace(/\\s+/g,''); if(incNorm!=='yes') continue;}\n"
        "      visibleLines++;\n"
        "      var mk=r.getAttribute('data-match')||'';\n"
        "      var fa=r.getAttribute('data-first')||'';\n"
        "      if(mk){participants[mk]=true;} if(fa){firstAncestors[fa]=true;}\n"
        "    }\n"
        "    var pCount=0, faCount=0, k;\n"
        "    for(k in participants){if(participants.hasOwnProperty(k)){pCount++;}}\n"
        "    for(k in firstAncestors){if(firstAncestors.hasOwnProperty(k)){faCount++;}}\n"
        "    var selCount=0; for(k in selectedMatches){if(selectedMatches.hasOwnProperty(k)){selCount++;}}\n"
        "    var elSel=document.getElementById('dn-sum-selected');\n"
        "    var elP=document.getElementById('dn-sum-part');\n"
        "    var elL=document.getElementById('dn-sum-lines');\n"
        "    var elFA=document.getElementById('dn-sum-fa');\n"
        "    if(elSel){elSel.textContent='Selected Participant: '+selCount;}\n"
        "    if(elP){elP.textContent='Network Participants: '+pCount;}\n"
        "    if(elL){elL.textContent='Network Ancestral lines: '+visibleLines;}\n"
        "    if(elFA){elFA.textContent='Network First Ancestors: '+faCount;}\n"
        "    updateShowingCount(visibleLines);\n"
        "  }\n"
        "  function updateRegister(){\n"
        "    var regRows=document.querySelectorAll('#reg-tb tr');\n"
        "    if(viewMode!=='selected' || !hasSelection()){\n"
        "      for(var i=0;i<regRows.length;i++){regRows[i].style.display='';}\n"
        "    }else{\n"
        "      var focusFirst={};\n"
        "      for(var i2=0;i2<regRows.length;i2++){\n"
        "        var r2=regRows[i2];\n"
        "        var mk=r2.getAttribute('data-match')||'';\n"
        "        var fa=r2.getAttribute('data-first')||'';\n"
        "        if(mk && fa && selectedMatches[mk]){focusFirst[fa]=true;}\n"
        "      }\n"
        "      for(var i3=0;i3<regRows.length;i3++){\n"
        "        var r3=regRows[i3];\n"
        "        var fa2=r3.getAttribute('data-first')||'';\n"
        "        var show=!!focusFirst[fa2];\n"
        "        r3.style.display = show ? '' : 'none';\n"
        "      }\n"
        "    }\n"
        "    updateSummaryFromVisible();\n"
        "  }\n"
        "  function dnToggleMatchRow(tr){\n"
        "    if(!tr) return false;\n"
        "    var key=tr.getAttribute('data-match')||'';\n"
        "    if(!key) return false;\n"
        "    if(selectedMatches[key]){delete selectedMatches[key]; setRowSelected(tr,false);}else{selectedMatches[key]=true; setRowSelected(tr,true);}\n"
        "    updateRegister(); return false;\n"
        "  }\n"
        "  function wireMatchRows(){\n"
        "    var rows=document.querySelectorAll('#match-tb tr');\n"
        "    for(var i=0;i<rows.length;i++){\n"
        "      (function(r){r.onclick=function(e){dnToggleMatchRow(r); if(e && e.preventDefault){e.preventDefault();}};})(rows[i]);\n"
        "    }\n"
        "  }\n"
        "  function dnToggleFirstAnc(){var wrap=document.getElementById('first-anc-wrapper'); if(!wrap) return false; var hidden=(wrap.style.display==='none'); setFirstAncVisible(hidden); return false;}\n"
        "  function dnShowSelected(){viewMode='selected'; updateRegister(); setParticipantsVisible(false); setFirstAncVisible(false); return false;}\n"
        "  function dnShowAll(){viewMode='all'; updateRegister(); setParticipantsVisible(true); setFirstAncVisible(true); return false;}\n"
        "  function dnReset(){\n"
        "    selectedMatches={}; viewMode='all';\n"
        "    var rows=document.querySelectorAll('#match-tb tr');\n"
        "    for(var i=0;i<rows.length;i++){setRowSelected(rows[i],false);}\n"
        "    var rrows=document.querySelectorAll('#reg-tb tr');\n"
        "    for(var j=0;j<rrows.length;j++){\n"
        "      rrows[j].style.display=''; rrows[j].setAttribute('data-excluded','0'); rrows[j].style.opacity='';\n"
        "      var c=rrows[j].querySelector('.dn-include-cell'); if(c){c.textContent='Yes';}\n"
        "    }\n"
        "    updateSummaryFromVisible(); setParticipantsVisible(true); setFirstAncVisible(true); return false;\n"
        "  }\n"
        "  function toggleExcludeRow(tr){\n"
        "    if(!tr) return;\n"
        "    var cur=(tr.getAttribute('data-excluded')==='1');\n"
        "    var next=!cur;\n"
        "    tr.setAttribute('data-excluded', next ? '1' : '0');\n"
        "    tr.style.opacity = next ? '0.45' : '';\n"
        "    var cell=tr.querySelector('.dn-include-cell'); if(cell){cell.textContent = next ? 'No' : 'Yes';}\n"
        "    updateSummaryFromVisible();\n"
        "  }\n"
        "  function wireRegisterRows(){\n"
        "    var rows=document.querySelectorAll('#reg-tb tr');\n"
        "    for(var i=0;i<rows.length;i++){\n"
        "      (function(r){\n"
        "        var cell=r.querySelector('.dn-include-cell'); if(!cell) return;\n"
        "        cell.onclick=function(e){toggleExcludeRow(r); if(e && e.preventDefault){e.preventDefault();} return false;};\n"
        "      })(rows[i]);\n"
        "    }\n"
        "  }\n"
        "  function dnDownloadCurrentCSV(){\n"
        "    var rows=document.querySelectorAll('#reg-tb tr');\n"
        "    var out=[];\n"
        "    function esc(v){\n"
        "      if(v==null){v='';}\n"
        "      v=String(v);\n"
        "      if(v.indexOf('\"')>=0||v.indexOf(',')>=0||v.indexOf('\\n')>=0||v.indexOf('\\r')>=0){v='\"'+v.replace(/\"/g,'\"\"')+'\"';}\n"
        "      return v;\n"
        "    }\n"
        "    out.push('Match to,First Ancestor,Include in proof,Match Summary,Full Lineage');\n"
        "    for(var i=0;i<rows.length;i++){\n"
        "      var r=rows[i];\n"
        "      if(r.style.display==='none') continue;\n"
        "      if(r.getAttribute('data-excluded')==='1') continue;\n"
        "      var tds=r.getElementsByTagName('td');\n"
        "      if(tds.length<5) continue;\n"
        "      var inc=(tds[2].textContent||'');\n"
        "      var incNorm=inc.toLowerCase().replace(/\\s+/g,'');\n"
        "      if(incNorm!=='yes') continue;\n"
        "      var match_to=(tds[0].textContent||'').trim();\n"
        "      var first=(tds[1].textContent||'').trim();\n"
        "      var summary=(tds[3].textContent||'').trim();\n"
        "      var lineage=(tds[4].textContent||'').trim();\n"
        "      out.push([esc(match_to),esc(first),esc(inc),esc(summary),esc(lineage)].join(','));\n"
        "    }\n"
        "    var csv=out.join('\\r\\n');\n"
        "    var blob=new Blob([csv],{type:'text/csv'});\n"
        "    var url=URL.createObjectURL(blob);\n"
        "    var a=document.createElement('a'); a.href=url; a.download='dna_network_register_live.csv';\n"
        "    document.body.appendChild(a); a.click(); document.body.removeChild(a); URL.revokeObjectURL(url);\n"
        "    return false;\n"
        "  }\n"
        "  window.dnShowSelected=dnShowSelected;\n"
        "  window.dnShowAll=dnShowAll;\n"
        "  window.dnReset=dnReset;\n"
        "  window.dnToggleFirstAnc=dnToggleFirstAnc;\n"
        "  window.dnDownloadCurrentCSV=dnDownloadCurrentCSV;\n"
        "  function init(){wireMatchRows(); wireRegisterRows(); setParticipantsVisible(true); setFirstAncVisible(true); updateRegister();}\n"
        "  if(document.readyState==='loading'){document.addEventListener('DOMContentLoaded',init,false);}else{init();}\n"
        "})();\n"
        "//]]>\n</script>\n</body>\n</html>"
    )

# ---------- 6) Row builder ----------
def build_register_row(row, id_col: str, match_col: str, name_col: str, cm_col: str, path_col: str):
    subject_raw = str(row.get(match_col, "") or "")
    subject_code = subject_raw.strip()
    key = subject_code.lower()
    subject_unmasked = MATCH_TO_UNMASKED.get(key, subject_code)

    subject_display = normalize_person_name(subject_unmasked or subject_code)
    subject_display_html = _html.escape(subject_display or "")

    match_to_html = subject_display_html

    pid = extract_person_id(row.get(id_col, ""))

    base_matchee = norm_matchee_name(row.get(name_col, "")) or subject_display
    matchee_name = normalize_person_name(base_matchee)
    if pid:
        matchee_html = (
            '<a href="%s/verticalchart.php?personID=%s&tree=%s&parentset=0&display=vertical&generations=15" '
            'target="_blank" rel="noopener">%s</a>'
            % (TNG_BASE, pid, TNG_TREE, _html.escape(matchee_name or "", quote=False))
        )
    else:
        matchee_html = _html.escape(matchee_name or "", quote=False)

    cm_val = row.get(cm_col, "0")
    tokens = split_tokens(row.get(path_col, ""))
    gens_total = len(tokens)

    husband_raw, wife_raw = derive_common_from_first_token(tokens)

    header_html = build_header(
        subject_display_html or subject_display,
        cm_val,
        matchee_html,
        gens_total,
        husband_raw,
        wife_raw,
    )

    return match_to_html, header_html, key

# ---------- 7) Unified page + CSV builder ----------
def build_network_partial(main_df: pd.DataFrame, id_col: str, match_col: str, name_col: str, cm_col: str, path_col: str):
    first_ancestors = []
    full_lineages = []
    for _, row in main_df.iterrows():
        path_raw = str(row.get(path_col, "") or "")
        tokens = split_tokens(path_raw)
        first_ancestors.append(_clean_piece(tokens[0]) if tokens else "")
        full_lineages.append(_clean_piece(path_raw))
    df = main_df.copy()
    df["First Ancestor"] = first_ancestors
    df["Full Lineage"] = full_lineages

    first_series = df["First Ancestor"].astype(str).map(lambda x: x.strip())
    vc = first_series[first_series != ""].value_counts(dropna=False)

    lin_df = vc.reset_index()
    if lin_df.shape[1] >= 2:
        lin_df.columns = ["First Ancestor", "Count"]
    else:
        lin_df["First Ancestor"] = lin_df.index.astype(str)
        lin_df["Count"] = vc.values
        lin_df = lin_df[["First Ancestor", "Count"]]

    lin_df = lin_df.sort_values(["Count", "First Ancestor"], ascending=[False, True], kind="mergesort").reset_index(drop=True)

    # Participants: derive + normalize + sort by last name; keep code in data (hidden in UI)
    part_df = df[[match_col]].copy()
    part_df["match_key"] = part_df[match_col].astype(str).str.strip().str.lower()
    part_df["match_code_raw"] = part_df[match_col].astype(str).str.strip()

    unmasked_series = part_df["match_key"].map(lambda k: MATCH_TO_UNMASKED.get(k, ""))
    part_df["match_label"] = unmasked_series
    mask_empty = part_df["match_label"] == ""
    part_df.loc[mask_empty, "match_label"] = part_df.loc[mask_empty, "match_code_raw"]
    part_df["match_label"] = part_df["match_label"].map(lambda x: normalize_person_name(x) if str(x).strip() else "")
    part_df = part_df[part_df["match_key"] != ""]

    if part_df.empty:
        p_counts = pd.DataFrame(columns=["match_key", "match_code_raw", "match_label", "rows"])
    else:
        grp = part_df.groupby("match_key")
        rows_series = grp.size().rename("rows")
        code_series = grp["match_code_raw"].first()
        label_series = grp["match_label"].first()
        p_counts = pd.concat([code_series, label_series, rows_series], axis=1).reset_index()
        p_counts.columns = ["match_key", "match_code_raw", "match_label", "rows"]

        p_counts["sort_last"] = p_counts["match_label"].map(lambda x: _last_first_keys(x)[0])
        p_counts["sort_first"] = p_counts["match_label"].map(lambda x: _last_first_keys(x)[1])

        p_counts = p_counts.sort_values(
            ["sort_last", "sort_first", "match_label", "match_key"],
            ascending=[True, True, True, True],
            kind="mergesort",
        ).reset_index(drop=True)

    full_participants = int(p_counts.shape[0])
    full_lines = int(first_series[first_series != ""].shape[0])
    full_first_anc = int(len(vc.index))

    html = []
    title = "Match Specific Produced DNA Network"
    html.append(_partial_head(title))

    html.append('<h2 class="centerline">Match Specific Produced DNA Network</h2>')
    html.append(
        '<p class="centerline">This unified view shows participants (matches), First Ancestors '
        'produced by the current DNA Network, and the detailed DNA Register rows below. '
        'Click tiles in the participants section to focus on a subset; use the Selected / All / Reset '
        'links above to change the view. In the register, you can mark individual lines as '
        'excluded from the proof.</p>'
    )

    # Participants: header includes total count; tiles show Name (bold last) + row-count number only
    html.append('<div id="participants-wrapper">')
    html.append('<h3>Network participants (matches): %d</h3>' % full_participants)
    if p_counts.empty:
        html.append('<p><em>No participants could be derived from the produced DNA network.</em></p>')
    else:
        html.append('<table id="participants-table" class="dn-participants sortable" border="1">')
        html.append('<tbody id="match-tb">')
        for _, r in p_counts.iterrows():
            mkey = str(r.get("match_key", "")).strip().lower()
            code_raw = str(r.get("match_code_raw", "")).strip()
            label = str(r.get("match_label", "")).strip()
            rows_count = int(str(r.get("rows", "0")).strip() or "0")

            label_html = format_name_bold_last(label)

            tr = (
                '<tr data-match="%s">'
                '<td></td>'
                '<td>%s</td>'
                '<td>%s</td>'
                '<td class="count">%d</td>'
                '</tr>'
                % (
                    _html.escape(mkey, quote=True),
                    label_html,
                    _html.escape(code_raw),
                    rows_count,
                )
            )
            html.append(tr)
        html.append("</tbody></table>")
    html.append("</div>")  # participants-wrapper

    # First Ancestor table + toggle
    html.append('<h3>First Ancestors produced by this DNA Network</h3>')
    html.append(
        '<div class="centerline" style="margin-bottom:4px;">'
        '<a href="#" onclick="return dnToggleFirstAnc();">Hide / show First Ancestors table</a>'
        '</div>'
    )
    html.append('<div id="first-anc-wrapper">')
    if lin_df.empty:
        html.append('<p><em>No First Ancestors could be derived from the produced DNA network.</em></p>')
    else:
        html.append(
            '<table id="first-anc-table" class="sortable" border="1">'
            '<thead><tr>'
            '<th style="width:80%">First Ancestor</th>'
            '<th style="width:20%">Showing</th>'
            '</tr></thead><tbody id="anc-tb">'
        )
        for _, r in lin_df.iterrows():
            first = str(r.get("First Ancestor", "")).strip()
            cnt = int(str(r.get("Count", "0")).strip() or "0")
            tr = (
                '<tr data-first="%s">'
                '<td>%s</td>'
                '<td class="count">%d</td>'
                '</tr>'
                % (
                    _html.escape(first, quote=True),
                    _html.escape(first),
                    cnt,
                )
            )
            html.append(tr)
        html.append("</tbody></table>")
    html.append("</div>")  # first-anc-wrapper

    # Summary block + CSV link
    html.append(
        '<table id="dn-summary" class="summary-block" border="0" style="margin:10px auto 6px auto;">'
        '<tr>'
        '<td><strong>Current proof scope:</strong></td>'
        '<td>'
        '<span id="dn-sum-selected">Selected Participant: 0</span> &nbsp;|&nbsp; '
        '<span id="dn-sum-part">Network Participants: 0</span> &nbsp;|&nbsp; '
        '<span id="dn-sum-lines">Network Ancestral lines: 0</span> &nbsp;|&nbsp; '
        '<span id="dn-sum-fa">Network First Ancestors: 0</span> &nbsp;|&nbsp; '
        '<a href="#" onclick="return dnDownloadCurrentCSV();">Download register CSV</a>'
        '</td>'
        '</tr>'
        '</table>'
    )

    # IMPORTANT: '%%' needed because this string is used with % formatting below
    html.append(
        '<div class="centerline" style="margin:2px 0 10px 0; font-size:90%%;">'
        'Full study scope (all network lines): '
        'Participants: %d  |  Ancestral lines: %d  |  First Ancestors: %d'
        '</div>' % (full_participants, full_lines, full_first_anc)
    )

    # Register rows + CSV rows
    html.append('<h2 class="centerline">DNA Register rows for produced DNA Network</h2>')
    html.append(
        '<table id="reg-list" class="sortable" border="1">'
        '<thead><tr>'
        '<th>Match to</th>'
        '<th>First Ancestor</th>'
        '<th>Include in proof</th>'
        '<th>Match Summary</th>'
        '<th>Full Lineage</th>'
        '</tr></thead><tbody id="reg-tb">'
    )

    csv_rows = []
    tag_re = re.compile(r"<[^>]+>")

    for _, row in df.iterrows():
        subject_html, header_html, mkey = build_register_row(row, id_col, match_col, name_col, cm_col, path_col)

        first_raw = str(row.get("First Ancestor", "")).strip()
        first_esc = _html.escape(first_raw)
        lineage_raw = str(row.get("Full Lineage", "")).strip()
        lineage_esc = _html.escape(lineage_raw)

        tr = (
            '<tr data-match="%s" data-first="%s" data-excluded="0">'
            '<td>%s</td>'
            '<td>%s</td>'
            '<td class="dn-include-cell">Yes</td>'
            '<td>%s</td>'
            '<td>%s</td>'
            '</tr>'
            % (
                _html.escape(mkey, quote=True),
                _html.escape(first_raw, quote=True),
                subject_html,
                first_esc,
                header_html,
                lineage_esc,
            )
        )
        html.append(tr)

        summary_plain = tag_re.sub("", header_html)
        csv_rows.append({
            "Match code": mkey,
            "First Ancestor": first_raw,
            "Include in proof": "Yes",
            "Match Summary": summary_plain,
            "Full Lineage": lineage_raw,
        })

    html.append("</tbody></table>")
    html.append(_partial_tail())
    return "".join(html), csv_rows

# ---------- 8) Main driver ----------
def main():
    encs = ("utf-8-sig", "utf-8", "cp1252", "iso-8859-15", "latin1")
    last_err = None
    df = None
    for enc in encs:
        try:
            df = pd.read_csv(CSV_IN, encoding=enc, dtype=str, keep_default_na=False)
            break
        except Exception as ex:
            last_err = ex
            df = None
    if df is None:
        raise RuntimeError("Unable to read CSV: %s (%s)" % (CSV_IN, last_err))

    print("[OK] Loaded CSV for unified DNA Network: %d rows, %d cols" % (len(df), len(df.columns)))

    id_col = find_col(df, [r"^(id#|personid)$"], ["ID#", "ID", "PersonID", "personID"])
    match_col = find_col(df, [r"^match\s*to$"], ["Match to", "Match", "match_to", "Match_to"])
    name_col = find_col(df, [r"^name$"], ["Name"])
    cm_col = find_col(df, [r"^(c\s*:?m|cm)$", r"centi.?morgan"], ["cM", "cm"])
    path_col = find_col(
        df,
        [r"(yates\s*dna\s*ancestral\s*line|ancestral\s*line|lineage)"],
        ["Yates DNA Ancestral Line", "Ancestral Line", "Lineage"],
    )

    if not match_col:
        raise ValueError("CSV missing 'Match to' column.")
    if not path_col:
        raise ValueError("CSV missing lineage/path column.")
    if not name_col:
        raise ValueError("CSV missing 'Name' column.")
    if not cm_col:
        raise ValueError("CSV missing 'cM' column.")
    if not id_col:
        raise ValueError("CSV missing an ID#/PersonID column.")

    _setup_resolver()
    os.makedirs("partials", exist_ok=True)

    network_html, csv_rows = build_network_partial(df, id_col, match_col, name_col, cm_col, path_col)

    with open(DNA_NETWORK_LOCAL, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
        f.write(network_html)
    print("[OK] Wrote unified DNA Network partial:", os.path.abspath(DNA_NETWORK_LOCAL))

    if csv_rows:
        reg_df = pd.DataFrame(csv_rows, columns=[
            "Match code",
            "First Ancestor",
            "Include in proof",
            "Match Summary",
            "Full Lineage",
        ])
        reg_df.to_csv(
            REGISTER_CSV_LOCAL,
            index=False,
            encoding="iso-8859-15",
            errors="xmlcharrefreplace",
        )
        print("[OK] Wrote register CSV:", os.path.abspath(REGISTER_CSV_LOCAL))
    else:
        print("[WARN] No register rows; CSV not written.")

    if not all(os.environ.get(k) for k in ["FTP_HOST", "FTP_USER", "FTP_PASS"]):
        print("[SKIP] Missing FTP creds; upload of dna_network.shtml and CSV skipped.")
        return

    try:
        ftps = ftp_connect()
        try:
            ftp_upload_overwrite(ftps, DNA_NETWORK_LOCAL, _remote_path(DNA_NETWORK_REMOTE))
        except Exception as e:
            print("[WARN] Upload dna_network.shtml failed:", e)

        if csv_rows:
            try:
                ftp_upload_overwrite(ftps, REGISTER_CSV_LOCAL, _remote_path(REGISTER_CSV_REMOTE))
            except Exception as e:
                print("[WARN] Upload register CSV failed:", e)

        print("\n--- SIZE Verification (if supported) ---")
        p_html = _remote_path(DNA_NETWORK_REMOTE)
        sz_html = ftp_size(ftps, p_html)
        print("%s : %s" % (p_html, sz_html if sz_html is not None else "(SIZE unsupported)"))
        if csv_rows:
            p_csv = _remote_path(REGISTER_CSV_REMOTE)
            sz_csv = ftp_size(ftps, p_csv)
            print("%s : %s" % (p_csv, sz_csv if sz_csv is not None else "(SIZE unsupported)"))

        try:
            ftps.quit()
        except Exception:
            pass

        print("\n--- Open URL ---")
        print("DNA Network (unified): https://yates.one-name.net/partials/dna_network.shtml")
        if csv_rows:
            print("Register CSV: https://yates.one-name.net/dna/dna_network_register.csv")
    except Exception as e:
        print("[FAIL] FTP session for dna_network.shtml / register CSV:", e)
        traceback.print_exc()

if __name__ == "__main__":
    main()
# ====== CUT STOP [1/1] CELL 2k - Unified DNA Network View (FLOW tiles; header-safe; nav-safe; dynamic Showing) ======


[CONFIRM] Golden Rules active | Cell=Cell2k_DNANetworkUnified | Version=2026.02.01-CELL2K-HDRSAFE1 | Encoding=ISO-8859-15
[AUDIT] DECLARED_LINES=-1
[VITALS] autosomal (after manual filter): 226
[VITALS] last updated (display): February 1, 2026 8:32 PM
[OK] Loaded CSV for unified DNA Network: 226 rows, 6 cols
[PULL] match_to_unmasked.csv -> /content/match_to_unmasked.server.csv
[OK] Resolver loaded: 94 codes
[OK] Wrote unified DNA Network partial: /content/partials/dna_network.shtml
[OK] Wrote register CSV: /content/dna_network_register.csv
[PUT] partials/dna_network.shtml -> partials/dna_network.shtml
[PUT] dna_network_register.csv -> dna/dna_network_register.csv

--- SIZE Verification (if supported) ---
partials/dna_network.shtml : 329932
dna/dna_network_register.csv : 209699

--- Open URL ---
DNA Network (unified): https://yates.one-name.net/partials/dna_network.shtml
Register CSV: https://yates.one-name.net/dna/dna_network_register.csv


# Cell 3

In [21]:
# -*- coding: iso-8859-15 -*-
# ====== CUT START [1/1] CELL 3 - Ancestor Register (Trees View; .shtml + SSI nav repair) ======
# RON GOLDEN RULES - CLIFF NOTES (v2026.02.02-CELL3-COL1=FIRST-ANCESTOR + ENRICHED-EXCLUDE1)
# - Complete & runnable Colab cell, one contiguous block.
# - Source ASCII-only; outputs written with encoding="iso-8859-15", errors="xmlcharrefreplace".
# - XHTML 1.0 Transitional; typography comes ONLY from /partials/dna_tree_styles.css.
# - Authority source for "First Ancestor" (same as Cell 2):
#     /partials/first_ancestor_pairs.csv (downloaded via FTP) provides mapping:
#       (FirstPair_Ancestor1_ID, FirstPair_Ancestor2_ID) -> FirstPair_LastFirst
# - Change in this build:
#     (1) Column 1 is REPLACED with "First Ancestor" values (authority key).
#     (2) The extra trailing "First Ancestor" column is removed to avoid duplication.
#     (3) UPDATED EXCLUSION: drop the enriched early-ancestor prefix up to Frances Yates & Jane Tichborne.
#
# Deterministic audit:
#   [CONFIRM] Golden Rules active | Cell=Cell3_Trees_SSI_NavRepair | Version=2026.02.02-CELL3-COL1=FIRST-ANCESTOR-EXCLFIX2+ENRICHED-EXCLUDE1 | Encoding=ISO-8859-15

print("[CONFIRM] Golden Rules active | Cell=Cell3_Trees_SSI_NavRepair | Version=2026.02.02-CELL3-COL1=FIRST-ANCESTOR-EXCLFIX2+ENRICHED-EXCLUDE1 | Encoding=ISO-8859-15")

# ---------- Imports ----------
import os, re, socket, posixpath, traceback
from datetime import datetime, timedelta
import pandas as pd
import html as _html
from ftplib import FTP_TLS
from string import Template as _T

DOWNLOADS_BLOCK = ""

# ---------- Display Policy ----------
SUPPRESS_ID_COLUMN = True
SUPPRESS_EMBEDDED_IDS_IN_TEXT = True

LINEAGE_SPOUSE_SEP = " & "
LINEAGE_COUPLE_SEP = " ~ "

ALPHA_BY_FIRST_ANCESTOR_FATHER = True
ALPHA_TIEBREAK_MOTHER_SURNAME  = True

# ---------- Enriched exclusion prefix (formatted lineage) ----------
# This is the exact prefix to strip from the formatted lineage display.
# It intentionally ends with a trailing "~" to remove the separator too.
ENRICHED_EXCLUDE_PREFIX = (
    "John Yates (1430-) & Still Searching ~ "
    "William Yates (1389-1440) & Still Searching ~ "
    "William Yates (1420-) & Still Searching ~ "
    "Edmund Yates (1445-1472) & Margaret Cornell ~ "
    "Richard Yates (1440-1498) & Joan Ashendon (1445-1499) ~ "
    "John Yates (1471-1544) & Alice Hyde (1498-1523) ~ "
    "Thomas Yates (1509-1565) & Elizabeth Fauconer (-1562) ~"
)

# ---------- Secrets ----------
try:
    from google.colab import userdata  # type: ignore
    os.environ["FTP_HOST"] = userdata.get("FTP_HOST")
    os.environ["FTP_USER"] = userdata.get("FTP_USER")
    os.environ["FTP_PASS"] = userdata.get("FTP_PASS")
    try:
        os.environ["FTP_PORT"] = userdata.get("FTP_PORT")
    except Exception:
        os.environ.setdefault("FTP_PORT", "21")
    try:
        os.environ["FTP_DIR"] = userdata.get("FTP_DIR")
    except Exception:
        os.environ.setdefault("FTP_DIR", "")
except Exception:
    os.environ.setdefault("FTP_HOST", "")
    os.environ.setdefault("FTP_USER", "")
    os.environ.setdefault("FTP_PASS", "")
    os.environ.setdefault("FTP_PORT", "21")
    os.environ.setdefault("FTP_DIR", "")

FTP_DIR = os.environ.get("FTP_DIR", "").strip().strip("/")

# ---------- Config / Paths ----------
INPUT_CSV = "final_combined_df_with_value_labels.csv"

EXPORT_BASENAME = "yates_ancestor_register"
LOCAL_CSV   = EXPORT_BASENAME + ".csv"
LOCAL_XLSX  = EXPORT_BASENAME + ".xlsx"
REMOTE_CSV  = posixpath.join("partials", LOCAL_CSV)
REMOTE_XLSX = posixpath.join("partials", LOCAL_XLSX)

OUTPUT_NAME = "just-trees.shtml"
REMOTE_HTML = posixpath.join("partials", OUTPUT_NAME)

DNA_CSS_HREF     = "/partials/dna_tree_styles.css"
DNA_CSS_VERSION  = "v2025-11-23-g3"
UNIFIED_CSS_HREF = "/partials/partials_unified.css"
UNIFIED_CSS_VER  = "v2026-02-01-unified-blue-refactor1"

HEAD_LINK = (
    '<link rel="stylesheet" type="text/css" href="%s?%s" />\n'
    '<link rel="stylesheet" type="text/css" href="%s?%s" />'
) % (DNA_CSS_HREF, DNA_CSS_VERSION, UNIFIED_CSS_HREF, UNIFIED_CSS_VER)

TABLE_WIDTH_PX = 5550

# ---------- Authority file (same as Cell 2) ----------
AUTH_REMOTE_DIR   = "partials"
AUTH_BASENAME     = "first_ancestor_pairs.csv"
AUTH_LOCAL_CACHE  = "first_ancestor_pairs.server.csv"

# ---------- Load CSV (robust) ----------
df = None
_last_err = None
for enc in ("utf-8-sig", "utf-8", "cp1252", "iso-8859-15", "latin1"):
    try:
        df = pd.read_csv(INPUT_CSV, dtype=str, keep_default_na=False, encoding=enc)
        break
    except Exception as e:
        _last_err = e
        df = None
if df is None:
    raise SystemExit("[ERROR] Unable to read CSV: %s (%r)" % (INPUT_CSV, _last_err))
print("[OK] Loaded CSV: %s rows=%d, cols=%d" % (INPUT_CSV, len(df), len(df.columns)))

if "haplogroup" not in df.columns:
    df["haplogroup"] = ""
else:
    df["haplogroup"] = df["haplogroup"].fillna("")

# ---------- Resolver: Column B (masked) -> Column C (unmasked) ----------
A_IDX = 0
B_IDX = 1
C_IDX = 2

def _norm_code(s):
    t = str(s or "").strip()
    if (t.startswith('"') and t.endswith('"')) or (t.startswith("'") and t.endswith("'")):
        t = t[1:-1]
    t = t.replace("\u00a0", " ")
    t = re.sub(r"\s{2,}", " ", t)
    return t.lower()

LOCAL_RESOLVER = "match_to_unmasked.csv"
if not os.path.exists(LOCAL_RESOLVER) and os.path.exists("/content/partials/match_to_unmasked.csv"):
    LOCAL_RESOLVER = "/content/partials/match_to_unmasked.csv"

def _pull_file_from_server(remote_dir, basename, local_out):
    try:
        with FTP_TLS(timeout=30) as ftps:
            ftps.connect(os.environ.get("FTP_HOST", ""), int(os.environ.get("FTP_PORT", "21")))
            ftps.login(os.environ.get("FTP_USER", ""), os.environ.get("FTP_PASS", ""))
            try:
                ftps.prot_p()
            except Exception:
                pass
            try:
                ftps.set_pasv(True)
            except Exception:
                pass
            if FTP_DIR:
                for p in [p for p in FTP_DIR.split("/") if p]:
                    try:
                        ftps.cwd(p)
                    except Exception:
                        try:
                            ftps.mkd(p)
                        except Exception:
                            pass
                        ftps.cwd(p)
            try:
                ftps.cwd(remote_dir)
            except Exception:
                pass
            with open(local_out, "wb") as f:
                ftps.retrbinary("RETR %s" % basename, f.write)
        return True
    except Exception as e:
        print("[WARN] Server pull failed for %s/%s: %s" % (remote_dir, basename, e))
        return False

def _pull_resolver_if_needed(local_path):
    if os.path.exists(local_path):
        print("Using resolver:", os.path.abspath(local_path))
        return local_path
    print("Resolver not found locally; attempting server pull ...")
    ok = _pull_file_from_server("partials", "match_to_unmasked.csv", "match_to_unmasked.csv")
    if ok:
        print("[OK] Pulled resolver from server -> match_to_unmasked.csv")
        return "match_to_unmasked.csv"
    return local_path

LOCAL_RESOLVER = _pull_resolver_if_needed(LOCAL_RESOLVER)

def _read_csv_anyenc(path: str) -> pd.DataFrame:
    encs = ("iso-8859-15", "utf-8-sig", "utf-8", "cp1252", "latin1")
    last = None
    dfx = None
    for enc in encs:
        try:
            dfx = pd.read_csv(path, encoding=enc, dtype=str, keep_default_na=False)
            break
        except Exception as e:
            last = e
            dfx = None
    if dfx is None:
        raise RuntimeError("Unable to read CSV %s: %s" % (path, last))
    return dfx

def _load_resolver_to_map(path):
    if not os.path.exists(path):
        return {}
    last = None
    m = None
    for enc in ("utf-8-sig", "iso-8859-15", "utf-8", "cp1252", "latin1"):
        try:
            m = pd.read_csv(path, dtype=str, keep_default_na=False, encoding=enc)
            break
        except Exception as e:
            last = e
            m = None
    if m is None:
        print("[WARN] Resolver not loaded:", last)
        return {}
    cols = {c.lower(): c for c in m.columns}
    if "code" not in cols or "unmasked" not in cols:
        print("[WARN] Resolver missing 'code'/'unmasked' cols; skipping map.")
        return {}
    m = m[[cols["code"], cols["unmasked"]]].copy()
    m["__key__"] = m[cols["code"]].map(_norm_code)
    m["__val__"] = m[cols["unmasked"]].astype(str)
    m = m.drop_duplicates(subset="__key__", keep="first")
    return dict(zip(m["__key__"], m["__val__"]))

resolver_map = _load_resolver_to_map(LOCAL_RESOLVER)

if df.shape[1] < 3:
    raise ValueError("Main df must have at least 3 columns: A(ID#), B(match to), C(unmasked).")

masked_raw = df.iloc[:, B_IDX].astype(str)
masked_key = masked_raw.map(_norm_code)
resolved   = masked_key.map(resolver_map)
df.iloc[:, C_IDX] = resolved.fillna("")

print(
    "[OK] Column B -> C mapping: %d / %d  unmatched: %d"
    % (int(resolved.notna().sum()), len(df), len(df) - int(resolved.notna().sum()))
)

# ---------- Lineage formatting helpers ----------
ID_TOKEN_RE = re.compile(r"\bI\d+\b", re.I)
COUPLE_SEP_RE = re.compile(r"\s*(?:->|&rarr;|;|>|/{2,}|\|{2,}|~{2,})\s*", re.I)
SPOUSE_SPLIT_RE = re.compile(r"\s*(?:&| and | AND |\+)\s*", re.I)

def _scrub_side_keep_name_years(side_text):
    s = str(side_text or "").strip()
    if not s:
        return ""

    m = re.match(r"^(I\d+)~([^~]+?)~(\d{4}\s*-\s*(?:\d{4})?)$", s, flags=re.I)
    if m:
        name = re.sub(r"\s{2,}", " ", (m.group(2) or "").strip())
        yrs = re.sub(r"\s+", "", (m.group(3) or "").strip())
        return ("%s (%s)" % (name, yrs)) if name and yrs else (name or "")

    m = re.match(r"^(I\d+)~([^~]+?)(?:~([^~]+?))?$", s, flags=re.I)
    if m:
        name = re.sub(r"\s{2,}", " ", (m.group(2) or "").strip())
        tail = re.sub(r"\s{2,}", " ", (m.group(3) or "").strip())
        if tail and re.search(r"\d{4}", tail):
            tail = re.sub(r"\s+", "", tail)
            return ("%s (%s)" % (name, tail)) if name else ""
        if tail and name:
            return ("%s %s" % (name, tail)).strip()
        return (name or tail or "").strip()

    m = re.match(r"^(I\d+)\s+(.*)$", s, flags=re.I)
    if m:
        rest = (m.group(2) or "").strip()
        yrs = ""
        m2 = re.search(r"(\b\d{4}\s*-\s*(?:\d{4})?\b)\s*$", rest)
        if m2:
            yrs = re.sub(r"\s+", "", m2.group(1))
            rest = rest[:m2.start()].strip()
        name = re.sub(r"\s{2,}", " ", rest).strip()
        return ("%s (%s)" % (name, yrs)) if name and yrs else (name or "")

    s = ID_TOKEN_RE.sub("", s).replace("~", " ")
    s = re.sub(r"\s{2,}", " ", s).strip()
    return s


# ---------- Enriched exclusion (drop very old lead-in couples) ----------
# Goal: keep the displayed lineage focused starting at:
#   Francis Yates (1541-1588) & Jane Tichborne (1548-1580)
#
# We support two compatible mechanisms:
#   (A) Exact prefix removal using the explicit ENRICHED_EXCLUDE_PREFIX couples.
#   (B) Anchor-based trimming: if the anchor couple is present, drop everything before it.

def _norm_couple_for_match(s: str) -> str:
    # lower + collapse whitespace; keep punctuation so we can match the authority text precisely
    return re.sub(r"\s{2,}", " ", str(s or "")).strip().lower()

# Build couple list from the explicit prefix string (safe if user updates prefix later)
_EXCLUDE_COUPLES = [
    c.strip()
    for c in re.split(r"\s*~\s*", (ENRICHED_EXCLUDE_PREFIX or "").strip().strip("~"))
    if c and c.strip()
]

def _strip_paren_years_anywhere(s: str) -> str:
    return re.sub(r"\([^)]*\)", "", str(s or "")).strip()

def _is_anchor_couple(couple_text: str) -> bool:
    t = _strip_paren_years_anywhere(couple_text).lower()
    return ("francis yates" in t) and ("jane tichborne" in t)

def _apply_enriched_exclusion(joined: str) -> str:
    s = str(joined or "").strip()
    if not s:
        return s

    couples = [c.strip() for c in s.split(LINEAGE_COUPLE_SEP) if c and c.strip()]
    if not couples:
        return s

    # (A) Exact prefix removal by couple list match (robust to spacing)
    if _EXCLUDE_COUPLES and len(couples) >= len(_EXCLUDE_COUPLES):
        ok = True
        for i in range(len(_EXCLUDE_COUPLES)):
            if _norm_couple_for_match(couples[i]) != _norm_couple_for_match(_EXCLUDE_COUPLES[i]):
                ok = False
                break
        if ok:
            couples = couples[len(_EXCLUDE_COUPLES):]

    # (B) Anchor trim if prefix did not match (or if data varies)
    if couples:
        for i, c in enumerate(couples):
            if _is_anchor_couple(c):
                couples = couples[i:]
                break

    return LINEAGE_COUPLE_SEP.join([c for c in couples if c]).strip()

def _format_lineage_cell(text):
    s = str(text or "").strip()
    if not s:
        return s
    couples = [t.strip() for t in COUPLE_SEP_RE.split(s) if t and t.strip()]
    if not couples:
        couples = [s]
    out_couples = []
    for c in couples:
        parts = [p.strip() for p in SPOUSE_SPLIT_RE.split(c, maxsplit=1) if p and p.strip()]
        if len(parts) == 2:
            father = _scrub_side_keep_name_years(parts[0]).strip()
            mother = _scrub_side_keep_name_years(parts[1]).strip()
            couple = (father + LINEAGE_SPOUSE_SEP + mother).strip()
        else:
            couple = _scrub_side_keep_name_years(c).strip()
        couple = re.sub(r"\s{2,}", " ", couple).strip()
        out_couples.append(couple)
    joined = (LINEAGE_COUPLE_SEP.join([c for c in out_couples if c])).strip()
    joined = re.sub(r"\s{2,}", " ", joined).strip()

    # Apply enriched early-ancestor truncation (Frances Yates & Jane Tichborne focus)
    joined = _apply_enriched_exclusion(joined)

    return joined


def _maybe_format_lineage_columns(df_in):
    if not SUPPRESS_EMBEDDED_IDS_IN_TEXT:
        return df_in
    df_out = df_in.copy()
    pat = re.compile(r"(ancestral|lineage|tree|path|ancestor|line)", re.I)
    cols = [c for c in df_out.columns if pat.search(str(c or "")) and str(c).strip().lower() != 'first ancestor']
    if not cols:
        for c in df_out.columns:
            try:
                ser = df_out[c].astype(str)
            except Exception:
                continue
            if ser.str.contains(r"\bI\d+~", regex=True, na=False).any() or ser.str.contains(r"\bI\d+\b", regex=True, na=False).any():
                if str(c).strip().lower() != 'first ancestor':
                    cols.append(c)
    cols = list(dict.fromkeys(cols))
    if not cols:
        return df_out
    for c in cols:
        try:
            df_out[c] = df_out[c].astype(str).map(_format_lineage_cell)
        except Exception:
            pass
    print("[OK] Lineage formatting applied to columns:", ", ".join([str(c) for c in cols]))
    return df_out


def _strip_years(name_text):
    s = str(name_text or "").strip()
    if not s:
        return ""
    return re.sub(r"\s*\([^)]*\)\s*$", "", s).strip()

def _first_last_tokens(person_text):
    s = _strip_years(person_text)
    parts = s.split()
    if not parts:
        return ("", "")
    return (parts[0], parts[-1])

def _first_ancestor_sort_key(lineage_text):
    s = str(lineage_text or "").strip()
    if not s:
        return ("", "", "")
    first_couple = s.split(LINEAGE_COUPLE_SEP, 1)[0].strip()
    father = ""
    mother = ""
    if LINEAGE_SPOUSE_SEP in first_couple:
        father, mother = [p.strip() for p in first_couple.split(LINEAGE_SPOUSE_SEP, 1)]
    else:
        father = first_couple.strip()
    f_given, f_surname = _first_last_tokens(father)
    m_surname = ""
    if ALPHA_TIEBREAK_MOTHER_SURNAME:
        _m_given, m_surname = _first_last_tokens(mother)
    return (f_surname.lower(), f_given.lower(), m_surname.lower())

# ---------- Authority load + first-ancestor value ----------
def _find_col(df0, patterns, prefer_exact=None):
    cols = list(df0.columns)
    lowmap = {c.lower(): c for c in cols}
    if prefer_exact:
        for name in prefer_exact:
            if name in df0.columns:
                return name
            if name and name.lower() in lowmap:
                return lowmap[name.lower()]
    for pat in patterns:
        rx = re.compile(pat, re.I)
        for c in cols:
            if rx.search(c):
                return c
    return None

def load_authority_map() -> dict:
    local_path = AUTH_LOCAL_CACHE if os.path.exists(AUTH_LOCAL_CACHE) else AUTH_BASENAME
    if not os.path.exists(local_path):
        print("Authority not found locally; attempting server pull ...")
        ok = _pull_file_from_server("partials", AUTH_BASENAME, AUTH_LOCAL_CACHE)
        if not ok:
            raise RuntimeError("Authority file missing: expected /partials/%s" % AUTH_BASENAME)
        local_path = AUTH_LOCAL_CACHE
        print("[OK] Pulled authority from server -> %s" % local_path)
    else:
        print("Using authority:", os.path.abspath(local_path))

    adf = _read_csv_anyenc(local_path)
    a1_col  = _find_col(adf, [r"ancestor1.*_id$", r"firstpair_ancestor1_id$"], ["FirstPair_Ancestor1_ID"])
    a2_col  = _find_col(adf, [r"ancestor2.*_id$", r"firstpair_ancestor2_id$"], ["FirstPair_Ancestor2_ID"])
    key_col = _find_col(adf, [r"firstpair_lastfirst$"], ["FirstPair_LastFirst"])
    if not (a1_col and a2_col and key_col):
        raise RuntimeError("Authority CSV missing required columns. Need Ancestor1_ID, Ancestor2_ID, FirstPair_LastFirst.")
    m = {}
    for _, r in adf.iterrows():
        a1 = str(r.get(a1_col, "")).strip()
        a2 = str(r.get(a2_col, "")).strip()
        k  = re.sub(r"\s+", "", str(r.get(key_col, "")))
        if not (a1 and a2 and k):
            continue
        m[(a1.upper(), a2.upper())] = k
        m[(a2.upper(), a1.upper())] = k
    if not m:
        raise RuntimeError("Authority map built empty from %s" % local_path)
    print("[OK] Authority map ready: %d pairs" % (len(m)//2))
    return m

AUTH_COUPLE_KEY_MAP = load_authority_map()

def _canon_side(txt):
    t = re.sub(r"\([^)]*\)", "", str(txt or ""))
    t = re.sub(r"[^A-Za-z0-9]+", "", t).lower()
    return t


def _is_unknown_like(name_text):
    s = str(name_text or "").strip().lower()
    s = re.sub(r"\([^)]*\)", "", s).strip()
    s = re.sub(r"\s{2,}", " ", s)
    if not s:
        return True
    if s in ("unknown", "none", "noneunknownname"):
        return True
    if "unknown name" in s:
        return True
    if s.replace(" ", "") in ("noneunknownname", "unknownname"):
        return True
    return False

def _canon_lastfirst(name_text):
    # Canonicalize a person name into a LastFirst key (no punctuation),
    # matching the authority file convention (FirstPair_LastFirst).
    if _is_unknown_like(name_text):
        return "unknown"
    s = str(name_text or "").strip()
    s = re.sub(r"\([^)]*\)", "", s).strip()
    toks = re.findall(r"[A-Za-z0-9]+", s.lower())
    if not toks:
        return ""
    if len(toks) == 1:
        return re.sub(r"[^a-z0-9]+", "", toks[0])
    last = toks[-1]
    first = "".join(toks[:-1])
    return re.sub(r"[^a-z0-9]+", "", last + first)

def _extract_first_couple_ids(raw_lineage_text: str):
    s = str(raw_lineage_text or "").strip()
    if not s:
        return ("", "", "", "")
    couples = [t.strip() for t in COUPLE_SEP_RE.split(s) if t and t.strip()]
    first = couples[0] if couples else s
    parts = [p.strip() for p in SPOUSE_SPLIT_RE.split(first, maxsplit=1) if p and p.strip()]
    father_raw = parts[0] if len(parts) >= 1 else ""
    mother_raw = parts[1] if len(parts) >= 2 else ""
    f_id = ""
    m_id = ""
    mf = re.search(r"\b(I\d+)\b", father_raw, flags=re.I)
    if mf:
        f_id = mf.group(1).upper()
    mm = re.search(r"\b(I\d+)\b", mother_raw, flags=re.I)
    if mm:
        m_id = mm.group(1).upper()
    f_disp = _scrub_side_keep_name_years(father_raw).strip()
    m_disp = _scrub_side_keep_name_years(mother_raw).strip()
    return (f_id, m_id, f_disp, m_disp)

def _first_ancestor_authority_value(raw_lineage_text: str) -> str:
    f_id, m_id, f_disp, m_disp = _extract_first_couple_ids(raw_lineage_text)

    # Primary: authority lookup by IDs
    if f_id and m_id:
        k = AUTH_COUPLE_KEY_MAP.get((f_id, m_id), "")
        if k:
            return re.sub(r"\s+", "", k)

    # Fallback: LastFirst canonical key, synchronized with authority convention
    if f_disp and m_disp:
        return _canon_lastfirst(f_disp) + "&" + _canon_lastfirst(m_disp)
    if f_disp:
        return _canon_lastfirst(f_disp)
    return ""

# ---------- Vitals ----------
VITALS_CSV = "dna_vitals.csv"
LAST_UPDATED_TEXT  = ""
AUTOSOMAL_MATCHES  = ""

def _friendly_ts_from_utc(raw):
    s = str(raw or "").strip()
    if not s:
        return "(unknown)"
    s = s.replace("UTC", "").replace("utc", "").strip()
    fmts = ["%Y-%m-%d %H:%M", "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M", "%Y-%m-%dT%H:%M:%S"]
    dt_utc = None
    for fmt in fmts:
        try:
            dt_utc = datetime.strptime(s, fmt)
            break
        except Exception:
            dt_utc = None
    if dt_utc is None:
        return raw
    dt_est = dt_utc - timedelta(hours=5)
    months = ["January","February","March","April","May","June","July","August","September","October","November","December"]
    month_name = months[dt_est.month - 1]
    h24  = dt_est.hour
    ampm = "AM" if h24 < 12 else "PM"
    h12  = h24 % 12
    if h12 == 0:
        h12 = 12
    return "%s %d, %d %d:%02d %s" % (month_name, dt_est.day, dt_est.year, h12, dt_est.minute, ampm)

def _format_num_with_commas(raw_val):
    s_digits = re.sub(r"[^0-9\-]", "", str(raw_val or ""))
    if not s_digits:
        return ""
    try:
        return "{:,}".format(int(s_digits))
    except Exception:
        return s_digits

def _load_vitals(path):
    global LAST_UPDATED_TEXT, AUTOSOMAL_MATCHES
    if not os.path.exists(path):
        print("[INFO] dna_vitals.csv not found; header will be blank for vitals.")
        return
    vdf = None
    for enc in ("iso-8859-15", "utf-8-sig", "utf-8", "cp1252", "latin1"):
        try:
            vdf = pd.read_csv(path, dtype=str, encoding=enc, keep_default_na=False)
            break
        except Exception:
            vdf = None
    if vdf is None:
        print("[WARN] Unable to read dna_vitals.csv")
        return
    flat = [str(cell) for row in vdf.astype(str).values.tolist() for cell in row]
    autosomal_raw = None
    last_text = None
    for cell in flat:
        if autosomal_raw is None and "Records tagged and filtered by NPFX" in cell:
            m = re.search(r"(\d[\d,]*)", cell)
            if m:
                autosomal_raw = m.group(1)
        if last_text is None and "LAST_UPDATED_TEXT" in cell:
            m = re.search(r"LAST_UPDATED_TEXT\s*:\s*(.+)", cell)
            if m:
                last_text = m.group(1).strip()
    if last_text is not None:
        LAST_UPDATED_TEXT = _friendly_ts_from_utc(last_text)
    AUTOSOMAL_MATCHES = _format_num_with_commas(autosomal_raw)

_load_vitals(VITALS_CSV)

updated_str = 'Last updated: <span id="last-updated">%s</span>' % _html.escape(LAST_UPDATED_TEXT or "")
_updated_parts = [updated_str]
if AUTOSOMAL_MATCHES:
    _updated_parts.append('Autosomal matches: %s' % _html.escape(AUTOSOMAL_MATCHES))
_updated_parts.append('Showing: <span id="showing-count"></span>')
UPDATED_BLOCK = '<div class="updated centerline">' + ' &nbsp;|&nbsp; '.join(_updated_parts) + '</div>'

NAV_BLOCK = '<div id="nav-slot"><!--#include virtual="/partials/nav_block.shtml" --></div>'

CONTROLS_BLOCK = (
    '<div class="controls centerline" style="margin:6px 0 10px 0;">'
    '<input type="text" id="search-box" class="search" size="28" value="" placeholder="Search&amp;hellip;" />'
    "</div>"
)

# ---------- Display DF ----------
display_df = df.copy()

# drop ID#
if SUPPRESS_ID_COLUMN and display_df.shape[1] >= 1:
    display_df = display_df.drop(columns=[display_df.columns[A_IDX]], errors="ignore")

# Identify lineage/path column BEFORE formatting (so IDs still exist for authority lookup)
lineage_cols_raw = [c for c in display_df.columns if re.search(r"(ancestral|lineage|tree|path|ancestor|line)", str(c or ""), re.I)]
AUTH_LINEAGE_COL = lineage_cols_raw[0] if lineage_cols_raw else None

# Compute authority first-ancestor values from the raw lineage column
if AUTH_LINEAGE_COL:
    fa_values = display_df[AUTH_LINEAGE_COL].astype(str).map(_first_ancestor_authority_value)
else:
    fa_values = pd.Series([""] * len(display_df))

# REPLACE COLUMN 1 with First Ancestor values and rename header
if display_df.shape[1] >= 1:
    first_col_name = display_df.columns[0]
    display_df[first_col_name] = fa_values.astype(str).map(lambda x: re.sub(r"\s+", "", x).replace('&', '&#38;'))
    display_df = display_df.rename(columns={first_col_name: "First Ancestor"})
    print("[OK] Column 1 replaced with First Ancestor (authority).")
else:
    print("[WARN] display_df has no columns to replace.")

# Remove any other "First Ancestor" columns to avoid duplication (keep the first one)
fa_cols = [c for c in display_df.columns if str(c) == "First Ancestor"]
if len(fa_cols) > 1:
    keep_first = fa_cols[0]
    drop_rest = fa_cols[1:]
    display_df = display_df.drop(columns=drop_rest, errors="ignore")
    print("[OK] Dropped duplicate First Ancestor columns:", ", ".join(drop_rest))

# Now apply lineage formatting (removes embedded IDs AND applies enriched exclusion)
display_df = _maybe_format_lineage_columns(display_df)

# Alpha sort by first couple (kept)
if ALPHA_BY_FIRST_ANCESTOR_FATHER:
    lineage_cols = [c for c in display_df.columns if re.search(r"(ancestral|lineage|tree|path|ancestor|line)", str(c or ""), re.I)]
    sort_col = lineage_cols[0] if lineage_cols else None
    if sort_col:
        sort_keys = display_df[sort_col].astype(str).map(_first_ancestor_sort_key)
        display_df["__sort_surname__"]    = [k[0] for k in sort_keys]
        display_df["__sort_given__"]      = [k[1] for k in sort_keys]
        display_df["__sort_momsurname__"] = [k[2] for k in sort_keys]

        by_cols = ["__sort_surname__", "__sort_given__"]
        if ALPHA_TIEBREAK_MOTHER_SURNAME:
            by_cols.append("__sort_momsurname__")

        display_df = display_df.sort_values(by=by_cols, ascending=[True]*len(by_cols), kind="mergesort").reset_index(drop=True)
        display_df = display_df.drop(columns=["__sort_surname__", "__sort_given__", "__sort_momsurname__"], errors="ignore")
        print("[OK] Alpha sort applied by first couple:", sort_col, "| keys=", ",".join(by_cols))

# ---------- HTML table ----------
visible_cols = [c for c in display_df.columns if c]

table_html = display_df.to_html(
    index=False,
    columns=visible_cols,
    escape=False,
    border=1,
    classes="dataframe sortable"
)

if 'id="refactor-table"' not in table_html:
    table_html = re.sub(r"<table([^>]*)>", r'<table\1 id="refactor-table">', table_html, count=1)

if 'class="dataframe sortable"' not in table_html and "sortable" not in table_html:
    table_html = table_html.replace('class="dataframe"', 'class="dataframe sortable"', 1)

table_html = table_html.replace("<tbody>\n<tr>", "<tbody>\n<tr id=\"first-row\">", 1)

SCROLL_WRAPPER = (
    '<div class="table-scroll-wrapper">'
    '<div id="top-scroll" class="scroll-sync-top">'
    '<div class="scroll-sync-top-inner" style="width:%dpx;"></div>'
    '</div>'
    '<div id="bottom-scroll" class="table-scroll">%s</div>'
    '</div>'
) % (TABLE_WIDTH_PX, table_html)

LATE_STYLE = r"""
<style type="text/css">
#nav-slot, #nav-slot nav, #nav-slot .oldnav, #nav-slot .navbar{
  display:block !important;
  visibility:visible !important;
  opacity:1 !important;
}
table.sortable thead{ display:table-header-group !important; visibility:visible !important; }
table.sortable thead th{ display:table-cell !important; visibility:visible !important; }
</style>
"""

JS_NAV_REPAIR = r"""
<script type="text/javascript">
//<![CDATA[
(function(){
  function hasNavContainer(el){
    if(!el) return false;
    var n = el.querySelector('nav.oldnav, nav.navbar, .oldnav, .navbar');
    return !!n;
  }
  function wrapFirstUL(el){
    if(!el) return false;
    var ul = el.querySelector('ul');
    if(!ul) return false;
    var nav = document.createElement('nav');
    nav.className = 'oldnav';
    nav.appendChild(ul);
    while(el.firstChild){ el.removeChild(el.firstChild); }
    el.appendChild(nav);
    return true;
  }
  function looksLikeSSICommentOnly(el){
    if(!el) return true;
    var txt = (el.textContent || '').replace(/\s+/g,'').toLowerCase();
    if(!txt) return true;
    if(txt.indexOf('<!--#include') >= 0) return true;
    return false;
  }
  function injectRemoteNav(el){
    try{
      var xhr = new XMLHttpRequest();
      xhr.open('GET', '/partials/nav_block.shtml', true);
      xhr.onreadystatechange = function(){
        if(xhr.readyState === 4){
          if(xhr.status >= 200 && xhr.status < 300){
            el.innerHTML = xhr.responseText;
            if(!hasNavContainer(el)){
              wrapFirstUL(el);
            }
          }
        }
      };
      xhr.send(null);
    }catch(e){}
  }

  function repairNav(){
    var slot = document.getElementById('nav-slot');
    if(!slot) return;

    if(looksLikeSSICommentOnly(slot)){
      injectRemoteNav(slot);
      return;
    }
    if(!hasNavContainer(slot)){
      wrapFirstUL(slot);
      return;
    }
  }

  if(document.readyState === 'loading'){
    document.addEventListener('DOMContentLoaded', repairNav, false);
  } else {
    repairNav();
  }
})();
//]]>
</script>
"""

page_tpl = _T(r"""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Ancestor Register (Trees View)</title>
$HEAD_LINK
$LATE_STYLE
<style type="text/css">
/* Sticky second column (index 2) for Trees table */
#refactor-table th:nth-child(2),
#refactor-table td:nth-child(2){
  position:sticky;
  left:0;
  z-index:6;
  background:#ffffff;
}
#refactor-table th:nth-child(2){
  z-index:7;
}
</style>
</head>
<body id="top">
<div class="wrap">
  <h1 class="centerline">Ancestor Register (Trees View)</h1>
  $DOWNLOADS_BLOCK
  $UPDATED_BLOCK
  $NAV_BLOCK
  $CONTROLS_BLOCK
  $SCROLL_WRAPPER
</div>
<button id="back-to-top" class="back-to-top">&#9650; Top</button>

<script type="text/javascript">
//<![CDATA[
(function(){
  function textOf(cell){
    return (cell && (cell.textContent || cell.innerText) || '').replace(/\s+/g,' ').trim().toLowerCase();
  }
  function sortTable(tbl, colIndex, dir, keyColIndex){
    var tb = tbl && tbl.tBodies ? tbl.tBodies[0] : null;
    if(!tb) return;
    var rows = [].slice.call(tb.rows || []);
    var asc  = (dir === 'asc');
    var kIdx = (typeof keyColIndex === 'number') ? keyColIndex : colIndex;
    rows.sort(function(a,b){
      var A = textOf(a.cells[kIdx]), B = textOf(b.cells[kIdx]);
      var nA = parseFloat(A.replace(/[^0-9.\-]/g,'')),
          nB = parseFloat(B.replace(/[^0-9.\-]/g,''));
      if(!isNaN(nA) && !isNaN(nB)){ return asc ? (nA-nB) : (nB-nA); }
      if (A < B) return asc ? -1 : 1;
      if (A > B) return asc ?  1 : -1;
      return 0;
    });
    var frag = document.createDocumentFragment();
    for(var i=0;i<rows.length;i++) frag.appendChild(rows[i]);
    tb.appendChild(frag);
    updateShowing();
  }
  function bindHeaderSort(){
    var tbl = document.getElementById('refactor-table');
    if(!(tbl && tbl.tHead && tbl.tHead.rows.length)) return;
    var ths = tbl.tHead.rows[0].cells;
    if(!ths) return;
    for(var i=0;i<ths.length;i++)(function(idx){
      var th = ths[idx];
      var dir = 'asc';
      th.addEventListener('click', function(){
        dir = (dir === 'asc') ? 'desc' : 'asc';
        var hdr = (th.textContent || th.innerText || '');
        hdr = hdr.replace(/\s+\(asc\)|\s+\(desc\)/,'').replace(/\s+/g,' ').trim().toLowerCase();
        // Golden rule sync: sorting the lineage column uses First Ancestor (authority) as the key.
        // Column 1 is 'First Ancestor' (index 0) in this build.
        var keyColIndex = null;
        if(hdr === 'yates dna ancestral line'){
          keyColIndex = 0;
        }

        for (var j = 0; j < ths.length; j++){
          ths[j].innerHTML = ths[j].innerHTML.replace(/\s+\(asc\)|\s+\(desc\)/,'');
        }
        th.innerHTML += (dir === 'asc' ? ' (asc)' : ' (desc)');
        sortTable(tbl, idx, dir, (keyColIndex === null ? undefined : keyColIndex));
      }, false);
    })(i);
  }
  function formatWithCommas(n){
    try{
      var x = parseInt(String(n||'').replace(/[^0-9\-]/g,''),10);
      if(isNaN(x)) return '';
      return x.toLocaleString('en-US');
    }catch(e){ return String(n||''); }
  }
  function visibleRowCount(){
    var tbl = document.getElementById('refactor-table');
    if(!(tbl && tbl.tBodies && tbl.tBodies[0])) return 0;
    var rows = tbl.tBodies[0].rows, n = 0;
    for(var i=0;i<rows.length;i++){
      if(rows[i].style.display !== 'none') n++;
    }
    return n;
  }
  function updateShowing(){
    var el = document.getElementById('showing-count');
    if(!el) return;
    el.textContent = formatWithCommas(visibleRowCount());
  }
  function getParam(name){
    var m = location.search.match(new RegExp('[?&]'+name+'=([^&]+)'));
    return m ? decodeURIComponent(m[1].replace(/\+/g,' ')) : '';
  }
  function bindSearch(){
    var box = document.getElementById('search-box');
    var tbl = document.getElementById('refactor-table');
    if(!(box && tbl && tbl.tBodies && tbl.tBodies[0])) return;
    var tb = tbl.tBodies[0];
    var rows = [].slice.call(tb.rows || []);
    function rowText(tr){
      var t = '';
      for(var i=0;i<tr.cells.length;i++){
        t += ' ' + (tr.cells[i].textContent || tr.cells[i].innerText || '');
      }
      return t.replace(/\s+/g,' ').toLowerCase();
    }
    function apply(q){
      q = String(q || '').toLowerCase();
      for(var i=0;i<rows.length;i++){
        var txt = rowText(rows[i]);
        var show = !q || txt.indexOf(q) > -1;
        rows[i].style.display = show ? '' : 'none';
      }
      updateShowing();
    }
    var to = null;
    function onInput(){
      if(to) clearTimeout(to);
      to = setTimeout(function(){ apply(box.value); }, 60);
    }
    box.addEventListener('input', onInput, false);
    box.addEventListener('search', onInput, false);
    var q0 = getParam('q');
    if(q0){
      box.value = q0;
      apply(q0);
      try{ history.replaceState(null,'',location.pathname); }catch(e){}
    } else {
      box.value = '';
      apply('');
    }
  }
  function bindBackToTop(){
    var btn = document.getElementById('back-to-top');
    if(!btn) return;
    function toggle(){ btn.style.display = (window.scrollY > 200 ? 'block' : 'none'); }
    toggle();
    window.addEventListener('scroll', toggle, {passive:true});
    btn.addEventListener('click', function(){
      try{
        window.scrollTo({top:0, behavior:'smooth'});
      } catch(e){
        window.scrollTo(0,0);
      }
    }, false);
  }
  function bindSyncedScrollbars(){
    var topScroll    = document.getElementById('top-scroll');
    var bottomScroll = document.getElementById('bottom-scroll');
    if(!(topScroll && bottomScroll)) return;
    var syncing = false;
    topScroll.addEventListener('scroll', function(){
      if(syncing) return;
      syncing = true;
      bottomScroll.scrollLeft = topScroll.scrollLeft;
      syncing = false;
    }, false);
    bottomScroll.addEventListener('scroll', function(){
      if(syncing) return;
      syncing = true;
      topScroll.scrollLeft = bottomScroll.scrollLeft;
      syncing = false;
    }, false);
  }
  document.addEventListener('DOMContentLoaded', function(){
    bindHeaderSort();
    bindBackToTop();
    bindSearch();
    bindSyncedScrollbars();
    updateShowing();
  });
})();
//]]>
</script>

$JS_NAV_REPAIR
</body>
</html>
""")

final_html = page_tpl.safe_substitute(
    HEAD_LINK=HEAD_LINK,
    LATE_STYLE=LATE_STYLE,
    JS_NAV_REPAIR=JS_NAV_REPAIR,
    DOWNLOADS_BLOCK=DOWNLOADS_BLOCK,
    UPDATED_BLOCK=UPDATED_BLOCK,
    NAV_BLOCK=NAV_BLOCK,
    CONTROLS_BLOCK=CONTROLS_BLOCK,
    SCROLL_WRAPPER=SCROLL_WRAPPER,
)

# ---------- Exports ----------
export_df = display_df.copy()
export_df.to_csv(LOCAL_CSV, index=False, encoding="iso-8859-15")
try:
    export_df.to_excel(LOCAL_XLSX, index=False)
except Exception:
    from pandas import ExcelWriter
    with ExcelWriter(LOCAL_XLSX) as _w:
        export_df.to_excel(_w, index=False)
print("[OK] Wrote exports:", os.path.abspath(LOCAL_CSV), "and", os.path.abspath(LOCAL_XLSX))

# ---------- Save page locally ----------
try:
    with open(OUTPUT_NAME, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
        f.write(final_html)
    print("[OK] Saved locally:", os.path.abspath(OUTPUT_NAME))
except Exception as e:
    print("[ERROR] Save failed:", e)
    traceback.print_exc()

# ---------- Upload to /partials ----------
def _ftps_ensure_dir(ftps, path):
    if not path:
        return
    for seg in [p for p in path.split("/") if p]:
        try:
            ftps.cwd(seg)
        except Exception:
            try:
                ftps.mkd(seg)
            except Exception:
                pass
            ftps.cwd(seg)

ftp_host = os.environ.get("FTP_HOST")
ftp_user = os.environ.get("FTP_USER")
ftp_pass = os.environ.get("FTP_PASS")
ftp_port = int(os.environ.get("FTP_PORT", "21") or "21")

if ftp_host and ftp_user and ftp_pass:
    print("[INFO] Attempting FTP upload ...")
    try:
        socket.setdefaulttimeout(30)
        with FTP_TLS(timeout=30) as ftps:
            ftps.connect(ftp_host, ftp_port)
            ftps.login(ftp_user, ftp_pass)
            try:
                ftps.prot_p()
            except Exception:
                pass
            try:
                ftps.set_pasv(True)
            except Exception:
                pass

            _ftps_ensure_dir(ftps, FTP_DIR)
            _ftps_ensure_dir(ftps, "partials")

            with open(OUTPUT_NAME, "rb") as fh:
                ftps.storbinary("STOR " + os.path.basename(REMOTE_HTML), fh)
            print("[OK] Uploaded HTML -> /partials/%s" % os.path.basename(REMOTE_HTML))

            with open(LOCAL_CSV, "rb") as fh:
                ftps.storbinary("STOR " + os.path.basename(REMOTE_CSV), fh)
            with open(LOCAL_XLSX, "rb") as fh:
                ftps.storbinary("STOR " + os.path.basename(REMOTE_XLSX), fh)
            print("[OK] Uploaded exports -> /partials/ (%s, %s)" % (LOCAL_CSV, LOCAL_XLSX))

            print("\n--- Open URLs ---")
            print("Trees page:       https://yates.one-name.net/partials/just-trees.shtml")
            print("CSV export:       https://yates.one-name.net/partials/%s" % os.path.basename(LOCAL_CSV))
            print("Excel export:     https://yates.one-name.net/partials/%s" % os.path.basename(LOCAL_XLSX))
    except Exception as e:
        print("[ERROR] FTP session failed:", e)
        traceback.print_exc()
else:
    print("[INFO] Skipping FTP upload (missing credentials).")

print("\n--- Cell 3 Complete (Column 1 replaced with First Ancestor from /partials/first_ancestor_pairs.csv; enriched prefix exclusion applied) ---")
# ====== CUT STOP  [1/1] CELL 3 ==================================================================


[CONFIRM] Golden Rules active | Cell=Cell3_Trees_SSI_NavRepair | Version=2026.02.02-CELL3-COL1=FIRST-ANCESTOR-EXCLFIX2+ENRICHED-EXCLUDE1 | Encoding=ISO-8859-15
[OK] Loaded CSV: final_combined_df_with_value_labels.csv rows=1700, cols=6
Using resolver: /content/match_to_unmasked.csv
[OK] Column B -> C mapping: 1700 / 1700  unmatched: 0
Using authority: /content/first_ancestor_pairs.server.csv
[OK] Authority map ready: 255 pairs
[OK] Column 1 replaced with First Ancestor (authority).
[OK] Lineage formatting applied to columns: Yates DNA Ancestral Line
[OK] Alpha sort applied by first couple: First Ancestor | keys= __sort_surname__,__sort_given__,__sort_momsurname__
[OK] Wrote exports: /content/yates_ancestor_register.csv and /content/yates_ancestor_register.xlsx
[OK] Saved locally: /content/just-trees.shtml
[INFO] Attempting FTP upload ...
[OK] Uploaded HTML -> /partials/just-trees.shtml
[OK] Uploaded exports -> /partials/ (yates_ancestor_register.csv, yates_ancestor_register.xlsx)

--- O

# debug

In [11]:
# ====== CUT START [1/1] CELL 2 (3-column register) - NAV+HEADER REPAIR + SSI-safe headers ======
# RON GOLDEN RULES - CLIFF NOTES (v2026.02.01-UNIFIED-BASELINE-NAVREPAIR1)
# - Complete and runnable Colab cell, one contiguous block.
# - Source ASCII-only; outputs written with encoding="iso-8859-15", errors="xmlcharrefreplace".
# - XHTML 1.0 Transitional; typography/layout/colors via /partials/partials_unified.css (single baseline).
# - Fixes in this build:
#     (1) SSI nav include kept, PLUS JS fallback if SSI not parsed / markup missing.
#     (2) Header "Showing:" count remains dynamic (search-filter aware).
#     (3) SSI-safe header survivability: inline THEAD/TH display styles + late overrides after nav.
# - VITALS FIX (aligned to repaired Cell2k/Cell2c behavior):
#     (A) Parse dna_vitals.csv via 'line' column when present (preferred).
#     (B) Detect last_updated_text case-insensitively (supports LAST_UPDATED_TEXT and last_updated_text).
#     (C) Keep autosomal matches parsing (Records tagged and filtered by NPFX) but also accept
#         After manual filter, total records as fallback.
# - Deterministic audit:
#   [CONFIRM] Golden Rules active | Cell=Cell2_3Col_AuthorityFirstAncestor | Version=2026.02.01-UNIFIED-BASELINE-NAVREPAIR1+VITALSFIX1 | Encoding=ISO-8859-15

print("[CONFIRM] Golden Rules active | Cell=Cell2_3Col_AuthorityFirstAncestor | Version=2026.02.01-UNIFIED-BASELINE-NAVREPAIR1+VITALSFIX1 | Encoding=ISO-8859-15")

import os
import re
import posixpath
import socket
import traceback
from ftplib import FTP_TLS
from datetime import datetime, timedelta
import pandas as pd
import html as _html
from string import Template

# ---------- A) LAYOUT CONTROL BLOCK ----------
COL_1_PX = 220
COL_2_PX = 420
COL_3_PX = 1240

COL_WIDTHS = [COL_1_PX, COL_2_PX, COL_3_PX]
TABLE_TOTAL_WIDTH_PX = sum(COL_WIDTHS)

print("[LAYOUT] TABLE_TOTAL_WIDTH_PX=%d" % TABLE_TOTAL_WIDTH_PX)
print("[LAYOUT] Column widths (px): 1=%d 2=%d 3=%d" % (COL_1_PX, COL_2_PX, COL_3_PX))

# ---------- 0) Secrets ----------
try:
    from google.colab import userdata  # type: ignore
    os.environ["FTP_HOST"] = userdata.get("FTP_HOST")
    os.environ["FTP_USER"] = userdata.get("FTP_USER")
    os.environ["FTP_PASS"] = userdata.get("FTP_PASS")
    try:
        os.environ["FTP_DIR"] = userdata.get("FTP_DIR")
    except Exception:
        os.environ.setdefault("FTP_DIR", "")
    try:
        os.environ["FTP_PORT"] = userdata.get("FTP_PORT")
    except Exception:
        os.environ.setdefault("FTP_PORT", "21")
except Exception:
    os.environ.setdefault("FTP_HOST", "")
    os.environ.setdefault("FTP_USER", "")
    os.environ.setdefault("FTP_PASS", "")
    os.environ.setdefault("FTP_DIR", "")
    os.environ.setdefault("FTP_PORT", "21")

# ---------- 1) Config ----------
CSV_IN = "final_combined_df_with_value_labels.csv"

# Authority file created by Cell 1 (already on server)
AUTH_REMOTE_DIR   = "partials"
AUTH_BASENAME     = "first_ancestor_pairs.csv"
AUTH_REMOTE_NAME  = posixpath.join(AUTH_REMOTE_DIR, AUTH_BASENAME)
AUTH_LOCAL_CACHE  = "first_ancestor_pairs.server.csv"

# NOTE: main register pages now .shtml (SSI pages only)
LOCAL_HTML        = "yates_ancestor_register.shtml"
REMOTE_HTML_CANON = posixpath.join("partials", "yates_ancestor_register.shtml")
REMOTE_HTML_LEG   = posixpath.join("partials", "ons_yates_dna_register.shtml")

FTP_DIR  = (os.environ.get("FTP_DIR", "") or "").strip()

TNG_BASE = "https://yates.one-name.net/tng"
TNG_TREE = "tree1"

REMOVE_PERIOD_AT_END = True

# Baseline stylesheet (single canonical CSS)
UNIFIED_CSS_BASENAME = "partials_unified.css"
# Cache buster helps Cloudflare / browser refresh.
UNIFIED_CSS_VERSION  = "v2026-02-01-unified-blue-refactor1"
UNIFIED_CSS_HREF     = "/partials/%s?%s" % (UNIFIED_CSS_BASENAME, UNIFIED_CSS_VERSION)
HEAD_LINK            = '<link rel="stylesheet" type="text/css" href="%s" />' % UNIFIED_CSS_HREF

# SSI navigation include (kept)
NAV_BLOCK = '<!--#include virtual="/partials/nav_block.shtml" -->'

# Fallback nav markup (used only if SSI is not parsed / not present)
NAV_FALLBACK_HTML = ''

# Path for vitals from Cell 1
VITALS_CSV = "dna_vitals.csv"

# Resolver for Match to (masked -> unmasked)
SERVER_PARTIALS_DIR        = "partials"
SERVER_MAPPING_BASENAME    = "match_to_unmasked.csv"
SERVER_MAPPING_REMOTE      = posixpath.join(SERVER_PARTIALS_DIR, SERVER_MAPPING_BASENAME)
SERVER_MAPPING_LOCAL_CACHE = "match_to_unmasked.server.csv"

# ---------- 2) FTP ----------
FTP_TIMEOUT = int(os.environ.get("FTP_TIMEOUT", "30"))
FTP_PASSIVE = True

def ftp_connect() -> FTP_TLS:
    ftps = FTP_TLS(timeout=FTP_TIMEOUT)
    socket.setdefaulttimeout(FTP_TIMEOUT)
    ftps.connect(os.environ.get("FTP_HOST", ""), int(os.environ.get("FTP_PORT", 21)))
    ftps.login(os.environ.get("FTP_USER", ""), os.environ.get("FTP_PASS", ""))
    try:
        ftps.prot_p()
    except Exception:
        pass
    try:
        ftps.set_pasv(FTP_PASSIVE)
    except Exception:
        pass
    if FTP_DIR:
        for p in [p for p in FTP_DIR.split("/") if p]:
            try:
                ftps.mkd(p)
            except Exception:
                pass
            ftps.cwd(p)
    return ftps

def _remote_path(name: str) -> str:
    return posixpath.join(FTP_DIR, name) if FTP_DIR else name

def ensure_remote_dirs(ftps, remote_path):
    if "/" not in remote_path:
        return
    pwd0 = ftps.pwd()
    for seg in [p for p in remote_path.split("/")[:-1] if p and p != "."]:
        try:
            ftps.cwd(seg)
        except Exception:
            try:
                ftps.mkd(seg)
            except Exception:
                pass
            ftps.cwd(seg)
    ftps.cwd(pwd0)

def ftp_download_if_exists(ftps, remote_name, local_name) -> bool:
    try:
        with open(local_name, "wb") as f:
            ftps.retrbinary("RETR %s" % remote_name, f.write)
        print("[PULL] %s -> %s" % (remote_name, os.path.abspath(local_name)))
        return True
    except Exception as e:
        try:
            if os.path.exists(local_name):
                os.remove(local_name)
        except Exception:
            pass
        print("[MISS] %s (%s)" % (remote_name, e))
        return False

def ftp_upload_overwrite(ftps, local_path, remote_name):
    ensure_remote_dirs(ftps, remote_name)
    with open(local_path, "rb") as fh:
        ftps.storbinary("STOR %s" % remote_name, fh)
    print("[PUT] %s -> %s" % (local_path, remote_name))

def ftp_size(ftps, remote_name):
    try:
        sz = ftps.size(remote_name)
        return int(sz) if sz is not None else None
    except Exception:
        return None

# ---------- 3) CSV helpers ----------
def _read_csv_anyenc(path: str) -> pd.DataFrame:
    encs = ("iso-8859-15", "utf-8-sig", "utf-8", "cp1252", "latin1")
    last = None
    dfx = None
    for enc in encs:
        try:
            dfx = pd.read_csv(path, encoding=enc, dtype=str, keep_default_na=False)
            break
        except Exception as e:
            last = e
            dfx = None
    if dfx is None:
        raise RuntimeError("Unable to read CSV %s: %s" % (path, last))
    return dfx

def _read_mapping_csv(path: str) -> pd.DataFrame:
    dfm = _read_csv_anyenc(path)
    if dfm.shape[1] < 2:
        raise RuntimeError("Mapping CSV must have at least two columns: code, unmasked")
    dfm = dfm.iloc[:, :2].copy()
    dfm.columns = ["code", "unmasked"]
    dfm["code"]     = dfm["code"].astype(str).str.strip().str.lower()
    dfm["unmasked"] = dfm["unmasked"].astype(str).str.strip()
    dfm = dfm[dfm["code"] != ""].drop_duplicates(subset=["code"], keep="first")
    if dfm.empty:
        raise RuntimeError("Mapping CSV empty after normalization.")
    return dfm

# ---------- 3.1) Resolver ----------
def load_resolver_from_server() -> dict:
    with ftp_connect() as ftps:
        try:
            ftps.cwd(SERVER_PARTIALS_DIR)
        except Exception:
            pass
        ok = ftp_download_if_exists(ftps, SERVER_MAPPING_BASENAME, SERVER_MAPPING_LOCAL_CACHE)
        try:
            ftps.quit()
        except Exception:
            pass
    if not ok:
        raise RuntimeError(
            "Resolver not found on server: /%s. Upload match_to_unmasked.csv into /partials/ and re-run."
            % _remote_path(SERVER_MAPPING_REMOTE)
        )
    df_map = _read_mapping_csv(SERVER_MAPPING_LOCAL_CACHE)
    print("[OK] Resolver loaded: %d codes" % len(df_map))
    return dict(zip(df_map["code"], df_map["unmasked"]))

MATCH_TO_UNMASKED = {}

def _setup_resolver():
    global MATCH_TO_UNMASKED
    if not MATCH_TO_UNMASKED:
        MATCH_TO_UNMASKED = load_resolver_from_server()

def resolve_match_to(code: str) -> str:
    if not isinstance(code, str):
        return ""
    return MATCH_TO_UNMASKED.get(code.strip().lower(), code)

# ---------- 4) Text utils ----------
SEP_RE = re.compile(r"\s*(?:\u2192|&rarr;|;|>|,|~{2,}|/{2,}|\|{2,})\s*")
ID_PAT = re.compile(r"\bI\d+\b", re.I)

def split_tokens(s):
    if pd.isna(s):
        return []
    if not isinstance(s, str):
        s = str(s)
    return [p.strip() for p in SEP_RE.split(s) if str(p).strip()]

def _clean_piece(text: str) -> str:
    t = re.sub(r"~+", " ", str(text))
    t = re.sub(r"\s+", " ", t)
    return t.strip()

_PARTICLES = {"de","del","della","der","van","von","da","dos","das","di","la","le","du","of"}

def _smart_title(token: str) -> str:
    if not token:
        return token
    token = re.sub(
        r"(^|\b)([a-z])(['&#8217;])([a-z])",
        lambda m: m.group(1) + m.group(2).upper() + m.group(3) + m.group(4).upper(),
        token.lower(),
    )
    token = "-".join([w.capitalize() for w in token.split("-")])
    token = re.sub(r"\bmc([a-z])",  lambda m: "Mc"  + m.group(1).upper(), token)
    token = re.sub(r"\bmac([a-z])", lambda m: "Mac" + m.group(1).upper(), token)
    return token

def smart_titlecase(name: str) -> str:
    name = _clean_piece(name)
    if not name:
        return name
    if "," in name:
        last, first = [p.strip() for p in name.split(",", 1)]
        pieces = (first + " " + last).split()
    else:
        pieces = name.split()
    out = []
    for i, w in enumerate(pieces):
        out.append(w.lower() if (i > 0 and w.lower() in _PARTICLES) else _smart_title(w))
    return " ".join(out)

def surname_given_from_token(token):
    token = token.strip()
    idx = None
    for i in range(1, len(token)):
        if token[i-1].islower() and token[i].isupper():
            idx = i
            break
    if idx is None:
        for i in range(1, len(token)):
            if token[i].isupper():
                idx = i
                break
    if idx is None:
        return (token,)
    surname = token[:idx]
    given = token[idx:]
    given_spaced = re.sub(r"(?<!^)([A-Z])", r" \1", given)
    return ("%s %s" % (given_spaced.strip(), surname.strip()),)

def normalize_person_name(s: str) -> str:
    if pd.isna(s):
        return ""
    s = _clean_piece(str(s))
    if "," in s:
        last, first = [p.strip() for p in s.split(",", 1)]
        s = "%s %s" % (first, last)
    if " " not in s and s.isalpha():
        return smart_titlecase(surname_given_from_token(s)[0])
    return smart_titlecase(s)

_CAMEL_WORDS = re.compile(r"[A-Z][a-z]*|[A-Z]+(?![a-z])|[a-z]+")

def norm_matchee_name(raw: str) -> str:
    raw = str(raw or "").strip()
    if not raw:
        return ""
    if " " in raw or "," in raw:
        nm = smart_titlecase(raw)
        parts = nm.split()
        if len(parts) == 1:
            return nm
        return ("%s %s" % (parts[0], parts[-1])).strip()
    words = _CAMEL_WORDS.findall(raw)
    while words and len(words[0]) == 1:
        words.pop(0)
    if not words:
        nm = smart_titlecase(surname_given_from_token(raw)[0])
        ps = nm.split()
        if len(ps) == 1:
            return nm
        return ("%s %s" % (ps[0], ps[-1])).strip()
    surname = smart_titlecase(words[0])
    given_candidates = [w for w in words[1:] if w.lower() != surname.lower()]
    if not given_candidates:
        return surname
    return ("%s %s" % (smart_titlecase(given_candidates[0]), surname)).strip()

def extract_person_id(s: str) -> str:
    m = ID_PAT.search(str(s or ""))
    return m.group(0).upper() if m else ""

def degree_label_from_generations(g):
    if g <= 1:
        return "parents" if g == 1 else "self"
    if g == 2:
        return "grandparents"
    greats = g - 2
    if greats == 1:
        return "great-grandparents"
    return "%dx-great-grandparents" % greats

def build_header(subject_name_html, cm_val, matchee_name_html, gens, couple_text_html):
    try:
        cm_str = "%d" % int(round(float(cm_val)))
    except Exception:
        cm_str = (str(cm_val).strip() or "0")
    degree_label = degree_label_from_generations(gens)
    parts = [
        "%s is a %s cM cousin match to %s, whose" % (subject_name_html, cm_str, matchee_name_html),
        "%s (back %d Gens)" % (degree_label, gens),
        "are",
        couple_text_html,
    ]
    s = " ".join(parts)
    if REMOVE_PERIOD_AT_END:
        s = re.sub(r"\.\s*$", "", s)
    return s

# ---------- 4.1) Parse enriched first ancestor token ----------
LINEAGE_SPOUSE_SEP = " & "

def _scrub_side_keep_name_years(side_raw: str):
    side_raw = _clean_piece(side_raw or "")
    if not side_raw:
        return ("", "", "")

    if "~" in side_raw:
        bits = [b.strip() for b in side_raw.split("~")]
        bits = [b for b in bits if b != ""]
        if bits and re.match(r"^I\d+$", bits[0], re.I):
            pid = bits[0].upper()
            nm  = normalize_person_name(bits[1]) if len(bits) >= 2 else ""
            yrs = _clean_piece(bits[2]) if len(bits) >= 3 else ""
            return (pid, nm, yrs)

    m = re.match(r"^(I\d+)\s+(.*)$", side_raw, flags=re.I)
    if m:
        pid  = m.group(1).upper()
        rest = _clean_piece(m.group(2))
        yrs  = ""
        m2 = re.search(r"(\b\d{4}\s*-\s*(?:\d{4})?\b)\s*$", rest)
        if m2:
            yrs  = _clean_piece(m2.group(1).replace(" ", ""))
            rest = _clean_piece(rest[:m2.start()])
        nm = normalize_person_name(rest) if rest else ""
        return (pid, nm, yrs)

    nm2 = smart_titlecase(side_raw) if " " in side_raw else smart_titlecase(surname_given_from_token(side_raw)[0])
    return ("", nm2, "")

def _first_ancestor_display_and_ids(tokens):
    if not tokens:
        return ("", "", "")
    raw = _clean_piece(tokens[0])
    if not raw:
        return ("", "", "")

    parts = re.split(r"\s*(?:&| and )\s*", raw, maxsplit=1, flags=re.I)
    if len(parts) != 2:
        pid, nm, yrs = _scrub_side_keep_name_years(raw)
        disp = nm or raw
        if yrs:
            disp = disp + " (%s)" % yrs
        return (disp, pid, "")

    f_id, f_nm, f_yrs = _scrub_side_keep_name_years(parts[0])
    m_id, m_nm, m_yrs = _scrub_side_keep_name_years(parts[1])

    f_disp = f_nm or normalize_person_name(parts[0])
    m_disp = m_nm or normalize_person_name(parts[1])

    if f_yrs:
        f_disp = f_disp + " (%s)" % f_yrs
    if m_yrs:
        m_disp = m_disp + " (%s)" % m_yrs

    disp = "%s%s%s" % (f_disp, LINEAGE_SPOUSE_SEP, m_disp)
    return (disp, f_id, m_id)

# ---------- 4.2) Load authority first-ancestor pairs ----------
def load_authority_first_ancestor_map() -> dict:
    if not all(os.environ.get(k) for k in ["FTP_HOST", "FTP_USER", "FTP_PASS"]):
        raise RuntimeError("Missing FTP creds; cannot download authority file %s" % AUTH_REMOTE_NAME)

    with ftp_connect() as ftps:
        try:
            ftps.cwd(AUTH_REMOTE_DIR)
        except Exception:
            pass
        ok = ftp_download_if_exists(ftps, AUTH_BASENAME, AUTH_LOCAL_CACHE)
        try:
            ftps.quit()
        except Exception:
            pass

    if not ok:
        raise RuntimeError("Authority file not found on server: /%s" % _remote_path(AUTH_REMOTE_NAME))

    adf = _read_csv_anyenc(AUTH_LOCAL_CACHE)

    def find_col(df0, patterns, prefer_exact=None):
        cols = list(df0.columns)
        lowmap = {c.lower(): c for c in cols}
        if prefer_exact:
            for name in prefer_exact:
                if name in df0.columns:
                    return name
                if name and name.lower() in lowmap:
                    return lowmap[name.lower()]
        for pat in patterns:
            rx = re.compile(pat, re.I)
            for c in cols:
                if rx.search(c):
                    return c
        return None

    a1_col  = find_col(adf, [r"ancestor1.*_id$", r"firstpair_ancestor1_id$"], ["FirstPair_Ancestor1_ID"])
    a2_col  = find_col(adf, [r"ancestor2.*_id$", r"firstpair_ancestor2_id$"], ["FirstPair_Ancestor2_ID"])
    key_col = find_col(adf, [r"firstpair_lastfirst$"], ["FirstPair_LastFirst"])

    if not (a1_col and a2_col and key_col):
        raise RuntimeError("Authority CSV missing required columns. Need Ancestor1_ID, Ancestor2_ID, FirstPair_LastFirst.")

    m = {}
    for _, r in adf.iterrows():
        a1 = str(r.get(a1_col, "")).strip()
        a2 = str(r.get(a2_col, "")).strip()
        k  = str(r.get(key_col, "")).strip()
        if not (a1 and a2 and k):
            continue
        m[(a1.upper(), a2.upper())] = k
        m[(a2.upper(), a1.upper())] = k

    if not m:
        raise RuntimeError("Authority mapping built empty from %s" % AUTH_LOCAL_CACHE)

    print("[OK] Authority first-ancestor map: %d pairs" % (len(m)//2))
    return m

AUTH_COUPLE_KEY_MAP = load_authority_first_ancestor_map()

# ---------- 5) Read main CSV ----------
def find_col(df0, patterns, prefer_exact=None):
    cols = list(df0.columns)
    lowmap = {c.lower(): c for c in cols}
    if prefer_exact:
        for name in prefer_exact:
            if name in df0.columns:
                return name
            if name and name.lower() in lowmap:
                return lowmap[name.lower()]
    for pat in patterns:
        rx = re.compile(pat, re.I)
        for c in cols:
            if rx.search(c):
                return c
    return None

df = _read_csv_anyenc(CSV_IN)
print("[OK] Loaded CSV: %d rows, %d cols" % (len(df), len(df.columns)))

id_col    = find_col(df, [r"^(id#|personid)$"], ["ID#", "ID", "PersonID", "personID"])
match_col = find_col(df, [r"^match\s*to$"], ["Match to", "Match", "match_to", "Match_to"])
name_col  = find_col(df, [r"^name$"], ["Name"])
cm_col    = find_col(df, [r"^(c\s*:?m|cm)$", r"centi.?morgan"], ["cM", "cm"])
path_col  = find_col(df, [r"(yates\s*dna\s*ancestral\s*line|ancestral\s*line|lineage)"],
                     ["Yates DNA Ancestral Line", "Ancestral Line", "Lineage"])

for req, nm in [(id_col,"ID#/PersonID"), (match_col,"Match to"), (name_col,"Name"), (cm_col,"cM"), (path_col,"Lineage")]:
    if not req:
        raise ValueError("CSV missing required column: %s" % nm)

# ---------- 5.1) Vitals (FIXED to match repaired Cell2k/Cell2c style) ----------
AUTOSOMAL_MATCHES = ""
LAST_UPDATED_TEXT = ""

def _friendly_ts_from_utc(raw):
    s = str(raw or "").strip()
    if not s:
        return "(unknown)"
    s = s.replace("UTC", "").replace("utc", "").strip()
    fmts = [
        "%Y-%m-%d %H:%M",
        "%Y-%m-%d %H:%M:%S",
        "%Y-%m-%dT%H:%M",
        "%Y-%m-%dT%H:%M:%S",
    ]
    dt_utc = None
    for fmt in fmts:
        try:
            dt_utc = datetime.strptime(s, fmt)
            break
        except Exception:
            dt_utc = None
    if dt_utc is None:
        return raw
    dt_est = dt_utc - timedelta(hours=5)  # site convention: UTC-5, no DST
    months = [
        "January","February","March","April","May","June",
        "July","August","September","October","November","December"
    ]
    month_name = months[dt_est.month - 1]
    h24  = dt_est.hour
    ampm = "AM" if h24 < 12 else "PM"
    h12  = h24 % 12
    if h12 == 0:
        h12 = 12
    return "%s %d, %d %d:%02d %s" % (
        month_name,
        dt_est.day,
        dt_est.year,
        h12,
        dt_est.minute,
        ampm,
    )

def _format_int_with_commas(s):
    if s is None:
        return ""
    t = re.sub(r"[^0-9\-]", "", str(s))
    if not t:
        return ""
    try:
        return "{:,}".format(int(t))
    except Exception:
        return t

def _scan_vitals_text_iter(text_iter):
    """
    Shared parser used by Cell2k/Cell2c pattern:
    - last_updated_text: <utc timestamp>  (case-insensitive)
    - Records tagged and filtered by NPFX ... <number>
    - After manual filter, total records: <number> (fallback for autosomal if needed)
    """
    autosomal_raw = None
    last_text_raw = None

    rx_last = re.compile(r"^\s*last_updated_text\s*:\s*(.+)\s*$", re.I)
    rx_npf  = re.compile(r"records\s+tagged\s+and\s+filtered\s+by\s+npfx", re.I)
    rx_after = re.compile(r"^\s*after\s+manual\s+filter,\s*total\s+records\s*:\s*(.+)\s*$", re.I)

    for raw in text_iter:
        line = str(raw or "").strip()
        if not line:
            continue

        # last_updated_text
        if last_text_raw is None:
            m = rx_last.match(line)
            if m:
                last_text_raw = m.group(1).strip()

        # autosomal preferred: NPFX line
        if autosomal_raw is None and rx_npf.search(line):
            m2 = re.search(r"(\d[\d,]*)", line)
            if m2:
                autosomal_raw = m2.group(1).strip()

        # autosomal fallback: "After manual filter..." line
        if autosomal_raw is None:
            m3 = rx_after.match(line)
            if m3:
                mnum = re.search(r"(\d[\d,]*)", m3.group(1))
                if mnum:
                    autosomal_raw = mnum.group(1).strip()

        if autosomal_raw is not None and last_text_raw is not None:
            break

    return autosomal_raw, last_text_raw

def _load_vitals(path):
    global AUTOSOMAL_MATCHES, LAST_UPDATED_TEXT
    AUTOSOMAL_MATCHES = ""
    LAST_UPDATED_TEXT = ""

    if not os.path.exists(path):
        print("[INFO] dna_vitals.csv not found; header will omit counts and last-updated text.")
        return

    vdf = _read_csv_anyenc(path)

    # Preferred: parse from 'line' column (matches repaired Cell2k/Cell2c)
    autosomal_raw = None
    last_text_raw = None
    if "line" in [c.lower() for c in vdf.columns]:
        # find actual column name preserving case
        col_map = {c.lower(): c for c in vdf.columns}
        line_col = col_map.get("line")
        autosomal_raw, last_text_raw = _scan_vitals_text_iter(vdf[line_col].astype(str).tolist())
    else:
        # Fallback: scan all cells flattened
        flat = [str(cell) for row in vdf.astype(str).values.tolist() for cell in row]
        autosomal_raw, last_text_raw = _scan_vitals_text_iter(flat)

    if last_text_raw is not None:
        LAST_UPDATED_TEXT = _friendly_ts_from_utc(last_text_raw)
    else:
        LAST_UPDATED_TEXT = ""

    AUTOSOMAL_MATCHES = _format_int_with_commas(autosomal_raw) if autosomal_raw else ""

    print("[VITALS] autosomal=%s  last_updated=%s" % (
        AUTOSOMAL_MATCHES or "(blank)",
        LAST_UPDATED_TEXT or "(blank)",
    ))

_load_vitals(VITALS_CSV)

# ---------- 6) Transform + compute authority sort key ----------
_setup_resolver()

out_match_to = []
out_auth_key = []
out_summary  = []
out_sort_key = []

def _canon_side(txt):
    t = re.sub(r"\([^)]*\)", "", str(txt or ""))
    t = re.sub(r"[^A-Za-z0-9]+", "", t).lower()
    return t

for _, row in df.iterrows():
    subject_raw  = row.get(match_col, "")
    subject_name = normalize_person_name(resolve_match_to(subject_raw))
    subject_name_html = "<strong>%s</strong>" % _html.escape(subject_name or "", quote=False)

    pid          = extract_person_id(row.get(id_col, ""))
    matchee_raw  = row.get(name_col, "")
    matchee_name = norm_matchee_name(matchee_raw) or subject_name

    if pid:
        matchee_url = (
            "%s/verticalchart.php?personID=%s&tree=%s&parentset=0&display=vertical&generations=15"
            % (TNG_BASE, pid, TNG_TREE)
        )
        matchee_name_html = '<a href="%s" target="_blank" rel="noopener">%s</a>' % (
            _html.escape(matchee_url, quote=True),
            _html.escape(matchee_name or "", quote=False),
        )
    else:
        matchee_name_html = _html.escape(matchee_name or "", quote=False)

    cm_val     = row.get(cm_col, "0")
    tokens     = split_tokens(row.get(path_col, ""))
    gens_total = len(tokens)

    couple_disp, f_id, m_id = _first_ancestor_display_and_ids(tokens)

    auth_key = ""
    if f_id and m_id:
        auth_key = AUTH_COUPLE_KEY_MAP.get((f_id.upper(), m_id.upper()), "")

    if not auth_key:
        parts = [p.strip() for p in re.split(r"\s*&\s*", couple_disp, maxsplit=1)]
        if len(parts) == 2:
            auth_key = _canon_side(parts[0]) + "&" + _canon_side(parts[1])
        else:
            auth_key = _canon_side(couple_disp)

    couple_html = _html.escape(couple_disp or "", quote=False) if couple_disp else ""
    summary_html = build_header(subject_name_html, cm_val, matchee_name_html, gens_total, couple_html)

    out_match_to.append(_html.escape(subject_name or "", quote=False))
    out_auth_key.append(_html.escape(auth_key or "", quote=False))
    out_summary.append(summary_html)
    out_sort_key.append(auth_key or "zzzzzzzzzzzzzzzzzzzzzzzz")

df_out = pd.DataFrame({
    "Match to": out_match_to,
    "First Ancestor Key": out_auth_key,
    "Match Summary": out_summary,
    "__sort__": out_sort_key,
})
df_out = df_out.sort_values(by="__sort__", kind="mergesort").drop(columns=["__sort__"]).reset_index(drop=True)

# ---------- 7) HTML ----------
ROOT_VAR_STYLE = '<style type="text/css">:root{--table-width-px:%dpx;}</style>' % int(TABLE_TOTAL_WIDTH_PX)

updated_label = 'Last updated: <span id="last-updated">%s</span>' % _html.escape(LAST_UPDATED_TEXT or "")
_updated_parts = [updated_label]
if AUTOSOMAL_MATCHES:
    _updated_parts.append('Autosomal matches: %s' % _html.escape(AUTOSOMAL_MATCHES))
_updated_parts.append('Showing: <span id="showing-count"></span>')

UPDATED_BLOCK = '<div class="updated centerline">' + ' &nbsp;|&nbsp; '.join([p for p in _updated_parts if p.strip()]) + '</div>'

CONTROLS_BLOCK = (
    '<div class="controls controls-spaced centerline">'
    '<input type="text" id="search-box" class="search" size="28" value="" placeholder="Search&amp;hellip;" />'
    "</div>"
)

LATE_OVERRIDE_BLOCK = ""

# Build fixed-width table (3 cols)
col_headers = [
    ("Match to", "center"),
    ("First Ancestor", "center"),
    ("Match Summary", "left"),
]
col_data = [
    df_out["Match to"].tolist(),
    df_out["First Ancestor Key"].tolist(),
    df_out["Match Summary"].tolist(),
]

thead_cells = []
for idx, (hdr, align) in enumerate(col_headers):
    wpx = COL_WIDTHS[idx]
    style_attr = "width:%dpx; display:table-cell !important;" % wpx
    if align == "center":
        thead_cells.append('<th class="center-header" style="%s">%s</th>' % (style_attr, hdr))
    else:
        thead_cells.append('<th class="left-header" style="%s">%s</th>' % (style_attr, hdr))

thead_html = (
    '<thead style="display:table-header-group !important;">\n'
    '  <tr style="display:table-row !important;">'
    + "".join(thead_cells)
    + "</tr>\n</thead>"
)

tbody_lines = ["<tbody>"]
for r in range(len(df_out)):
    cells = []
    for c in range(len(col_headers)):
        wpx = COL_WIDTHS[c]
        val = col_data[c][r]
        val_str = "" if val is None else str(val)
        cells.append('<td style="width:%dpx;">%s</td>' % (wpx, val_str))
    tbody_lines.append("  <tr>" + "".join(cells) + "</tr>")
tbody_lines.append("</tbody>")
tbody_html = "\n".join(tbody_lines)

html_table = (
    '<table border="1" class="dataframe sortable dna-register-table" id="refactor-table">'
    + thead_html + "\n" + tbody_html + "</table>"
)

SCROLL_WRAPPER = (
    '<div class="table-scroll-wrapper">'
    '<div class="table-scroll" id="bottom-scroll">%s</div>'
    "</div>"
) % (html_table,)

JS_NAV_REPAIR = ""

page_tpl = Template("""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>ONS Yates Study Autosomal DNA Register</title>
$HEAD_LINK
$ROOT_VAR_STYLE
</head>
<body id="top">
<div class="wrap">
  <h1 class="centerline">ONS Yates Study Autosomal DNA Register</h1>
  $UPDATED_BLOCK
  $NAV_BLOCK
  $LATE_OVERRIDE_BLOCK
  $CONTROLS_BLOCK
  $SCROLL_WRAPPER
</div>

$JS_NAV_REPAIR

<script type="text/javascript">
//<![CDATA[
(function(){
  function textOf(cell){
    return (cell && (cell.textContent || cell.innerText) || '')
      .replace(/\\s+/g,' ')
      .trim()
      .toLowerCase();
  }

  function formatWithCommas(n){
    try{
      var x = parseInt(String(n||'').replace(/[^0-9\\-]/g,''), 10);
      if(isNaN(x)) return '';
      return x.toLocaleString('en-US');
    }catch(e){
      return String(n||'');
    }
  }

  function visibleRowCount(){
    var tbl = document.getElementById('refactor-table');
    if(!(tbl && tbl.tBodies && tbl.tBodies[0])) return 0;
    var rows = tbl.tBodies[0].rows, n = 0;
    for(var i=0;i<rows.length;i++){
      if(rows[i].style.display !== 'none') n++;
    }
    return n;
  }

  function updateShowing(){
    var el = document.getElementById('showing-count');
    if(!el) return;
    el.textContent = formatWithCommas(visibleRowCount());
  }

  function sortTable(tbl, colIndex, dir){
    var tb = tbl && tbl.tBodies ? tbl.tBodies[0] : null;
    if(!tb) return;
    var rows = Array.prototype.slice.call(tb.rows || []);
    var asc  = (dir === 'asc');

    rows.sort(function(a,b){
      var A = textOf(a.cells[colIndex]),
          B = textOf(b.cells[colIndex]);

      var nA = parseFloat(A.replace(/[^0-9.\\-]/g,'')),
          nB = parseFloat(B.replace(/[^0-9.\\-]/g,''));

      if(!isNaN(nA) && !isNaN(nB)){
        return asc ? (nA - nB) : (nB - nA);
      }
      if (A < B) return asc ? -1 : 1;
      if (A > B) return asc ?  1 : -1;
      return 0;
    });

    var frag = document.createDocumentFragment();
    for(var i=0;i<rows.length;i++){
      frag.appendChild(rows[i]);
    }
    tb.appendChild(frag);
    updateShowing();
  }

  function bindHeaderSort(){
    var tbl = document.getElementById('refactor-table');
    if(!(tbl && tbl.tHead && tbl.tHead.rows.length)) return;

    var ths = tbl.tHead.rows[0].cells;
    if(!ths) return;

    for(var i=0;i<ths.length;i++){
      (function(idx){
        var th  = ths[idx];
        var dir = 'asc';
        th.addEventListener('click', function(){
          dir = (dir === 'asc') ? 'desc' : 'asc';
          for (var j = 0; j < ths.length; j++){
            ths[j].innerHTML = ths[j].innerHTML.replace(/\\s+\\(asc\\)|\\s+\\(desc\\)/,'');
          }
          th.innerHTML += (dir === 'asc' ? ' (asc)' : ' (desc)');
          sortTable(tbl, idx, dir);
        }, false);
      })(i);
    }
  }

  function getParam(name){
    var m = location.search.match(new RegExp('[?&]'+name+'=([^&]+)'));
    return m ? decodeURIComponent(m[1].replace(/\\+/g,' ')) : '';
  }

  function bindSearch(){
    var box = document.getElementById('search-box');
    var tbl = document.getElementById('refactor-table');
    if(!(box && tbl && tbl.tBodies && tbl.tBodies[0])) return;

    var tb   = tbl.tBodies[0];
    var rows = Array.prototype.slice.call(tb.rows || []);

    function rowText(tr){
      var t = '';
      for(var i=0;i<tr.cells.length;i++){
        t += ' ' + (tr.cells[i].textContent || tr.cells[i].innerText || '');
      }
      return t.replace(/\\s+/g,' ').toLowerCase();
    }

    function apply(q){
      q = String(q || '').toLowerCase();
      for(var i=0;i<rows.length;i++){
        var txt  = rowText(rows[i]);
        var show = !q || txt.indexOf(q) > -1;
        rows[i].style.display = show ? '' : 'none';
      }
      updateShowing();
    }

    var to = null;
    function onInput(){
      if(to) clearTimeout(to);
      to = setTimeout(function(){ apply(box.value); }, 60);
    }

    box.addEventListener('input',  onInput, false);
    box.addEventListener('search', onInput, false);

    var q0 = getParam('q');
    if(q0){
      box.value = q0;
      apply(q0);
      try{ history.replaceState(null,'',location.pathname); }catch(e){}
    } else {
      box.value = '';
      apply('');
    }
  }

  document.addEventListener('DOMContentLoaded', function(){
    bindHeaderSort();
    bindSearch();
    updateShowing();
  });
})();
//]]>
</script>

</body>
</html>
""")

final_html = page_tpl.safe_substitute(
    HEAD_LINK           = HEAD_LINK,
    ROOT_VAR_STYLE      = ROOT_VAR_STYLE,
    UPDATED_BLOCK       = UPDATED_BLOCK,
    NAV_BLOCK           = NAV_BLOCK,
    NAV_FALLBACK_HTML   = NAV_FALLBACK_HTML,
    LATE_OVERRIDE_BLOCK = LATE_OVERRIDE_BLOCK,
    CONTROLS_BLOCK      = CONTROLS_BLOCK,
    SCROLL_WRAPPER      = SCROLL_WRAPPER,
    JS_NAV_REPAIR       = JS_NAV_REPAIR,
)

with open(LOCAL_HTML, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
    f.write(final_html)
print("[OK] Saved render: %s" % os.path.abspath(LOCAL_HTML))

print("[DEBUG] SSI nav include present:", ("<!--#include" in final_html))

# ---------- 8) Upload ----------
def save_and_upload_all():
    if not all(os.environ.get(k) for k in ["FTP_HOST", "FTP_USER", "FTP_PASS"]):
        print("[SKIP] Missing FTP creds; uploads skipped.")
        return
    try:
        ftps = ftp_connect()

        try:
            ftp_upload_overwrite(ftps, LOCAL_HTML, _remote_path(REMOTE_HTML_CANON))
            ftp_upload_overwrite(ftps, LOCAL_HTML, _remote_path(REMOTE_HTML_LEG))
        except Exception as e:
            print("[WARN] Upload main HTML failed: %s" % e)

        print("\\n--- SIZE Verification (if supported) ---")
        for p in [_remote_path(REMOTE_HTML_CANON), _remote_path(REMOTE_HTML_LEG)]:
            sz = ftp_size(ftps, p)
            print("%s : %s" % (p, sz if sz is not None else "(SIZE unsupported)"))

        try:
            ftps.quit()
        except Exception:
            pass

        print("\\n--- Open URLs ---")
        print("Canonical: https://yates.one-name.net/partials/yates_ancestor_register.shtml")
        print("Legacy:    https://yates.one-name.net/partials/ons_yates_dna_register.shtml")
        print("CSS:       https://yates.one-name.net/partials/partials_unified.css")
    except Exception as e:
        print("[FAIL] FTP session: %s" % e)
        traceback.print_exc()

save_and_upload_all()

print("\\n--- Cell 2 complete (NAVREPAIR1+VITALSFIX1: vitals parsing aligned to Cell2k/Cell2c) ---")
# ====== CUT STOP [1/1] CELL 2 (3-column register) - NAV+HEADER REPAIR + SSI-safe headers ======


[CONFIRM] Golden Rules active | Cell=Cell2_3Col_AuthorityFirstAncestor | Version=2026.02.01-UNIFIED-BASELINE-NAVREPAIR1+VITALSFIX1 | Encoding=ISO-8859-15
[LAYOUT] TABLE_TOTAL_WIDTH_PX=1880
[LAYOUT] Column widths (px): 1=220 2=420 3=1240
[PULL] first_ancestor_pairs.csv -> /content/first_ancestor_pairs.server.csv
[OK] Authority first-ancestor map: 56 pairs
[OK] Loaded CSV: 226 rows, 6 cols
[VITALS] autosomal=1,700  last_updated=February 1, 2026 8:32 PM
[PULL] match_to_unmasked.csv -> /content/match_to_unmasked.server.csv
[OK] Resolver loaded: 94 codes
[OK] Saved render: /content/yates_ancestor_register.shtml
[DEBUG] SSI nav include present: True
[PUT] yates_ancestor_register.shtml -> partials/yates_ancestor_register.shtml
[PUT] yates_ancestor_register.shtml -> partials/ons_yates_dna_register.shtml
\n--- SIZE Verification (if supported) ---
partials/yates_ancestor_register.shtml : 116693
partials/ons_yates_dna_register.shtml : 116693
\n--- Open URLs ---
Canonical: https://yates.one-name.n