<a href="https://colab.research.google.com/github/ronyates47/Gedcom-Utils/blob/main/New_Gold__Orchestrator_20251130_1200.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PIP

In [1]:
!pip install pandas
!pip install python-gedcom
!pip install openpyxl
!pip install xlsxwriter
!pip install mlxtend
!pip caas_jupyter_tools

Collecting python-gedcom
  Downloading python_gedcom-1.0.0-py2.py3-none-any.whl.metadata (15 kB)
Downloading python_gedcom-1.0.0-py2.py3-none-any.whl (35 kB)
Installing collected packages: python-gedcom
Successfully installed python-gedcom-1.0.0
Collecting xlsxwriter
  Downloading xlsxwriter-3.2.9-py3-none-any.whl.metadata (2.7 kB)
Downloading xlsxwriter-3.2.9-py3-none-any.whl (175 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.3/175.3 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xlsxwriter
Successfully installed xlsxwriter-3.2.9
ERROR: unknown command "caas_jupyter_tools"


# Cell 0

In [None]:
# ====== CUT START [1/1] CELL 0 - Orchestrator (Authority Scripts + Latest GEDCOM) ======
# RON GOLDEN RULES - CLIFF NOTES (v2025.11.30-G3)
# - Complete & runnable Colab cell: one contiguous block, no fragments.
# - Source ASCII-only; any file writes must use encoding="iso-8859-15", errors="xmlcharrefreplace".
# - Authority:
#     * Colab work dir: /content
#     * Scripts pulled from: /partials/colab_cells/*.py  (server)
#     * Latest GEDCOM pulled from: /tng/gedcom/*.ged     (server)
# - Deterministic audit banner:
#   [CONFIRM] Golden Rules active | Cell=Cell0_Orchestrator | Version=2025.11.30-G3 | Encoding=ISO-8859-15
# - Execution order (after pulls): cell1.py, cell2.py, cell2b.py, cell2c.py, cell3.py
# ====================================================================

print("[CONFIRM] Golden Rules active | Cell=Cell0_Orchestrator | Version=2025.11.30-G3 | Encoding=ISO-8859-15")

import os, socket, traceback, hashlib
from ftplib import FTP_TLS, all_errors

# ---------- 0) Env / secrets ----------

def _get_env(k, default=""):
    try:
        from google.colab import userdata  # type: ignore
        v = userdata.get(k)
        return v if v is not None else os.environ.get(k, default)
    except Exception:
        return os.environ.get(k, default)

FTP_HOST = (_get_env("FTP_HOST", "") or "").strip()
FTP_USER = (_get_env("FTP_USER", "") or "").strip()
FTP_PASS = _get_env("FTP_PASS", "") or ""
FTP_PORT = int(_get_env("FTP_PORT", "21") or "21")
FTP_DIR  = (_get_env("FTP_DIR", "") or "").strip().strip("/")

def _mask(s, keep=3):
    s = "" if s is None else str(s)
    if not s:
        return "(empty)"
    return (s[:keep] + "***" + s[-keep:]) if len(s) > keep * 2 else s[0:1] + "***"

print(
    "[ENV] HOST=%s  USER=%s  PASS=%s  PORT=%d  DIR=%s"
    % (_mask(FTP_HOST), _mask(FTP_USER, 2), "***", FTP_PORT, ("/" + FTP_DIR) if FTP_DIR else "(root)")
)

if not (FTP_HOST and FTP_USER and FTP_PASS):
    raise SystemExit("[FATAL] Missing FTP_HOST/FTP_USER/FTP_PASS; cannot run orchestrator.")

# ---------- 1) FTPS helpers ----------

FTP_TIMEOUT = int(os.environ.get("FTP_TIMEOUT", "30"))
PASSIVE_MODE = True

def _ftps_connect():
    socket.setdefaulttimeout(FTP_TIMEOUT)
    ftps = FTP_TLS(timeout=FTP_TIMEOUT)
    ftps.connect(FTP_HOST, FTP_PORT)
    ftps.auth()  # Explicit FTPS
    ftps.login(FTP_USER, FTP_PASS)
    try:
        ftps.prot_p()
    except Exception:
        pass
    try:
        ftps.set_pasv(PASSIVE_MODE)
    except Exception:
        pass
    if FTP_DIR:
        for seg in [p for p in FTP_DIR.split("/") if p]:
            try:
                ftps.cwd(seg)
            except all_errors:
                try:
                    ftps.mkd(seg)
                except all_errors:
                    pass
                ftps.cwd(seg)
    return ftps

def _sha256_of_file(path):
    h = hashlib.sha256()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(65536), b""):
            h.update(chunk)
    return h.hexdigest()[:16]

# ---------- 2) Pull authority scripts from /partials/colab_cells ----------

SCRIPT_REMOTE_DIR = "/partials/colab_cells"
SCRIPT_NAMES = ["cell1.py", "cell2.py", "cell2b.py", "cell2c.py", "cell3.py"]

def pull_authority_scripts():
    print("[STEP] Pulling authority scripts from server ...")
    pulled = 0
    try:
        ftps = _ftps_connect()
        try:
            pwd0 = ftps.pwd()
        except Exception:
            pwd0 = "(unknown)"
        print("[OK] Connected via FTPS (explicit AUTH TLS).")
        print("[INFO] Initial PWD on server: %s" % pwd0)

        # Move to /partials/colab_cells, independent of FTP_DIR
        try:
            # go to root, then walk to /partials/colab_cells
            try:
                ftps.cwd("/")
            except Exception:
                pass
            for seg in [p for p in SCRIPT_REMOTE_DIR.split("/") if p]:
                ftps.cwd(seg)
        except Exception as e:
            raise RuntimeError("Unable to cwd to %s (%s)" % (SCRIPT_REMOTE_DIR, e))

        print("[INFO] Using remote dir for scripts: %s" % SCRIPT_REMOTE_DIR)

        os.chdir("/content")
        for name in SCRIPT_NAMES:
            try:
                local_path = os.path.join("/content", name)
                with open(local_path, "wb") as f:
                    ftps.retrbinary("RETR " + name, f.write)
                sz = os.path.getsize(local_path)
                sh = _sha256_of_file(local_path)
                print("[PULL] %s -> %s  size=%d  sha256=%s" % (name, local_path, sz, sh))
                pulled += 1
            except Exception as e:
                print("[MISS] Could not pull %s: %s" % (name, e))

        try:
            ftps.quit()
        except Exception:
            pass
        print("[OK] Pulled %d script(s) from authority shelf." % pulled)
    except Exception as e:
        print("[ERROR] Script pull failed:", e)
        traceback.print_exc()
    return pulled

# ---------- 3) Pull latest GEDCOM from /tng/gedcom ----------

GEDCOM_REMOTE_DIR = "/tng/gedcom"
LOCAL_GED_DIR = "/content"

def _choose_latest_gedcom(ftps, names):
    """
    Pick the newest *.ged using MDTM if available; fallback = last alphabetically.
    """
    ged_files = [n for n in names if n.lower().endswith(".ged")]
    if not ged_files:
        return None

    latest_name = None
    latest_ts = None

    for nm in ged_files:
        ts = None
        try:
            # MDTM response: '213 YYYYMMDDhhmmss'
            resp = ftps.sendcmd("MDTM " + nm)
            parts = resp.strip().split()
            if len(parts) == 2 and parts[0] == "213":
                ts = parts[1]
        except Exception:
            ts = None
        if ts is None:
            # Fallback: lexical key
            ts = "00000000000000" + nm
        if latest_ts is None or ts > latest_ts:
            latest_ts = ts
            latest_name = nm
    return latest_name

def pull_latest_gedcom():
    print("\n[STEP] Pulling latest GEDCOM from %s ..." % GEDCOM_REMOTE_DIR)
    try:
        ftps = _ftps_connect()
        # Go to absolute /tng/gedcom irrespective of FTP_DIR
        try:
            ftps.cwd("/")
        except Exception:
            pass
        for seg in [p for p in GEDCOM_REMOTE_DIR.split("/") if p]:
            ftps.cwd(seg)

        names = ftps.nlst()
        if not names:
            print("[WARN] No files listed in %s" % GEDCOM_REMOTE_DIR)
            try:
                ftps.quit()
            except Exception:
                pass
            return

        latest = _choose_latest_gedcom(ftps, names)
        if not latest:
            print("[WARN] No .ged files found in %s" % GEDCOM_REMOTE_DIR)
            try:
                ftps.quit()
            except Exception:
                pass
            return

        print("[INFO] Latest GEDCOM on server: %s" % latest)

        # Clean any old local GEDCOMs so Cell 1 can't accidentally pick the wrong one
        try:
            for fname in os.listdir(LOCAL_GED_DIR):
                if fname.lower().endswith(".ged"):
                    try:
                        os.remove(os.path.join(LOCAL_GED_DIR, fname))
                        print("[CLEAN] Removed old local GEDCOM:", fname)
                    except Exception as e:
                        print("[WARN] Could not remove %s: %s" % (fname, e))
        except Exception as e:
            print("[WARN] Could not scan local GED dir:", e)

        # Download latest into /content with same filename
        local_path = os.path.join(LOCAL_GED_DIR, latest)
        with open(local_path, "wb") as f:
            ftps.retrbinary("RETR " + latest, f.write)
        try:
            ftps.quit()
        except Exception:
            pass

        sz = os.path.getsize(local_path)
        print("[OK] Pulled GEDCOM -> %s  size=%d bytes" % (local_path, sz))
        print("[INFO] Cell 1 will now see exactly one *.ged in /content.")
    except Exception as e:
        print("[WARN] GEDCOM pull failed; Cell 1 will use any existing local *.ged instead.")
        traceback.print_exc()

# ---------- 4) Run scripts in order ----------

def run_script(path):
    print("\n[RUN] %s" % path)
    if not os.path.exists(path):
        print("[SKIP] %s not found in /content." % path)
        return
    try:
        with open(path, "r", encoding="iso-8859-15", errors="ignore") as f:
            code = f.read()
        compiled = compile(code, path, "exec")
        exec(compiled, globals(), globals())
        print("[DONE] %s" % path)
    except SystemExit as e:
        print("[EXIT] %s exited: %s" % (path, e))
    except Exception as e:
        print("[ERROR] Exception while running %s: %s" % (path, e))
        traceback.print_exc()

def main():
    os.chdir("/content")
    pulled = pull_authority_scripts()
    if pulled == 0:
        print("[FATAL] No authority scripts pulled; aborting.")
        return

    # Always pull the latest GEDCOM snapshot before running Cell 1
    pull_latest_gedcom()

    script_order = ["cell1.py", "cell2.py", "cell2b.py", "cell2c.py", "cell3.py"]
    print("\n[STEP] Running scripts in order:", ", ".join(script_order))
    for s in script_order:
        run_script(os.path.join("/content", s))

main()

print("\n--- Cell 0 That's all folks, Orchestrator complete (authority scripts + latest GEDCOM pulled, then Cell1/2/2b/2c/3 executed) ---")
# ====== CUT STOP  [1/1] CELL 0 - Orchestrator (Authority Scripts + Latest GEDCOM) ======


[CONFIRM] Golden Rules active | Cell=Cell0_Orchestrator | Version=2025.11.30-G3 | Encoding=ISO-8859-15
[ENV] HOST=ftp***net  USER=ad***et  PASS=***  PORT=21  DIR=(root)
[STEP] Pulling authority scripts from server ...
[OK] Connected via FTPS (explicit AUTH TLS).
[INFO] Initial PWD on server: /
[INFO] Using remote dir for scripts: /partials/colab_cells
[PULL] cell1.py -> /content/cell1.py  size=25624  sha256=f4a9f8aaa5be4317
[PULL] cell2.py -> /content/cell2.py  size=39821  sha256=857ed34e45060d8d
[PULL] cell2b.py -> /content/cell2b.py  size=38691  sha256=610b68482191f255
[PULL] cell2c.py -> /content/cell2c.py  size=16653  sha256=91c1a583dd9a776d
[PULL] cell3.py -> /content/cell3.py  size=23320  sha256=e7091d2d79a4d827
[OK] Pulled 5 script(s) from authority shelf.

[STEP] Pulling latest GEDCOM from /tng/gedcom ...
[INFO] Latest GEDCOM on server: yates_study_2025.ged
[CLEAN] Removed old local GEDCOM: yates_study_2025.ged
[OK] Pulled GEDCOM -> /content/yates_study_2025.ged  size=38705063 

  last_updated_text = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")


Processing 93 individuals with chunk-based parallel...


Building Yates Lines (Stage 1):   0%|          | 0/93 [00:00<?, ?it/s]

# Cell 1

In [None]:
# ====== CUT START [1/1] CELL 1 - GEDCOM -> CSV + HTML + Upload (Explicit FTPS, ISO-8859-15) ======
# RON GOLDEN RULES - CLIFF NOTES (v2025.11.27-G2)
# - Complete and runnable Colab cell, one contiguous block (no fragments).
# - Source ASCII-only; outputs written with encoding="iso-8859-15", errors="xmlcharrefreplace".
# - XHTML 1.0 Transitional; typography via /partials/dna_tree_styles.css (HTML export only).
# - Deterministic audit:
#   [CONFIRM] Golden Rules active | Cell=Cell1_FTPS_Explicit | Version=2025.11.27-G2 | Encoding=ISO-8859-15
# - Enforce ISO-8859-15 printable chars on writes; replace non-Latin with XML entities.
# =================================================================================================

import os, re, glob, logging, functools, socket, traceback, hashlib
from datetime import datetime
from collections import defaultdict
from concurrent.futures import ProcessPoolExecutor
import pandas as pd
import numpy as np
from tqdm import tqdm
from ftplib import FTP_TLS, all_errors
from string import Template

CELL_NAME = "Cell1_FTPS_Explicit"
VERSION   = "2025.11.27-G2"

print("[CONFIRM] Golden Rules active | Cell=%s | Version=%s | Encoding=ISO-8859-15" % (CELL_NAME, VERSION))

# ---------- Logging ----------
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(CELL_NAME)

# ---------- Timezone helper (EST) ----------
def _now_est_string():
    """
    Return a human formatted EST timestamp like:
    November 27, 2025 8:09 PM
    """
    try:
        # Python 3.9+ standard library
        from zoneinfo import ZoneInfo
        tz = ZoneInfo("America/New_York")
        now = datetime.now(tz)
    except Exception:
        # Fallback: naive local time if zoneinfo is unavailable
        now = datetime.now()

    month_name = now.strftime("%B")
    day = now.day
    year = now.year
    hour_24 = now.hour
    minute = now.minute
    ampm = "AM" if hour_24 < 12 else "PM"
    hour_12 = hour_24 % 12
    if hour_12 == 0:
        hour_12 = 12
    return "%s %d, %d %d:%02d %s" % (month_name, day, year, hour_12, minute, ampm)

# ---------- Secrets (env or userdata) ----------
def _get_env(k, default=""):
    try:
        from google.colab import userdata  # type: ignore
        v = userdata.get(k)
        return v if v is not None else os.environ.get(k, default)
    except Exception:
        return os.environ.get(k, default)

FTP_HOST = (_get_env("FTP_HOST","") or "").strip()
FTP_USER = (_get_env("FTP_USER","") or "").strip()
FTP_PASS = _get_env("FTP_PASS","") or ""
FTP_PORT = int(_get_env("FTP_PORT","21") or "21")
FTP_DIR  = (_get_env("FTP_DIR","") or "").strip().strip("/")
PASSIVE_MODE = True

def _mask(s, keep=3):
    s = "" if s is None else str(s)
    if not s:
        return "(empty)"
    return (s[:keep] + "***" + s[-keep:]) if len(s) > keep * 2 else s[0:1] + "***"

print("[ENV] HOST=%s  USER=%s  PASS=%s  PORT=%d  DIR=%s" %
      (_mask(FTP_HOST), _mask(FTP_USER, 2), "***", FTP_PORT, ("/" + FTP_DIR) if FTP_DIR else "(root)"))

# ---------- FTPS (Explicit AUTH TLS) ----------
def _ftps_connect():
    if not (FTP_HOST and FTP_USER and FTP_PASS):
        raise RuntimeError("Missing FTP_HOST/FTP_USER/FTP_PASS.")
    socket.setdefaulttimeout(30)
    ftps = FTP_TLS(timeout=30)
    ftps.connect(FTP_HOST, FTP_PORT)
    ftps.auth()                 # Explicit FTPS: AUTH TLS before login
    ftps.login(FTP_USER, FTP_PASS)
    try:
        ftps.prot_p()           # Encrypt data channel
    except Exception:
        pass
    try:
        ftps.set_pasv(PASSIVE_MODE)
    except Exception:
        pass
    if FTP_DIR:
        for p in [p for p in FTP_DIR.split("/") if p]:
            ftps.cwd(p)
    return ftps

def _ftps_ensure_dir(ftps, path):
    if not path:
        return
    for p in [p for p in path.split("/") if p]:
        try:
            ftps.cwd(p)
        except all_errors:
            try:
                ftps.mkd(p)
            except all_errors:
                pass
            ftps.cwd(p)

def _ftps_upload(ftps, local_path, remote_name):
    with open(local_path, "rb") as fh:
        ftps.storbinary("STOR " + remote_name, fh)
    print("[OK] Uploaded: %s -> %s/%s" % (local_path, ftps.pwd().rstrip("/"), remote_name))

# ---------- Outputs / Paths ----------
REMOTE_DIR        = "partials"
CSV_OUT_LOCAL     = "final_combined_df_with_value_labels.csv"
HTML_OUT_LOCAL    = "cell1_work_table.htm"
ABS_CSV_URL       = "/%s/%s" % (REMOTE_DIR, os.path.basename(CSV_OUT_LOCAL))
ABS_HOME_URL      = "/index.htm"

# vitals file used by Cell 2 / Cell 3
VITALS_CSV_PATH        = "dna_vitals.csv"
AUTOSOMAL_COUNT_TXT    = "autosomal_count.txt"  # legacy count for Cell 3 JS

# ---------- Minimal GEDCOM parse helpers ----------
anchor_gen1 = None
visited_pairs = set()
generation_table = []

class GedcomDataset:
    def __init__(self, gen_person):
        self.gen_person = gen_person
        self.extractable_detail = {}
        self.anchor_gen1 = None

    def add_extractable_detail(self, key, value):
        self.extractable_detail[key] = value

    def get_gen_person(self):
        name = self.extractable_detail.get("NAME", "") or ""
        parts = name.split("/", 1)
        first_name = parts[0].split(" ")[0] if parts else ""
        last_name  = parts[1].rstrip("/") if len(parts) > 1 else ""
        self.anchor_gen1 = last_name.replace(" ", "") + first_name.replace(" ", "")
        global anchor_gen1
        anchor_gen1 = self.anchor_gen1
        return self.gen_person.strip("@")

    def get_extractable_NPFX(self):
        return self.extractable_detail.get("NPFX", "") or ""

    def get_extractable_cm(self):
        v = self.extractable_detail.get("NPFX", "") or ""
        if "&" in v:
            cm = v.split("&")[0].strip()
        elif "**" in v:
            cm = v.split("**")[0].strip()
        else:
            cm = v.strip()
        try:
            int(cm)
            return cm
        except Exception:
            return ""

    def get_extractable_sort(self):
        v = self.extractable_detail.get("NPFX", "") or ""
        if "&" in v:
            s = v.split("&")[1]
            return (s.split("**")[0] if "**" in s else s).strip()
        return ""

    def get_extractable_YDNA(self):
        v = self.extractable_detail.get("NPFX", "") or ""
        return v.split("**")[1].strip() if "**" in v else ""

    def get_extractable_FAMC(self):
        return (self.extractable_detail.get("FAMC", "") or "").strip("@")

class Gedcom:
    def __init__(self, file_name):
        self.file_name = file_name
        self.gedcom_datasets = []
        self.filter_pool = []

        # Counters / vitals
        self.total_records = 0
        self.npfx_count = 0
        self.ydna_count = 0
        self.autosomal_count = 0
        self.after_manual_filter_total = 0

    def parse_gedcom(self):
        with open(self.file_name, "r", encoding="utf-8-sig") as f:
            lines = f.readlines()

        current = None
        npfx_count = 0
        ydna_count = 0
        total = 0

        for line in lines:
            parts = line.strip().split(" ", 2)
            if not parts or not parts[0].isdigit():
                continue
            level = int(parts[0])
            tag = parts[1]
            value = parts[2] if len(parts) > 2 else None

            if level == 0 and tag.startswith("@") and tag.endswith("@") and value == "INDI":
                total += 1
                current = GedcomDataset(tag)
                self.gedcom_datasets.append(current)
            elif current is not None:
                if level == 1 and tag in ["NAME", "FAMC"]:
                    current.add_extractable_detail(tag, value)
                elif level == 2 and tag == "NPFX":
                    npfx_count += 1
                    current.add_extractable_detail(tag, value)
                    if value and "**" in value:
                        ydna_count += 1

        autosomal = npfx_count - ydna_count

        # Store vitals on the instance
        self.total_records = total
        self.npfx_count = npfx_count
        self.ydna_count = ydna_count
        self.autosomal_count = autosomal

        print("GEDCOM contained %d total records" % total)
        print("Records tagged and filtered by NPFX: %d" % npfx_count)
        print("Records with YDNA information: %d" % ydna_count)
        print("Autosomal matches (NPFX minus YDNA): %d" % autosomal)

        # First-level filter: keep only records with NPFX
        for ds in self.gedcom_datasets:
            if ds.get_extractable_NPFX():
                self.filter_pool.append(ds)

        # Second-level manual filter via filtered_ids.xlsx (if present)
        try:
            df_filter = pd.read_excel("filtered_ids.xlsx")
            manual_ids = set(str(x) for x in df_filter["ID"])
            self.filter_pool = [d for d in self.filter_pool if d.get_gen_person() in manual_ids]
            print("After manual filter, total records: %d" % len(self.filter_pool))
        except FileNotFoundError:
            logger.warning("filtered_ids.xlsx not found. Skipping second-level manual filter.")

        # Final: record the post-filter count as a vital
        self.after_manual_filter_total = len(self.filter_pool)

        return autosomal

def _chunks(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

def _quick_extract_name(full_text):
    name_marker = "\n1 NAME "
    idx = full_text.find(name_marker)
    if idx == -1:
        if full_text.startswith("1 NAME "):
            idx = 0
        else:
            return "UnknownName"
    start = idx + len(name_marker)
    end = full_text.find("\n", start)
    end = len(full_text) if end == -1 else end
    name_line = full_text[start:end].strip()
    if "/" not in name_line:
        return name_line[:10].replace(" ", "")
    first_name, last_name = name_line.split("/", 1)
    last_name = last_name.replace("/", "").strip()
    return last_name[:10].replace(" ", "") + first_name[:10].replace(" ", "")

def _find_parents(individual_id, generation, parents_map):
    global visited_pairs, generation_table
    if individual_id not in parents_map:
        return
    father_id, mother_id = parents_map[individual_id]
    if not father_id and not mother_id:
        return
    pair = (father_id, mother_id)
    if pair not in visited_pairs:
        visited_pairs.add(pair)
        generation_table.append((generation, pair))
    if father_id:
        _find_parents(father_id, generation + 1, parents_map)
    if mother_id:
        _find_parents(mother_id, generation + 1, parents_map)

def _find_distant(individual_id, parents_map, path=None):
    if path is None:
        path = []
    path.append(individual_id)
    if individual_id not in parents_map:
        return [path]
    father_id, mother_id = parents_map[individual_id]
    if not father_id and not mother_id:
        return [path]
    paths = []
    if father_id:
        paths.extend(_find_distant(father_id, parents_map, path[:]))
    if mother_id:
        paths.extend(_find_distant(mother_id, parents_map, path[:]))
    return paths if paths else [path]

def _filter_lineage(winning_ids, gen_table, names_map):
    matching = []
    for generation, pair in gen_table:
        id1, id2 = pair
        if id1 in winning_ids or id2 in winning_ids:
            matching.append((generation, pair))
    matching.sort(key=lambda x: x[0])
    lines = []
    for _, pair in matching:
        name_pair = [names_map.get(pid, "UnknownName") for pid in pair]
        lines.append("%s&%s" % (name_pair[0], name_pair[1]))
    lines.reverse()
    return "~~~".join(lines)

def _process_record(individual_id, ged, parents_map, names_map):
    global generation_table, visited_pairs, anchor_gen1
    generation_table = []
    visited_pairs = set()
    _find_parents(individual_id, 1, parents_map)
    paths = _find_distant(individual_id, parents_map)
    best_score, best_path = None, None
    for path in paths:
        name_path = [names_map.get(pid, "UnknownName") for pid in path]
        score = sum((idx + 1) for idx, nm in enumerate(name_path) if "Yates" in nm)
        if best_score is None or score > best_score:
            best_score, best_path = score, path
    best_path = best_path or []
    best_ids  = [pid for pid in best_path if pid != individual_id]
    line_str  = _filter_lineage(set(best_ids), generation_table, names_map)
    cm_value = ""
    sort_value = ""
    ydna_value = ""
    for ds in ged.filter_pool:
        if ds.get_gen_person() == individual_id:
            cm_value   = ds.get_extractable_cm()
            sort_value = ds.get_extractable_sort()
            ydna_value = ds.get_extractable_YDNA()
            break
    short_name = names_map.get(individual_id, "UnknownName")
    return [individual_id, sort_value, short_name, cm_value, line_str, ydna_value]

# ---------- Main build ----------
def main():
    files = glob.glob("*.ged")
    if not files:
        print("No GEDCOM files found.")
        return False
    print("Automatically selecting the first GEDCOM file.")
    gedcom_path = files[0]

    # Parse GEDCOM, build datasets, and compute vitals
    ged = Gedcom(gedcom_path)
    autosomal_count = ged.parse_gedcom()

    # Legacy autosomal_count.txt (used by Cell 3 JS)
    with open(AUTOSOMAL_COUNT_TXT, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
        f.write(str(autosomal_count))
    print("[OK] Wrote %s = %d" % (AUTOSOMAL_COUNT_TXT, autosomal_count))

    # EST-based LAST_UPDATED_TEXT for dna_vitals and HTML
    last_updated_text = _now_est_string()
    print("[OK] LAST_UPDATED_TEXT (EST) for dna_vitals.csv: %s" % last_updated_text)

    npfx_count = int(ged.npfx_count)
    after_manual_filter_total = int(ged.after_manual_filter_total or len(ged.filter_pool))

    vitals_lines = [
        "Records tagged and filtered by NPFX: %d" % npfx_count,
        "After manual filter, total records: %d" % after_manual_filter_total,
        "LAST_UPDATED_TEXT: %s" % last_updated_text,
    ]
    vitals_df = pd.DataFrame({"line": vitals_lines})
    vitals_df.to_csv(
        VITALS_CSV_PATH,
        index=False,
        encoding="iso-8859-15",
        errors="xmlcharrefreplace",
    )
    print("[OK] Wrote dna_vitals.csv -> %s" % os.path.abspath(VITALS_CSV_PATH))

    # Re-read GEDCOM raw text for ancestor-building
    with open(gedcom_path, "r", encoding="utf-8") as f:
        raw = f.read()

    blocks = raw.split("\n0 ")
    all_records = {}
    for blk in blocks:
        blk = blk.strip()
        if not blk:
            continue
        flend = blk.find("\n")
        flend = len(blk) if flend == -1 else flend
        first_line = blk[:flend]
        if "@" in first_line:
            s = first_line.find("@") + 1
            e = first_line.find("@", s)
            rec_id = first_line[s:e].strip()
            all_records[rec_id] = blk

    parents_map, names_map, families = {}, {}, {}
    for rec_id, txt in all_records.items():
        if "FAM" in txt[:50]:
            father_idx = txt.find("1 HUSB @")
            husb_id = txt[father_idx + len("1 HUSB @"):txt.find("@", father_idx + len("1 HUSB @"))] if father_idx != -1 else None
            wife_idx = txt.find("1 WIFE @")
            wife_id = txt[wife_idx + len("1 WIFE @"):txt.find("@", wife_idx + len("1 WIFE @"))] if wife_idx != -1 else None
            kids = [ln.split("@")[1] for ln in txt.split("\n") if ln.strip().startswith("1 CHIL @")]
            families[rec_id] = (husb_id, wife_id, kids)

    for fam_id, (f_id, m_id, k_list) in families.items():
        for kid in k_list:
            parents_map[kid] = (f_id, m_id)

    for rec_id, txt in all_records.items():
        names_map[rec_id] = _quick_extract_name("\n" + txt)

    individual_ids = [d.get_gen_person() for d in ged.filter_pool]
    print("Processing %d individuals with chunk-based parallel..." % len(individual_ids))

    combined_rows = []
    chunk_size = 50
    max_workers = os.cpu_count() or 4
    from functools import partial as _partial

    with ProcessPoolExecutor(max_workers=max_workers) as ex, tqdm(
        total=len(individual_ids),
        desc="Building Yates Lines (Stage 1)"
    ) as pbar:
        for chunk in _chunks(individual_ids, chunk_size):
            func = _partial(_process_record, ged=ged, parents_map=parents_map, names_map=names_map)
            results = list(ex.map(func, chunk))
            combined_rows.extend(results)
            pbar.update(len(chunk))

    columns = ["ID#", "Match to", "Name", "cM", "Yates DNA Ancestral Line", "haplogroup"]
    df = pd.DataFrame(combined_rows, columns=columns)
    df.index += 1

    def _trim_prefix(row):
        prefix = (
            "YatesJohn&SearchingStill~~~"
            "YatesWilliam&SearchingStill~~~"
            "YatesWilliam&SearchingStill~~~"
            "YatesEdmund&CornellMargaret~~~"
            "YatesRichard&AshendonJoan~~~"
            "YatesJohn&HydeAlice~~~"
            "YatesThomas&FauconerElizabeth~~~"
        )
        s = str(row["Yates DNA Ancestral Line"])
        if s.startswith(prefix):
            row["Yates DNA Ancestral Line"] = s[len(prefix):]
        return row

    df = df.apply(_trim_prefix, axis=1)
    df.sort_values(by=["Yates DNA Ancestral Line"], inplace=True)

    # CSV (ISO-8859-15 as required)
    with open(CSV_OUT_LOCAL, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
        f.write(df.to_csv(index=False))
    logger.info("Exported CSV -> %s", CSV_OUT_LOCAL)

    # HTML (XHTML 1.0 Transitional; Times via external CSS is implied; inline minimal styles ok)
    final_cols = ["ID#", "cM", "haplogroup", "Match to", "Yates DNA Ancestral Line"]
    table_html = df.to_html(index=False, columns=final_cols, escape=False, border=1)

    page_tpl = Template("""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Cell 1 Working Table</title>
<link rel="stylesheet" type="text/css" href="/partials/dna_tree_styles.css" />
<style type="text/css">
  html { scroll-behavior: smooth; }
  body { background:#ffffff; color:#222222; margin:0; padding:20px; }
  h1 { margin:0 0 8px 0; font-size:24px; line-height:1.2; text-align:center; }
  .meta { text-align:center; font-size:12px; color:#555555; margin:2px 0 12px 0; }
  .downloads { text-align:center; margin:4px 0 12px 0; font-size:13px; }
  a { color:#154b8b; text-decoration:none; }
  a:hover { text-decoration:underline; }
  table { width:100%%; border-collapse:collapse; }
  th, td { border:1px solid #333333; padding:6px 8px; vertical-align:top; }
  th { background:#e3eaf8; text-align:left; }
  td:nth-child(5) { text-align:left; white-space:normal; }
</style>
</head>
<body>
  <h1>Cell 1 Working Table</h1>
  <div class="meta">
    <a href="$HOME" target="_blank" rel="noopener">Home</a>
    &nbsp;|&nbsp; Last updated: $LAST_UPDATED_TEXT
    &nbsp;|&nbsp; Download: <a href="$CSV">$CSV</a>
  </div>
  <div class="downloads"><a href="$CSV">/partials/$CSV_NAME</a></div>
  $TABLE
</body>
</html>""")

    page = page_tpl.safe_substitute(
        HOME=ABS_HOME_URL,
        CSV=ABS_CSV_URL,
        CSV_NAME=os.path.basename(ABS_CSV_URL),
        TABLE=table_html,
        LAST_UPDATED_TEXT=last_updated_text,
    )

    with open(HTML_OUT_LOCAL, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
        f.write(page)
    logger.info("Exported HTML -> %s", HTML_OUT_LOCAL)

    print("[SUMMARY] GEDCOM total records: %d" % ged.total_records)
    print("[SUMMARY] NPFX-tagged records: %d" % ged.npfx_count)
    print("[SUMMARY] Autosomal matches (NPFX minus YDNA): %d" % ged.autosomal_count)
    print("[SUMMARY] After manual filter, total records: %d" % ged.after_manual_filter_total)

    return True

ok = main()

# ---------- Upload to /partials (Explicit FTPS AUTH TLS) ----------
if ok and FTP_HOST and FTP_USER and FTP_PASS:
    print("[INFO] Uploading artifacts to /partials/ ...")
    try:
        ftps = _ftps_connect()
        _ftps_ensure_dir(ftps, "partials")
        try:
            _ftps_upload(ftps, CSV_OUT_LOCAL, os.path.basename(CSV_OUT_LOCAL))
        except Exception as e:
            print("[ERROR] CSV upload failed:", e)
        try:
            _ftps_upload(ftps, HTML_OUT_LOCAL, os.path.basename(HTML_OUT_LOCAL))
        except Exception as e:
            print("[ERROR] HTML upload failed:", e)

        # Also upload dna_vitals.csv and autosomal_count.txt so they are in the same authority path
        try:
            _ftps_upload(ftps, VITALS_CSV_PATH, os.path.basename(VITALS_CSV_PATH))
        except Exception as e:
            print("[ERROR] dna_vitals.csv upload failed:", e)
        try:
            _ftps_upload(ftps, AUTOSOMAL_COUNT_TXT, os.path.basename(AUTOSOMAL_COUNT_TXT))
        except Exception as e:
            print("[ERROR] autosomal_count.txt upload failed:", e)

        try:
            ftps.quit()
        except Exception:
            pass
        print("[OK] Uploads complete to /partials/")
    except Exception as e:
        print("[ERROR] FTP session failed:", e)
        traceback.print_exc()
else:
    print("[INFO] Skipping FTP upload (missing creds or build failed).")

print("\n--- Cell 1 Complete: CSV + HTML + dna_vitals.csv + autosomal_count.txt built with ISO-8859-15; explicit FTPS used. ---")
# ====== CUT STOP  [1/1] CELL 1 - GEDCOM -> CSV + HTML + Upload (Explicit FTPS, ISO-8859-15) ======


[CONFIRM] Golden Rules active | Cell=Cell1_FTPS_Explicit | Version=2025.11.27-G2 | Encoding=ISO-8859-15
[ENV] HOST=ftp***net  USER=ad***et  PASS=***  PORT=21  DIR=(root)
Automatically selecting the first GEDCOM file.
GEDCOM contained 62553 total records
Records tagged and filtered by NPFX: 1604
Records with YDNA information: 0
Autosomal matches (NPFX minus YDNA): 1604
After manual filter, total records: 7
[OK] Wrote autosomal_count.txt = 1604
[OK] LAST_UPDATED_TEXT (EST) for dna_vitals.csv: December 2, 2025 7:36 PM
[OK] Wrote dna_vitals.csv -> /content/dna_vitals.csv
Processing 7 individuals with chunk-based parallel...


Building Yates Lines (Stage 1): 100%|██████████| 7/7 [00:03<00:00,  1.88it/s]


[SUMMARY] GEDCOM total records: 62553
[SUMMARY] NPFX-tagged records: 1604
[SUMMARY] Autosomal matches (NPFX minus YDNA): 1604
[SUMMARY] After manual filter, total records: 7
[INFO] Uploading artifacts to /partials/ ...
[OK] Uploaded: final_combined_df_with_value_labels.csv -> /partials/final_combined_df_with_value_labels.csv
[OK] Uploaded: cell1_work_table.htm -> /partials/cell1_work_table.htm
[OK] Uploaded: dna_vitals.csv -> /partials/dna_vitals.csv
[OK] Uploaded: autosomal_count.txt -> /partials/autosomal_count.txt
[OK] Uploads complete to /partials/

--- Cell 1 Complete: CSV + HTML + dna_vitals.csv + autosomal_count.txt built with ISO-8859-15; explicit FTPS used. ---


# Cell 2

In [3]:
# ====== CUT START [1/1] CELL 2 - Build + Publish DNA Register (All styling via stylesheet) ======
# RON GOLDEN RULES - CLIFF NOTES (v2025.12.01-G1)
# - Complete and runnable Colab cell, one contiguous block.
# - Source ASCII-only; outputs written with encoding="iso-8859-15", errors="xmlcharrefreplace".
# - XHTML 1.0 Transitional; typography/layout via /partials/dna_tree_styles.css (this cell writes it).
# - Deterministic audit:
#   [CONFIRM] Golden Rules active | Cell=Cell2_AllStyles_ExternalCSS | Version=2025.12.01-G1 | Encoding=ISO-8859-15
# - Enforce ISO-8859-15 printable chars on writes.

print("[CONFIRM] Golden Rules active | Cell=Cell2_AllStyles_ExternalCSS | Version=2025.12.01-G1 | Encoding=ISO-8859-15")

import os, re, posixpath, socket, traceback, urllib.parse as _u
from ftplib import FTP_TLS
from datetime import datetime, timedelta
import pandas as pd
import html as _html
from string import Template

# ---------- A) LAYOUT CONTROL BLOCK (ONE PLACE TO TUNE WIDTHS) ----------
# Col 1 = Match to
# Col 2 = Name
# Col 3 = cM
# Col 4 = Match Summary
# Col 5 = Website
# Col 6 = Yates DNA Ancestral Lines

COL_1_PX = 80
COL_2_PX = 220
COL_3_PX = 60
COL_4_PX = 1200
COL_5_PX = 120
COL_6_PX = 1800

COL_WIDTHS = [COL_1_PX, COL_2_PX, COL_3_PX, COL_4_PX, COL_5_PX, COL_6_PX]
TABLE_TOTAL_WIDTH_PX = sum(COL_WIDTHS)

# Hide these column indices (zero-based) in the rendered table: 2,3,5
HIDE_COLS_ZERO_BASED = {1, 2, 4}

print("[LAYOUT] TABLE_TOTAL_WIDTH_PX=%d" % TABLE_TOTAL_WIDTH_PX)
print("[LAYOUT] Column widths (px): 1=%d 2=%d 3=%d 4=%d 5=%d 6=%d" %
      (COL_1_PX, COL_2_PX, COL_3_PX, COL_4_PX, COL_5_PX, COL_6_PX))

# ---------- 0) Secrets ----------
try:
    from google.colab import userdata  # type: ignore
    os.environ["FTP_HOST"] = userdata.get("FTP_HOST")
    os.environ["FTP_USER"] = userdata.get("FTP_USER")
    os.environ["FTP_PASS"] = userdata.get("FTP_PASS")
    try:
        os.environ["FTP_DIR"] = userdata.get("FTP_DIR")
    except Exception:
        os.environ.setdefault("FTP_DIR", "")
    try:
        os.environ["FTP_PORT"] = userdata.get("FTP_PORT")
    except Exception:
        os.environ.setdefault("FTP_PORT", "21")
except Exception:
    os.environ.setdefault("FTP_HOST", "")
    os.environ.setdefault("FTP_USER", "")
    os.environ.setdefault("FTP_PASS", "")
    os.environ.setdefault("FTP_DIR", "")
    os.environ.setdefault("FTP_PORT", "21")

# ---------- 1) Config ----------
CSV_IN = "final_combined_df_with_value_labels.csv"

# NOTE: main register pages now .shtml (SSI pages only)
LOCAL_HTML        = "yates_ancestor_register.shtml"
REMOTE_HTML_CANON = posixpath.join("partials", "yates_ancestor_register.shtml")
REMOTE_HTML_LEG   = posixpath.join("partials", "ons_yates_dna_register.shtml")

DNA_REGISTER_ABS = "https://yates.one-name.net/partials/ons_yates_dna_register.shtml"
TREES_ABS        = "https://yates.one-name.net/partials/just-trees.htm"

EXPORT_BASENAME = "yates_ancestor_register"
LOCAL_CSV  = "%s.csv"  % EXPORT_BASENAME
LOCAL_XLSX = "%s.xlsx" % EXPORT_BASENAME
REMOTE_CSV  = posixpath.join("partials", os.path.basename(LOCAL_CSV))
REMOTE_XLSX = posixpath.join("partials", os.path.basename(LOCAL_XLSX))

FTP_DIR  = (os.environ.get("FTP_DIR", "") or "").strip()
TNG_BASE = "https://yates.one-name.net/tng"
TNG_TREE = "tree1"

HOME_URL        = "https://yates.one-name.net/partials/yates_ancestor_register.shtml"
REMOTE_NAME_ABS = HOME_URL

ARROW_ENTITY         = "&rarr;"
REMOVE_PERIOD_AT_END = True

SERVER_PARTIALS_DIR        = "partials"
SERVER_MAPPING_BASENAME    = "match_to_unmasked.csv"
SERVER_MAPPING_REMOTE      = posixpath.join(SERVER_PARTIALS_DIR, SERVER_MAPPING_BASENAME)
SERVER_MAPPING_LOCAL_CACHE = "match_to_unmasked.server.csv"

# Stylesheet
STYLESHEET_BASENAME = "dna_tree_styles.css"
STYLESHEET_LOCAL    = os.path.join("partials", STYLESHEET_BASENAME)
STYLESHEET_REMOTE   = posixpath.join("partials", STYLESHEET_BASENAME)
CSS_VERSION         = "v2025-12-01-g1"
STYLESHEET_HREF     = "/partials/%s?%s" % (STYLESHEET_BASENAME, CSS_VERSION)
HEAD_LINK           = '<link rel="stylesheet" type="text/css" href="%s" />' % STYLESHEET_HREF

# Path for vitals from Cell 1
VITALS_CSV = "dna_vitals.csv"

# ---------- 2) FTP ----------
FTP_TIMEOUT = int(os.environ.get("FTP_TIMEOUT", "30"))
FTP_PASSIVE = True

def ftp_connect() -> FTP_TLS:
    ftps = FTP_TLS(timeout=FTP_TIMEOUT)
    socket.setdefaulttimeout(FTP_TIMEOUT)
    ftps.connect(os.environ.get("FTP_HOST", ""), int(os.environ.get("FTP_PORT", 21)))
    ftps.login(os.environ.get("FTP_USER", ""), os.environ.get("FTP_PASS", ""))
    try:
        ftps.prot_p()
    except Exception:
        pass
    try:
        ftps.set_pasv(FTP_PASSIVE)
    except Exception:
        pass
    if FTP_DIR:
        for p in [p for p in FTP_DIR.split("/") if p]:
            try:
                ftps.mkd(p)
            except Exception:
                pass
            ftps.cwd(p)
    return ftps

def _remote_path(name: str) -> str:
    return posixpath.join(FTP_DIR, name) if FTP_DIR else name

def ensure_remote_dirs(ftps, remote_path):
    if "/" not in remote_path:
        return
    pwd0 = ftps.pwd()
    for seg in [p for p in remote_path.split("/")[:-1] if p and p != "."]:
        try:
            ftps.cwd(seg)
        except Exception:
            try:
                ftps.mkd(seg)
            except Exception:
                pass
            ftps.cwd(seg)
    ftps.cwd(pwd0)

def ftp_download_if_exists(ftps, remote_name, local_name) -> bool:
    try:
        with open(local_name, "wb") as f:
            ftps.retrbinary("RETR %s" % remote_name, f.write)
        print("[PULL] %s -> %s" % (remote_name, os.path.abspath(local_name)))
        return True
    except Exception as e:
        try:
            if os.path.exists(local_name):
                os.remove(local_name)
        except Exception:
            pass
        print("[MISS] %s (%s)" % (remote_name, e))
        return False

def ftp_upload_overwrite(ftps, local_path, remote_name):
    ensure_remote_dirs(ftps, remote_name)
    with open(local_path, "rb") as fh:
        ftps.storbinary("STOR %s" % remote_name, fh)
    print("[PUT] %s -> %s" % (local_path, remote_name))

def ftp_size(ftps, remote_name):
    try:
        sz = ftps.size(remote_name)
        return int(sz) if sz is not None else None
    except Exception:
        return None

# ---------- 3) Resolver ----------
def _read_mapping_csv(path: str) -> pd.DataFrame:
    encs = ("iso-8859-15", "utf-8-sig", "utf-8", "cp1252", "latin1")
    last = None
    df = None
    for enc in encs:
        try:
            df = pd.read_csv(path, encoding=enc, dtype=str, keep_default_na=False)
            break
        except Exception as e:
            last = e
            df = None
    if df is None:
        raise RuntimeError("Unable to read mapping CSV %s: %s" % (path, last))
    if df.shape[1] < 2:
        raise RuntimeError("Mapping CSV must have at least two columns: code, unmasked")
    df = df.iloc[:, :2].copy()
    df.columns = ["code", "unmasked"]
    df["code"]     = df["code"].astype(str).str.strip().str.lower()
    df["unmasked"] = df["unmasked"].astype(str).str.strip()
    df = df[df["code"] != ""].drop_duplicates(subset=["code"], keep="first")
    if df.empty:
        raise RuntimeError("Mapping CSV empty after normalization.")
    return df

def load_resolver_from_server() -> dict:
    with ftp_connect() as ftps:
        try:
            ftps.cwd(SERVER_PARTIALS_DIR)
        except Exception:
            pass
        ok = ftp_download_if_exists(ftps, SERVER_MAPPING_BASENAME, SERVER_MAPPING_LOCAL_CACHE)
        try:
            ftps.quit()
        except Exception:
            pass
    if not ok:
        raise RuntimeError(
            "Resolver not found on server: /%s. Upload match_to_unmasked.csv into /partials/ and re-run."
            % _remote_path(SERVER_MAPPING_REMOTE)
        )
    df_map = _read_mapping_csv(SERVER_MAPPING_LOCAL_CACHE)
    print("[OK] Resolver loaded: %d codes" % len(df_map))
    return dict(zip(df_map["code"], df_map["unmasked"]))

MATCH_TO_UNMASKED = {}

def _setup_resolver():
    global MATCH_TO_UNMASKED
    if not MATCH_TO_UNMASKED:
        MATCH_TO_UNMASKED = load_resolver_from_server()

def resolve_match_to(code: str) -> str:
    if not isinstance(code, str):
        return ""
    return MATCH_TO_UNMASKED.get(code.strip().lower(), code)

# ---------- 4) Name/text utils ----------
SEP_RE = re.compile(r"\s*(?:\u2192|&rarr;|;|>|,|~{2,}|/{2,}|\|{2,})\s*")

def split_tokens(s):
    if pd.isna(s):
        return []
    if not isinstance(s, str):
        s = str(s)
    return [p.strip() for p in SEP_RE.split(s) if str(p).strip()]

def _clean_piece(text: str) -> str:
    t = re.sub(r"~+", " ", str(text))
    t = re.sub(r"\s+", " ", t)
    return t.strip()

_PARTICLES = {
    "de", "del", "della", "der", "van", "von", "da", "dos", "das", "di", "la", "le", "du", "of"
}

def _smart_title(token: str) -> str:
    if not token:
        return token
    token = re.sub(
        r"(^|\b)([a-z])(['’])([a-z])",
        lambda m: m.group(1) + m.group(2).upper() + m.group(3) + m.group(4).upper(),
        token.lower(),
    )
    token = "-".join([w.capitalize() for w in token.split("-")])
    token = re.sub(r"\bmc([a-z])",  lambda m: "Mc"  + m.group(1).upper(), token)
    token = re.sub(r"\bmac([a-z])", lambda m: "Mac" + m.group(1).upper(), token)
    return token

def smart_titlecase(name: str) -> str:
    name = _clean_piece(name)
    if not name:
        return name
    if "," in name:
        last, first = [p.strip() for p in name.split(",", 1)]
        pieces = (first + " " + last).split()
    else:
        pieces = name.split()
    out = []
    for i, w in enumerate(pieces):
        if i > 0 and w.lower() in _PARTICLES:
            out.append(w.lower())
        else:
            out.append(_smart_title(w))
    return " ".join(out)

def surname_given_from_token(token):
    token = token.strip()
    idx = None
    for i in range(1, len(token)):
        if token[i-1].islower() and token[i].isupper():
            idx = i
            break
    if idx is None:
        for i in range(1, len(token)):
            if token[i].isupper():
                idx = i
                break
    if idx is None:
        return (token,)
    surname      = token[:idx]
    given        = token[idx:]
    given_spaced = re.sub(r"(?<!^)([A-Z])", r" \1", given)
    return ("%s %s" % (given_spaced.strip(), surname.strip()),)

def normalize_person_name(s: str) -> str:
    if pd.isna(s):
        return ""
    s = _clean_piece(str(s))
    if "," in s:
        last, first = [p.strip() for p in s.split(",", 1)]
        s = "%s %s" % (first, last)
    if " " not in s and s.isalpha():
        return smart_titlecase(surname_given_from_token(s)[0])
    return smart_titlecase(s)

_CAMEL_WORDS = re.compile(r"[A-Z][a-z]*|[A-Z]+(?![a-z])|[a-z]+")

def norm_matchee_name(raw: str) -> str:
    raw = str(raw or "").strip()
    if not raw:
        return ""
    if " " in raw or "," in raw:
        nm    = smart_titlecase(raw)
        parts = nm.split()
        if len(parts) == 1:
            return nm
        return ("%s %s" % (parts[0], parts[-1])).strip()
    words = _CAMEL_WORDS.findall(raw)
    while words and len(words[0]) == 1:
        words.pop(0)
    if not words:
        nm = smart_titlecase(surname_given_from_token(raw)[0])
        ps = nm.split()
        if len(ps) == 1:
            return nm
        return ("%s %s" % (ps[0], ps[-1])).strip()
    surname          = smart_titlecase(words[0])
    given_candidates = [w for w in words[1:] if w.lower() != surname.lower()]
    if not given_candidates:
        return surname
    return ("%s %s" % (smart_titlecase(given_candidates[0]), surname)).strip()

def derive_common_from_first_token(tokens):
    if not tokens:
        return ("", "")
    first = _clean_piece(tokens[0])
    parts = re.split(r"\s*(?:&| and )\s*", first, maxsplit=1, flags=re.I)
    if len(parts) != 2:
        return ("", "")
    def _norm(s):
        if " " in s:
            return smart_titlecase(s)
        return smart_titlecase(surname_given_from_token(s)[0])
    return (_norm(parts[0]), _norm(parts[1]))

def degree_label_from_generations(g):
    if g <= 1:
        return "parents" if g == 1 else "self"
    if g == 2:
        return "grandparents"
    greats = g - 2
    if greats == 1:
        return "great-grandparents"
    return "%dx-great-grandparents" % greats

def build_header(subject_name_html, cm_val, matchee_name_html, gens, husband, wife):
    try:
        cm_str = "%d" % int(round(float(cm_val)))
    except Exception:
        cm_str = (str(cm_val).strip() or "0")
    degree_label = degree_label_from_generations(gens)
    parts = [
        "%s is a %s cM cousin match to %s, whose" % (subject_name_html, cm_str, matchee_name_html),
        "%s (back %d Gens)" % (degree_label, gens),
        "are",
        "%s & %s." % (husband, wife),
    ]
    s = " ".join(parts)
    if REMOVE_PERIOD_AT_END:
        s = re.sub(r"\.\s*$", "", s)
    return s

ID_PAT = re.compile(r"\bI\d+\b", re.I)

def extract_person_id(s: str) -> str:
    m = ID_PAT.search(str(s or ""))
    return m.group(0).upper() if m else ""

# ---------- 5) Read CSV ----------
def find_col(df, patterns, prefer_exact=None):
    cols   = list(df.columns)
    lowmap = {c.lower(): c for c in cols}
    if prefer_exact:
        for name in prefer_exact:
            if name in df.columns:
                return name
            if name and name.lower() in lowmap:
                return lowmap[name.lower()]
    for pat in patterns:
        rx = re.compile(pat, re.I)
        for c in cols:
            if rx.search(c):
                return c
    return None

_encs     = ("utf-8-sig", "utf-8", "cp1252", "iso-8859-15", "latin1")
_last_err = None
df        = None
for _e in _encs:
    try:
        df = pd.read_csv(CSV_IN, encoding=_e, dtype=str, keep_default_na=False)
        break
    except Exception as _ex:
        _last_err = _ex
        df        = None
if df is None:
    raise RuntimeError("Unable to read CSV: %s (%s)" % (CSV_IN, _last_err))
print("[OK] Loaded CSV: %d rows, %d cols" % (len(df), len(df.columns)))

id_col    = find_col(df, [r"^(id#|personid)$"], ["ID#", "ID", "PersonID", "personID"])
match_col = find_col(df, [r"^match\s*to$"], ["Match to", "Match", "match_to", "Match_to"])
name_col  = find_col(df, [r"^name$"], ["Name"])
cm_col    = find_col(df, [r"^(c\s*:?m|cm)$", r"centi.?morgan"], ["cM", "cm"])
path_col  = find_col(
    df,
    [r"(yates\s*dna\s*ancestral\s*line|ancestral\s*line|lineage)"],
    ["Yates DNA Ancestral Line", "Ancestral Line", "Lineage"],
)

if not id_col:
    raise ValueError("CSV missing an ID#/PersonID column.")
if not match_col:
    raise ValueError("CSV missing 'Match to' column (try headings like 'Match to' or 'Match').")
if not name_col:
    raise ValueError("CSV missing 'Name' column.")
if not cm_col:
    raise ValueError("CSV missing 'cM' column.")
if not path_col:
    raise ValueError("CSV missing lineage/path column.")

# ---------- 5.1) Read vitals from dna_vitals.csv ----------
AUTOSOMAL_MATCHES = ""
SHOWING_STATIC    = ""
LAST_UPDATED_TEXT = ""

def _friendly_ts_from_utc(raw: str) -> str:
    """
    Convert a UTC timestamp like
      'YYYY-MM-DD HH:MM UTC'
      'YYYY-MM-DD HH:MM:SS UTC'
      'YYYY-MM-DDTHH:MM UTC'
      'YYYY-MM-DDTHH:MM:SS UTC'
    to 'Month D, YYYY H:MM AM/PM' in approx. US Eastern (UTC-5).
    If parsing fails, return the original string.
    """
    raw = str(raw or "").strip()
    if not raw:
        return "(unknown)"

    raw_clean = raw.replace("UTC", "").replace("utc", "").strip()

    fmts = [
        "%Y-%m-%d %H:%M",
        "%Y-%m-%d %H:%M:%S",
        "%Y-%m-%dT%H:%M",
        "%Y-%m-%dT%H:%M:%S",
    ]

    dt_utc = None
    for fmt in fmts:
        try:
            dt_utc = datetime.strptime(raw_clean, fmt)
            break
        except Exception:
            dt_utc = None

    if dt_utc is None:
        return raw

    dt_est = dt_utc - timedelta(hours=5)

    months = [
        "January","February","March","April","May","June",
        "July","August","September","October","November","December"
    ]
    month_name = months[dt_est.month - 1]
    h24 = dt_est.hour
    ampm = "AM" if h24 < 12 else "PM"
    h12 = h24 % 12
    if h12 == 0:
        h12 = 12

    return "%s %d, %d %d:%02d %s" % (
        month_name,
        dt_est.day,
        dt_est.year,
        h12,
        dt_est.minute,
        ampm,
    )

def _format_int_with_commas(s):
    """
    Take a numeric-like string, strip non-digits, and return with thousands
    separators (e.g., '1604' -> '1,604'). On failure, return the input.
    """
    if s is None:
        return ""
    t = re.sub(r"[^0-9\-]", "", str(s))
    if not t:
        return str(s)
    try:
        n = int(t)
        return "{:,}".format(n)
    except Exception:
        return str(s)

def _load_vitals(path):
    global AUTOSOMAL_MATCHES, SHOWING_STATIC, LAST_UPDATED_TEXT
    AUTOSOMAL_MATCHES = ""
    SHOWING_STATIC    = ""
    LAST_UPDATED_TEXT = ""

    if not os.path.exists(path):
        print("[INFO] dna_vitals.csv not found; header will omit counts and last-updated text.")
        return

    try:
        vdf = pd.read_csv(path, dtype=str, encoding="iso-8859-15", keep_default_na=False)
    except Exception:
        encs = ("utf-8-sig", "utf-8", "cp1252", "latin1")
        last = None
        vdf  = None
        for enc in encs:
            try:
                vdf = pd.read_csv(path, dtype=str, encoding=enc, keep_default_na=False)
                break
            except Exception as e:
                last = e
        if vdf is None:
            print("[WARN] Unable to read dna_vitals.csv: %s" % last)
            return

    flat = []
    for row in vdf.astype(str).values.tolist():
        for cell in row:
            flat.append(str(cell))

    autosomal = None
    showing   = None
    last_text = None

    for cell in flat:
        s = str(cell)
        if "Records tagged and filtered by NPFX" in s and autosomal is None:
            m = re.search(r"(\d+)", s)
            if m:
                autosomal = m.group(1)
        if "After manual filter, total records" in s and showing is None:
            m = re.search(r"(\d+)", s)
            if m:
                showing = m.group(1)
        if "LAST_UPDATED_TEXT" in s and last_text is None:
            m = re.search(r"LAST_UPDATED_TEXT\s*:\s*(.+)", s)
            if m:
                last_text = m.group(1).strip()

    if autosomal is None or showing is None or last_text is None:
        all_text = " ".join(flat)
        nums = re.findall(r"\d+", all_text)
        if autosomal is None and len(nums) >= 1:
            autosomal = nums[0]
        if showing is None and len(nums) >= 2:
            showing = nums[1]

    if last_text is not None:
        last_text = _friendly_ts_from_utc(last_text)

    AUTOSOMAL_MATCHES = _format_int_with_commas(autosomal) if autosomal else ""
    SHOWING_STATIC    = _format_int_with_commas(showing)   if showing   else ""
    LAST_UPDATED_TEXT = last_text or ""

    print("[OK] Loaded vitals from %s -> autosomal=%s, showing=%s, last_updated_text=%s"
          % (path,
             AUTOSOMAL_MATCHES or "?",
             SHOWING_STATIC or "?",
             LAST_UPDATED_TEXT or "(blank)"))

_load_vitals(VITALS_CSV)

if SHOWING_STATIC:
    try:
        show_raw = re.sub(r"[^0-9\-]", "", SHOWING_STATIC)
        csv_rows = len(df)
        if show_raw:
            if int(show_raw) != csv_rows:
                print("[WARN] dna_vitals showing (%s) != CSV rows (%d)" %
                      (show_raw, csv_rows))
    except Exception as _e:
        print("[WARN] Unable to compare showing from dna_vitals.csv to CSV rows: %s" % _e)

# ---------- 6) Transform ----------
_setup_resolver()

headers          = []
lineages         = []
subjects         = []
first_ancestors  = []
display_match_to = []
display_name     = []

LINEAGE_HEADER_SAFE = "Yates DNA Ancestral Lines"

for _, row in df.iterrows():
    subject_raw    = row.get(match_col, "")
    subject_name   = normalize_person_name(resolve_match_to(subject_raw))
    subject_name_b = "<strong>%s</strong>" % subject_name if subject_name else subject_name

    pid          = extract_person_id(row.get(id_col, ""))
    matchee_raw  = row.get(name_col, "")
    matchee_name = norm_matchee_name(matchee_raw) or subject_name

    if pid:
        matchee_url = (
            "%s/verticalchart.php?personID=%s&tree=%s&parentset=0&display=vertical&generations=15"
            % (TNG_BASE, pid, TNG_TREE)
        )
        matchee_name_html = '<a href="%s" target="_blank" rel="noopener">%s</a>' % (matchee_url, matchee_name)
    else:
        matchee_name_html = matchee_name

    cm_val      = row.get(cm_col, "0")
    tokens      = split_tokens(row.get(path_col, ""))
    gens_total  = len(tokens)
    tokens_disp = tokens[:7]

    if "common_husband" in df.columns and "common_wife" in df.columns:
        husband_raw = str(row.get("common_husband", "")).strip()
        wife_raw    = str(row.get("common_wife", "")).strip()
        if not husband_raw and not wife_raw:
            husband_raw, wife_raw = derive_common_from_first_token(tokens)
    else:
        husband_raw, wife_raw = derive_common_from_first_token(tokens)

    header_html = build_header(subject_name_b or subject_name, cm_val, matchee_name_html, gens_total, husband_raw, wife_raw)
    if tokens_disp:
        tokens_disp[0] = "<strong>%s</strong>" % tokens_disp[0]
    sep          = " %s " % ARROW_ENTITY
    lineage_text = sep.join(tokens_disp) if tokens_disp else ""

    headers.append(header_html)
    lineages.append(lineage_text)
    subjects.append(subject_name)
    first_ancestors.append(tokens[0] if tokens else "")

    display_match_to.append(subject_name)
    display_name.append(matchee_name_html)

df["Match Summary"]      = headers
df[LINEAGE_HEADER_SAFE]  = lineages
df["Subject"]            = subjects
df["First Ancestor"]     = [_clean_piece(x) for x in first_ancestors]

# ---------- 6.1) Clean exports ----------
TAG_RE = re.compile(r"<[^>]+>")

def _html_to_text(s: str) -> str:
    t = TAG_RE.sub("", str(s or ""))
    t = _html.unescape(t)
    t = t.replace("\u2192", "->")
    t = re.sub(r"\s+", " ", t)
    return t.strip()

def _extract_find_url(subject_name: str) -> str:
    if not subject_name:
        return ""
    q = _u.quote(subject_name)
    return "%s?q=%s" % (REMOTE_NAME_ABS, q)

website_urls = [_extract_find_url(subj) for subj in df["Subject"].tolist()]

export_df = pd.DataFrame({
    "Match to"      : df[match_col].tolist(),
    "Name"          : df[name_col].tolist(),
    "cM"            : df[cm_col].tolist(),
    "Match Summary" : [_html_to_text(v) for v in df["Match Summary"].tolist()],
    "Website URL"   : website_urls,
    "Lineage"       : [_html_to_text(v) for v in df[LINEAGE_HEADER_SAFE].tolist()],
})

export_df.to_csv(LOCAL_CSV, index=False, encoding="iso-8859-15", errors="xmlcharrefreplace")
try:
    export_df.to_excel(LOCAL_XLSX, index=False)
except Exception:
    with pd.ExcelWriter(LOCAL_XLSX) as _writer:
        export_df.to_excel(_writer, index=False)
print("[OK] Wrote exports: %s and %s" % (os.path.abspath(LOCAL_CSV), os.path.abspath(LOCAL_XLSX)))

# ---------- 7) Stylesheet content (includes print fix for scroll containers) ----------
CSS_TEXT = """/* yates.one-name.net - DNA pages (unified stylesheet)
   Version: %s
   Note: Typography, layout, colors, borders - centralized here. */

:root {
  --table-width-px: %dpx;
  --brand-blue: #5b79b8;
  --brand-blue-dark: #4668aa;
  --line: #dddddd;
  --line-strong: #999999;
}

html, body {
  margin:0; padding:0;
  font-family: "Times New Roman", Times, serif;
  font-size: 16px; line-height: 1.35;
  color:#111111; background:#ffffff;
}

.wrap {
  max-width:100%%;
  margin:0 auto;
  background:#ffffff;
  padding:16px;
  padding-bottom:48px;
}
.centerline { text-align:center; }

.downloads { text-align:center; margin:4px 0 10px 0; font-size: 13px; }
.updated   { font-size: 12px; color:#555555; text-align:center; margin:2px 0 10px 0; }

.left-align { text-align:left; }

/* Simple header alignment helpers */
th.center-header { text-align:center; }
th.left-header   { text-align:left; }

/* Wrapper for scroll container */
.table-scroll-wrapper {
  width:100%%;
  max-width:100%%;
  margin:0 auto;
}

/* Single scroll container (optimized: no fake top scrollbar) */
.table-scroll {
  width:100%%;
  max-width:100%%;
  max-height:80vh;
  overflow-x:auto;
  overflow-y:auto;
  border:1px solid var(--line);
  position:relative;
  -webkit-overflow-scrolling:touch;
  scrollbar-width:auto;
  -ms-overflow-style:auto;
}

/* Table: let widths and content drive horizontal size */
table.sortable {
  border-collapse:separate;
  border-spacing:0;
}

table.sortable th,
table.sortable td {
  border:1px solid var(--line);
  padding:6px 8px;
  vertical-align:top;
  white-space:nowrap;
}

/* Sticky header row */
table.sortable th {
  background:#e3eaf8;
  position:-webkit-sticky;
  position:sticky;
  top:0;
  z-index:5;
  box-shadow:0 1px 0 #cccccc;
  cursor:pointer;
}

/* Sticky first column (Match to) for DNA Register table */
table.dna-register-table th:nth-child(1),
table.dna-register-table td:nth-child(1) {
  position:-webkit-sticky;
  position:sticky;
  left:0;
  z-index:6;
  background:#ffffff;
}
table.dna-register-table th:nth-child(1) {
  z-index:7;
}

/* First data row marker */
#first-row td { border-top:2px solid var(--line-strong); }

/* Back-to-top button */
.back-to-top {
  position:fixed; right:16px; bottom:16px; padding:6px 10px;
  border:1px solid #3e5a97; background:var(--brand-blue);
  color:#ffffff; cursor:pointer; border-radius:6px; display:none; z-index:9999;
}
.back-to-top:hover { background:var(--brand-blue-dark); }

/* Controls */
.controls { text-align:center; }
.controls-spaced { margin:6px 0 10px 0; }
.search { font-size: 14px; padding:5px 8px; }

/* Old-school blue nav menu */
.oldnav {
  margin:8px auto 6px auto; padding:0; background:var(--brand-blue);
  border-radius:6px; overflow:hidden; max-width: var(--table-width-px);
}
.oldnav ul { list-style:none; margin:0; padding:0; display:flex; flex-wrap:wrap; }
.oldnav li { margin:0; padding:0; }
.oldnav a, .oldnav a:link, .oldnav a:visited, .oldnav a:active { color:#ffffff !important; }
.oldnav a {
  display:block;
  padding:8px 12px;
  text-decoration:none;
  white-space:nowrap;
  border-right:1px solid #ffffff;
  font-weight:600;
}
.oldnav li:last-child a { border-right:none; }
.oldnav a:hover { background:var(--brand-blue-dark); color:#ffffff !important; }

/* Responsive tweaks */
@media screen and (min-width: 1200px) {
  .wrap { max-width: var(--table-width-px); }
}
@media screen and (max-width: 1199px) {
  .oldnav { border-radius:0; }
}
@media screen and (max-width: 700px) {
  table.sortable th, table.sortable td { padding:5px 6px; }
}

/* Print: avoid scroll clipping for long tables (e.g., Cousin List, DNA register) */
@media print {
  .table-scroll-wrapper,
  .table-scroll {
    max-height:none !important;
    overflow:visible !important;
    border:none !important;
  }
  html, body {
    overflow:visible !important;
  }
}
""" % (
    CSS_VERSION,
    TABLE_TOTAL_WIDTH_PX,
)

os.makedirs("partials", exist_ok=True)
with open(STYLESHEET_LOCAL, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as _css:
    _css.write(CSS_TEXT)
print("[OK] Wrote stylesheet: %s" % os.path.abspath(STYLESHEET_LOCAL))

# ---------- 8) Main HTML ----------
page_tpl = Template("""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>ONS Yates Study Autosomal DNA Register</title>
$HEAD_LINK
</head>
<body id="top">
<!-- Cell2 build: Version=2025.12.01-G1 | HIDE_COLS=2,3,5 | CousinListFullPrint=on | Scroll=single-bottom -->
<div class="wrap">
  <h1 class="centerline">ONS Yates Study Autosomal DNA Register</h1>
  $DOWNLOADS_BLOCK
  $UPDATED_BLOCK
  $NAV_BLOCK
  $CONTROLS_BLOCK
  $SCROLL_WRAPPER
</div>
<button id="back-to-top" class="back-to-top">&#9650; Top</button>
<script type="text/javascript">
//<![CDATA[
(function(){
  function textOf(cell){
    return (cell && (cell.textContent || cell.innerText) || '')
      .replace(/\\s+/g,' ')
      .trim()
      .toLowerCase();
  }

  function sortTable(tbl, colIndex, dir){
    var tb = tbl && tbl.tBodies ? tbl.tBodies[0] : null;
    if(!tb) return;
    var rows = Array.prototype.slice.call(tb.rows || []);
    var asc  = (dir === 'asc');

    rows.sort(function(a,b){
      var A = textOf(a.cells[colIndex]),
          B = textOf(b.cells[colIndex]);

      var nA = parseFloat(A.replace(/[^0-9.\\-]/g,'')),
          nB = parseFloat(B.replace(/[^0-9.\\-]/g,''));

      if(!isNaN(nA) && !isNaN(nB)){
        return asc ? (nA - nB) : (nB - nA);
      }
      if (A < B) return asc ? -1 : 1;
      if (A > B) return asc ?  1 : -1;
      return 0;
    });

    var frag = document.createDocumentFragment();
    for(var i=0;i<rows.length;i++){
      frag.appendChild(rows[i]);
    }
    tb.appendChild(frag);
  }

  function bindHeaderSort(){
    var tbl = document.getElementById('refactor-table');
    if(!(tbl && tbl.tHead && tbl.tHead.rows.length)) return;

    var ths = tbl.tHead.rows[0].cells;
    if(!ths) return;

    for(var i=0;i<ths.length;i++){
      (function(idx){
        var th  = ths[idx];
        var dir = 'asc';
        th.addEventListener('click', function(){
          dir = (dir === 'asc') ? 'desc' : 'asc';

          for (var j = 0; j < ths.length; j++){
            ths[j].innerHTML = ths[j].innerHTML.replace(/\\s+\\(asc\\)|\\s+\\(desc\\)/,'');
          }
          th.innerHTML += (dir === 'asc' ? ' (asc)' : ' (desc)');
          sortTable(tbl, idx, dir);
        }, false);
      })(i);
    }
  }

  function getParam(name){
    var m = location.search.match(new RegExp('[?&]'+name+'=([^&]+)'));
    return m ? decodeURIComponent(m[1].replace(/\\+/g,' ')) : '';
  }

  function bindSearch(){
    var box = document.getElementById('search-box');
    var tbl = document.getElementById('refactor-table');
    if(!(box && tbl && tbl.tBodies && tbl.tBodies[0])) return;

    var tb   = tbl.tBodies[0];
    var rows = Array.prototype.slice.call(tb.rows || []);

    function rowText(tr){
      var t = '';
      for(var i=0;i<tr.cells.length;i++){
        t += ' ' + (tr.cells[i].textContent || tr.cells[i].innerText || '');
      }
      return t.replace(/\\s+/g,' ').toLowerCase();
    }

    function apply(q){
      q = String(q || '').toLowerCase();
      for(var i=0;i<rows.length;i++){
        var txt  = rowText(rows[i]);
        var show = !q || txt.indexOf(q) > -1;
        rows[i].style.display = show ? '' : 'none';
      }
    }

    var to = null;
    function onInput(){
      if(to) clearTimeout(to);
      to = setTimeout(function(){ apply(box.value); }, 60);
    }

    box.addEventListener('input',  onInput, false);
    box.addEventListener('search', onInput, false);

    var q0 = getParam('q');
    if(q0){
      box.value = q0;
      apply(q0);
      try{ history.replaceState(null,'',location.pathname); }catch(e){}
    } else {
      box.value = '';
      apply('');
    }
  }

  function bindBackToTop(){
    var btn = document.getElementById('back-to-top');
    if(!btn) return;

    window.addEventListener('scroll', function(){
      btn.style.display = (window.pageYOffset > 200 ? 'block' : 'none');
    }, false);

    btn.addEventListener('click', function(){
      try{
        window.scrollTo({top:0, behavior:'smooth'});
      } catch(e){
        window.scrollTo(0,0);
      }
    }, false);
  }

  function bindScrollSync(){
    // No-op in the optimized version (no separate top-scroll container).
    return;
  }

  document.addEventListener('DOMContentLoaded', function(){
    bindHeaderSort();
    bindSearch();
    bindBackToTop();
    bindScrollSync();
  });
})();
//]]>
</script>

</body>
</html>
""")

# ---------- 8.1) Build HTML table manually with inline widths ----------
website_links = []
for subj in df["Subject"].tolist():
    url = _extract_find_url(subj)
    if url:
        website_links.append('<a href="%s" target="_blank" rel="noopener">Website</a>' %
                             _html.escape(url, quote=True))
    else:
        website_links.append("")

col_headers = [
    ("Match to", "center"),
    ("Name", "center"),
    ("cM", "center"),
    ("Match Summary", "center"),
    ("Website", "center"),
    ("Yates DNA Ancestral Lines", "left"),
]

col_data = [
    display_match_to,
    display_name,
    df[cm_col].tolist(),
    df["Match Summary"].tolist(),
    website_links,
    df[LINEAGE_HEADER_SAFE].tolist(),
]

thead_cells = []
for (idx, (hdr, align)) in enumerate(col_headers):
    wpx = COL_WIDTHS[idx]
    style_bits = ["width:%dpx" % wpx]
    if idx in HIDE_COLS_ZERO_BASED:
        style_bits.append("display:none")
    style_attr = "; ".join(style_bits)
    if align == "center":
        cell_html = '<th class="center-header" style="%s;">%s</th>' % (style_attr, hdr)
    else:
        cell_html = '<th class="left-header" style="%s;">%s</th>' % (style_attr, hdr)
    thead_cells.append(cell_html)
thead_html = "<thead>\n  <tr>" + "".join(thead_cells) + "</tr>\n</thead>"

nrows = len(df)
tbody_lines = ["<tbody>"]
for r in range(nrows):
    tr_open = '  <tr id="first-row">' if r == 0 else '  <tr>'
    cells = []
    for c in range(len(col_headers)):
        wpx = COL_WIDTHS[c]
        val = col_data[c][r]
        val_str = "" if val is None else str(val)
        style_bits = ["width:%dpx" % wpx]
        if c in HIDE_COLS_ZERO_BASED:
            style_bits.append("display:none")
        style_attr = "; ".join(style_bits)
        cells.append('<td style="%s;">%s</td>' % (style_attr, val_str))
    tbody_lines.append(tr_open + "".join(cells) + "</tr>")
tbody_lines.append("</tbody>")
tbody_html = "\n".join(tbody_lines)

html_table = (
    '<table border="1" class="dataframe sortable dna-register-table" id="refactor-table">'
    + thead_html +
    "\n" +
    tbody_html +
    "</table>"
)

# Optimized: single bottom scroll container (no fake top scroll shim)
SCROLL_WRAPPER = (
    '<div class="table-scroll-wrapper">'
    '<div class="table-scroll" id="bottom-scroll">%s</div>'
    '</div>'
) % (html_table,)

# ---------- 8.2) Page assembly ----------
if LAST_UPDATED_TEXT:
    updated_label = 'Last updated: <span id="last-updated">%s</span>' % _html.escape(LAST_UPDATED_TEXT)
else:
    updated_label = 'Last updated: <span id="last-updated"></span>'

_updated_parts = [updated_label]
if AUTOSOMAL_MATCHES:
    _updated_parts.append('Autosomal matches: %s' % _html.escape(AUTOSOMAL_MATCHES))
if SHOWING_STATIC:
    _updated_parts.append('Showing: %s' % _html.escape(SHOWING_STATIC))

UPDATED_BLOCK = (
    '<div class="updated centerline">' +
    ' &nbsp;|&nbsp; '.join(_updated_parts) +
    '</div>'
)

# No "Download:" paragraph in the body; downloads live in nav
DOWNLOADS_BLOCK = ""

NAV_BLOCK = '<!--#include virtual="/partials/nav_block.shtml" -->'


CONTROLS_BLOCK = (
  '<div class="controls controls-spaced centerline">'
  '<input type="text" id="search-box" class="search" size="28" value="" placeholder="Search&amp;hellip;" />'
  '</div>'
)

final_html = page_tpl.safe_substitute(
    HEAD_LINK       = HEAD_LINK,
    UPDATED_BLOCK   = UPDATED_BLOCK,
    NAV_BLOCK       = NAV_BLOCK,
    CONTROLS_BLOCK  = CONTROLS_BLOCK,
    DOWNLOADS_BLOCK = DOWNLOADS_BLOCK,
    SCROLL_WRAPPER  = SCROLL_WRAPPER,
)

with open(LOCAL_HTML, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
    f.write(final_html)
print("[OK] Saved canonical render: %s" % os.path.abspath(LOCAL_HTML))

# ---------- 9) Upload ----------
def save_and_upload_all():
    if not all(os.environ.get(k) for k in ["FTP_HOST", "FTP_USER", "FTP_PASS"]):
        print("[SKIP] Missing FTP creds; uploads skipped.")
        return
    try:
        ftps = ftp_connect()

        try:
            ftp_upload_overwrite(ftps, STYLESHEET_LOCAL, _remote_path(STYLESHEET_REMOTE))
        except Exception as e:
            print("[WARN] Upload stylesheet failed: %s" % e)

        try:
            ftp_upload_overwrite(ftps, LOCAL_HTML, _remote_path(REMOTE_HTML_CANON))
            ftp_upload_overwrite(ftps, LOCAL_HTML, _remote_path(REMOTE_HTML_LEG))
        except Exception as e:
            print("[WARN] Upload main HTML failed: %s" % e)

        try:
            if os.path.exists(LOCAL_CSV):
                ftp_upload_overwrite(ftps, LOCAL_CSV, _remote_path(REMOTE_CSV))
            if os.path.exists(LOCAL_XLSX):
                ftp_upload_overwrite(ftps, LOCAL_XLSX, _remote_path(REMOTE_XLSX))
            print("[OK] Uploaded CSV/XLSX -> /partials/")
        except Exception as e:
            print("[WARN] Upload CSV/XLSX failed: %s" % e)

        print("\n--- SIZE Verification (if supported) ---")
        for p in [
            _remote_path(REMOTE_HTML_CANON),
            _remote_path(REMOTE_HTML_LEG),
            _remote_path(REMOTE_CSV),
            _remote_path(REMOTE_XLSX),
            _remote_path(STYLESHEET_REMOTE),
        ]:
            sz = ftp_size(ftps, p)
            print("%s : %s" % (p, sz if sz is not None else "(SIZE unsupported)"))

        try:
            ftps.quit()
        except Exception:
            pass

        print("\n--- Open URLs ---")
        print("Canonical:        https://yates.one-name.net/partials/yates_ancestor_register.shtml")
        print("Legacy (ons_):    https://yates.one-name.net/partials/ons_yates_dna_register.shtml")
        print("Match Count:      https://yates.one-name.net/partials/match_count.shtml")
        print("Lineage Count:    https://yates.one-name.net/partials/lineage_count.shtml")
        print("Cousin Printable: https://yates.one-name.net/partials/cousin_list_print.htm")
        print("Trees (Cell 3):   https://yates.one-name.net/partials/just-trees.shtml")
        print("Stylesheet:       https://yates.one-name.net/partials/dna_tree_styles.css")
        print("\nBust cache once if needed by appending ?v=%s to the URL." % CSS_VERSION)
    except Exception as e:
        print("[FAIL] FTP session: %s" % e)
        traceback.print_exc()

save_and_upload_all()
# ====== CUT STOP [1/1] CELL 2 ==================================================================


[CONFIRM] Golden Rules active | Cell=Cell2_AllStyles_ExternalCSS | Version=2025.12.01-G1 | Encoding=ISO-8859-15
[LAYOUT] TABLE_TOTAL_WIDTH_PX=3480
[LAYOUT] Column widths (px): 1=80 2=220 3=60 4=1200 5=120 6=1800
[OK] Loaded CSV: 93 rows, 6 cols
[OK] Loaded vitals from dna_vitals.csv -> autosomal=1,605, showing=93, last_updated_text=December 4, 2025 8:00 PM
[PULL] match_to_unmasked.csv -> /content/match_to_unmasked.server.csv
[OK] Resolver loaded: 83 codes
[OK] Wrote exports: /content/yates_ancestor_register.csv and /content/yates_ancestor_register.xlsx
[OK] Wrote stylesheet: /content/partials/dna_tree_styles.css
[OK] Saved canonical render: /content/yates_ancestor_register.shtml
[PUT] partials/dna_tree_styles.css -> partials/dna_tree_styles.css
[PUT] yates_ancestor_register.shtml -> partials/yates_ancestor_register.shtml
[PUT] yates_ancestor_register.shtml -> partials/ons_yates_dna_register.shtml
[PUT] yates_ancestor_register.csv -> partials/yates_ancestor_register.csv
[PUT] yates_ance

# Cell 2b

In [10]:
# ====== CUT START [1/1] CELL 2b — Build + Upload Match/Lineage Count Partials (Counts only) ======
# RON GOLDEN RULES -- CLIFF NOTES (v2025.11.30-G3)
# • Complete & runnable Colab cell — one contiguous block.
# • Source ASCII-only; outputs written with encoding="iso-8859-15", errors="xmlcharrefreplace".
# • XHTML 1.0 Transitional; ALL typography/layout via /partials/dna_tree_styles.css (linked only).
# • Deterministic audit:
#   [CONFIRM] Golden Rules active | Cell=Cell2b_Counts | Version=2025.11.30-G3 | Encoding=ISO-8859-15
# • Enforce ISO-8859-15 printable chars on writes;
print("[CONFIRM] Golden Rules active | Cell=Cell2b_Counts | Version=2025.11.30-G3 | Encoding=ISO-8859-15")

DOWNLOADS_BLOCK = ""  # moved into nav_block.shtml

import os, re, posixpath, socket, traceback
from datetime import datetime, timedelta
from ftplib import FTP_TLS
import pandas as pd
import html as _html

# ---------- 0) Secrets ----------
try:
    from google.colab import userdata  # type: ignore
    os.environ["FTP_HOST"] = userdata.get("FTP_HOST")
    os.environ["FTP_USER"] = userdata.get("FTP_USER")
    os.environ["FTP_PASS"] = userdata.get("FTP_PASS")
    try:
        os.environ["FTP_DIR"] = userdata.get("FTP_DIR")
    except Exception:
        os.environ.setdefault("FTP_DIR", "")
    try:
        os.environ["FTP_PORT"] = userdata.get("FTP_PORT")
    except Exception:
        os.environ.setdefault("FTP_PORT", "21")
except Exception:
    os.environ.setdefault("FTP_HOST", "")
    os.environ.setdefault("FTP_USER", "")
    os.environ.setdefault("FTP_PASS", "")
    os.environ.setdefault("FTP_DIR", "")
    os.environ.setdefault("FTP_PORT", "21")

# ---------- 1) Config ----------
CSV_IN = "final_combined_df_with_value_labels.csv"

SERVER_PARTIALS_DIR = "partials"
SERVER_MAPPING_BASENAME = "match_to_unmasked.csv"
SERVER_MAPPING_REMOTE = posixpath.join(SERVER_PARTIALS_DIR, SERVER_MAPPING_BASENAME)
SERVER_MAPPING_LOCAL_CACHE = "match_to_unmasked.server.csv"

FTP_DIR = (os.environ.get("FTP_DIR", "") or "").strip()

# Shared stylesheet link (must already be present on server from Cell 2)
STYLESHEET_BASENAME = "dna_tree_styles.css"
CSS_VERSION = "v2025-11-12-max"
STYLESHEET_HREF = "/partials/%s?%s" % (STYLESHEET_BASENAME, CSS_VERSION)
HEAD_LINK = '<link rel="stylesheet" type="text/css" href="%s" />' % STYLESHEET_HREF

# Shared nav include (SSI)
NAV_BLOCK = '<!--#include virtual="/partials/nav_block.shtml" -->'

# NEW: vitals authority (built by Cell 1)
VITALS_LOCAL = "dna_vitals.csv"

# TNG settings for cousin links (match vertical chart behavior in main register)
TNG_BASE = "https://yates.one-name.net/tng"
TNG_TREE = "tree1"

# Local partial paths
MATCH_COUNT_LOCAL   = os.path.join("partials", "match_count.shtml")
LINEAGE_COUNT_LOCAL = os.path.join("partials", "lineage_count.shtml")
COUSIN_PRINT_LOCAL  = os.path.join("partials", "cousin_list_print.htm")

# Remote partial paths (server-side)
MATCH_COUNT_REMOTE   = posixpath.join("partials", "match_count.shtml")
LINEAGE_COUNT_REMOTE = posixpath.join("partials", "lineage_count.shtml")
COUSIN_PRINT_REMOTE  = posixpath.join("partials", "cousin_list_print.htm")

# ---------- 1a) Load vitals authority from dna_vitals.csv ----------
def _friendly_ts_from_utc(raw: str) -> str:
    """
    Convert a UTC timestamp like
      'YYYY-MM-DD HH:MM UTC'
      'YYYY-MM-DD HH:MM:SS UTC'
      'YYYY-MM-DDTHH:MM UTC'
      'YYYY-MM-DDTHH:MM:SS UTC'
    to 'Month D, YYYY H:MM AM/PM' in approx. US Eastern (UTC-5).
    If parsing fails, return the original string.
    """
    raw = str(raw or "").strip()
    if not raw:
        return "(unknown)"

    # Strip trailing 'UTC' (any case) and extra spaces
    raw_clean = raw.replace("UTC", "").replace("utc", "").strip()

    # Try several common formats
    fmts = [
        "%Y-%m-%d %H:%M",
        "%Y-%m-%d %H:%M:%S",
        "%Y-%m-%dT%H:%M",
        "%Y-%m-%dT%H:%M:%S",
    ]

    dt_utc = None
    for fmt in fmts:
        try:
            dt_utc = datetime.strptime(raw_clean, fmt)
            break
        except Exception:
            dt_utc = None

    if dt_utc is None:
        # Could not parse; return as-is
        return raw

    # Approximate Eastern as UTC-5; DST handling can be layered later if needed.
    dt_est = dt_utc - timedelta(hours=5)

    months = [
        "January","February","March","April","May","June",
        "July","August","September","October","November","December"
    ]
    month_name = months[dt_est.month - 1]
    h24 = dt_est.hour
    ampm = "AM" if h24 < 12 else "PM"
    h12 = h24 % 12
    if h12 == 0:
        h12 = 12

    return "%s %d, %d %d:%02d %s" % (
        month_name,
        dt_est.day,
        dt_est.year,
        h12,
        dt_est.minute,
        ampm,
    )

def _load_vitals(path: str):
    autosomal = "(unknown)"
    last_updated_display = "(unknown)"
    if not os.path.exists(path):
        print("[WARN] dna_vitals.csv not found; using '(unknown)' for header vitals.")
        return autosomal, last_updated_display

    encs = ("iso-8859-15", "utf-8-sig", "utf-8", "cp1252", "latin1")
    df_v = None
    last_err = None
    for enc in encs:
        try:
            df_v = pd.read_csv(path, dtype=str, keep_default_na=False, encoding=enc)
            break
        except Exception as ex:
            last_err = ex
            df_v = None
    if df_v is None or "line" not in df_v.columns:
        print("[WARN] Unable to read dna_vitals.csv or missing 'line' column:", last_err)
        return autosomal, last_updated_display

    for raw_line in df_v["line"].astype(str):
        line = raw_line.strip()
        low = line.lower()
        if low.startswith("after manual filter, total records:"):
            # Allow optional commas in the stored number and normalize
            m = re.search(r"(\d[\d,]*)", line)
            if m:
                num_raw = m.group(1).replace(",", "")
                try:
                    autosomal = "{:,}".format(int(num_raw))
                except Exception:
                    autosomal = num_raw  # fallback
        elif low.startswith("last_updated_text:"):
            ts = line.split(":", 1)[1].strip()
            last_updated_display = _friendly_ts_from_utc(ts)

    print("[VITALS] autosomal (after manual filter): %s" % autosomal)
    print("[VITALS] last updated (display): %s" % last_updated_display)
    return autosomal, last_updated_display

AUTOSOMAL_MATCHES_TEXT, LAST_UPDATED_DISPLAY = _load_vitals(VITALS_LOCAL)

# ---------- 2) FTP helpers ----------
FTP_TIMEOUT = int(os.environ.get("FTP_TIMEOUT", "30"))
FTP_PASSIVE = True


def ftp_connect() -> FTP_TLS:
    ftps = FTP_TLS(timeout=FTP_TIMEOUT)
    socket.setdefaulttimeout(FTP_TIMEOUT)
    ftps.connect(os.environ.get("FTP_HOST", ""), int(os.environ.get("FTP_PORT", 21)))
    ftps.login(os.environ.get("FTP_USER", ""), os.environ.get("FTP_PASS", ""))
    try:
        ftps.prot_p()
    except Exception:
        pass
    try:
        ftps.set_pasv(FTP_PASSIVE)
    except Exception:
        pass
    if FTP_DIR:
        for p in [p for p in FTP_DIR.split("/") if p]:
            try:
                ftps.mkd(p)
            except Exception:
                pass
            ftps.cwd(p)
    return ftps


def _remote_path(name: str) -> str:
    return posixpath.join(FTP_DIR, name) if FTP_DIR else name


def ensure_remote_dirs(ftps, remote_path):
    if "/" not in remote_path:
        return
    pwd0 = ftps.pwd()
    parts = [p for p in remote_path.split("/")[:-1] if p and p != "."]
    for seg in parts:
        try:
            ftps.cwd(seg)
        except Exception:
            try:
                ftps.mkd(seg)
            except Exception:
                pass
            ftps.cwd(seg)
    ftps.cwd(pwd0)


def ftp_download_if_exists(ftps, remote_name, local_name) -> bool:
    try:
        with open(local_name, "wb") as f:
            ftps.retrbinary("RETR %s" % remote_name, f.write)
        print("[PULL] %s -> %s" % (remote_name, os.path.abspath(local_name)))
        return True
    except Exception as e:
        try:
            if os.path.exists(local_name):
                os.remove(local_name)
        except Exception:
            pass
        print("[MISS] %s (%s)" % (remote_name, e))
        return False


def ftp_upload_overwrite(ftps, local_path, remote_name):
    ensure_remote_dirs(ftps, remote_name)
    with open(local_path, "rb") as fh:
        ftps.storbinary("STOR %s" % remote_name, fh)
    print("[PUT] %s -> %s" % (local_path, remote_name))


def ftp_size(ftps, remote_name):
    try:
        sz = ftps.size(remote_name)
        return int(sz) if sz is not None else None
    except Exception:
        return None


# ---------- 3) Resolver (match_to_unmasked.csv on server) ----------
def _read_mapping_csv(path: str) -> pd.DataFrame:
    encs = ("iso-8859-15", "utf-8-sig", "utf-8", "cp1252", "latin1")
    last = None
    df = None
    for enc in encs:
        try:
            df = pd.read_csv(path, encoding=enc, dtype=str, keep_default_na=False)
            break
        except Exception as e:
            last = e
            df = None
    if df is None:
        raise RuntimeError("Unable to read mapping CSV %s: %s" % (path, last))
    if df.shape[1] < 2:
        raise RuntimeError("Mapping CSV must have at least two columns: code, unmasked")
    df = df.iloc[:, :2].copy()
    df.columns = ["code", "unmasked"]
    df["code"] = df["code"].astype(str).str.strip().str.lower()
    df["unmasked"] = df["unmasked"].astype(str).str.strip()
    df = df[df["code"] != ""].drop_duplicates(subset=["code"], keep="first")
    if df.empty:
        raise RuntimeError("Mapping CSV empty after normalization.")
    return df


def load_resolver_from_server() -> dict:
    with ftp_connect() as ftps:
        try:
            ftps.cwd(SERVER_PARTIALS_DIR)
        except Exception:
            pass
        ok = ftp_download_if_exists(ftps, SERVER_MAPPING_BASENAME, SERVER_MAPPING_LOCAL_CACHE)
        try:
            ftps.quit()
        except Exception:
            pass
    if not ok:
        raise RuntimeError(
            "Resolver not found on server: /%s. Upload match_to_unmasked.csv into /partials/ and re-run."
            % _remote_path(SERVER_MAPPING_REMOTE)
        )
    df_map = _read_mapping_csv(SERVER_MAPPING_LOCAL_CACHE)
    print("[OK] Resolver loaded: %d codes" % len(df_map))
    return dict(zip(df_map["code"], df_map["unmasked"]))


MATCH_TO_UNMASKED = {}


def _setup_resolver():
    global MATCH_TO_UNMASKED
    if not MATCH_TO_UNMASKED:
        MATCH_TO_UNMASKED = load_resolver_from_server()


# ---------- 4) CSV + name helpers ----------
def find_col(df, patterns, prefer_exact=None):
    cols = list(df.columns)
    lowmap = {c.lower(): c for c in cols}
    if prefer_exact:
        for name in prefer_exact:
            if name in df.columns:
                return name
            if name and name.lower() in lowmap:
                return lowmap[name.lower()]
    for pat in patterns:
        rx = re.compile(pat, re.I)
        for c in cols:
            if rx.search(c):
                return c
    return None


SEP_RE = re.compile(r"\s*(?:\u2192|&rarr;|;|>|,|~{2,}|/{2,}|\|{2,})\s*")


def split_tokens(s):
    if pd.isna(s):
        return []
    if not isinstance(s, str):
        s = str(s)
    return [p.strip() for p in SEP_RE.split(s) if str(p).strip()]


def _clean_piece(text: str) -> str:
    t = re.sub(r"~+", " ", str(text))
    t = re.sub(r"\s+", " ", t)
    return t.strip()


_PARTICLES = {
    "de",
    "del",
    "della",
    "der",
    "van",
    "von",
    "da",
    "dos",
    "das",
    "di",
    "la",
    "le",
    "du",
    "of",
}


def _smart_title(token: str) -> str:
    if not token:
        return token
    token = re.sub(
        r"(^|\b)([a-z])(['’])([a-z])",
        lambda m: m.group(1) + m.group(2).upper() + m.group(3) + m.group(4).upper(),
        token.lower(),
    )
    token = "-".join([w.capitalize() for w in token.split("-")])
    token = re.sub(r"\bmc([a-z])", lambda m: "Mc" + m.group(1).upper(), token)
    token = re.sub(r"\bmac([a-z])", lambda m: "Mac" + m.group(1).upper(), token)
    return token


def smart_titlecase(name: str) -> str:
    name = _clean_piece(name)
    if not name:
        return name
    if "," in name:
        last, first = [p.strip() for p in name.split(",", 1)]
        pieces = (first + " " + last).split()
    else:
        pieces = name.split()
    out = []
    for i, w in enumerate(pieces):
        out.append(w.lower() if (i > 0 and w.lower() in _PARTICLES) else _smart_title(w))
    return " ".join(out)


def surname_given_from_token(token):
    token = token.strip()
    idx = None
    for i in range(1, len(token)):
        if token[i - 1].islower() and token[i].isupper():
            idx = i
            break
    if idx is None:
        for i in range(1, len(token)):
            if token[i].isupper():
                idx = i
                break
    if idx is None:
        return (token,)
    surname = token[:idx]
    given = token[idx:]
    given_spaced = re.sub(r"(?<!^)([A-Z])", r" \1", given)
    return ("%s %s" % (given_spaced.strip(), surname.strip()),)


_CAMEL_WORDS = re.compile(r"[A-Z][a-z]*|[A-Z]+(?![a-z])|[a-z]+")


def norm_matchee_name(raw: str) -> str:
    raw = str(raw or "").strip()
    if not raw:
        return ""
    if " " in raw or "," in raw:
        nm = smart_titlecase(raw)
        parts = nm.split()
        if len(parts) == 1:
            return nm
        return ("%s %s" % (parts[0], parts[-1])).strip()
    words = _CAMEL_WORDS.findall(raw)
    while words and len(words[0]) == 1:
        words.pop(0)
    if not words:
        nm = smart_titlecase(surname_given_from_token(raw)[0])
        ps = nm.split()
        if len(ps) == 1:
            return nm
        return ("%s %s" % (ps[0], ps[-1])).strip()
    surname = smart_titlecase(words[0])
    given_candidates = [w for w in words[1:] if w.lower() != surname.lower()]
    if not given_candidates:
        return surname
    return ("%s %s" % (smart_titlecase(given_candidates[0]), surname)).strip()


def normalize_person_name(s: str) -> str:
    if pd.isna(s):
        return ""
    s = _clean_piece(str(s))
    if "," in s:
        last, first = [p.strip() for p in s.split(",", 1)]
        s = "%s %s" % (first, last)
    if " " not in s and s.isalpha():
        return smart_titlecase(surname_given_from_token(s)[0])
    return smart_titlecase(s)


ID_PAT = re.compile(r"\bI\d+\b", re.I)


def extract_person_id(s: str) -> str:
    m = ID_PAT.search(str(s or ""))
    return m.group(0).upper() if m else ""


def derive_common_from_first_token(tokens):
    if not tokens:
        return ("", "")
    first = _clean_piece(tokens[0])
    parts = re.split(r"\s*(?:&| and )\s*", first, maxsplit=1, flags=re.I)
    if len(parts) != 2:
        return ("", "")

    def _norm(s):
        return smart_titlecase(s) if " " in s else smart_titlecase(surname_given_from_token(s)[0])

    return (_norm(parts[0]), _norm(parts[1]))


def degree_label_from_generations(g):
    if g <= 1:
        return "parents" if g == 1 else "self"
    if g == 2:
        return "grandparents"
    greats = g - 2
    if greats == 1:
        return "great-grandparents"
    return "%dx-great-grandparents" % greats


def build_header(subject_name_html, cm_val, matchee_name_html, gens, husband, wife):
    try:
        cm_str = "%d" % int(round(float(cm_val)))
    except Exception:
        cm_str = (str(cm_val).strip() or "0")
    degree_label = degree_label_from_generations(gens)
    parts = [
        "%s is a %s cM cousin match to %s, whose" % (subject_name_html, cm_str, matchee_name_html),
        "%s (back %d Gens)" % (degree_label, gens),
        "are",
        "%s & %s." % (husband, wife),
    ]
    s = " ".join(parts)
    s = re.sub(r"\.\s*$", "", s)
    return s

# ---------- 5) Count helpers + partial HTML shells ----------
def _norm_code_for_count(s):
    t = str(s or "").strip()
    if (t.startswith('"') and t.endswith('"')) or (t.startswith("'") and t.endswith("'")):
        t = t[1:-1]
    t = re.sub(r"\s+", " ", t).strip().lower()
    return t


def _partial_head(title):
    return (
        "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
        " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
        "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">\n<head>\n"
        + "%s\n" % HEAD_LINK
        + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-15\" />\n"
        + "<title>%s</title>\n" % _html.escape(title)
        + "</head>\n<body>\n<div class=\"wrap\">\n"
        + "<h1 class=\"centerline\">%s</h1>\n" % _html.escape(title)
        + "<div class=\"updated centerline\">"
          "Last updated: %s &nbsp;|&nbsp; "
          "Showing: %s"
          "</div>\n" % (
              _html.escape(LAST_UPDATED_DISPLAY),
              _html.escape(AUTOSOMAL_MATCHES_TEXT),
          )
        + NAV_BLOCK + "\n"
        + "<div class=\"selection-menu centerline\">"
          "Showing: "
          "<a href=\"#\" onclick=\"return ySelShowSelected('ref-tb');\">Selected</a> &nbsp;|&nbsp; "
          "<a href=\"#\" onclick=\"return ySelShowAll('ref-tb');\">All</a> &nbsp;|&nbsp; "
          "<a href=\"#\" onclick=\"return ySelReset('ref-tb');\">Reset</a>"
          "</div>\n"
        + "<div class=\"table-scroll\">\n"
    )


def _partial_tail():
    # JS now only handles row selection; no date stamping or XHR autosomal fetch.
    # Selection color is enforced INLINE for robustness and iOS compatibility.
    return (
        "</div>\n</div>\n"
        "<script type=\"text/javascript\">\n//<![CDATA[\n"
        "(function(){"
        "function ySelEachRow(tb, cb){"
        " if(!tb) return;"
        " var rows=tb.getElementsByTagName('tr');"
        " for(var i=0;i<rows.length;i++){cb(rows[i]);}"
        "}"
        "function ySelClear(tr){"
        " if(!tr) return;"
        " tr.removeAttribute('data-selected');"
        " var cls=tr.className||'';"
        " cls=cls.replace(/\\bsel-row\\b/g,'').replace(/\\s{2,}/g,' ').replace(/^\\s+|\\s+$/g,'');"
        " tr.className=cls;"
        " tr.style.backgroundColor='';"
        "}"
        "function ySelToggle(a){"
        " var tr=a;"
        " while(tr&&tr.tagName&&tr.tagName.toLowerCase()!=='tr'){tr=tr.parentNode;}"
        " if(!tr) return false;"
        " var sel=tr.getAttribute('data-selected')==='1';"
        " if(sel){"
        "  ySelClear(tr);"
        " }else{"
        "  tr.setAttribute('data-selected','1');"
        "  var cls=tr.className||'';"
        "  if(cls.indexOf('sel-row')===-1){tr.className=(cls?(cls+' '):'')+'sel-row';}"
        "  tr.style.backgroundColor='#fff2cc';"
        " }"
        " return false;"
        "}"
        "function ySelGetTBody(tbodyId){"
        " var tb=document.getElementById(tbodyId);"
        " if(tb) return tb;"
        " var t=document.getElementById('ref-table');"
        " if(!t) return null;"
        " if(t.tBodies&&t.tBodies.length){return t.tBodies[0];}"
        " return t;"
        "}"
        "function ySelShowSelected(tbodyId){"
        " var tb=ySelGetTBody(tbodyId);"
        " if(!tb) return false;"
        " ySelEachRow(tb,function(tr){"
        "  var sel=tr.getAttribute('data-selected')==='1';"
        "  tr.style.display=sel?'':'none';"
        " });"
        " var rl=document.getElementById('reg-list');"
        " if(rl){"
        "  var selVals=[];"
        "  ySelEachRow(tb,function(tr){"
        "    if(tr.getAttribute('data-selected')==='1'){"
        "      var v=tr.getAttribute('data-filter')"
        "        || tr.getAttribute('data-lineage')"
        "        || tr.getAttribute('data-code')"
        "        || tr.getAttribute('data-q')"
        "        || '';"
        "      if(v){selVals.push(v);}"
        "    }"
        "  });"
        "  if(selVals.length===0){"
        "    return false;"
        "  }"
        "  var rows=rl.getElementsByTagName('tr');"
        "  for(var i=0;i<rows.length;i++){"
        "    var r=rows[i];"
        "    var lv=r.getAttribute('data-filter')"
        "      || r.getAttribute('data-lineage')"
        "      || r.getAttribute('data-code')"
        "      || '';"
        "    var show=false;"
        "    for(var j=0;j<selVals.length;j++){"
        "      if(lv===selVals[j]){show=true; break;}"
        "    }"
        "    r.style.display=show?'':'none';"
        "  }"
        " }"
        " return false;"
        "}"
        "function ySelShowAll(tbodyId){"
        " var tb=ySelGetTBody(tbodyId);"
        " if(!tb) return false;"
        " ySelEachRow(tb,function(tr){tr.style.display='';});"
        " var rl=document.getElementById('reg-list');"
        " if(rl){"
        "  var rows=rl.getElementsByTagName('tr');"
        "  for(var i=0;i<rows.length;i++){rows[i].style.display='';}"
        " }"
        " return false;"
        "}"
        "function ySelReset(tbodyId){"
        " var tb=ySelGetTBody(tbodyId);"
        " if(!tb) return false;"
        " ySelEachRow(tb,function(tr){tr.style.display=''; ySelClear(tr);});"
        " var rl=document.getElementById('reg-list');"
        " if(rl){"
        "  var rows=rl.getElementsByTagName('tr');"
        "  for(var i=0;i<rows.length;i++){rows[i].style.display='';}"
        " }"
        " return false;"
        "}"
        "window.ySelToggle=ySelToggle;"
        "window.ySelShowSelected=ySelShowSelected;"
        "window.ySelShowAll=ySelShowAll;"
        "window.ySelReset=ySelReset;"
        "})();\n//]]>\n</script>\n</body>\n</html>"
    )

# ---------- 6) DNA-register-style row builder ----------
def build_register_row(
    row,
    id_col: str,
    match_col: str,
    name_col: str,
    cm_col: str,
    path_col: str,
):
    subject_raw = row.get(match_col, "")
    # Unmask subject if possible, then normalize to same style as main register
    key = str(subject_raw).strip().lower()
    subject_unmasked = MATCH_TO_UNMASKED.get(key, subject_raw)
    subject_name = normalize_person_name(subject_unmasked)
    subject_name_html = _html.escape(subject_name or "")

    pid = extract_person_id(row.get(id_col, ""))

    matchee_name = norm_matchee_name(row.get(name_col, "")) or subject_name
    if pid:
        name_html = (
            '<a href="%s/verticalchart.php?personID=%s&tree=%s&parentset=0&display=vertical&generations=15" '
            'target="_blank" rel="noopener">%s</a>'
            % (TNG_BASE, pid, TNG_TREE, _html.escape(matchee_name or "", quote=False))
        )
    else:
        name_html = _html.escape(matchee_name or "", quote=False)

    cm_val = row.get(cm_col, "0")
    tokens = split_tokens(row.get(path_col, ""))
    gens_total = len(tokens)

    if "common_husband" in row.index and "common_wife" in row.index:
        husband_raw = str(row.get("common_husband", "")).strip()
        wife_raw = str(row.get("common_wife", "")).strip()
        if not husband_raw and not wife_raw:
            husband_raw, wife_raw = derive_common_from_first_token(tokens)
    else:
        husband_raw, wife_raw = derive_common_from_first_token(tokens)

    header_html = build_header(
        subject_name_html or subject_name,
        cm_val,
        name_html,
        gens_total,
        husband_raw,
        wife_raw,
    )

    return subject_name_html, name_html, _html.escape(str(cm_val).strip()), header_html

# ---------- 7) Match Count partial ----------
def build_match_count_partial(
    main_df: pd.DataFrame,
    id_col: str,
    match_col: str,
    name_col: str,
    cm_col: str,
    path_col: str,
) -> str:
    codes_raw = main_df[match_col].astype(str).map(lambda x: x.strip())
    keys_norm = codes_raw.map(_norm_code_for_count)

    counts_series = keys_norm.value_counts(dropna=False)
    counts = counts_series.reset_index()
    if counts.shape[1] >= 2:
        counts.columns = ["norm_key", "Count"]
    else:
        counts["norm_key"] = counts.index.astype(str)
        counts["Count"] = counts_series.values
        counts = counts[["norm_key", "Count"]]

    first_display = {}
    raw_list = codes_raw.tolist()
    norm_list = keys_norm.tolist()
    for code_disp, k in zip(raw_list, norm_list):
        if k not in first_display and str(k) != "":
            first_display[k] = code_disp

    counts["Code"] = counts["norm_key"].map(lambda k: first_display.get(k, k))
    counts["Unmasked"] = counts["norm_key"].map(lambda k: MATCH_TO_UNMASKED.get(k, ""))

    counts = counts.sort_values(
        by=["Code", "Count"],
        ascending=[True, False],
        kind="mergesort",
    ).reset_index(drop=True)

    html = []
    html.append(_partial_head("Match Count"))
    html.append('<table id="ref-table" class="sortable" border="1"><thead><tr>')
    html.append(
        '<th style="width:35%">Code</th>'
        '<th style="width:35%">Unmasked</th>'
        '<th style="width:30%">Showing</th>'
    )
    html.append("</tr></thead><tbody id=\"ref-tb\">")

    for _, r in counts.iterrows():
        code = r.get("Code", "")
        unm = r.get("Unmasked", "")
        cnt = int(str(r.get("Count", "0")).strip() or "0")
        norm_key = _norm_code_for_count(code)
        label = (unm or code).strip()
        tr = (
            "<tr data-q=\"%s\" data-count=\"%d\" data-code=\"%s\" data-filter=\"%s\">"
            "<td>%s</td><td>%s</td>"
            "<td class=\"count\">"
            "<a href=\"#\" class=\"count-pick\" onclick=\"return ySelToggle(this);\" title=\"Toggle select\">%d</a>"
            "</td></tr>"
            % (
                _html.escape(label, quote=True),
                cnt,
                _html.escape(norm_key, quote=True),
                _html.escape(norm_key, quote=True),
                _html.escape(code),
                _html.escape(unm),
                cnt,
            )
        )
        html.append(tr)

    html.append("</tbody></table>")

    # DNA Register-style table under Match Count
    html.append('<h2 class="centerline">DNA Register rows for selected code(s)</h2>')
    html.append(
        '<table id="reg-list" class="sortable" border="1">'
        '<thead><tr>'
        '<th>Match to</th>'
        '<th>Name</th>'
        '<th>cM</th>'
        '<th>Match Summary</th>'
        '</tr></thead><tbody>'
    )

    for _, row in main_df.iterrows():
        code_raw = str(row.get(match_col, "")).strip()
        if not code_raw:
            continue
        norm_key = _norm_code_for_count(code_raw)

        match_to_html, name_html, cm_html, header_html = build_register_row(
            row, id_col, match_col, name_col, cm_col, path_col
        )

        tr = (
            "<tr data-code=\"%s\" data-filter=\"%s\">"
            "<td>%s</td>"
            "<td>%s</td>"
            "<td>%s</td>"
            "<td>%s</td>"
            "</tr>"
            % (
                _html.escape(norm_key, quote=True),
                _html.escape(norm_key, quote=True),
                match_to_html,
                name_html,
                cm_html,
                header_html,
            )
        )
        html.append(tr)

    html.append("</tbody></table>")
    html.append(_partial_tail())
    return "".join(html)

# ---------- 8) Lineage Count partial ----------
def build_lineage_count_partial(
    main_df: pd.DataFrame,
    id_col: str,
    match_col: str,
    name_col: str,
    cm_col: str,
    path_col: str,
) -> str:
    first_series = (
        main_df.get("First Ancestor", pd.Series(dtype=str))
        .astype(str)
        .map(lambda x: x.strip())
    )
    vc = first_series[first_series != ""].value_counts(dropna=False)

    lin_df = vc.reset_index()
    if lin_df.shape[1] >= 2:
        lin_df.columns = ["First Ancestor", "Count"]
    else:
        lin_df["First Ancestor"] = lin_df.index.astype(str)
        lin_df["Count"] = vc.values
        lin_df = lin_df[["First Ancestor", "Count"]]

    lin_df = lin_df.sort_values(
        ["Count", "First Ancestor"],
        ascending=[False, True],
        kind="mergesort",
    ).reset_index(drop=True)

    html = []
    html.append(_partial_head("Lineage Count"))
    html.append('<table id="ref-table" class="sortable" border="1"><thead><tr>')
    html.append(
        '<th style="width:80%">First Ancestor</th>'
        '<th style="width:20%">Showing</th>'
    )
    html.append("</tr></thead><tbody id=\"ref-tb\">")

    for _, r in lin_df.iterrows():
        first = str(r.get("First Ancestor", "")).strip()
        cnt = int(str(r.get("Count", "0")).strip() or "0")
        tr = (
            "<tr data-q=\"%s\" data-count=\"%d\" data-lineage=\"%s\" data-filter=\"%s\">"
            "<td>%s</td>"
            "<td class=\"count\">"
            "<a href=\"#\" class=\"count-pick\" onclick=\"return ySelToggle(this);\" title=\"Toggle select\">%d</a>"
            "</td>"
            "</tr>"
            % (
                _html.escape(first, quote=True),
                cnt,
                _html.escape(first, quote=True),
                _html.escape(first, quote=True),
                _html.escape(first),
                cnt,
            )
        )
        html.append(tr)

    html.append("</tbody></table>")

    # DNA Register-style table under Lineage Count
    html.append('<h2 class="centerline">DNA Register rows for selected lineage(s)</h2>')
    html.append(
        '<table id="reg-list" class="sortable" border="1">'
        '<thead><tr>'
        '<th>Match to</th>'
        '<th>Name</th>'
        '<th>cM</th>'
        '<th>Match Summary</th>'
        '</tr></thead><tbody>'
    )

    for _, row in main_df.iterrows():
        first = str(row.get("First Ancestor", "")).strip()
        if not first:
            continue

        match_to_html, name_html, cm_html, header_html = build_register_row(
            row, id_col, match_col, name_col, cm_col, path_col
        )

        tr = (
            "<tr data-lineage=\"%s\" data-filter=\"%s\">"
            "<td>%s</td>"
            "<td>%s</td>"
            "<td>%s</td>"
            "<td>%s</td>"
            "</tr>"
            % (
                _html.escape(first, quote=True),
                _html.escape(first, quote=True),
                match_to_html,
                name_html,
                cm_html,
                header_html,
            )
        )
        html.append(tr)

    html.append("</tbody></table>")
    html.append(_partial_tail())
    return "".join(html)

# ---------- 9) Cousin printable partial ----------
def build_cousin_print_partial(main_df: pd.DataFrame, id_col: str, match_col: str, name_col: str, cm_col: str, path_col: str) -> str:
    rows = []

    for _, row in main_df.iterrows():
        subject_raw = row.get(match_col, "")
        subject_name = normalize_person_name(MATCH_TO_UNMASKED.get(str(subject_raw).strip().lower(), subject_raw))
        subject_name_html = "<strong>%s</strong>" % subject_name if subject_name else ""

        pid = extract_person_id(row.get(id_col, ""))

        matchee_name = norm_matchee_name(row.get(name_col, "")) or subject_name
        if pid:
            matchee_name_html = (
                '<a href="%s/verticalchart.php?personID=%s&tree=%s&parentset=0&display=vertical&generations=15" '
                'target="_blank" rel="noopener">%s</a>'
                % (TNG_BASE, pid, TNG_TREE, matchee_name)
            )
        else:
            matchee_name_html = matchee_name

        cm_val = row.get(cm_col, "0")
        tokens = split_tokens(row.get(path_col, ""))
        gens_total = len(tokens)

        if "common_husband" in main_df.columns and "common_wife" in main_df.columns:
            husband_raw = str(row.get("common_husband", "")).strip()
            wife_raw = str(row.get("common_wife", "")).strip()
            if not husband_raw and not wife_raw:
                husband_raw, wife_raw = derive_common_from_first_token(tokens)
        else:
            husband_raw, wife_raw = derive_common_from_first_token(tokens)

        header_html = build_header(
            subject_name_html or subject_name,
            cm_val,
            matchee_name_html,
            gens_total,
            husband_raw,
            wife_raw,
        )
        rows.append(header_html)

    rows_sorted = sorted(rows)

    html_rows = [
        '<table border="1" id="refactor-table" class="sortable"><thead><tr><th>Match Summary</th></tr></thead><tbody>'
    ]
    for v in rows_sorted:
        html_rows.append("<tr><td>%s</td></tr>" % v)
    html_rows.append("</tbody></table>")

    cousin_html = (
        "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" "
        "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">"
        "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\"><head>"
        "%s" % HEAD_LINK
        + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-15\" />"
        "<title>Cousin List (Printable)</title>"
        "</head><body onload=\"window.print();\">"
        "<div class=\"wrap\">"
        "<h1 class=\"centerline\">Cousin List (Printable)</h1>"
        "<div class=\"table-scroll\">%s</div>"
        "</div></body></html>"
        % "".join(html_rows)
    )
    return cousin_html

# ---------- 10) Main driver ----------
def main():
    encs = ("utf-8-sig", "utf-8", "cp1252", "iso-8859-15", "latin1")
    last_err = None
    df = None
    for enc in encs:
        try:
            df = pd.read_csv(CSV_IN, encoding=enc, dtype=str, keep_default_na=False)
            break
        except Exception as ex:
            last_err = ex
            df = None
    if df is None:
        raise RuntimeError("Unable to read CSV: %s (%s)" % (CSV_IN, last_err))

    print("[OK] Loaded CSV for counts: %d rows, %d cols" % (len(df), len(df.columns)))

    id_col = find_col(df, [r"^(id#|personid)$"], ["ID#", "ID", "PersonID", "personID"])
    match_col = find_col(df, [r"^match\s*to$"], ["Match to", "Match", "match_to", "Match_to"])
    name_col = find_col(df, [r"^name$"], ["Name"])
    cm_col = find_col(df, [r"^(c\s*:?m|cm)$", r"centi.?morgan"], ["cM", "cm"])
    path_col = find_col(
        df,
        [r"(yates\s*dna\s*ancestral\s*line|ancestral\s*line|lineage)"],
        ["Yates DNA Ancestral Line", "Ancestral Line", "Lineage"],
    )

    if not match_col:
        raise ValueError("CSV missing 'Match to' column (try headings like 'Match to' or 'Match').")
    if not path_col:
        raise ValueError("CSV missing lineage/path column for First Ancestor.")
    if not name_col:
        raise ValueError("CSV missing 'Name' column.")
    if not cm_col:
        raise ValueError("CSV missing 'cM' column.")
    if not id_col:
        raise ValueError("CSV missing an ID#/PersonID column.")

    # Rebuild First Ancestor column from lineage path
    first_ancestors = []
    for _, row in df.iterrows():
        tokens = split_tokens(row.get(path_col, ""))
        first_ancestors.append(_clean_piece(tokens[0]) if tokens else "")
    df["First Ancestor"] = first_ancestors

    _setup_resolver()
    os.makedirs("partials", exist_ok=True)

    mc_html = build_match_count_partial(df, id_col, match_col, name_col, cm_col, path_col)
    with open(
        MATCH_COUNT_LOCAL,
        "w",
        encoding="iso-8859-15",
        errors="xmlcharrefreplace",
    ) as f:
        f.write(mc_html)
    print("[OK] Wrote partial:", os.path.abspath(MATCH_COUNT_LOCAL))

    lc_html = build_lineage_count_partial(df, id_col, match_col, name_col, cm_col, path_col)
    with open(
        LINEAGE_COUNT_LOCAL,
        "w",
        encoding="iso-8859-15",
        errors="xmlcharrefreplace",
    ) as f:
        f.write(lc_html)
    print("[OK] Wrote partial:", os.path.abspath(LINEAGE_COUNT_LOCAL))

    cousin_html = build_cousin_print_partial(df, id_col, match_col, name_col, cm_col, path_col)
    with open(
        COUSIN_PRINT_LOCAL,
        "w",
        encoding="iso-8859-15",
        errors="xmlcharrefreplace",
    ) as f:
        f.write(cousin_html)
    print("[OK] Wrote partial:", os.path.abspath(COUSIN_PRINT_LOCAL))

    if not all(os.environ.get(k) for k in ["FTP_HOST", "FTP_USER", "FTP_PASS"]):
        print("[SKIP] Missing FTP creds; uploads skipped.")
        return

    try:
        ftps = ftp_connect()

        try:
            ftp_upload_overwrite(ftps, MATCH_COUNT_LOCAL, _remote_path(MATCH_COUNT_REMOTE))
            ftp_upload_overwrite(ftps, LINEAGE_COUNT_LOCAL, _remote_path(LINEAGE_COUNT_REMOTE))
            ftp_upload_overwrite(ftps, COUSIN_PRINT_LOCAL, _remote_path(COUSIN_PRINT_REMOTE))
        except Exception as e:
            print("[WARN] Upload partials failed:", e)

        print("\n--- SIZE Verification (if supported) ---")
        for p in [
            _remote_path(MATCH_COUNT_REMOTE),
            _remote_path(LINEAGE_COUNT_REMOTE),
            _remote_path(COUSIN_PRINT_REMOTE),
        ]:
            sz = ftp_size(ftps, p)
            print("%s : %s" % (p, sz if sz is not None else "(SIZE unsupported)"))

        try:
            ftps.quit()
        except Exception:
            pass

        print("\n--- Open URLs ---")
        print("Match Count:      https://yates.one-name.net/partials/match_count.shtml")
        print("Lineage Count:    https://yates.one-name.net/partials/lineage_count.shtml")
        print("Cousin Printable: https://yates.one-name.net/partials/cousin_list_print.htm")
    except Exception as e:
        print("[FAIL] FTP session:", e)
        traceback.print_exc()


if __name__ == "__main__":
    main()
# ====== CUT STOP [1/1] CELL 2b ================================================================


[CONFIRM] Golden Rules active | Cell=Cell2b_Counts | Version=2025.11.30-G3 | Encoding=ISO-8859-15
[VITALS] autosomal (after manual filter): 93
[VITALS] last updated (display): December 4, 2025 10:46 AM
[OK] Loaded CSV for counts: 93 rows, 6 cols
[PULL] match_to_unmasked.csv -> /content/match_to_unmasked.server.csv
[OK] Resolver loaded: 83 codes
[OK] Wrote partial: /content/partials/match_count.shtml
[OK] Wrote partial: /content/partials/lineage_count.shtml
[OK] Wrote partial: /content/partials/cousin_list_print.htm
[PUT] partials/match_count.shtml -> partials/match_count.shtml
[PUT] partials/lineage_count.shtml -> partials/lineage_count.shtml
[PUT] partials/cousin_list_print.htm -> partials/cousin_list_print.htm

--- SIZE Verification (if supported) ---
partials/match_count.shtml : 65480
partials/lineage_count.shtml : 69825
partials/cousin_list_print.htm : 31260

--- Open URLs ---
Match Count:      https://yates.one-name.net/partials/match_count.shtml
Lineage Count:    https://yates.on

# Cell2c

In [6]:
# ====== CUT START [1/1] CELL 2c — Seed-based DNA Network (Match Count companion) ======
# RON GOLDEN RULES — CLIFF NOTES (v2025.11.30-G3)
# • Complete & runnable Colab cell — one contiguous block.
# • Source ASCII-only; outputs written with encoding="iso-8859-15", errors="xmlcharrefreplace".
# • XHTML 1.0 Transitional; ALL typography/layout via /partials/dna_tree_styles.css (linked only).
# • Deterministic audit:
#   [CONFIRM] Golden Rules active | Cell=Cell2c_SeedNetwork | Version=2025.12.04-G3 | Encoding=ISO-8859-15
# • Enforce ISO-8859-15 printable chars on writes.

print("[CONFIRM] Golden Rules active | Cell=Cell2c_SeedNetwork | Version=2025.12.04-G3 | Encoding=ISO-8859-15")
DECLARED_LINES = 999
print(f"[AUDIT] DECLARED_LINES={DECLARED_LINES}")

import os, re, html as _html, traceback, posixpath, socket
import pandas as pd
from ftplib import FTP_TLS

# ---------- 0) Secrets (FTP) ----------
try:
    from google.colab import userdata  # type: ignore
    os.environ["FTP_HOST"] = userdata.get("FTP_HOST")
    os.environ["FTP_USER"] = userdata.get("FTP_USER")
    os.environ["FTP_PASS"] = userdata.get("FTP_PASS")
    try:
        os.environ["FTP_DIR"] = userdata.get("FTP_DIR")
    except Exception:
        os.environ.setdefault("FTP_DIR", "")
    try:
        os.environ["FTP_PORT"] = userdata.get("FTP_PORT")
    except Exception:
        os.environ.setdefault("FTP_PORT", "21")
except Exception:
    os.environ.setdefault("FTP_HOST", "")
    os.environ.setdefault("FTP_USER", "")
    os.environ.setdefault("FTP_PASS", "")
    os.environ.setdefault("FTP_DIR", "")
    os.environ.setdefault("FTP_PORT", "21")

FTP_DIR = (os.environ.get("FTP_DIR", "") or "").strip()
FTP_TIMEOUT = int(os.environ.get("FTP_TIMEOUT", "30"))
FTP_PASSIVE = True

def ftp_connect() -> FTP_TLS:
    ftps = FTP_TLS(timeout=FTP_TIMEOUT)
    socket.setdefaulttimeout(FTP_TIMEOUT)
    ftps.connect(os.environ.get("FTP_HOST", ""), int(os.environ.get("FTP_PORT", 21)))
    ftps.login(os.environ.get("FTP_USER", ""), os.environ.get("FTP_PASS", ""))
    try:
        ftps.prot_p()
    except Exception:
        pass
    try:
        ftps.set_pasv(FTP_PASSIVE)
    except Exception:
        pass
    if FTP_DIR:
        for p in [p for p in FTP_DIR.split("/") if p]:
            try:
                ftps.mkd(p)
            except Exception:
                pass
            ftps.cwd(p)
    return ftps

def _remote_path(name: str) -> str:
    return posixpath.join(FTP_DIR, name) if FTP_DIR else name

def ensure_remote_dirs(ftps, remote_path):
    if "/" not in remote_path:
        return
    pwd0 = ftps.pwd()
    parts = [p for p in remote_path.split("/")[:-1] if p and p != "."]
    for seg in parts:
        try:
            ftps.cwd(seg)
        except Exception:
            try:
                ftps.mkd(seg)
            except Exception:
                pass
            ftps.cwd(seg)
    ftps.cwd(pwd0)

def ftp_upload_overwrite(ftps, local_path, remote_name):
    ensure_remote_dirs(ftps, remote_name)
    with open(local_path, "rb") as fh:
        ftps.storbinary("STOR %s" % remote_name, fh)
    print("[PUT] %s -> %s" % (local_path, remote_name))

def ftp_size(ftps, remote_name):
    try:
        sz = ftps.size(remote_name)
        return int(sz) if sz is not None else None
    except Exception:
        return None

# ---------- 1) Config: use the SAME CSV as Cell 2b ----------
CSV_IN = "final_combined_df_with_value_labels.csv"

# Where to write outputs
NETWORK_LINEAGE_CSV = "dna_network_lineages.csv"
NETWORK_MATCHES_CSV = "dna_network_matches.csv"
NETWORK_PARTIAL     = os.path.join("partials", "dna_network.shtml")

# Remote path for the partial
SERVER_PARTIALS_DIR    = "partials"
REMOTE_NETWORK_PARTIAL = posixpath.join(SERVER_PARTIALS_DIR, "dna_network.shtml")

# Stylesheet used by the proof system (already on server from Cell 2)
STYLESHEET_BASENAME = "dna_tree_styles.css"
CSS_VERSION = "v2025-11-12-max"
STYLESHEET_HREF = "/partials/%s?%s" % (STYLESHEET_BASENAME, CSS_VERSION)

# ---------- 2) Seed list: codes exactly as shown in Match Count ----------
SEED_CODES = [
    "girtain,alma",
    "girtain,andy",
    "girtain,kathryn",
    "girtain,theresa",
]

NETWORK_TITLE = "DNA Network by Lineage (Seed-based from Match Count)"
NETWORK_SUBTITLE = "Seeds (by code): " + ", ".join(SEED_CODES)

# ---------- 3) Small helpers (matching Cell 2b behavior where needed) ----------
SEP_RE = re.compile(r"\s*(?:\u2192|&rarr;|;|>|,|~{2,}|/{2,}|\|{2,})\s*")

def _clean_piece(text: str) -> str:
    t = re.sub(r"~+", " ", str(text))
    t = re.sub(r"\s+", " ", t)
    return t.strip()

def split_tokens(s):
    if pd.isna(s):
        return []
    if not isinstance(s, str):
        s = str(s)
    return [p.strip() for p in SEP_RE.split(s) if str(p).strip()]

def _norm_code_for_count(s):
    t = str(s or "").strip()
    if (t.startswith('"') and t.endswith('"')) or (t.startswith("'") and t.endswith("'")):
        t = t[1:-1]
    t = re.sub(r"\s+", " ", t).strip().lower()
    return t

def safe_write(path, text):
    dirpath = os.path.dirname(path)
    if dirpath:
        os.makedirs(dirpath, exist_ok=True)
    with open(path, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
        f.write(text)

def safe_to_csv(df, path):
    dirpath = os.path.dirname(path)
    if dirpath:
        os.makedirs(dirpath, exist_ok=True)
    with open(path, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
        df.to_csv(f, index=False)

def escape_txt(x):
    if x is None:
        return ""
    return _html.escape(str(x), quote=True)

def find_col(df, patterns, prefer_exact=None):
    cols = list(df.columns)
    lowmap = {c.lower(): c for c in cols}
    if prefer_exact:
        for name in prefer_exact:
            if name in df.columns:
                return name
            if name and name.lower() in lowmap:
                return lowmap[name.lower()]
    for pat in patterns:
        rx = re.compile(pat, re.I)
        for c in cols:
            if rx.search(c):
                return c
    return None

# ---------- 4) Load the SAME working CSV as Cell 2b ----------
encs = ("utf-8-sig", "utf-8", "cp1252", "iso-8859-15", "latin1")
last_err = None
df = None
for enc in encs:
    try:
        df = pd.read_csv(CSV_IN, encoding=enc, dtype=str, keep_default_na=False)
        break
    except Exception as ex:
        last_err = ex
        df = None

if df is None:
    raise RuntimeError("Unable to read CSV: %s (%s)" % (CSV_IN, last_err))

print("[OK] Loaded CSV for network: %d rows, %d cols" % (len(df), len(df.columns)))

id_col = find_col(df, [r"^(id#|personid)$"], ["ID#", "ID", "PersonID", "personID"])
match_col = find_col(df, [r"^match\s*to$"], ["Match to", "Match", "match_to", "Match_to"])
name_col = find_col(df, [r"^name$"], ["Name"])
cm_col = find_col(df, [r"^(c\s*:?m|cm)$", r"centi.?morgan"], ["cM", "cm"])
path_col = find_col(
    df,
    [r"(yates\s*dna\s*ancestral\s*line|ancestral\s*line|lineage)"],
    ["Yates DNA Ancestral Line", "Ancestral Line", "Lineage"],
)

if not match_col:
    raise ValueError("CSV missing 'Match to' column (tried headings like 'Match to', 'Match').")
if not path_col:
    raise ValueError("CSV missing lineage/path column (same one Cell 2b uses).")
if not name_col:
    raise ValueError("CSV missing 'Name' column.")
if not cm_col:
    raise ValueError("CSV missing 'cM' column.")
if not id_col:
    raise ValueError("CSV missing an ID#/PersonID column.")

print("[COLS] id_col=%r, match_col=%r, name_col=%r, cm_col=%r, path_col=%r"
      % (id_col, match_col, name_col, cm_col, path_col))

# ---------- 5) Define "lineage" and normalized codes ----------
df["_lineage_key"] = df[path_col].map(lambda x: _clean_piece(x))
df["_code_norm"]   = df[match_col].map(_norm_code_for_count)

seed_norm_set = {_norm_code_for_count(c) for c in SEED_CODES}
print("[SEEDS] Raw:", SEED_CODES)
print("[SEEDS] Normalized:", ", ".join(sorted(seed_norm_set)))

if not seed_norm_set:
    print("[ERROR] No seeds defined; nothing to do.")
    raise SystemExit(0)

# ---------- 6) Batch 1: rows for seed matches, and their lineages ----------
df_seed_rows = df[df["_code_norm"].isin(seed_norm_set)].copy()
print("[INFO] Rows in CSV matching seed codes:", len(df_seed_rows))

if df_seed_rows.empty:
    print("[WARN] No rows matched the seed codes. Network will be empty.")
    df_lineage = pd.DataFrame()
    df_net = pd.DataFrame()
else:
    lineage_keys = sorted(
        x for x in df_seed_rows["_lineage_key"].unique()
        if isinstance(x, str) and x.strip() != ""
    )
    print("[INFO] Unique lineage keys in Batch 1:", len(lineage_keys))

    if lineage_keys:
        df_net = df[df["_lineage_key"].isin(lineage_keys)].copy()
    else:
        df_net = df.iloc[0:0].copy()
    print("[INFO] Total rows (matches) in Batch 1 + Batch 2 universe:", len(df_net))

    rows_summary = []
    for lin in lineage_keys:
        sub_all  = df_net[df_net["_lineage_key"] == lin]
        sub_seed = sub_all[sub_all["_code_norm"].isin(seed_norm_set)]

        total_count = len(sub_all)
        seed_count  = len(sub_seed)
        pct_seed = round(100.0 * float(seed_count) / float(total_count), 2) if total_count else 0.0

        tokens = split_tokens(lin)
        first_ancestor = _clean_piece(tokens[0]) if tokens else ""

        rows_summary.append({
            "lineage_key": lin,
            "first_ancestor": first_ancestor,
            "seed_matches_in_lineage": seed_count,
            "total_matches_in_lineage": total_count,
            "percent_seed_in_lineage": pct_seed,
        })

    df_lineage = pd.DataFrame(rows_summary)
    if not df_lineage.empty:
        df_lineage = df_lineage.sort_values(
            by=["seed_matches_in_lineage", "total_matches_in_lineage", "first_ancestor"],
            ascending=[False, False, True],
        ).reset_index(drop=True)

# ---------- 8) Match-level listing with Batch flags ----------
if not df_net.empty:
    df_net = df_net.copy()
    df_net["batch_flag"] = df_net["_code_norm"].apply(
        lambda v: "Batch1_Seed" if v in seed_norm_set else "Batch2_Extended"
    )

    df_matches = df_net[[
        "batch_flag",
        "_code_norm",
        match_col,
        name_col,
        cm_col,
        "_lineage_key",
    ]].copy()

    df_matches = df_matches.rename(columns={
        "_code_norm": "code_norm",
        match_col: "match_code_raw",
        name_col: "match_name",
        cm_col: "cM",
        "_lineage_key": "lineage_key",
    })

    df_matches = df_matches.sort_values(
        by=["lineage_key", "batch_flag", "code_norm", "match_name"],
        ascending=[True, True, True, True],
    ).reset_index(drop=True)
else:
    df_matches = df_net.iloc[0:0].copy()

# ---------- 9) Write CSV outputs ----------
print("[WRITE] Lineage summary CSV:", NETWORK_LINEAGE_CSV)
safe_to_csv(df_lineage, NETWORK_LINEAGE_CSV)

print("[WRITE] Match-level network CSV:", NETWORK_MATCHES_CSV)
safe_to_csv(df_matches, NETWORK_MATCHES_CSV)

# ---------- 10) Build XHTML partial (dna_network.shtml) ----------
lineage_rows_html = []
if df_lineage.empty:
    lineage_rows_html.append(
        "<tr><td colspan=\"5\" class=\"dna-empty\">No lineages found for the selected seed codes.</td></tr>"
    )
else:
    for _, r in df_lineage.iterrows():
        lineage_rows_html.append(
            "<tr>"
            + "<td class=\"dna-cell-first-ancestor\">" + escape_txt(r.get("first_ancestor", "")) + "</td>"
            + "<td class=\"dna-cell-lineage-path\">"   + escape_txt(r.get("lineage_key", "")) + "</td>"
            + "<td class=\"dna-cell-count-seed\">"     + escape_txt(r.get("seed_matches_in_lineage", "")) + "</td>"
            + "<td class=\"dna-cell-count-total\">"    + escape_txt(r.get("total_matches_in_lineage", "")) + "</td>"
            + "<td class=\"dna-cell-pct\">"            + escape_txt(r.get("percent_seed_in_lineage", "")) + "</td>"
            + "</tr>"
        )
lineage_table_html = "\n".join(lineage_rows_html)

match_rows_html = []
if df_matches.empty:
    match_rows_html.append(
        "<tr><td colspan=\"6\" class=\"dna-empty\">No matches found in the Batch-1/Batch-2 universe.</td></tr>"
    )
else:
    for _, r in df_matches.iterrows():
        match_rows_html.append(
            "<tr>"
            + "<td class=\"dna-cell-batch-flag\">"   + escape_txt(r.get("batch_flag", "")) + "</td>"
            + "<td class=\"dna-cell-code-norm\">"    + escape_txt(r.get("code_norm", "")) + "</td>"
            + "<td class=\"dna-cell-code-raw\">"     + escape_txt(r.get("match_code_raw", "")) + "</td>"
            + "<td class=\"dna-cell-name\">"         + escape_txt(r.get("match_name", "")) + "</td>"
            + "<td class=\"dna-cell-cm\">"           + escape_txt(r.get("cM", "")) + "</td>"
            + "<td class=\"dna-cell-lineage-path\">" + escape_txt(r.get("lineage_key", "")) + "</td>"
            + "</tr>"
        )
match_table_html = "\n".join(match_rows_html)

xhtml = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15" />
  <title>DNA Network (Seed-based)</title>
  <link rel="stylesheet" type="text/css" href="%s" />
</head>
<body>
<div class="wrap dna-network">
  <h1 class="centerline">%s</h1>
  <div class="centerline dna-subheading">%s</div>

  <h2 class="centerline">Lineage overview (Batch 1 universe)</h2>
  <div class="table-scroll">
    <table class="sortable dna-table dna-network-lineage" border="1">
      <thead>
        <tr>
          <th>First Ancestor (token 1)</th>
          <th>Lineage line (full path)</th>
          <th>Seed matches in lineage</th>
          <th>Total matches in lineage</th>
          <th>Percent seed in lineage</th>
        </tr>
      </thead>
      <tbody>
%s
      </tbody>
    </table>
  </div>

  <h2 class="centerline">Batch 1 and Batch 2 matches in those lineages</h2>
  <div class="table-scroll">
    <table class="sortable dna-table dna-network-matches" border="1">
      <thead>
        <tr>
          <th>Batch</th>
          <th>Code (normalized)</th>
          <th>Code (raw from CSV)</th>
          <th>Name</th>
          <th>cM</th>
          <th>Lineage line (full path)</th>
        </tr>
      </thead>
      <tbody>
%s
      </tbody>
    </table>
  </div>

  <p class="dna-footnote">
    Batch 1 (Seed) = rows where the normalized code is one of: %s.<br />
    Batch 2 (Extended) = all other matches sharing any lineage path with Batch 1.
  </p>
</div>
</body>
</html>
""" % (
    STYLESHEET_HREF,
    _html.escape(NETWORK_TITLE),
    _html.escape(NETWORK_SUBTITLE),
    lineage_table_html,
    match_table_html,
    _html.escape(", ".join(sorted(seed_norm_set))),
)

print("[WRITE] XHTML partial:", NETWORK_PARTIAL)
safe_write(NETWORK_PARTIAL, xhtml)

# ---------- 11) Upload partial to server ----------
def upload_network_partial():
    if not all(os.environ.get(k) for k in ["FTP_HOST", "FTP_USER", "FTP_PASS"]):
        print("[SKIP] Missing FTP creds; uploads skipped.")
        return
    try:
        ftps = ftp_connect()
        try:
            ftp_upload_overwrite(ftps, NETWORK_PARTIAL, _remote_path(REMOTE_NETWORK_PARTIAL))
            print("[OK] Uploaded dna_network partial ->", _remote_path(REMOTE_NETWORK_PARTIAL))
        except Exception as e:
            print("[WARN] Upload dna_network partial failed:", e)

        print("\n--- SIZE Verification (if supported) ---")
        p = _remote_path(REMOTE_NETWORK_PARTIAL)
        sz = ftp_size(ftps, p)
        print("%s : %s" % (p, sz if sz is not None else "(SIZE unsupported)"))

        try:
            ftps.quit()
        except Exception:
            pass

        print("\nOpen URL: https://yates.one-name.net/partials/dna_network.shtml")
    except Exception as e:
        print("[FAIL] FTP session:", e)
        traceback.print_exc()

upload_network_partial()

print("[DONE] Seed-based DNA network CSVs and partial generated + uploaded successfully.")
# ====== CUT STOP [1/1] CELL 2c — Seed-based DNA Network (Match Count companion) ======


[CONFIRM] Golden Rules active | Cell=Cell2c_SeedNetwork | Version=2025.12.04-G3 | Encoding=ISO-8859-15
[AUDIT] DECLARED_LINES=999
[OK] Loaded CSV for network: 93 rows, 6 cols
[COLS] id_col='ID#', match_col='Match to', name_col='Name', cm_col='cM', path_col='Yates DNA Ancestral Line'
[SEEDS] Raw: ['girtain,alma', 'girtain,andy', 'girtain,kathryn', 'girtain,theresa']
[SEEDS] Normalized: girtain,alma, girtain,andy, girtain,kathryn, girtain,theresa
[INFO] Rows in CSV matching seed codes: 11
[INFO] Unique lineage keys in Batch 1: 10
[INFO] Total rows (matches) in Batch 1 + Batch 2 universe: 11
[WRITE] Lineage summary CSV: dna_network_lineages.csv
[WRITE] Match-level network CSV: dna_network_matches.csv
[WRITE] XHTML partial: partials/dna_network.shtml
[PUT] partials/dna_network.shtml -> partials/dna_network.shtml
[OK] Uploaded dna_network partial -> partials/dna_network.shtml

--- SIZE Verification (if supported) ---
partials/dna_network.shtml : 14483

Open URL: https://yates.one-name.net/p

# Cell 3

In [None]:
# ====== CUT START [1/1] CELL 3 - Ancestor Register (Old-school Blue Menu; WHITE menu text; .shtml + SSI) ======
# RON GOLDEN RULES - CLIFF NOTES (v2025.12.01-G1)
# - Complete & runnable Colab cell, one contiguous block.
# - Source ASCII-only; outputs written with encoding="iso-8859-15", errors="xmlcharrefreplace".
# - XHTML 1.0 Transitional; typography comes ONLY from /partials/dna_tree_styles.css.
# - Deterministic audit:
#   [CONFIRM] Golden Rules active | Cell=Cell3_OldSchoolMenu_WhiteText | Version=2025.12.01-G1 | Encoding=ISO-8859-15
# - Enforce ISO-8859-15 printable chars on writes.

print("[CONFIRM] Golden Rules active | Cell=Cell3_OldSchoolMenu_WhiteText | Version=2025.12.01-G1 | Encoding=ISO-8859-15")

# ---------- Imports ----------
import os, re, socket, posixpath, traceback
from datetime import datetime, timedelta
import pandas as pd
import html as _html
from ftplib import FTP_TLS
from string import Template as _T

# Downloads paragraph is now suppressed (links live in nav_block.shtml)
DOWNLOADS_BLOCK = ""

# ---------- Secrets ----------
try:
    from google.colab import userdata  # type: ignore
    os.environ["FTP_HOST"] = userdata.get("FTP_HOST")
    os.environ["FTP_USER"] = userdata.get("FTP_USER")
    os.environ["FTP_PASS"] = userdata.get("FTP_PASS")
    try:
        os.environ["FTP_PORT"] = userdata.get("FTP_PORT")
    except Exception:
        os.environ.setdefault("FTP_PORT", "21")
    try:
        os.environ["FTP_DIR"] = userdata.get("FTP_DIR")
    except Exception:
        os.environ.setdefault("FTP_DIR", "")
except Exception:
    os.environ.setdefault("FTP_HOST", "")
    os.environ.setdefault("FTP_USER", "")
    os.environ.setdefault("FTP_PASS", "")
    os.environ.setdefault("FTP_PORT", "21")
    os.environ.setdefault("FTP_DIR", "")

FTP_DIR = os.environ.get("FTP_DIR", "").strip().strip("/")

# ---------- Config / Paths ----------
INPUT_CSV = "final_combined_df_with_value_labels.csv"

EXPORT_BASENAME = "yates_ancestor_register"
LOCAL_CSV   = EXPORT_BASENAME + ".csv"
LOCAL_XLSX  = EXPORT_BASENAME + ".xlsx"
REMOTE_CSV  = posixpath.join("partials", LOCAL_CSV)
REMOTE_XLSX = posixpath.join("partials", LOCAL_XLSX)

# This page is now .shtml so Apache will parse SSI
OUTPUT_NAME = "just-trees.shtml"
REMOTE_HTML = posixpath.join("partials", OUTPUT_NAME)

# Stylesheet + cache buster (shared with Cell 2)
STYLESHEET_HREF = "/partials/dna_tree_styles.css"
CSS_VERSION     = "v2025-11-23-g3"
HEAD_LINK = '<link rel="stylesheet" type="text/css" href="%s?%s" />' % (STYLESHEET_HREF, CSS_VERSION)

# Layout knob (used for top-scroll inner width)
TABLE_WIDTH_PX = 5550

# ---------- Load CSV (robust) ----------
df = None
_last_err = None
for enc in ("utf-8-sig", "utf-8", "cp1252", "iso-8859-15", "latin1"):
    try:
        df = pd.read_csv(INPUT_CSV, dtype=str, keep_default_na=False, encoding=enc)
        break
    except Exception as e:
        _last_err = e
        df = None
if df is None:
    raise SystemExit("[ERROR] Unable to read CSV: %s (%r)" % (INPUT_CSV, _last_err))
print("[OK] Loaded CSV: %s rows=%d, cols=%d" % (INPUT_CSV, len(df), len(df.columns)))

# Ensure haplogroup present (harmless for this view)
if "haplogroup" not in df.columns:
    df["haplogroup"] = ""
else:
    df["haplogroup"] = df["haplogroup"].fillna("")

# ---------- Resolver: Column B (masked) -> Column C (unmasked) ----------
A_IDX = 0
B_IDX = 1
C_IDX = 2

def _norm_code(s):
    t = str(s or "").strip()
    if (t.startswith('"') and t.endswith('"')) or (t.startswith("'") and t.endswith("'")):
        t = t[1:-1]
    t = t.replace("\u00a0", " ")
    t = re.sub(r"\s{2,}", " ", t)
    return t.lower()

# Prefer local-first resolver cached by Cell 1; fall back to server
LOCAL_RESOLVER = "match_to_unmasked.csv"
if not os.path.exists(LOCAL_RESOLVER) and os.path.exists("/content/partials/match_to_unmasked.csv"):
    LOCAL_RESOLVER = "/content/partials/match_to_unmasked.csv"

def _pull_resolver_if_needed(local_path):
    if os.path.exists(local_path):
        print("Using resolver:", os.path.abspath(local_path))
        return local_path
    print("Resolver not found locally; attempting server pull ...")
    try:
        with FTP_TLS(timeout=30) as ftps:
            ftps.connect(os.environ.get("FTP_HOST", ""), int(os.environ.get("FTP_PORT", "21")))
            ftps.login(os.environ.get("FTP_USER", ""), os.environ.get("FTP_PASS", ""))
            try:
                ftps.prot_p()
            except Exception:
                pass
            try:
                ftps.set_pasv(True)
            except Exception:
                pass
            if FTP_DIR:
                for p in [p for p in FTP_DIR.split("/") if p]:
                    try:
                        ftps.cwd(p)
                    except Exception:
                        try:
                            ftps.mkd(p)
                        except Exception:
                            pass
                        ftps.cwd(p)
            try:
                ftps.cwd("partials")
            except Exception:
                pass
            with open("match_to_unmasked.csv", "wb") as f:
                ftps.retrbinary("RETR match_to_unmasked.csv", f.write)
        print("[OK] Pulled resolver from server -> match_to_unmasked.csv")
        return "match_to_unmasked.csv"
    except Exception as e:
        print("[WARN] Could not pull resolver from server:", e)
        return local_path

LOCAL_RESOLVER = _pull_resolver_if_needed(LOCAL_RESOLVER)

def _load_resolver_to_map(path):
    last = None
    m = None
    for enc in ("utf-8-sig", "iso-8859-15", "utf-8", "cp1252", "latin1"):
        try:
            m = pd.read_csv(path, dtype=str, keep_default_na=False, encoding=enc)
            break
        except Exception as e:
            last = e
            m = None
    if m is None:
        print("[WARN] Resolver not loaded:", last)
        return {}
    cols = {c.lower(): c for c in m.columns}
    if "code" not in cols or "unmasked" not in cols:
        print("[WARN] Resolver missing 'code'/'unmasked' cols; skipping map.")
        return {}
    m = m[[cols["code"], cols["unmasked"]]].copy()
    m["__key__"] = m[cols["code"]].map(_norm_code)
    m["__val__"] = m[cols["unmasked"]].astype(str)
    m = m.drop_duplicates(subset="__key__", keep="first")
    return dict(zip(m["__key__"], m["__val__"]))

resolver_map = _load_resolver_to_map(LOCAL_RESOLVER) if os.path.exists(LOCAL_RESOLVER) else {}

if df.shape[1] < 3:
    raise ValueError("Main df must have at least 3 columns: A(ID#), B(match to), C(unmasked).")

masked_raw = df.iloc[:, B_IDX].astype(str)
masked_key = masked_raw.map(_norm_code)
resolved   = masked_key.map(resolver_map)
df.iloc[:, C_IDX] = resolved.fillna("")

print(
    "[OK] Column B -> C mapping: %d / %d  unmatched: %d"
    % (int(resolved.notna().sum()), len(df), len(df) - int(resolved.notna().sum()))
)

# ---------- Load vitals from dna_vitals.csv (friendly date + autosomal count) ----------
VITALS_CSV = "dna_vitals.csv"
LAST_UPDATED_TEXT  = ""
AUTOSOMAL_MATCHES  = ""
SHOWING_STATIC     = ""

def _friendly_ts_from_utc(raw):
    """
    Convert a UTC timestamp like
      'YYYY-MM-DD HH:MM UTC'
      'YYYY-MM-DD HH:MM:SS UTC'
      'YYYY-MM-DDTHH:MM UTC'
      'YYYY-MM-DDTHH:MM:SS UTC'
    to 'Month D, YYYY H:MM AM/PM' in approximate US Eastern (UTC-5).
    On failure, return the original string.
    """
    s = str(raw or "").strip()
    if not s:
        return "(unknown)"
    s = s.replace("UTC", "").replace("utc", "").strip()
    fmts = [
        "%Y-%m-%d %H:%M",
        "%Y-%m-%d %H:%M:%S",
        "%Y-%m-%dT%H:%M",
        "%Y-%m-%dT%H:%M:%S",
    ]
    dt_utc = None
    for fmt in fmts:
        try:
            dt_utc = datetime.strptime(s, fmt)
            break
        except Exception:
            dt_utc = None
    if dt_utc is None:
        return raw
    dt_est = dt_utc - timedelta(hours=5)
    months = [
        "January","February","March","April","May","June",
        "July","August","September","October","November","December"
    ]
    month_name = months[dt_est.month - 1]
    h24  = dt_est.hour
    ampm = "AM" if h24 < 12 else "PM"
    h12  = h24 % 12
    if h12 == 0:
        h12 = 12
    return "%s %d, %d %d:%02d %s" % (
        month_name,
        dt_est.day,
        dt_est.year,
        h12,
        dt_est.minute,
        ampm,
    )

def _format_num_with_commas(raw_val):
    """Normalize a numeric string (allowing commas) and format with thousands commas."""
    if raw_val is None:
        return ""
    s = str(raw_val)
    # Strip everything except digits and optional leading minus
    s_digits = re.sub(r"[^0-9\-]", "", s)
    if not s_digits:
        return ""
    try:
        n = int(s_digits)
        return "{:,}".format(n)
    except Exception:
        return s_digits

def _load_vitals(path):
    global LAST_UPDATED_TEXT, AUTOSOMAL_MATCHES, SHOWING_STATIC
    LAST_UPDATED_TEXT = ""
    AUTOSOMAL_MATCHES = ""
    SHOWING_STATIC    = ""

    if not os.path.exists(path):
        print("[INFO] dna_vitals.csv not found; LAST_UPDATED_TEXT and AUTOSOMAL_MATCHES will be blank.")
        return

    try:
        vdf = pd.read_csv(path, dtype=str, encoding="iso-8859-15", keep_default_na=False)
    except Exception:
        encs = ("utf-8-sig", "utf-8", "cp1252", "latin1")
        last = None
        vdf  = None
        for enc in encs:
            try:
                vdf = pd.read_csv(path, dtype=str, encoding=enc, keep_default_na=False)
                break
            except Exception as e:
                last = e
        if vdf is None:
            print("[WARN] Unable to read dna_vitals.csv: %s" % last)
            return

    flat = []
    for row in vdf.astype(str).values.tolist():
        for cell in row:
            flat.append(str(cell))

    autosomal_raw = None
    showing_raw   = None
    last_text     = None

    for cell in flat:
        s = str(cell)
        if "Records tagged and filtered by NPFX" in s and autosomal_raw is None:
            m = re.search(r"(\d[\d,]*)", s)
            if m:
                autosomal_raw = m.group(1)
        if "After manual filter, total records" in s and showing_raw is None:
            m = re.search(r"(\d[\d,]*)", s)
            if m:
                showing_raw = m.group(1)
        if "LAST_UPDATED_TEXT" in s and last_text is None:
            m = re.search(r"LAST_UPDATED_TEXT\s*:\s*(.+)", s)
            if m:
                last_text = m.group(1).strip()

    # Fallback: grab first two numeric-looking tokens if labels were not found
    if autosomal_raw is None or showing_raw is None or last_text is None:
        all_text = " ".join(flat)
        nums = re.findall(r"\d[\d,]*", all_text)
        if autosomal_raw is None and len(nums) >= 1:
            autosomal_raw = nums[0]
        if showing_raw is None and len(nums) >= 2:
            showing_raw = nums[1]

    if last_text is not None:
        last_text = _friendly_ts_from_utc(last_text)

    AUTOSOMAL_MATCHES = _format_num_with_commas(autosomal_raw)
    SHOWING_STATIC    = _format_num_with_commas(showing_raw)
    LAST_UPDATED_TEXT = last_text or ""

    print("[OK] Vitals from %s -> autosomal=%s, showing=%s, last_updated_text=%s"
          % (path,
             AUTOSOMAL_MATCHES or "?",
             SHOWING_STATIC or "?",
             LAST_UPDATED_TEXT or "(blank)"))

_load_vitals(VITALS_CSV)

# ---------- Blocks (updated, nav via SSI, controls) ----------
if LAST_UPDATED_TEXT:
    updated_str = (
        'Last updated: <span id="last-updated">%s</span>' %
        _html.escape(LAST_UPDATED_TEXT)
    )
else:
    updated_str = 'Last updated: <span id="last-updated"></span>'

_updated_parts = [updated_str]
if AUTOSOMAL_MATCHES:
    _updated_parts.append('Autosomal matches: %s' % _html.escape(AUTOSOMAL_MATCHES))
# Showing is still the live, on-page filtered count
_updated_parts.append('Showing: <span id="showing-count"></span>')

UPDATED_BLOCK = (
    '<div class="updated centerline">' +
    ' &nbsp;|&nbsp; '.join(_updated_parts) +
    '</div>'
)

NAV_BLOCK = '<!--#include virtual="/partials/nav_block.shtml" -->'

CONTROLS_BLOCK = (
    '<div class="controls centerline" style="margin:6px 0 10px 0;">'
    '<input type="text" id="search-box" class="search" size="28" value="" '
    'placeholder="Search&amp;hellip;" />'
    "</div>"
)

# ---------- HTML table ----------
visible_cols = [c for c in df.columns if c]

table_html = df.to_html(
    index=False,
    columns=visible_cols,
    escape=False,
    border=1,
    classes="dataframe sortable"
)

if 'id="refactor-table"' not in table_html:
    table_html = re.sub(r"<table([^>]*)>", r'<table\1 id="refactor-table">', table_html, count=1)

if 'class="dataframe sortable"' not in table_html and "sortable" not in table_html:
    table_html = table_html.replace('class="dataframe"', 'class="dataframe sortable"', 1)

table_html = table_html.replace("<tbody>\n<tr>", "<tbody>\n<tr id=\"first-row\">", 1)

SCROLL_WRAPPER = (
    '<div class="table-scroll-wrapper">'
    '<div id="top-scroll" class="scroll-sync-top">'
    '<div class="scroll-sync-top-inner" style="width:%dpx;"></div>'
    '</div>'
    '<div id="bottom-scroll" class="table-scroll">%s</div>'
    '</div>'
) % (TABLE_WIDTH_PX, table_html)

# ---------- XHTML page template ----------
page_tpl = _T("""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Ancestor Register (Trees View)</title>
$HEAD_LINK
<style type="text/css">
/* Sticky second column (index 2) for Trees table */
#refactor-table th:nth-child(2),
#refactor-table td:nth-child(2){
  position:sticky;
  left:0;
  z-index:6;
  background:#ffffff;
}
#refactor-table th:nth-child(2){
  z-index:7;
}
</style>
</head>
<body id="top">
<div class="wrap">
  <h1 class="centerline">Ancestor Register (Trees View)</h1>
  $DOWNLOADS_BLOCK
  $UPDATED_BLOCK
  $NAV_BLOCK
  $CONTROLS_BLOCK
  $SCROLL_WRAPPER
</div>
<button id="back-to-top" class="back-to-top">&#9650; Top</button>
<script type="text/javascript">
//<![CDATA[
(function(){
  function textOf(cell){
    return (cell && (cell.textContent || cell.innerText) || '').replace(/\\s+/g,' ').trim().toLowerCase();
  }
  function sortTable(tbl, colIndex, dir){
    var tb = tbl && tbl.tBodies ? tbl.tBodies[0] : null;
    if(!tb) return;
    var rows = [].slice.call(tb.rows || []);
    var asc  = (dir === 'asc');
    rows.sort(function(a,b){
      var A = textOf(a.cells[colIndex]), B = textOf(b.cells[colIndex]);
      var nA = parseFloat(A.replace(/[^0-9.\\-]/g,'')),
          nB = parseFloat(B.replace(/[^0-9.\\-]/g,''));
      if(!isNaN(nA) && !isNaN(nB)){ return asc ? (nA-nB) : (nB-nA); }
      if (A < B) return asc ? -1 : 1;
      if (A > B) return asc ?  1 : -1;
      return 0;
    });
    var frag = document.createDocumentFragment();
    for(var i=0;i<rows.length;i++) frag.appendChild(rows[i]);
    tb.appendChild(frag);
    updateShowing();
  }
  function bindHeaderSort(){
    var tbl = document.getElementById('refactor-table');
    if(!(tbl && tbl.tHead && tbl.tHead.rows.length)) return;
    var ths = tbl.tHead.rows[0].cells;
    if(!ths) return;
    for(var i=0;i<ths.length;i++)(function(idx){
      var th = ths[idx];
      var dir = 'asc';
      th.addEventListener('click', function(){
        dir = (dir === 'asc') ? 'desc' : 'asc';
        for (var j = 0; j < ths.length; j++){
          ths[j].innerHTML = ths[j].innerHTML.replace(/\\s+\\(asc\\)|\\s+\\(desc\\)/,'');
        }
        th.innerHTML += (dir === 'asc' ? ' (asc)' : ' (desc)');
        sortTable(tbl, idx, dir);
      }, false);
    })(i);
  }
  function formatWithCommas(n){
    try{
      var x = parseInt(String(n||'').replace(/[^0-9\\-]/g,''),10);
      if(isNaN(x)) return '';
      return x.toLocaleString('en-US');
    }catch(e){ return String(n||''); }
  }
  function visibleRowCount(){
    var tbl = document.getElementById('refactor-table');
    if(!(tbl && tbl.tBodies && tbl.tBodies[0])) return 0;
    var rows = tbl.tBodies[0].rows, n = 0;
    for(var i=0;i<rows.length;i++){
      if(rows[i].style.display !== 'none') n++;
    }
    return n;
  }
  function updateShowing(){
    var el = document.getElementById('showing-count');
    if(!el) return;
    el.textContent = formatWithCommas(visibleRowCount());
  }
  function getParam(name){
    var m = location.search.match(new RegExp('[?&]'+name+'=([^&]+)'));
    return m ? decodeURIComponent(m[1].replace(/\\+/g,' ')) : '';
  }
  function bindSearch(){
    var box = document.getElementById('search-box');
    var tbl = document.getElementById('refactor-table');
    if(!(box && tbl && tbl.tBodies && tbl.tBodies[0])) return;
    var tb = tbl.tBodies[0];
    var rows = [].slice.call(tb.rows || []);
    function rowText(tr){
      var t = '';
      for(var i=0;i<tr.cells.length;i++){
        t += ' ' + (tr.cells[i].textContent || tr.cells[i].innerText || '');
      }
      return t.replace(/\\s+/g,' ').toLowerCase();
    }
    function apply(q){
      q = String(q || '').toLowerCase();
      for(var i=0;i<rows.length;i++){
        var txt = rowText(rows[i]);
        var show = !q || txt.indexOf(q) > -1;
        rows[i].style.display = show ? '' : 'none';
      }
      updateShowing();
    }
    var to = null;
    function onInput(){
      if(to) clearTimeout(to);
      to = setTimeout(function(){ apply(box.value); }, 60);
    }
    box.addEventListener('input', onInput, false);
    box.addEventListener('search', onInput, false);
    var q0 = getParam('q');
    if(q0){
      box.value = q0;
      apply(q0);
      try{ history.replaceState(null,'',location.pathname); }catch(e){}
    } else {
      box.value = '';
      apply('');
    }
  }
  function bindBackToTop(){
    var btn = document.getElementById('back-to-top');
    if(!btn) return;
    function toggle(){ btn.style.display = (window.scrollY > 200 ? 'block' : 'none'); }
    toggle();
    window.addEventListener('scroll', toggle, {passive:true});
    btn.addEventListener('click', function(){
      try{
        window.scrollTo({top:0, behavior:'smooth'});
      } catch(e){
        window.scrollTo(0,0);
      }
    }, false);
  }
  function bindSyncedScrollbars(){
    var topScroll    = document.getElementById('top-scroll');
    var bottomScroll = document.getElementById('bottom-scroll');
    if(!(topScroll && bottomScroll)) return;
    var syncing = false;
    topScroll.addEventListener('scroll', function(){
      if(syncing) return;
      syncing = true;
      bottomScroll.scrollLeft = topScroll.scrollLeft;
      syncing = false;
    }, false);
    bottomScroll.addEventListener('scroll', function(){
      if(syncing) return;
      syncing = true;
      topScroll.scrollLeft = bottomScroll.scrollLeft;
      syncing = false;
    }, false);
  }
  document.addEventListener('DOMContentLoaded', function(){
    bindHeaderSort();
    bindBackToTop();
    bindSearch();
    bindSyncedScrollbars();
    updateShowing();
  });
})();
//]]>
</script>
</body>
</html>
""")

final_html = page_tpl.safe_substitute(
    HEAD_LINK=HEAD_LINK,
    DOWNLOADS_BLOCK=DOWNLOADS_BLOCK,
    UPDATED_BLOCK=UPDATED_BLOCK,
    NAV_BLOCK=NAV_BLOCK,
    CONTROLS_BLOCK=CONTROLS_BLOCK,
    SCROLL_WRAPPER=SCROLL_WRAPPER,
)

# ---------- Exports ----------
export_df = df.copy()
export_df.to_csv(LOCAL_CSV, index=False, encoding="iso-8859-15")
try:
    export_df.to_excel(LOCAL_XLSX, index=False)
except Exception:
    from pandas import ExcelWriter
    with ExcelWriter(LOCAL_XLSX) as _w:
        export_df.to_excel(_w, index=False)
print("[OK] Wrote exports:", os.path.abspath(LOCAL_CSV), "and", os.path.abspath(LOCAL_XLSX))

# ---------- Save page locally ----------
try:
    with open(OUTPUT_NAME, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
        f.write(final_html)
    print("[OK] Saved locally:", os.path.abspath(OUTPUT_NAME))
except Exception as e:
    print("[ERROR] Save failed:", e)
    traceback.print_exc()

# ---------- Upload to /partials ----------
def _ftps_ensure_dir(ftps, path):
    if not path:
        return
    for seg in [p for p in path.split("/") if p]:
        try:
            ftps.cwd(seg)
        except Exception:
            try:
                ftps.mkd(seg)
            except Exception:
                pass
            ftps.cwd(seg)

ftp_host = os.environ.get("FTP_HOST")
ftp_user = os.environ.get("FTP_USER")
ftp_pass = os.environ.get("FTP_PASS")
ftp_port = int(os.environ.get("FTP_PORT", "21") or "21")

if ftp_host and ftp_user and ftp_pass:
    print("[INFO] Attempting FTP upload ...")
    try:
        socket.setdefaulttimeout(30)
        with FTP_TLS(timeout=30) as ftps:
            ftps.connect(ftp_host, ftp_port)
            ftps.login(ftp_user, ftp_pass)
            try:
                ftps.prot_p()
            except Exception:
                pass
            try:
                ftps.set_pasv(True)
            except Exception:
                pass

            _ftps_ensure_dir(ftps, FTP_DIR)
            _ftps_ensure_dir(ftps, "partials")

            with open(OUTPUT_NAME, "rb") as fh:
                ftps.storbinary("STOR " + os.path.basename(REMOTE_HTML), fh)
            print("[OK] Uploaded HTML -> /partials/%s" % os.path.basename(REMOTE_HTML))

            with open(LOCAL_CSV, "rb") as fh:
                ftps.storbinary("STOR " + os.path.basename(REMOTE_CSV), fh)
            with open(LOCAL_XLSX, "rb") as fh:
                ftps.storbinary("STOR " + os.path.basename(REMOTE_XLSX), fh)
            print("[OK] Uploaded exports -> /partials/ (%s, %s)" % (LOCAL_CSV, LOCAL_XLSX))

            print("\n--- Open URLs ---")
            print("Trees page:       https://yates.one-name.net/partials/just-trees.shtml")
            print("CSV export:       https://yates.one-name.net/partials/%s" % os.path.basename(LOCAL_CSV))
            print("Excel export:     https://yates.one-name.net/partials/%s" % os.path.basename(LOCAL_XLSX))
    except Exception as e:
        print("[ERROR] FTP session failed:", e)
        traceback.print_exc()
else:
    print("[INFO] Skipping FTP upload (missing credentials).")

print("\n--- Cell 3 Complete (.shtml + SSI nav; top+bottom scroll; sticky col 2; sortable/searchable with live 'Showing' count; LAST_UPDATED_TEXT + Autosomal matches from dna_vitals.csv, now comma-formatted; exports + upload ready) ---")
# ====== CUT STOP  [1/1] CELL 3 ==================================================================


[CONFIRM] Golden Rules active | Cell=Cell3_OldSchoolMenu_WhiteText | Version=2025.12.01-G1 | Encoding=ISO-8859-15
[OK] Loaded CSV: final_combined_df_with_value_labels.csv rows=1604, cols=6
Using resolver: /content/match_to_unmasked.csv
[OK] Column B -> C mapping: 1604 / 1604  unmatched: 0
[OK] Vitals from dna_vitals.csv -> autosomal=1,604, showing=1,604, last_updated_text=December 1, 2025 8:39 AM
[OK] Wrote exports: /content/yates_ancestor_register.csv and /content/yates_ancestor_register.xlsx
[OK] Saved locally: /content/just-trees.shtml
[INFO] Attempting FTP upload ...
[OK] Uploaded HTML -> /partials/just-trees.shtml
[OK] Uploaded exports -> /partials/ (yates_ancestor_register.csv, yates_ancestor_register.xlsx)

--- Open URLs ---
Trees page:       https://yates.one-name.net/partials/just-trees.shtml
CSV export:       https://yates.one-name.net/partials/yates_ancestor_register.csv
Excel export:     https://yates.one-name.net/partials/yates_ancestor_register.xlsx

--- Cell 3 Complete (

# debug