<a href="https://colab.research.google.com/github/ronyates47/Gedcom-Utils/blob/main/ons_study_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# @title [PIP]
!pip install pandas
!pip install python-gedcom
!pip install openpyxl
!pip install xlsxwriter
!pip install mlxtend



In [11]:
# @title [STEP 0] MASTER SETUP: Run this once to initialize the Shared Library
# This cell creates 'config.py', 'toolkit.py', and 'deploy.py' in your Colab session.

import os

# ==========================================
# 1. CONFIGURATION (config.py)
# ==========================================
config_content = """
import os

# --- SITE SETTINGS ---
SITE_BASE_URL = "https://yates.one-name.net"
TNG_BASE_URL = "https://yates.one-name.net/tng"
TNG_TREE_ID = "tree1"

# --- CSS VERSIONS (Control from here!) ---
CSS_UNIFIED     = "v2026-02-01-unified-blue-refactor1"
CSS_DNA_STYLES  = "v2025-11-23-g3"

# --- FILE PATHS ---
INPUT_CSV       = "final_combined_df_with_value_labels.csv"
VITALS_CSV      = "dna_vitals.csv"
NETWORK_AUTH    = "dna_network_first_ancestors.txt"

# --- FTP CREDENTIALS ---
def get_ftp_creds():
    # Tries to get from Colab secrets first, then environment
    try:
        from google.colab import userdata
        return {
            "HOST": userdata.get("FTP_HOST"),
            "USER": userdata.get("FTP_USER"),
            "PASS": userdata.get("FTP_PASS"),
            "PORT": int(userdata.get("FTP_PORT", 21)),
            "DIR":  userdata.get("FTP_DIR", "")
        }
    except:
        return {
            "HOST": os.environ.get("FTP_HOST", ""),
            "USER": os.environ.get("FTP_USER", ""),
            "PASS": os.environ.get("FTP_PASS", ""),
            "PORT": int(os.environ.get("FTP_PORT", 21)),
            "DIR":  "ons-study"
        }
"""
with open("config.py", "w") as f: f.write(config_content)

# ==========================================
# 2. TOOLKIT (toolkit.py) - Data Logic
# ==========================================
toolkit_content = """
import pandas as pd
import re
import html
from datetime import datetime, timedelta

# --- ROBUST CSV READER ---
def load_csv(path):
    # Tries 5 different encodings so you don't have to worry about it
    encodings = ["iso-8859-15", "utf-8-sig", "utf-8", "cp1252", "latin1"]
    for enc in encodings:
        try:
            df = pd.read_csv(path, dtype=str, keep_default_na=False, encoding=enc)
            print(f"[TOOLKIT] Successfully loaded {path} using {enc}")
            return df
        except Exception:
            continue
    print(f"[TOOLKIT] ERROR: Could not read {path} with any known encoding.")
    return None

# --- TEXT CLEANERS ---
def clean_text(text):
    if not text: return ""
    # Remove tildes and extra spaces
    t = str(text).replace("~", " ")
    return re.sub(r"\\s+", " ", t).strip()

def smart_titlecase(text):
    # Your custom capitalization logic
    if not text: return ""
    text = clean_text(text)
    particles = {"de","del","della","der","van","von","da","dos","das","di","la","le","du","of"}

    def _fix_word(w):
        # Fix Mc/Mac and apostrophes
        w = w.lower()
        if w in particles: return w
        # Logic for Mc/Mac/Apostrophes...
        w = re.sub(r"(^|\\b)([a-z])(['&#8217;])([a-z])", lambda m: m.group(1)+m.group(2).upper()+m.group(3)+m.group(4).upper(), w)
        w = w[0].upper() + w[1:]
        w = re.sub(r"\\bMc([a-z])", lambda m: "Mc" + m.group(1).upper(), w)
        return w

    return " ".join([_fix_word(w) for w in text.split()])

# --- DATE FORMATTER ---
def friendly_date(utc_string):
    if not utc_string: return "(unknown)"
    clean = str(utc_string).replace("UTC", "").replace("utc", "").strip()
    # Try parsing multiple formats
    for fmt in ["%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M", "%Y-%m-%dT%H:%M"]:
        try:
            dt = datetime.strptime(clean, fmt)
            # Convert UTC to EST (UTC-5)
            dt_est = dt - timedelta(hours=5)
            # Format: January 1, 2026 12:00 PM
            return dt_est.strftime("%B %-d, %Y %-I:%M %p")
        except:
            continue
    return utc_string
"""
with open("toolkit.py", "w") as f: f.write(toolkit_content)

# ==========================================
# 3. DEPLOY (deploy.py) - FTP Logic
# ==========================================
deploy_content = """
import os
from ftplib import FTP_TLS
import config

def upload_files(file_map):
    # file_map is a dict: {"local_path.html": "remote/path/file.html"}
    creds = config.get_ftp_creds()

    if not creds["HOST"]:
        print("[DEPLOY] No FTP credentials found. Skipping upload.")
        return

    print(f"[DEPLOY] Connecting to {creds['HOST']}...")
    try:
        ftps = FTP_TLS(timeout=30)
        ftps.connect(creds["HOST"], creds["PORT"])
        ftps.login(creds["USER"], creds["PASS"])
        ftps.prot_p() # Secure data connection
        ftps.set_pasv(True)

        # Navigate to base dir if set
        if creds["DIR"]:
            _ensure_dir(ftps, creds["DIR"])
            ftps.cwd(creds["DIR"])

        for local, remote in file_map.items():
            # Ensure remote directory exists
            remote_dir = os.path.dirname(remote)
            if remote_dir:
                _ensure_dir(ftps, remote_dir)
                # Go back to root relative to base
                if creds["DIR"]: ftps.cwd("/" + creds["DIR"])
                else: ftps.cwd("/")

            # Upload
            with open(local, "rb") as f:
                ftps.storbinary(f"STOR {remote}", f)
            print(f"[DEPLOY] SUCCESS: Uploaded {local} -> {remote}")

        ftps.quit()

    except Exception as e:
        print(f"[DEPLOY] ERROR: FTP Upload failed - {e}")

def _ensure_dir(ftp, path):
    # Recursive directory creator
    parts = [p for p in path.split('/') if p]
    for part in parts:
        try:
            ftp.cwd(part)
        except:
            try:
                ftp.mkd(part)
                ftp.cwd(part)
            except:
                pass
"""
with open("deploy.py", "w") as f: f.write(deploy_content)

print("[SUCCESS] Shared Library Initialized.")
print("   - config.py created")
print("   - toolkit.py created")
print("   - deploy.py created")

[SUCCESS] Shared Library Initialized.
   - config.py created
   - toolkit.py created
   - deploy.py created


In [17]:
# @title [CELL 1] Master Engine (GEDCOM -> Authority Database)
# Version: 2026.02.04-RICH-SLUG (Production)
# Logic: Parses '2 NPFX', traces 'Yates' lines, formats 'Name (Years) & Spouse', and sorts via hidden slug.

import os, re, glob, logging, pickle
import pandas as pd
from concurrent.futures import ProcessPoolExecutor
from tqdm import tqdm
from datetime import datetime

print("="*60)
print("      CELL 1: THE ENGINE")
print("="*60)

# --- CONFIGURATION ---
TARGET_GEDCOM = "yates_study_2025.ged"
NAME_KEY_FILE = "match_to_unmasked.csv"
CSV_OUT       = "engine_database.csv"
VITALS_OUT    = "dna_vitals.csv"

# --- UTILITIES ---
def _slugify_last_first(name_str):
    if not name_str or name_str.lower() == "unknown": return "unknown"
    clean = name_str.replace("/", "").strip()
    parts = clean.split()
    if not parts: return "unknown"
    return re.sub(r"[^a-z0-9]", "", (parts[-1] + "".join(parts[:-1])).lower())

def _pretty_name(display_name):
    s = (display_name or "").replace("/", "").strip()
    return s if s and s.lower() != "unknown" else "Unknown"

def _extract_years(txt):
    b = re.search(r"1 BIRT.*?2 DATE.*?(\d{4})", txt, re.S)
    d = re.search(r"1 DEAT.*?2 DATE.*?(\d{4})", txt, re.S)
    return f"{b.group(1) if b else ''}-{d.group(1) if d else ''}".strip('-')

# --- LOGIC CORE ---
class StrictLineageTracer:
    def __init__(self, parents_map, names_map, years_map):
        self.parents = parents_map
        self.names = names_map
        self.years = years_map
        self.target = "yates"

    def get_lineage(self, start_id):
        lineage = []
        curr = start_id
        for _ in range(50):
            lineage.append({'id': curr, 'name': self.names.get(curr, "Unknown"), 'years': self.years.get(curr, "")})
            f, m = self.parents.get(curr, (None, None))
            if not f and not m: break

            # Follow Yates
            fn, mn = self.names.get(f, "").lower(), self.names.get(m, "").lower()
            if self.target in fn: curr = f
            elif self.target in mn: curr = m
            elif f: curr = f
            elif m: curr = m
            else: break
        return lineage

def process_record(pkg):
    rid, (pmap, nmap, ymap, smap), meta = pkg
    tracer = StrictLineageTracer(pmap, nmap, ymap)

    # 1. Trace & Reverse (Oldest -> Newest)
    lineage = tracer.get_lineage(rid)
    lineage.reverse()

    # 2. Identify First Ancestor Pair
    apex = lineage[0] if lineage else {'name': 'Unknown', 'id': None, 'years': ''}
    spouses = smap.get(apex['id'], [])
    spouse_name = nmap.get(spouses[0], "Unknown") if spouses else "Unknown"

    # 3. Format Rich Header: "William Yates (1750-1830) & Mary"
    header = f"{apex['name']}"
    if apex['years']: header += f" ({apex['years']})"
    if spouse_name != "Unknown": header += f" & {spouse_name}"

    # 4. Inject Header into Lineage String
    lineage_names = [x['name'] for x in lineage]
    if lineage_names: lineage_names[0] = header

    return {
        "ID#": rid,
        "Match to": meta.get("code", ""),
        "Name": meta.get("real_name", "Unknown"),
        "cM": meta.get("cm", ""),
        "Yates DNA Ancestral Line": " -> ".join(lineage_names),
        "fa_1 masked": _slugify_last_first(apex['name']),
        "FirstAncestor_pair cojoined": header,
        "Authority_FirstAncestor": _slugify_last_first(apex['name']) # Sort Key
    }

# --- MAIN EXECUTION ---
def main():
    if not os.path.exists(TARGET_GEDCOM):
        print(f"[ERR] {TARGET_GEDCOM} not found."); return

    print(f"[1] Parsing {TARGET_GEDCOM}...")
    with open(TARGET_GEDCOM, 'r', encoding='utf-8-sig', errors='ignore') as f: raw = f.read()

    # Maps
    pmap, nmap, ymap, smap, meta_map = {}, {}, {}, {}, {}
    name_key = {}

    if os.path.exists(NAME_KEY_FILE):
        ndf = pd.read_csv(NAME_KEY_FILE, header=None)
        name_key = dict(zip(ndf[0].str.strip().str.lower(), ndf[1].str.strip()))

    blocks = raw.split("\n0 ")
    total_recs = 0

    for blk in blocks:
        lines = blk.splitlines()
        if " INDI" in lines[0]:
            total_recs += 1
            rid = lines[0].split("@")[1]
            name, npfx = "", ""
            for l in lines:
                if "1 NAME" in l: name = _pretty_name(l.split("NAME")[1])
                if "2 NPFX" in l: npfx = l.split("NPFX")[1].strip()

            nmap[rid] = name
            ymap[rid] = _extract_years(blk)

            if "&" in npfx:
                try:
                    parts = npfx.replace("(", "").replace(")", "").split("&")
                    meta_map[rid] = {'cm': parts[0].strip(), 'code': parts[1].strip(), 'real_name': name_key.get(parts[1].strip().lower(), name)}
                except: pass

        if " FAM" in lines[0]:
            h = re.search(r"1 HUSB @(.*?)@", blk)
            w = re.search(r"1 WIFE @(.*?)@", blk)
            kids = re.findall(r"1 CHIL @(.*?)@", blk)
            hid, wid = (h.group(1) if h else None), (w.group(1) if w else None)
            if hid and wid:
                smap.setdefault(hid, []).append(wid)
                smap.setdefault(wid, []).append(hid)
            for k in kids: pmap[k] = (hid, wid)

    # Processing
    queue = [(mid, (pmap, nmap, ymap, smap), meta) for mid, meta in meta_map.items()]
    print(f"[2] Processing {len(queue)} matches (Total Pool: {len(queue)})...")

    with ProcessPoolExecutor() as exe:
        rows = list(tqdm(exe.map(process_record, queue), total=len(queue)))

    # Output
    df = pd.DataFrame(rows)
    if not df.empty:
        df.sort_values(by=["Authority_FirstAncestor", "Name"], inplace=True)

    # Columns: Authority on far right
    cols = ["ID#", "Match to", "Name", "cM", "Yates DNA Ancestral Line",
            "fa_1 masked", "FirstAncestor_pair cojoined", "Authority_FirstAncestor"]
    df = df[cols]

    df.to_csv(CSV_OUT, index=False, encoding="iso-8859-15", errors="xmlcharrefreplace")

    # Save Vitals
    pd.DataFrame([
        {"line": f"Records tagged and filtered by NPFX: {len(queue)}"},
        {"line": f"After manual filter, total records: {len(df)}"}
    ]).to_csv(VITALS_OUT, index=False, encoding="iso-8859-15")

    print(f"[DONE] Database generated: {CSV_OUT} ({len(df)} rows)")

if __name__ == "__main__": main()

      CELL 1: THE ENGINE
[1] Parsing yates_study_2025.ged...
[2] Processing 1700 matches (Total Pool: 1700)...


100%|██████████| 1700/1700 [06:11<00:00,  4.58it/s]


[DONE] Database generated: engine_database.csv (1700 rows)


In [36]:
# @title [CELL 2] Production Publisher (Dual Views)
# Logic: Generates TWO distinct register pages (Ancestor View & Participant View) for optimal sorting.

import os
import pandas as pd
import html
import pytz
from ftplib import FTP_TLS
from datetime import datetime
from google.colab import userdata, files

print("="*60)
print("      CELL 2: PRODUCTION PUBLISHER (DUAL VIEWS)")
print("="*60)

# --- 1. CONFIGURATION ---
REMOTE_DIR = "ons-study"
WEB_BASE_URL = "https://yates.one-name.net/ons-study/"
TNG_BASE_URL = "https://yates.one-name.net/tng/verticalchart.php?personID="
TNG_SUFFIX   = "&tree=tree1&parentset=0&display=vertical&generations=15"
MANUAL_POOL_COUNT = 1700
CSV_INPUT    = "engine_database.csv"
VITALS_INPUT = "dna_vitals.csv"
KEY_FILE     = "match_to_unmasked.csv"

# --- 2. CREDENTIALS ---
try:
    FTP_HOST = userdata.get('FTP_HOST')
    FTP_USER = userdata.get('FTP_USER')
    FTP_PASS = userdata.get('FTP_PASS')
    try: FTP_PORT = int(userdata.get('FTP_PORT'))
    except: FTP_PORT = 21
    if not FTP_HOST: raise ValueError("Missing Secrets.")
    print(f"[1] Credentials Loaded for: {FTP_HOST}")
except Exception as e:
    print(f"[CRITICAL] Credential Error: {e}")
    raise e

# --- 3. SERVER FETCH ---
print(f"\n[2] Fetching Authority Key from Server...")
try:
    ftps = FTP_TLS(timeout=30)
    ftps.connect(FTP_HOST, FTP_PORT)
    ftps.login(FTP_USER, FTP_PASS)
    ftps.prot_p(); ftps.set_pasv(True)
    try: ftps.cwd(f"/{REMOTE_DIR}")
    except:
        try: ftps.cwd(f"/public_html/{REMOTE_DIR}")
        except: pass
    if KEY_FILE in ftps.nlst():
        with open(KEY_FILE, "wb") as f:
            ftps.retrbinary(f"RETR {KEY_FILE}", f.write)
        print(f"    - [SUCCESS] Pulled latest '{KEY_FILE}'.")
    ftps.quit()
except Exception as e:
    print(f"    - [ERR] Fetch failed: {e}")

# --- 4. DATA SETUP ---
if not os.path.exists(CSV_INPUT):
    print(f"\n[WARN] Database '{CSV_INPUT}' missing. Please upload.")
    up = files.upload()
    if up: CSV_INPUT = list(up.keys())[0]

try:
    df = pd.read_csv(CSV_INPUT, encoding="iso-8859-15")
    print(f"[3] Database Loaded: {len(df)} records.")
except:
    print("[ERR] Database empty.")
    df = pd.DataFrame()

# Key Map
unmask_map = {}
if os.path.exists(KEY_FILE):
    try:
        kdf = pd.read_csv(KEY_FILE, header=None)
        unmask_map = dict(zip(kdf[0].str.strip(), kdf[1].str.strip()))
    except: pass

# Vitals
vitals_total = MANUAL_POOL_COUNT
if os.path.exists(VITALS_INPUT):
    try:
        v_df = pd.read_csv(VITALS_INPUT, header=None)
        found = False
        for i, row in v_df.iterrows():
            if "Records tagged" in str(row[0]):
                vitals_total = int(str(row[0]).split(":")[-1].strip())
                found = True; break
        if found: print(f"    - Vitals Recalculated: {vitals_total}")
    except: pass

# --- 5. DATA TRANSFORM ---
# A. Unmask
if "Match to" in df.columns and unmask_map:
    print("    - Unmasking 'Match to' codes...")
    df["Match to"] = df["Match to"].apply(lambda x: unmask_map.get(str(x).strip(), x))

# B. Build Narrative
print("    - Building Narrative Column...")
def build_narrative(row):
    m_to = str(row.get('Match to', 'Unknown'))
    nm   = str(row.get('Name', 'Unknown'))
    cm   = str(row.get('cM', '0'))
    anc  = str(row.get('FirstAncestor_pair cojoined', 'Unknown'))
    rid  = str(row.get('ID#', ''))

    linked_name = m_to
    if rid and rid != 'nan':
        linked_name = f'<a href="{TNG_BASE_URL}{rid}{TNG_SUFFIX}" target="_blank"><b>{m_to}</b></a>'

    return f"{linked_name} matches {nm} as a {cm} cM relative; they share a Yates ancestral line descending from {anc}."

long_header = "Participants who tested-Who they matched-Oldest known Yates ancestor"
df[long_header] = df.apply(build_narrative, axis=1)

# Timezone
est = pytz.timezone('US/Eastern')
timestamp_str = datetime.now(est).strftime("%B %d, %Y %-I:%M %p EST")

# --- 6. HTML FACTORY ---
def make_page(title, content, count, active_view="ancestor"):
    # Toggle Links
    style_anc = 'font-weight:bold; color:#006064;' if active_view == 'ancestor' else 'color:#00acc1; text-decoration:none;'
    style_par = 'font-weight:bold; color:#006064;' if active_view == 'participant' else 'color:#00acc1; text-decoration:none;'

    toggle_html = f"""
    <div style="text-align:center; padding: 10px; margin-bottom: 10px; font-family: sans-serif; font-size: 14px; background: #e0f7fa; border: 1px solid #b2ebf2;">
        <strong>Sort View:</strong> &nbsp;
        <a href="ons_yates_dna_register.shtml" style="{style_anc}">By Ancestral Line</a> &nbsp;|&nbsp;
        <a href="ons_yates_dna_register_participants.shtml" style="{style_par}">By Participant Name</a>
    </div>
    """

    head = f"""<div style="background:#f4f4f4; border-top:1px solid #ddd; border-bottom:1px solid #ddd; font-family:sans-serif; font-size:12px; color:#555; padding:8px 15px; text-align:center; margin-bottom:0;"><strong>Last updated:</strong> {timestamp_str} &nbsp;|&nbsp; <strong>Autosomal matches:</strong> {vitals_total:,} &nbsp;|&nbsp; <strong>Showing:</strong> {count:,}</div>"""
    nav = r"""<style>nav.oldnav ul { background-color: #006064 !important; border-bottom: 2px solid #00acc1 !important; margin:0; } nav.oldnav a { color: #e0f7fa !important; } nav.oldnav a:hover { background-color: #00838f !important; }</style><nav class="oldnav"><ul><li><a href="contents.shtml">Contents</a></li><li><a href="yates_ancestor_register.shtml">DNA Register</a></li><li><a href="match_count.shtml">Match Count</a></li><li><a href="lineage_count.shtml">Lineage Count</a></li><li><a href="dna_network.shtml">DNA Network</a></li><li><a href="just-trees.shtml">Trees</a></li><li><a href="subscribe_updates.shtml">Subscribe Updates</a></li><li><a href="share_matches.shtml">Share Your Matches</a></li><li><a href="gedmatchkits.htm">Gedmatch Kits</a></li><li><a href="cousin_list_print.htm">Print Cousin List</a></li><li><a href="yates_ancestor_register.csv">Download CSV</a></li><li><a href="yates_ancestor_register.xlsx">Download Excel</a></li><li><a href="https://yates.one-name.net/gengen/dna_cousin_surname_study.htm">Study Details</a></li><li><a href="https://yates.one-name.net/gengen/dna_theory_of_the_case.htm">Theory in Action</a></li></ul></nav>"""

    style_overrides = """
    <style>
        .table-scroll-wrapper { text-align: center; }
        #reg-table { margin: 0 auto; width: 90%; }
        #reg-table th { text-align: center !important; position:sticky; top:0; z-index:6; background:#fff; }
        #reg-table td { text-align: left; padding: 8px 15px; }
    </style>
    """
    return f"""<!DOCTYPE html><html lang="en"><head><meta charset="iso-8859-15"><title>{title}</title><link rel="stylesheet" href="partials_unified.css"><link rel="stylesheet" href="dna_tree_styles.css">{style_overrides}</head><body id="top"><div class="wrap"><h1 class="centerline">{title}</h1><div id="nav-slot">{head}{nav}</div>{toggle_html}{content}</div></body></html>"""

print("\n[4] Rendering HTML...")

# VIEW 1: ANCESTOR SORT (Default)
print("    - Generating Ancestor View...")
if "fa_1 masked" in df.columns:
    # Sort Z-A by Ancestor Slug
    df.sort_values(by=['fa_1 masked', 'Match to'], ascending=[False, True], inplace=True)
tbl_anc = df.to_html(columns=[long_header], index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table")
html_anc = make_page("ONS Yates Study DNA Register (By Ancestor)", f'<div class="table-scroll-wrapper">{tbl_anc}</div>', len(df), "ancestor")

# VIEW 2: PARTICIPANT SORT (Surname)
print("    - Generating Participant View...")
# Create temp sort key for Surname
df['_sort_key'] = df["Match to"].astype(str).apply(lambda x: x.strip().split()[-1] if x.strip() else "")
df.sort_values(by=['_sort_key', 'Match to'], ascending=[True, True], inplace=True)
tbl_par = df.to_html(columns=[long_header], index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table")
html_par = make_page("ONS Yates Study DNA Register (By Participant)", f'<div class="table-scroll-wrapper">{tbl_par}</div>', len(df), "participant")


# Other Pages
tree_html = make_page("Ancestor Register (Trees View)", f'<div class="table-scroll-wrapper">{tbl_anc.replace("reg-table", "refactor-table")}</div>', len(df), "ancestor")
net_rows = "".join([f"<tr><td>{r.get('Match to','')}</td><td>{html.escape(str(r.get('FirstAncestor_pair cojoined', r.get('Authority_FirstAncestor',''))))}</td><td>Yes</td><td>{html.escape(str(r.get('Name','')))} via ...</td><td>{html.escape(str(r.get('Yates DNA Ancestral Line','')))}</td></tr>" for _, r in df.iterrows()])
net_html = make_page("DNA Network", f'<table id="reg-list" class="sortable" border="1"><thead><tr><th>Match to</th><th>First Ancestor</th><th>Include</th><th>Summary</th><th>Lineage</th></tr></thead><tbody>{net_rows}</tbody></table>', len(df), "")

# Stats
grp = 'FirstAncestor_pair cojoined' if 'FirstAncestor_pair cojoined' in df.columns else 'Authority_FirstAncestor'
lin = df[grp].value_counts().reset_index(); lin.columns = ['First Ancestor', 'Count']
lin_html = make_page("Lineage Count Report", f'<div class="centerline"><p>Total Lines: {len(lin)}</p></div>{lin.to_html(index=False, border=1, classes="dataframe sortable")}', len(lin), "")
mat = df['Name'].value_counts().reset_index(); mat.columns = ['Participant', 'Entries']
mat_html = make_page("Match Count Report", f'<div class="centerline"><p>Participants: {len(mat)}</p></div>{mat.to_html(index=False, border=1, classes="dataframe sortable")}', len(mat), "")

# --- 7. UPLOAD & VERIFY ---
print(f"\n[5] Uploading to {FTP_HOST}...")
ftps = FTP_TLS(timeout=30); ftps.connect(FTP_HOST, FTP_PORT); ftps.login(FTP_USER, FTP_PASS); ftps.prot_p(); ftps.set_pasv(True)

try: ftps.cwd(f"/{REMOTE_DIR}")
except:
    try: ftps.cwd(f"/public_html/{REMOTE_DIR}")
    except: pass
print(f"    - Target Directory: {ftps.pwd()}")

uploads = {
    "yates_ancestor_register.shtml": html_anc,              # Main File (Ancestor Sort)
    "ons_yates_dna_register.shtml": html_anc,               # Alias (Ancestor Sort)
    "ons_yates_dna_register_participants.shtml": html_par,  # NEW: Participant Sort
    "just-trees.shtml": tree_html,
    "dna_network.shtml": net_html,
    "lineage_count.shtml": lin_html,
    "match_count.shtml": mat_html,
    CSV_INPUT: None
}

for fn, content in uploads.items():
    if content:
        with open(fn, "w", encoding="iso-8859-15") as f: f.write(content)
    src = fn if content else CSV_INPUT
    if os.path.exists(src):
        with open(src, "rb") as fh: ftps.storbinary(f"STOR {fn}", fh)
        print(f"    - Uploaded {fn}")

print(f"\n[6] Server Verification ({ftps.pwd()}):")
try:
    file_list = ftps.nlst()
    relevant_files = [f for f in file_list if f in uploads.keys()]
    for f in relevant_files:
        print(f"    [OK] {f}")
        print(f"         {WEB_BASE_URL}{f}")
    print(f"    (Total files in directory: {len(file_list)})")
except Exception as e:
    print(f"    [WARN] Could not list directory: {e}")

ftps.quit()
print("\n[DONE] Dual Views Published.")

      CELL 2: PRODUCTION PUBLISHER (DUAL VIEWS)
[1] Credentials Loaded for: ftp.one-name.net

[2] Fetching Authority Key from Server...
    - [SUCCESS] Pulled latest 'match_to_unmasked.csv'.
[3] Database Loaded: 1700 records.
    - Vitals Recalculated: 1700
    - Unmasking 'Match to' codes...
    - Building Narrative Column...

[4] Rendering HTML...
    - Generating Ancestor View...
    - Generating Participant View...

[5] Uploading to ftp.one-name.net...
    - Target Directory: /ons-study
    - Uploaded yates_ancestor_register.shtml
    - Uploaded ons_yates_dna_register.shtml
    - Uploaded ons_yates_dna_register_participants.shtml
    - Uploaded just-trees.shtml
    - Uploaded dna_network.shtml
    - Uploaded lineage_count.shtml
    - Uploaded match_count.shtml
    - Uploaded engine_database.csv

[6] Server Verification (/ons-study):
    [OK] ons_yates_dna_register.shtml
         https://yates.one-name.net/ons-study/ons_yates_dna_register.shtml
    [OK] engine_database.csv
         