<a href="https://colab.research.google.com/github/ronyates47/Gedcom-Utils/blob/main/ons_study_v15.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# @title [CELL 1] Setup + Global Variables (V8 Baseline + Bio Proof Nav)
import os, sys, re, csv, json, html, socket, pytz
import pandas as pd
from ftplib import FTP_TLS
from datetime import datetime

try:
    import tqdm
except ImportError:
    os.system('pip install tqdm')
    import tqdm

print("="*60)
print("      [CELL 1] SETUP LOADED (V8 Baseline - Clean Nav)")
print("="*60)

TNG_BASE_URL = "https://yates.one-name.net/tng/verticalchart.php?personID="
TNG_SUFFIX = "&tree=tree1&parentset=0&display=vertical&generations=15"

# Orange background removed from Brick Wall Buster link
NAV_HTML = r"""<style>nav.oldnav ul{display:flex;flex-wrap:wrap;justify-content:center;background-color:#006064!important;border-bottom:2px solid #00acc1!important;margin:0;padding:0;list-style:none} nav.oldnav li{display:inline-block} nav.oldnav a{display:block;padding:10px 15px;text-decoration:none;color:#e0f7fa!important;font-size:14px} nav.oldnav a:hover{background-color:#00838f!important} @media print { nav.oldnav, #nav-slot, .no-print, .action-btn, .control-panel, .tabs { display: none !important; } }</style><nav class="oldnav"><ul><li><a href="/ons-study/research_admin.html" style="color:#ffcc80 !important; font-weight:bold;">Admin Hub</a></li><li><a href="/ons-study/contents.shtml" style="color:#ffcc80 !important; font-weight:bold;">Guide</a></li><li><a href="/ons-study/yates_ancestor_register.shtml">DNA Register</a></li><li><a href="/ons-study/just-trees.shtml">Trees</a></li><li><a href="/ons-study/dna_network.shtml">DNA Network</a></li><li><a href="/ons-study/lineage_proof.html">Lineage Proof</a></li><li><a href="/ons-study/biological_proof.html" style="color:#fff !important; font-weight:bold;">Biological Proof</a></li><li><a href="/ons-study/dna_dossier.html">Forensic Dossier</a></li><li><a href="/ons-study/brick_wall_buster.shtml">Brick Wall Buster</a></li><li><a href="/ons-study/data_glossary.shtml">Data Glossary</a></li><li><a href="https://yates.one-name.net/gengen/images/cousin-calculator.jpg" target="_blank" style="color:#b2dfdb;">Cousin Calc</a></li><li><a href="https://yates.one-name.net/gengen/images/Shared_cM_Project_v4.jpg" target="_blank" style="color:#b2dfdb;">cM Chart</a></li><li><a href="/ons-study/share_dna.shtml" style="background-color:#0277bd; font-weight:bold;">Share DNA</a></li><li><a href="/ons-study/subscribe.shtml" style="background-color:#004d40;">Subscribe</a></li></ul></nav>"""

SITE_INFO = r"""<div class="no-print" style="background:#e0f2f1;border:1px solid #b2dfdb;padding:20px;margin:20px auto;width:90%;border-radius:8px;font-family:sans-serif;"><h3 style="color:#006064;margin-top:0;border-bottom:2px solid #004d40;padding-bottom:10px;">Establishing Kinship Through Collateral DNA Saturation</h3><p style="color:#333;line-height:1.6;font-size:1.05em;margin-bottom:0;"><strong>Methodology:</strong> This register moves beyond the reliance on single "golden matches" to prove kinship. Instead, it employs <em>Collateral DNA Saturation</em>‚Äîa method that blends genealogical reasoning with data-driven logic.</p></div>"""

JS_CORE = r"""<script type="text/javascript">(function(){ function textOf(c){var val = c.getAttribute('data-sort') || c.textContent || c.innerText;return (val || '').replace(/\s+/g,' ').trim().toLowerCase();} function sortTable(t,i,d){if(!(t&&t.tBodies&&t.tBodies[0]))return;var tb=t.tBodies[0],r=Array.prototype.slice.call(tb.rows||[]),asc=(d==='asc');r.sort(function(a,b){var A=textOf(a.cells[i]),B=textOf(b.cells[i]),nA=parseFloat(A.replace(/[^0-9.\-]/g,'')),nB=parseFloat(B.replace(/[^0-9.\-]/g,''));if(!isNaN(nA)&&!isNaN(nB))return asc?(nA-nB):(nB-nA);return(A<B)?(asc?-1:1):(A>B)?(asc?1:-1):0;});var f=document.createDocumentFragment();for(var k=0;k<r.length;k++)f.appendChild(r[k]);tb.appendChild(f);} function makeSortable(t){if(!(t&&t.tHead&&t.tHead.rows.length))return;var th=t.tHead.rows[0].cells;for(var i=0;i<th.length;i++){(function(idx){var h=th[idx],d='asc';h.style.cursor='pointer';h.onclick=function(){d=(d==='asc')?'desc':'asc';for(var j=0;j<th.length;j++)th[j].innerHTML=th[j].innerHTML.replace(/\s+\(asc\)|\s+\(desc\)/,'');h.innerHTML+=(d==='asc'?' (asc)':' (desc)');sortTable(t,idx,d);};})(i);}} window.filterTable = function() { var input = document.getElementById("tableSearch"); var filter = input.value.toUpperCase(); var table = document.getElementById("reg-table") || document.querySelector("table.dataframe"); var tr = table.getElementsByTagName("tr"); for (var i = 1; i < tr.length; i++) { var tdArr = tr[i].getElementsByTagName("td"); var found = false; for (var j = 0; j < tdArr.length; j++) { if (tdArr[j]) { var txtValue = tdArr[j].textContent || tdArr[j].innerText; if (txtValue.toUpperCase().indexOf(filter) > -1) { found = true; break; } } } tr[i].style.display = found ? "" : "none"; } } function init(){ var t=document.getElementsByTagName('table'); for(var i=0;i<t.length;i++) if(/\bsortable\b/.test(t[i].className)) makeSortable(t[i]); } if(document.readyState==='loading')document.addEventListener('DOMContentLoaded',init,false);else init(); })();</script>"""

# --- EXACT V8 MAKE_PAGE SIGNATURE RESTORED ---
def make_page(title, content, count, view_type="", extra="", stats_bar=""):
    nav_blk = ""
    if view_type in ['ancestor', 'participant', 'tree_az', 'tree_za', 'proof', 'hot_paths', 'network', 'dossier', 'subscribe', 'share', 'buster', 'singleton']:
        nav_blk = SITE_INFO
    if view_type == 'subscribe' or view_type == 'theory' or view_type == 'share' or view_type == 'glossary':
        nav_blk = ""

    toggle = ""
    print_btn = ""
    search_bar = ""

    if view_type in ['ancestor', 'participant', 'singleton']:
        search_bar = """<div class="no-print" style="margin:20px auto;max-width:600px;text-align:center;"><input type="text" id="tableSearch" onkeyup="filterTable()" placeholder="üîç Type a name to filter list..." style="width:100%;padding:12px;font-size:16px;border:2px solid #006064;border-radius:4px;"></div>"""

    if view_type in ['ancestor', 'participant', 'singleton']:
        view_name = "Register"
        if view_type == 'singleton': view_name = "Singleton List"
        print_btn = f"""<div class="no-print" style="text-align:center;margin-bottom:15px;"><button onclick="window.print()" style="background:#0277bd;color:white;border:none;padding:10px 20px;border-radius:4px;font-weight:bold;cursor:pointer;font-size:14px;">üñ®Ô∏è Print {view_name}</button></div>"""

    if view_type == 'ancestor':
        toggle = f"""<div class="no-print" style="text-align:center;padding:10px;margin-bottom:10px;font-family:sans-serif;font-size:14px;background:#e0f7fa;border:1px solid #b2ebf2;"><strong>Sort Register:</strong> &nbsp;<span style="font-weight:bold;color:#006064;">By Ancestral Line</span> &nbsp;|&nbsp; <a href="ons_yates_dna_register_participants.shtml" style="color:#00acc1;text-decoration:none;">By Participant Name</a></div>"""
    elif view_type == 'participant':
        toggle = f"""<div class="no-print" style="text-align:center;padding:10px;margin-bottom:10px;font-family:sans-serif;font-size:14px;background:#e0f7fa;border:1px solid #b2ebf2;"><strong>Sort Register:</strong> &nbsp;<a href="ons_yates_dna_register.shtml" style="color:#00acc1;text-decoration:none;">By Ancestral Line</a> &nbsp;|&nbsp; <span style="font-weight:bold;color:#006064;">By Participant Name</span></div>"""
    elif 'tree' in view_type:
        za = f'<span style="font-weight:bold;color:#000;">Z-A</span>' if 'za' in view_type else f'<a href="just-trees.shtml" style="color:#006064;text-decoration:underline;">Z-A</a>'
        az = f'<span style="font-weight:bold;color:#000;">A-Z</span>' if 'az' in view_type else f'<a href="just-trees-az.shtml" style="color:#006064;text-decoration:underline;">A-Z</a>'
        toggle = f"""<div class="no-print" style="text-align:center;font-family:sans-serif;font-size:16px;margin:15px 0 10px 0;">Individual Yates Family trees: &nbsp; {za} &nbsp;|&nbsp; {az}</div>"""

    return f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>{title}</title><link rel="stylesheet" href="partials_unified.css"><link rel="stylesheet" href="dna_tree_styles.css">{extra}</head><body id="top"><div class="wrap"><h1 class="centerline">{title}</h1><div id="nav-slot">{stats_bar}{NAV_HTML}</div>{nav_blk}{search_bar}{print_btn}{toggle}{content}</div>{JS_CORE}</body></html>"""

print("‚úÖ Cell 1 Loaded.")

      [CELL 1] SETUP LOADED (V8 Baseline - Clean Nav)
‚úÖ Cell 1 Loaded.


In [78]:
# @title [CELL 3] The Data Engine (V123 - Deep Ancestry Radar)
def run_engine():
    print("="*60)
    print("      [CELL 3] ENGINE STARTING (V123 - DEEP RADAR)...")
    print("="*60)

    import os, sys, re, csv
    from ftplib import FTP_TLS
    from google.colab import userdata
    from datetime import datetime

    CSV_DB = "engine_database.csv"
    if os.path.exists(CSV_DB): os.remove(CSV_DB)

    try:
        HOST = os.environ.get("FTP_HOST") or userdata.get("FTP_HOST")
        USER = os.environ.get("FTP_USER") or userdata.get("FTP_USER")
        PASS = os.environ.get("FTP_PASS") or userdata.get("FTP_PASS")
    except: pass
    REMOTE_SUBDIR = "ons-study"
    KEY_FILE = "match_to_unmasked.csv"
    PROCESSED_GED = "_processed_unmasked.ged"

    def clean_and_standardize(raw_name):
        if not raw_name: return "findme"
        s = raw_name.replace("/", "").strip()
        triggers = ["unknown", "missing", "searching", "still searching", "living", "private", "nee", "nee ?", "wife", "mrs"]
        if s.lower() in triggers or s == "": return "findme"
        if "?" in s: return "findme"
        if "unknown" in s.lower(): return "findme"
        return s

    def get_surname(full_name):
        if not full_name or "findme" in full_name.lower(): return ""
        clean = re.sub(r'\b(jr\.?|sr\.?|iii|iv|esq\.?|m\.d\.?|ph\.d\.?)\b', '', full_name, flags=re.IGNORECASE)
        parts = clean.replace(',', '').split()
        return parts[-1] if parts else ""

    def make_directory_label(name, dates):
        if "findme" in name.lower(): return name
        sur = get_surname(name)
        if not sur: return name
        firsts = re.sub(f"{re.escape(sur)}$", "", name).strip()
        return f"{sur}, {firsts} {dates}"

    print("\n[STEP 1] Resolving Files (Local Priority)...")
    if os.path.exists(KEY_FILE):
        print(f"    ‚úÖ Found {KEY_FILE} locally. Skipping FTP download.")
    else:
        print(f"    üåê {KEY_FILE} not found locally. Attempting FTP fetch...")
        try:
            ftps = FTP_TLS()
            ftps.connect(HOST, 21); ftps.auth(); ftps.login(USER, PASS); ftps.prot_p()
            try:
                with open(KEY_FILE, "wb") as f: ftps.retrbinary(f"RETR /{REMOTE_SUBDIR}/{KEY_FILE}", f.write)
                print(f"    ‚úÖ Successfully downloaded {KEY_FILE}.")
            except Exception as e:
                print(f"    ‚ö†Ô∏è FTP download failed: {e}")
            ftps.quit()
        except Exception as e:
            print(f"    ‚ö†Ô∏è FTP connection failed: {e}")

    all_files = os.listdir('.')
    ged_files = [f for f in all_files if f.lower().endswith('.ged') and "_processed" not in f.lower()]
    if not ged_files: return print("‚ùå No GEDCOM found. Please upload one.")
    ged_files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
    DEFAULT_GEDCOM = ged_files[0]
    print(f"    üëâ Source GEDCOM: {DEFAULT_GEDCOM}")

    def resolve_code(payload):
        m = re.search(r'(\d+)\s*&?\s*([^ \t\n\r\f\v]+)', payload)
        return m.group(2).lower() if m else None

    print("\n[STEP 2] Loading Tester Authority CSV...")
    csv_auth = {}
    if os.path.exists(KEY_FILE):
        with open(KEY_FILE, 'r', errors='replace') as f:
            reader = csv.reader(f)
            for i, row in enumerate(reader):
                if len(row) >= 2:
                    if i == 0 and ("tester" in row[0].lower() or "masked" in row[0].lower() or "code" in row[0].lower()):
                        continue
                    code = row[0].strip().lower()
                    name = row[1].strip()
                    tid = row[2].strip() if len(row) > 2 else ""
                    if tid: tid = "I" + re.sub(r'[^0-9]', '', tid)
                    csv_auth[code] = {"name": name, "id": tid}

    print("\n[STEP 3] Parsing GEDCOM for Study| Tags & Lineages...")
    import shutil
    shutil.copyfile(DEFAULT_GEDCOM, PROCESSED_GED)

    individuals = {}; families = {}; study_testers = {}

    def is_yates(name_str):
        n = (name_str or "").lower()
        return "yates" in n or "yeates" in n or "yate" in n

    current_id = None; current_fam = None; current_tag = None
    with open(PROCESSED_GED, "r", encoding="utf-8", errors="replace") as f:
        for line in f:
            line = line.strip(); parts = line.split(" ", 2)
            if len(parts) < 2: continue
            lvl, tag, val = parts[0], parts[1], parts[2] if len(parts)>2 else ""

            if lvl == "0" and "INDI" in val:
                current_id = tag.replace("@", "")
                individuals[current_id] = {"name": "findme", "famc": None, "fams": [], "match_code": "", "cm": 0, "birt": "", "deat": ""}
                current_fam = None; current_tag = "INDI"
            elif current_id and lvl != "0":
                if tag == "NAME" and lvl == "1":
                    individuals[current_id]["name"] = clean_and_standardize(val)
                elif tag == "FAMC" and lvl == "1":
                    individuals[current_id]["famc"] = val.replace("@", "")
                elif tag == "FAMS" and lvl == "1":
                    individuals[current_id]["fams"].append(val.replace("@", ""))

                elif tag == "NICK" and lvl == "2" and "Study|" in val:
                    tester_code = val.split("Study|")[-1].strip().lower()
                    study_testers[tester_code] = {"id": current_id, "name": individuals[current_id]["name"]}

                elif tag == "NPFX" and lvl == "2":
                    code = resolve_code(val)
                    if code: individuals[current_id]["match_code"] = code.lower()
                    m = re.search(r'^(\d+)|(\d+)\s*cM', val, re.IGNORECASE)
                    if m: individuals[current_id]["cm"] = int(m.group(1) or m.group(2))

                elif tag == "BIRT": current_tag = "BIRT"
                elif tag == "DEAT": current_tag = "DEAT"
                elif tag == "DATE" and current_tag:
                    m = re.search(r'\d{4}', val)
                    if m: individuals[current_id][current_tag.lower()] = m.group(0)
                    current_tag = None

            if lvl == "0" and "FAM" in val:
                current_fam = tag.replace("@", "")
                families[current_fam] = {"husb": None, "wife": None}
                current_id = None
            elif current_fam and lvl != "0":
                if tag == "HUSB": families[current_fam]["husb"] = val.replace("@", "")
                elif tag == "WIFE": families[current_fam]["wife"] = val.replace("@", "")

    def get_parents(pid):
        if not pid or pid not in individuals: return None, None
        famc = individuals[pid]["famc"]
        if not famc or famc not in families: return None, None
        return families[famc]["husb"], families[famc]["wife"]

    def get_mother_surname(pid):
        if not pid: return ""
        _, mom_id = get_parents(pid)
        if mom_id and mom_id in individuals: return get_surname(individuals[mom_id]["name"])
        return ""

    def to_spanish_name(pid, current_name):
        if "findme" in current_name.lower(): return current_name
        mom_surname = get_mother_surname(pid)
        if not mom_surname or "findme" in mom_surname.lower(): return current_name
        if mom_surname.lower() not in current_name.lower(): return f"{current_name}-{mom_surname}"
        return current_name

    # üåü NEW: THE DEEP ANCESTRY RADAR
    # This recursively checks if a specific person has ANY Yates in their documented ancestry
    yates_memo = {}
    def has_yates_ancestry(pid):
        if not pid or pid not in individuals: return False
        if pid in yates_memo: return yates_memo[pid]

        # Base case: Is this person a Yates?
        if is_yates(individuals[pid].get("name", "")):
            yates_memo[pid] = True
            return True

        # Recursive case: Check their parents
        dad_id, mom_id = get_parents(pid)
        res = has_yates_ancestry(dad_id) or has_yates_ancestry(mom_id)
        yates_memo[pid] = res
        return res

    def climb_full_line(start_id):
        curr = start_id; lineage_data = []
        while curr:
            p = individuals.get(curr)
            if not p: break
            spanish_name = to_spanish_name(curr, p["name"])
            spouse_name = "findme"; spouse_id = None
            if p["fams"]:
                fid = p["fams"][0]
                if fid in families:
                    f = families[fid]
                    sid = f["wife"] if f["husb"] == curr else f["husb"]
                    if sid and sid in individuals:
                        spouse_name = individuals[sid]["name"]; spouse_id = sid
            spouse_spanish = to_spanish_name(spouse_id, spouse_name) if spouse_id else spouse_name
            lineage_data.append({"name": spanish_name, "raw_name": p["name"], "id": curr, "spouse": spouse_spanish, "spouse_raw": spouse_name, "spouse_id": spouse_id})

            dad_id, mom_id = get_parents(curr)
            if not dad_id and not mom_id: break

            # üåü USE THE RADAR TO STEER THE CLIMB
            dad_has_yates = has_yates_ancestry(dad_id)
            mom_has_yates = has_yates_ancestry(mom_id)

            if dad_has_yates and not mom_has_yates: curr = dad_id
            elif mom_has_yates and not dad_has_yates: curr = mom_id
            else: curr = dad_id if dad_id else mom_id

        return lineage_data

    def format_dates(uid):
        if not uid or uid not in individuals: return "findme"
        b = individuals[uid]["birt"] or "findme"
        d = individuals[uid]["deat"] or "findme"
        b = re.sub(r'\?', 'findme', b); d = re.sub(r'\?', 'findme', d)
        if b == "findme" and d == "findme": return "findme"
        return f"({b} - {d})"

    testers = {}
    for code, data in csv_auth.items():
        testers[code] = {"name": data["name"], "id": data["id"]}

    for code, data in study_testers.items():
        if code not in testers:
            testers[code] = {"name": data["name"], "id": data["id"]}
        elif not testers[code]["id"]:
            testers[code]["id"] = data["id"]

    for kcode, tdata in testers.items():
        t_lin = ""; t_pids = ""
        if tdata["id"] and tdata["id"] in individuals:
            lin_data = climb_full_line(tdata["id"])
            if lin_data:
                full = list(reversed(lin_data))
                t_lin = " -> ".join([x["name"] for x in full])
                t_pids = ",".join([x["id"] for x in full])
        tdata["lineage_str"] = t_lin
        tdata["path_ids"] = t_pids

    print("\n[STEP 4] Constructing Database...")
    rows = []
    for uid, p in individuals.items():
        if p["match_code"]: # It's a found match!
            kit_code = p["match_code"]

            if kit_code in testers:
                t_name = testers[kit_code]["name"]
                t_id = testers[kit_code]["id"]
                t_lin = testers[kit_code]["lineage_str"]
                t_pids = testers[kit_code]["path_ids"]
                tester_display = f"{t_name} [{t_id}]" if t_id else f"{t_name} [{kit_code}]"
            else:
                t_name = kit_code
                t_id = ""
                t_lin = ""
                t_pids = ""
                tester_display = f"{kit_code} [{kit_code}]"

            lineage_data = climb_full_line(uid)
            if not lineage_data: continue

            full_line = list(reversed(lineage_data))
            gen1 = full_line[0]

            top_name = gen1["raw_name"]
            top_dates = format_dates(gen1["id"])
            spouse_name = gen1["spouse_raw"]
            spouse_id = gen1["spouse_id"]
            spouse_dates = format_dates(spouse_id)

            if spouse_name != "findme":
                husb_sur = get_surname(top_name); wife_sur = get_surname(spouse_name)
                if husb_sur.lower() == wife_sur.lower(): spouse_name += f" (n√©e {wife_sur})"

            pair_dated = f"{top_name} {top_dates}"
            if spouse_name != "findme": dir_label = make_directory_label(top_name, top_dates) + f" & {spouse_name}"
            else: dir_label = make_directory_label(top_name, top_dates)

            if spouse_name != "findme": pair_dated += f" & {spouse_name} {spouse_dates}"
            pair_simple = f"{top_name} & {spouse_name}" if spouse_name != "findme" else top_name

            clean_top = re.sub(r'[^a-zA-Z0-9]', '', top_name)
            clean_sp = re.sub(r'[^a-zA-Z0-9]', '', spouse_name.split('(')[0]) if spouse_name != "findme" else "ZZZ"
            sort_key = f"{clean_top}_{clean_sp}"

            path_names = []
            for i, x in enumerate(full_line):
                if i == 0: path_names.append(pair_dated)
                else: path_names.append(x["name"])

            lineage_str = " -> ".join(path_names)
            path_ids = ",".join([x["id"] for x in full_line])

            _, fa1_mom_id = get_parents(gen1["id"])
            fa1_mother = to_spanish_name(fa1_mom_id, individuals[fa1_mom_id]["name"]) if fa1_mom_id else "findme"

            fa2_mother = "findme"
            if spouse_id:
                _, fa2_mom_id = get_parents(spouse_id)
                if fa2_mom_id: fa2_mother = to_spanish_name(fa2_mom_id, individuals[fa2_mom_id]["name"])

            rows.append({
                "Tester_Code": kit_code,
                "Tester_Name": t_name,
                "Tester_ID": t_id,
                "Tester_Display": tester_display,
                "Tester_Lineage": t_lin,
                "Tester_Path_IDs": t_pids,
                "Match_Name": p["name"],
                "Match_ID": uid,
                "cM": p["cm"],
                "Match_Lineage": lineage_str,
                "Match_Path_IDs": path_ids,
                "Authority_Directory_Label": dir_label,
                "Authority_FirstAncestor": pair_simple,
                "Authority_FirstAncestor_alpha": sort_key,
                "Authority_FirstAncestor_dated": pair_dated,
                "fa_1 extracted": top_name, "fa_1_Dates": top_dates, "fa_1_Mother": fa1_mother,
                "fa_2 extracted": spouse_name, "fa_2 Dates": spouse_dates, "fa_2_Mother": fa2_mother,
                "Gen_Count": len(full_line)
            })

    rows.sort(key=lambda r: r["Authority_Directory_Label"])

    fieldnames = [
        "Tester_Code", "Tester_Name", "Tester_ID", "Tester_Display",
        "Tester_Lineage", "Tester_Path_IDs",
        "Match_Name", "Match_ID", "cM", "Match_Lineage", "Match_Path_IDs",
        "Authority_Directory_Label", "Authority_FirstAncestor", "Authority_FirstAncestor_alpha", "Authority_FirstAncestor_dated",
        "fa_1 extracted", "fa_1_Dates", "fa_1_Mother",
        "fa_2 extracted", "fa_2 Dates", "fa_2_Mother",
        "Gen_Count"
    ]

    with open(CSV_DB, "w", encoding="iso-8859-15", newline="", errors="replace") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
        writer.writeheader(); writer.writerows(rows)

    print(f"\n[SUCCESS] Engine V123 Complete. Saved {len(rows)} verified matches to {CSV_DB}.")

print("‚úÖ Cell 3 (Engine V123 - Deep Ancestry Radar) Loaded.")

‚úÖ Cell 3 (Engine V123 - Deep Ancestry Radar) Loaded.


In [79]:
# @title [CELL 4] The Template Library (V31: Checkbox UI & Inferred Ancestors)
print("="*60)
print("      [CELL 4] TEMPLATE LIBRARY LOADING...")
print("="*60)

NAV_HTML = r"""<style>nav.oldnav ul{display:flex;flex-wrap:wrap;justify-content:center;background-color:#006064!important;border-bottom:2px solid #00acc1!important;margin:0;padding:0;list-style:none} nav.oldnav li{display:inline-block} nav.oldnav a{display:block;padding:10px 15px;text-decoration:none;color:#e0f7fa!important;font-size:14px} nav.oldnav a:hover{background-color:#00838f!important} @media print { nav.oldnav, #nav-slot, .no-print { display: none !important; } }</style><nav class="oldnav"><ul><li><a href="/ons-study/research_admin.html" style="color:#ffcc80 !important; font-weight:bold;">Admin Hub</a></li><li><a href="/ons-study/contents.shtml" style="color:#ffcc80 !important; font-weight:bold;">Guide</a></li><li><a href="/ons-study/yates_ancestor_register.shtml">DNA Register</a></li><li><a href="/ons-study/just-trees.shtml">Trees</a></li><li><a href="/ons-study/dna_network.shtml">DNA Network</a></li><li><a href="/ons-study/lineage_proof.html">Lineage Proof</a></li><li><a href="/ons-study/biological_proof.html">Biological Proof</a></li><li><a href="/ons-study/proof_consolidator.html" style="background-color:#4a148c; color:#fff !important; font-weight:bold; border-left:1px solid #7c43bd; border-right:1px solid #7c43bd;">Proof Consolidator</a></li><li><a href="/ons-study/dna_dossier.html">Forensic Dossier</a></li><li><a href="/ons-study/brick_wall_buster.shtml">Brick Wall Buster</a></li><li><a href="/ons-study/data_glossary.shtml">Data Glossary</a></li><li><a href="https://yates.one-name.net/gengen/images/cousin-calculator.jpg" target="_blank" style="color:#b2dfdb;">Cousin Calc</a></li><li><a href="/ons-study/share_dna.shtml" style="background-color:#0277bd; font-weight:bold;">Share DNA</a></li><li><a href="/ons-study/subscribe.shtml" style="background-color:#004d40; font-weight:bold;">Subscribe</a></li></ul></nav>"""
SITE_INFO = r"""<div class="no-print" style="background:#e0f2f1;border:1px solid #b2dfdb;padding:20px;margin:20px auto;width:90%;border-radius:8px;font-family:sans-serif;"><h3 style="color:#006064;margin-top:0;border-bottom:2px solid #004d40;padding-bottom:10px;">Establishing Kinship Through Collateral DNA Saturation</h3><p style="color:#333;line-height:1.6;margin-bottom:0;">This register employs <em>Collateral DNA Saturation</em>‚Äîa method blending genealogical reasoning with data-driven logic to prove connections using multiple independent DNA cousins.</p></div>"""
CSS_BASE = r"""body{font-family:'Segoe UI',sans-serif;background:#f0f2f5;padding:20px;display:flex;flex-direction:column;min-height:100vh;margin:0;} .wrap{flex:1;} .proof-card{background:white;max-width:1100px;margin:20px auto;border-radius:8px;box-shadow:0 4px 15px rgba(0,0,0,0.1);padding:40px} .badge{padding:5px 10px;border-radius:4px;font-weight:bold;font-size:0.85em;text-transform:uppercase;border:1px solid #ccc;} .badge-platinum{background:#eceff1;color:#263238} .badge-gold{background:#fff8e1;color:#f57f17} .badge-silver{background:#f5f5f5;color:#616161} .badge-bronze{background:#efebe9;color:#5d4037} .badge-descendant{background:#e3f2fd;color:#0d47a1} .badge-terminal{background:#fff;color:#000;border-color:#000;font-style:italic;} table{width:100%;border-collapse:collapse;margin-top:15px;margin-bottom:40px;font-family:'Georgia',serif;font-size:15px;} th{background:#eceff1;color:#263238;padding:12px;text-align:left;border-bottom:2px solid #000;} td{padding:12px;border-bottom:1px solid #ddd;vertical-align:top;} @media print{ .no-print{display:none !important;} .only-print{display:block !important;} .proof-card{box-shadow:none;border:none;padding:0;margin:0;} body{background:white;padding:0;display:block;} th{background:#f0f0f0 !important;color:#000 !important;} .badge{border:1px solid #000;color:#000;background:transparent !important;} .legal-footer{background:transparent !important; border-top:2px solid #000 !important; color:#000 !important; page-break-inside:avoid !important; padding:10px 0 !important; margin-top:30px !important;} } .only-print{display:none;}"""
JS_CORE = r"""<script type="text/javascript">(function(){ function textOf(c){var val = c.getAttribute('data-sort') || c.textContent || c.innerText;return (val || '').replace(/ +/g,' ').trim().toLowerCase();} function sortTable(t,i,d){if(!(t&&t.tBodies&&t.tBodies[0]))return;var tb=t.tBodies[0],r=Array.prototype.slice.call(tb.rows||[]),asc=(d==='asc');r.sort(function(a,b){var A=textOf(a.cells[i]),B=textOf(b.cells[i]),nA=parseFloat(A.replace(/[^0-9.-]/g,'')),nB=parseFloat(B.replace(/[^0-9.-]/g,''));if(!isNaN(nA)&&!isNaN(nB))return asc?(nA-nB):(nB-nA);return(A<B)?(asc?-1:1):(A>B)?(asc?1:-1):0;});var f=document.createDocumentFragment();for(var k=0;k<r.length;k++)f.appendChild(r[k]);tb.appendChild(f);} function makeSortable(t){if(!(t&&t.tHead&&t.tHead.rows.length))return;var th=t.tHead.rows[0].cells;for(var i=0;i<th.length;i++){(function(idx){var h=th[idx],d='asc';h.style.cursor='pointer';h.onclick=function(){d=(d==='asc')?'desc':'asc';for(var j=0;j<th.length;j++)th[j].innerHTML=th[j].innerHTML.replace(' (asc)','').replace(' (desc)','');h.innerHTML+=(d==='asc'?' (asc)':' (desc)');sortTable(t,idx,d);};})(i);}} window.filterTable = function() { var input = document.getElementById("tableSearch"); var filter = input.value.toUpperCase(); var table = document.getElementById("reg-table") || document.querySelector("table.dataframe"); var tr = table.getElementsByTagName("tr"); for (var i = 1; i < tr.length; i++) { var tdArr = tr[i].getElementsByTagName("td"); var found = false; for (var j = 0; j < tdArr.length; j++) { if (tdArr[j]) { var txtValue = tdArr[j].textContent || tdArr[j].innerText; if (txtValue.toUpperCase().indexOf(filter) > -1) { found = true; break; } } } tr[i].style.display = found ? "" : "none"; } } function init(){ var t=document.getElementsByTagName('table'); for(var i=0;i<t.length;i++) if(t[i].className.indexOf('sortable') !== -1) makeSortable(t[i]); } if(document.readyState==='loading')document.addEventListener('DOMContentLoaded',init,false);else init(); })();</script>"""
BTT_BTN = r"""<style>.btt{position:fixed;bottom:20px;right:20px;background:#00838f;color:white;padding:10px 15px;text-decoration:none;border-radius:4px;font-weight:bold;box-shadow:0 2px 5px rgba(0,0,0,0.3);z-index:1000;opacity:0.9;} .btt:hover{opacity:1;background:#006064;} @media print { .btt { display: none !important; } }</style><a href="#top" class="btt no-print">‚¨ÜÔ∏è Top</a>"""

LEGAL_FOOTER_TMPL = r"""<div class="legal-footer no-print" style="margin-top:50px;padding:20px;background:#f4f4f4;border-top:1px solid #ddd;text-align:center;color:#666;font-family:sans-serif;font-size:0.85em;clear:both;"><p style="margin-bottom:5px;font-size:1.1em;color:#333;"><strong>&copy; __YEAR__ Ronald Eugene Yates. All Rights Reserved.</strong></p><p style="margin-bottom:5px;">Generated by <em>The Forensic Genealogy Publisher&trade;</em></p><p style="font-style:italic;color:#888;margin-bottom:0;max-width:800px;margin-left:auto;margin-right:auto;">The terms "Forensic Handshake", "Brick Wall Buster", and "Collateral Saturation" are trademarks of the Yates One-Name Study.</p></div><div class="only-print" style="margin-top:40px;padding-top:10px;border-top:2px solid #000;text-align:center;color:#000;font-family:'Georgia',serif;font-size:12px;clear:both;page-break-inside:avoid;"><strong>&copy; __YEAR__ Ronald Eugene Yates. All Rights Reserved.</strong><br><em>Generated by The Forensic Genealogy Publisher&trade; | yates.one-name.net</em></div>"""

CONSOLIDATOR_CSS = r"""<style>
.consol-panel { background: #f3e5f5; border: 1px solid #ab47bc; padding: 25px; border-radius: 8px; margin-bottom: 25px; font-family: 'Segoe UI', sans-serif; text-align: center; }
.consol-panel select { padding: 8px; font-size: 14px; width: 100%; border: 1px solid #7b1fa2; border-radius: 4px; }
.consol-btn { background: #4a148c; color: white; border: none; padding: 12px 25px; font-size: 16px; font-weight: bold; border-radius: 4px; cursor: pointer; box-shadow: 0 4px 6px rgba(0,0,0,0.1); margin-top: 10px; }
.consol-btn:hover { background: #38006b; }
.academic-brief { background: white; max-width: 1000px; margin: 0 auto 30px auto; padding: 60px 80px; box-shadow: 0 4px 20px rgba(0,0,0,0.1); font-family: 'Georgia', serif; color: #000; line-height: 1.6; }
.title-page { page-break-after: always; display: flex; flex-direction: column; justify-content: center; min-height: 70vh; padding: 20px; }
.brief-header { text-align: center; border-bottom: 3px solid #000; padding-bottom: 20px; margin-bottom: 30px; }
.brief-header h1 { font-size: 28px; text-transform: uppercase; margin: 0; letter-spacing: 1px; color: #000; }
.brief-header p { font-size: 16px; font-style: italic; color: #444; margin: 5px 0 0 0; }
.brief-meta { background: #fafafa; border: 1px solid #ddd; padding: 20px; margin-bottom: 30px; font-size: 15px; }
.brief-meta strong { color: #000; }
.brief-section-title { font-size: 18px; text-transform: uppercase; border-bottom: 1px solid #ccc; padding-bottom: 5px; margin-top: 40px; margin-bottom: 20px; font-weight: bold; }
.collateral-list { margin-left: 20px; font-size: 15px; }
.collateral-list li { margin-bottom: 8px; }
.brief-table { width: 100%; border-collapse: collapse; margin-top: 15px; font-size: 14px; }
.brief-table th { background: #f0f0f0; color: #000; border-bottom: 2px solid #000; border-top: 1px solid #000; padding: 10px; text-align: left; }
.brief-table td { padding: 10px; border-bottom: 1px solid #ddd; vertical-align: middle; }
.brief-table tr { page-break-inside: avoid; }
.verdict-stamp { border: 3px double #4a148c; padding: 20px; text-align: center; margin-top: 40px; font-size: 18px; font-weight: bold; color: #4a148c; background: #fafafa; text-transform: uppercase; letter-spacing: 1px; }
@media print { .no-print { display: none !important; } body { background: white; padding: 0; } .academic-brief { box-shadow: none; padding: 0; max-width: 100%; border: none; margin-bottom: 0; } .verdict-stamp { border: 3px double #000; color: #000; } }
.vg-checkbox-container { height:150px; overflow-y:auto; border:1px solid #7b1fa2; background:white; border-radius:4px; padding:10px; font-size:13px; text-align:left; }
.vg-checkbox-container label { display:block; margin-bottom:5px; cursor:pointer; }
.vg-checkbox-container label:hover { background-color:#f3e5f5; }
</style>"""

APPENDIX_A_HTML = r"""
<div class="academic-brief" style="max-width: 1000px; margin-top: 40px; page-break-before: always; text-align: left; padding: 60px 80px;">
  <h2 style="color: #4a148c; border-bottom: 2px solid #ccc; padding-bottom: 5px; margin-top:0; font-size:22px; text-transform:uppercase;">Appendix A: CSS Field Definitions &amp; Data Sources</h2>
  <p><b>Purpose:</b> This appendix defines each field used in the Collateral Saturation Score (CSS) matrix, documents the intended data source(s) within the Yates DNA Study reporting system, and states calculation rules and quality checks so results are reproducible by an independent reviewer.</p>
  <h3 style="color: #4a148c; margin-top: 30px;">A1. Core Entities</h3>
  <ul>
    <li><b>Participant (ID):</b> A unique tester kit identifier.</li>
    <li><b>Cluster:</b> The set of proper matches and related evidence assigned to a participant‚Äôs claimed lineage path.</li>
    <li><b>Virtual Group:</b> A user-defined composite of multiple independent kits treated as a single "Super-Kit" to test combined evidence density.</li>
    <li><b>Handshake:</b> A cross-match instance where a kit in the participant‚Äôs cluster intersects a kit assigned to a known lineage.</li>
  </ul>
  <h3 style="color: #4a148c; margin-top: 30px;">A2. CSS Matrix Fields</h3>
  <table class="brief-table" style="font-size: 13px;">
    <thead><tr><th>Field</th><th>Abbrev</th><th>Definition</th><th>Computation</th><th>Desired Range</th></tr></thead>
    <tbody>
      <tr><td><b>Proper Matches</b></td><td>PM</td><td>Count of matches that meet study criteria assigned to the participant‚Äôs cluster.</td><td>PM = confirmed matches.</td><td>‚â•15 required; 50‚Äì150 strong.</td></tr>
      <tr><td><b>Target Handshakes</b></td><td>HC-T</td><td>Handshake instances supporting the rank #1 candidate lineage.</td><td>HC-T = participant's match instances intersecting target node.</td><td>30‚Äì100 strong; 100+ saturated.</td></tr>
      <tr><td><b>Second Handshakes</b></td><td>HC-2</td><td>Handshake instances for the rank #2 candidate lineage.</td><td>HC-2 = participant's match instances for runner-up lineage.</td><td>Lower is better; typical &lt;30.</td></tr>
      <tr><td><b>Dominance Ratio</b></td><td>DR</td><td>Relative support for target lineage over the runner-up lineage.</td><td>DR = HC-T / max(HC-2, 1).</td><td>3‚Äì10 strong; 10+ fully saturated.</td></tr>
      <tr><td><b>Independent Branches</b></td><td>BR</td><td>Count of independent descendant branches represented in the cluster.</td><td>BR = distinct descendant branches meeting criteria.</td><td>2‚Äì3 minimum; 4‚Äì10 strong.</td></tr>
      <tr><td><b>Unique Testers</b></td><td>TB</td><td>Number of unique tester kits contributing evidence to the participant's cluster.</td><td>TB = count of unique NPFX kits on target node (Node-Level).</td><td>15‚Äì40 strong; 40+ saturated.</td></tr>
      <tr><td><b>Node Saturation</b></td><td>NS</td><td>Maximum independent kit corroboration observed for the deepest proven ancestor node.</td><td>NS = max(independent kits) across the spine (Node-Level).</td><td>15‚Äì50 validated; 50‚Äì150 strong.</td></tr>
      <tr><td><b>Stability</b></td><td>ST</td><td>Sensitivity outcome (PASS/PARTIAL/FAIL) indicating robustness.</td><td>Multiplier applied: PASS=1.00, PARTIAL=0.85, FAIL=0.60.</td><td>Desired PASS.</td></tr>
      <tr><td><b>CSS v2a (score)</b></td><td>CSSv2a</td><td>Composite normalized score for cross-participant comparison.</td><td>100 √ó weighted_mean(component norms) √ó ST.</td><td>85‚Äì100 platinum.</td></tr>
    </tbody>
  </table>
  <h3 style="color: #4a148c; margin-top: 30px;">A3. Mathematical Weighting &amp; Normalization</h3>
  <ul style="font-size: 14px; margin-bottom: 0;">
    <li><b>Empirical Caps:</b> Logarithmic caps (e.g., PM=150, HC=100, TB=40) are not arbitrary; they were established empirically based on observed saturation limits within the Yates DNA Study dataset. Values exceeding these caps provide diminishing marginal evidence and are constrained to a maximum normalized score of 1.0.</li>
    <li><b>Weighted Mean:</b> To reflect genealogical reality, components are not weighted equally. Structural replication (BR, weight 2.0) and lineage dominance (DR, weight 1.5) carry more mathematical gravity. Raw match volume (PM) carries a weight of 1.0 to ensure participants with low personal evidence are properly separated from highly verified anchors. Handshakes, testers, and node saturation carry a standard weight of 1.0.</li>
    <li><b>Categorical Branch Logic:</b> Independent Branches (BR) is treated as a stepwise categorical variable (BR=2‚Üí0.25, BR=3‚Üí0.50... BR‚â•6‚Üí1.0) mapped onto a continuous scale, reflecting the rapid diminishing returns of branch replication beyond 6 independent lines.</li>
  </ul>
  <h3 style="color: #4a148c; margin-top: 30px;">A4. Matrix Inclusion Rules &amp; Flags</h3>
  <ul style="font-size: 14px; margin-bottom: 0;">
    <li><b>Strict PM Minimum:</b> Participants with PM &lt; 15 automatically trigger a FAIL stability penalty (0.60 multiplier) as they lack the minimum personal evidence mass for proof-grade analysis. They are included in the matrix strictly to demonstrate the noise-to-signal gradient.</li>
    <li><b>Node-Level Duplication:</b> In saturated clusters, descendants of the same ancestor will naturally display identical TB and NS values because these measure the shared ancestral node, not the individual kit.</li>
  </ul>
</div>
"""

CONSOLIDATOR_JS = r"""<script>__JS_GLOBALS__
const getCM = (val) => { let m = String(val).match(/(\d+)/); return m ? parseInt(m[1]) : 0; };
function cleanNum(str){let res="";for(let i=0;i<str.length;i++)if(str[i]>='0'&&str[i]<='9')res+=str[i];return res;}

const partSel = document.getElementById('testerSelect');
const groupDiv = document.getElementById('groupCheckboxes');
const ancSel = document.getElementById('ancestorSelect');

const validTesters = DB.filter(r => r.t_names && r.t_names.trim() !== "");
const uniqueTesters = [...new Set(validTesters.map(r => r.participant))];

uniqueTesters.sort((a, b) => {
    const keyA = DATA.participants[a] ? DATA.participants[a].sort_key : a.toLowerCase();
    const keyB = DATA.participants[b] ? DATA.participants[b].sort_key : b.toLowerCase();
    if (keyA === keyB) return a.localeCompare(b);
    return keyA.localeCompare(keyB);
});

// Populate Single Dropdown & Checkboxes
uniqueTesters.forEach(t => {
    const o1 = document.createElement('option'); o1.value = t; o1.innerText = t; partSel.appendChild(o1);

    const lbl = document.createElement('label');
    lbl.innerHTML = `<input type="checkbox" value="${t}" class="vg-checkbox"> ${t}`;
    groupDiv.appendChild(lbl);
});

const allAncestors = Object.keys(DATA.ancestors).sort((a,b) => DATA.ancestors[a].name.localeCompare(DATA.ancestors[b].name));
allAncestors.forEach(k => { const o = document.createElement('option'); o.value = DATA.ancestors[k].name; o.innerText = DATA.ancestors[k].name; ancSel.appendChild(o); });

const cleanName = (str) => {
    if(!str) return "";
    return str.replace(/findme/gi, '?').replace(/\(\? - \?\)/g, '').replace(/\( - \)/g, '').replace(/\(\? - /g, '(d. ').replace(/ - \?\)/g, '(b. ').replace(/& \?/g, '').trim();
};

let secCounter = 1;
const getSec = () => {
    let num = secCounter++;
    const rules = {M:1000,CM:900,D:500,CD:400,C:100,XC:90,L:50,XL:40,X:10,IX:9,V:5,IV:4,I:1};
    let res = '';
    for(let i in rules){ while(num >= rules[i]){ res += i; num -= rules[i]; } }
    return res + ".";
};

const getStudyStats = () => {
    const d = new Date();
    const datePart = d.toLocaleDateString('en-US', { month: 'long', day: 'numeric', year: 'numeric' });
    const timePart = d.toLocaleTimeString('en-US', { hour: 'numeric', minute: '2-digit', timeZoneName: 'short' });
    return `Study Data Current As Of: ${datePart} ${timePart} | Total Autosomal matches: ${DB.length.toLocaleString()}`;
};

const getTitlePage = () => {
    const year = new Date().getFullYear();
    return `
    <div class="academic-brief title-page">
      <div style="font-family: Arial, sans-serif; text-align:center; line-height:1.6;">
        <h1 style="font-size:36px; border-bottom:none; margin-bottom:5px;">Collateral Saturation</h1>
        <h2 style="font-size:20px; font-weight:normal; color:#444; margin-top:0;">A Quantitative Method for Autosomal Lineage Reconstruction</h2>
        <br><br><br><p style="font-size:18px;"><b>Ronald Eugene Yates, MPH</b><br>University of California, Los Angeles<br>1975</p>
        <br><br><br><p style="font-size:16px;">Yates DNA Study<br>Autosomal Lineage Reconstruction Project</p>
        <br><br><br><br><br><p style="font-size:16px;">${year}</p>
        <p style="font-size:14px; color:#004d40; margin-top:20px; font-weight:bold;">${getStudyStats()}</p>
        <br><br><br><p style="font-size:14px; color:#555;">&copy; ${year} Ronald Eugene Yates<br>All Rights Reserved.</p>
      </div>
    </div>`;
};

const getMethodologyPage = () => {
    return `
    <div class="academic-brief" style="max-width: 1000px; margin-top: 40px; page-break-before: always; text-align: left; padding: 60px 80px;">
        <h2 style="color: #4a148c; border-bottom: 2px solid #ccc; padding-bottom: 5px; margin-top:0; font-size:22px; text-transform:uppercase;">Methodological Principles of Collateral Saturation</h2>
        <p style="font-size:15px; line-height:1.6; color:#333; margin-top:20px;">
            <b>Collateral Saturation</b> is a lineage-validation method in which autosomal DNA evidence is evaluated at the level of descendant networks rather than isolated matches. A lineage hypothesis is considered reliable when it is supported by sufficient descendant density, replicated across independent branches, and remains stable under perturbation tests. The following principles define the minimum standard for applying Collateral Saturation in the Yates DNA Study.
        </p>
        <h3 style="color: #4a148c; margin-top: 25px; font-size:16px;">1) Minimum Descendant Count</h3>
        <p style="font-size:15px; line-height:1.6; color:#333;">The study requires adequate descendant representation to produce interpretable inheritance patterns. A lineage claim must be supported by a descendant network large enough to reduce reliance on any single match. In practice, this means that clusters should have multiple descendant lines and a sufficient number of corroborating kits to allow replication testing and competition testing (dominance over alternate hypotheses).</p>

        <h3 style="color: #4a148c; margin-top: 20px; font-size:16px;">2) Minimum Proper Matches (PM &ge; 15)</h3>
        <p style="font-size:15px; line-height:1.6; color:#333;">A lineage cluster must contain at least <b>15 proper matches</b> before Collateral Saturation analysis is applied. A ‚Äúproper match‚Äù is a match that meets platform matching thresholds, includes sufficient match metadata, and has been attributable to the lineage cluster through study classification rules. This threshold establishes a minimum evidence mass so that measured patterns (handshakes, dominance, and stability) are based on multiple observations rather than chance alignment.</p>

        <h3 style="color: #4a148c; margin-top: 20px; font-size:16px;">3) Minimum Branches (BR &ge; 2)</h3>
        <p style="font-size:15px; line-height:1.6; color:#333;">A cluster must be sourced from at least <b>two independent descendant branches</b>. Independence is defined genealogically when pedigree documentation exists (distinct child-lines descending from the target ancestor), and may be inferred genetically when documentary data are incomplete (e.g., absence of close-relationship shared DNA patterns that indicate a single nuclear-family source). This requirement prevents over-weighting of one family group and ensures that the lineage signal replicates across distinct inheritance pathways.</p>

        <h3 style="color: #4a148c; margin-top: 20px; font-size:16px;">4) No Singleton Lines</h3>
        <p style="font-size:15px; line-height:1.6; color:#333;">No lineage conclusion may be based on a single tester or a single descendant line. A cluster must be sourced from at least <b>two participants</b>, and the branch requirement (BR &ge; 2) must be met. Singleton clusters may be cataloged as hypotheses, but they are not eligible for proof-grade inference under Collateral Saturation because they cannot be tested for replication or stability.</p>

        <h3 style="color: #4a148c; margin-top: 20px; font-size:16px;">5) Handshake Replication</h3>
        <p style="font-size:15px; line-height:1.6; color:#333;">A lineage hypothesis must show <b>replicated cross-line corroboration</b> through ‚Äúhandshakes,‚Äù defined as cross-matches between kits assigned to the participant‚Äôs cluster and kits assigned to established reference lineages. The key principle is replication: the handshake signal must appear across multiple independent branches rather than being driven by one highly connected kit. In saturated lineages, handshake evidence is expected to show dominance over competing candidate lineages and to persist under stricter thresholds.</p>

        <h3 style="color: #4a148c; margin-top: 20px; font-size:16px;">6) Stability Testing</h3>
        <p style="font-size:15px; line-height:1.6; color:#333;">Collateral Saturation requires that lineage conclusions be <b>robust</b> when subjected to sensitivity tests. Typical perturbations include: removing the largest bridge participant(s), removing close relatives above a defined cM threshold, raising the minimum match threshold, and recomputing lineage rankings. A lineage conclusion is considered proof-grade when it remains stable under these perturbations (recorded as <b>PASS</b>, <b>PARTIAL</b>, or <b>FAIL</b>), demonstrating that the inference is not dependent on a single individual, a single segment class, or a single permissive threshold.</p>

        <h3 style="color: #4a148c; margin-top: 20px; font-size:16px;">7) Node-Level Saturation Properties</h3>
        <p style="font-size:15px; line-height:1.6; color:#333;">In highly saturated lineages, metrics such as Unique Testers (TB) and Node Saturation (NS) become node-level properties shared by all descendants of that ancestor. Variation between participants representing the same lineage is therefore primarily expressed through their personal Proper Matches (PM), Target Handshakes (HC-T), and Dominance Ratio (DR).</p>

        <h3 style="color: #4a148c; margin-top: 20px; font-size:16px;">Summary Principle</h3>
        <p style="font-size:15px; line-height:1.6; color:#333;">In Collateral Saturation, <b>proof emerges from replication</b>: adequate descendant density, multiple independent branches, repeated handshake corroboration, and stability under perturbation. These conditions collectively reduce the probability that an observed lineage signal is an artifact of chance, population structure, or data errors, and they establish a consistent, scalable standard for lineage reconstruction in autosomal surname studies.</p>
    </div>

    <div class="academic-brief" style="max-width: 1000px; margin-top: 40px; page-break-before: always; text-align: left; padding: 60px 80px;">
        <h2 style="color: #4a148c; border-bottom: 2px solid #ccc; padding-bottom: 5px; margin-top:0; font-size:22px; text-transform:uppercase;">Virtual Group Protocol for Brick-Wall Ancestors</h2>
        <h3 style="color: #4a148c; margin-top: 25px; font-size:16px;">Definition</h3>
        <p style="font-size:15px; line-height:1.6; color:#333;">A <strong>Virtual Group</strong> is a composite lineage cluster formed by combining multiple independently tested participants who share the same unresolved ancestral hypothesis. Virtual Groups are intended primarily for unresolved ancestral problems where multiple independent descendants exist but individual evidence clusters remain underpowered. Virtual Groups are used when individual participants lack sufficient evidence mass to meet Collateral Saturation criteria independently, but collectively demonstrate a stable and replicated lineage signal.<br><br>Under this protocol, Collateral Saturation is evaluated at the group level, while individual participant results are retained for transparency.</p>

        <h3 style="color: #4a148c; margin-top: 25px; font-size:16px;">Eligibility Requirements</h3>
        <p style="font-size:15px; line-height:1.6; color:#333; margin-bottom:10px;"><strong>1. Shared Brick-Wall Ancestor</strong><br>All participants in a Virtual Group must descend from the same unresolved ancestor or ancestral couple representing the target lineage hypothesis. The brick-wall ancestor must be explicitly defined and documented. Each participant must independently claim descent from this ancestor through genealogical evidence or credible family tradition.</p>
        <p style="font-size:15px; line-height:1.6; color:#333; margin-bottom:10px;"><strong>2. Independent Participants</strong><br>Virtual Groups must include at least two or more participants, preferably representing multiple descendant branches (BR &ge; 2). Participants must represent independent lines of descent and may not consist solely of close relatives. Independence may be determined through documented pedigrees, genetic relationship distances, or the absence of close-family match patterns.</p>
        <p style="font-size:15px; line-height:1.6; color:#333; margin-bottom:10px;"><strong>3. Independent Lineage Signals</strong><br>Each participant must independently produce measurable evidence linking them to the target lineage. Evidence may include proper matches, handshake connections, shared lineage clusters, and supporting testers. Participants do not need to meet Collateral Saturation thresholds individually. However, each participant must contribute at least minimal independent evidence supporting the shared lineage hypothesis.</p>

        <h3 style="color: #4a148c; margin-top: 25px; font-size:16px;">Evaluation Procedure</h3>
        <ul style="font-size:15px; line-height:1.6; color:#333; padding-left:20px;">
            <li style="margin-bottom:8px;"><strong>Step 1 ‚Äî Individual Assessment:</strong> Each participant is first evaluated independently using standard Collateral Saturation criteria. Individual results are recorded. Participants that fail individual thresholds remain eligible for Virtual Group inclusion.</li>
            <li style="margin-bottom:8px;"><strong>Step 2 ‚Äî Composite Cluster Construction:</strong> The Virtual Group is constructed by combining proper matches, handshake connections, bridge connections, and supporting testers. Duplicate matches and testers are removed to avoid double-counting. Metrics are recalculated for the composite cluster.</li>
            <li style="margin-bottom:8px;"><strong>Step 3 ‚Äî Target Node Confirmation:</strong> The Virtual Group must demonstrate a consistent dominant lineage signal pointing to the shared brick-wall ancestor. The target node must be the highest-ranked lineage hypothesis for the composite cluster. Dominance ratio must indicate a clear preference for the target lineage over competing hypotheses.</li>
            <li style="margin-bottom:8px;"><strong>Step 4 ‚Äî Stability Testing:</strong> The Virtual Group must pass sensitivity testing. Recommended perturbations include the removal of the strongest participant, bridge kits, or close relatives, and raising match thresholds. The lineage signal must remain stable under perturbation.</li>
        </ul>

        <h3 style="color: #4a148c; margin-top: 25px; font-size:16px;">Interpretation &amp; Reporting Standard</h3>
        <p style="font-size:15px; line-height:1.6; color:#333;">Virtual Groups demonstrate that Collateral Saturation is a network-level property rather than an individual property. Multiple independent descendant kits may jointly produce a proof-grade lineage signal even when individual kits remain below threshold.<br><br>Virtual Group reports must include the list of participants included, individual participant metrics, composite cluster metrics, stability test results, and target ancestor definition. Both individual FAIL results and composite PASS results must be reported to ensure transparency.</p>

        <h3 style="color: #4a148c; margin-top: 25px; font-size:16px;">Key Principle</h3>
        <p style="font-size:15px; line-height:1.6; color:#333;">Collateral Saturation may be achieved collectively when independent descendant signals converge on the same unresolved ancestor. <strong>Proof emerges from replicated network evidence rather than from any single participant.</strong></p>
    </div>
    `;
};

const getAuthorshipPage = () => {
    return `
    <div class="academic-brief" style="max-width: 1000px; margin-top: 40px; page-break-before: always; text-align: left; padding: 60px 80px;">
        <h2 style="color: #4a148c; border-bottom: 2px solid #ccc; padding-bottom: 5px; margin-top:0; font-size:22px; text-transform:uppercase;">Appendix B: Authorship &amp; Metric Development</h2>
        <p style="font-size:15px; line-height:1.6; color:#333; margin-top:20px;">The Collateral Saturation methodology originated from the long-term design and execution of the Yates DNA Study by Ron Yates. Over a period of several years, the study evolved from traditional autosomal DNA matching into a structured lineage-based analytical framework emphasizing dense collateral descendant sampling and cross-lineage corroboration.</p>
        <p style="font-size:15px; line-height:1.6; color:#333;">The conceptual framework underlying Collateral Saturation‚Äîincluding the emphasis on independent descendant branches, handshake replication, node saturation, and lineage dominance‚Äîwas developed through empirical observation of inheritance patterns across the Yates surname dataset. The study demonstrated that lineage signals become increasingly stable as collateral representation increases, leading to the formulation of Collateral Saturation as a generalizable method for lineage reconstruction.</p>
        <p style="font-size:15px; line-height:1.6; color:#333;">The formal metric definitions, normalization procedures, and scoring formulas were developed through collaborative methodological work between Ron Yates and ChatGPT (OpenAI). Ron Yates provided the empirical dataset, methodological insights, terminology, and structural requirements derived from the Yates DNA Study, while ChatGPT assisted in translating these concepts into explicit mathematical definitions, normalization procedures, and reproducible scoring formulas (including the CSS and CSS v2a/v2a variants).</p>
        <p style="font-size:15px; line-height:1.6; color:#333;">This collaboration produced a documented field dictionary and a quantitative scoring framework intended to support consistent cross-participant comparison, sensitivity testing, and publication-grade reporting. The methodology reflects the integration of empirical genealogical research with formal quantitative modeling and is grounded in observed match-network behavior within the study.</p>
    </div>`;
};

function getCSS(testerArray, customName = null) {
    const isGroup = testerArray.length > 1;
    const pName = isGroup ? (customName || `VIRTUAL GROUP (${testerArray.length} Kits)`) : testerArray[0];

    const myMatches = DB.filter(m => testerArray.includes(m.participant) && m.ancestor !== 'No Matches' && m.ancestor);
    let PM = myMatches.length;
    if(PM === 0) return null;

    let dirs = {};
    myMatches.forEach(m => { dirs[m.ancestor] = (dirs[m.ancestor] || 0) + 1; });
    let sortedDirs = Object.entries(dirs).sort((a,b) => b[1] - a[1]);
    let HC_T = sortedDirs.length > 0 ? sortedDirs[0][1] : PM;
    let HC_2 = sortedDirs.length > 1 ? sortedDirs[1][1] : 0;

    let TB = 0; let NS = 0; let BR = 0;

    let idCounts = {};
    myMatches.forEach(m => {
        if(m.search_ids) {
            let ids = m.search_ids.split(',').map(x=>cleanNum(x));
            ids.forEach(id => { if(id) idCounts[id] = (idCounts[id] || 0) + 1; });
        }
    });

    let highestHeat = 0; let targetID = null;
    for (let [id, count] of Object.entries(idCounts)) {
        const nodeMatches = DB.filter(m => m.search_ids && m.search_ids.split(',').map(x=>cleanNum(x)).includes(id));
        const nodeUniqueKits = new Set(nodeMatches.map(m => m.participant)).size;
        if (nodeUniqueKits > highestHeat) { highestHeat = nodeUniqueKits; targetID = id; }
    }
    NS = highestHeat;

    if (targetID) {
        const collaterals = DB.filter(m => m.search_ids && m.search_ids.split(',').map(x=>cleanNum(x)).includes(targetID));
        TB = new Set(collaterals.map(m => m.participant)).size;
        let branches = new Set();
        collaterals.forEach(r => {
            const ids = r.search_ids.split(',').map(x => cleanNum(x));
            const names = r.search_names.split('|');
            let idx = ids.indexOf(targetID);
            if(idx !== -1 && idx + 1 < names.length) {
                branches.add(names[idx+1].replace(/findme/gi, '?').split(' (')[0].trim());
            } else { branches.add("Direct Descendant"); }
        });
        BR = branches.size;
    }

    let DR = HC_T / (HC_2 > 0 ? HC_2 : 1);
    const norm = (val, cap) => Math.min(1, Math.log(1+val) / Math.log(1+cap));
    let PM_n = norm(PM, 150); let HC_n = norm(HC_T, 100); let DR_n = norm(DR, 10); let TB_n = norm(TB, 40); let NS_n = norm(NS, 150);

    let BR_n = 0;
    if(BR >= 6) BR_n = 1.0; else if(BR === 5) BR_n = 0.85; else if(BR === 4) BR_n = 0.70; else if(BR === 3) BR_n = 0.50; else if(BR === 2) BR_n = 0.25;

    let ST_str = "FAIL"; let ST_val = 0.60;
    if (PM >= 15) {
        if(BR >= 3 && DR >= 1.5) { ST_str = "PASS"; ST_val = 1.0; }
        else if(BR >= 2) { ST_str = "PARTIAL"; ST_val = 0.85; }
    }

    let weightedSum = (PM_n * 1.0) + (HC_n * 1.0) + (DR_n * 1.5) + (TB_n * 1.0) + (BR_n * 2.0) + (NS_n * 1.0);
    let cssBase = 100 * (weightedSum / 7.5);
    let cssFinal = cssBase * ST_val;

    return { pName, PM, HC_T, HC_2, DR, TB, BR, NS, ST_str, cssFinal, isGroup };
}

function getDiagnosticHTML(testerArray, ancName) {
    let css = getCSS(testerArray);
    if(!css) return ``;

    let stColor = css.ST_str === "PASS" ? "green" : (css.ST_str === "PARTIAL" ? "#f57f17" : "red");
    let brStr = css.BR >= 6 ? "&ge;6" : css.BR;

    let narrative = "";
    if(css.PM < 15) {
        narrative = `The evaluated subject triggers a <strong style="color:red;">FAIL</strong> stability warning.<br><br><strong>Limiting Factor:</strong> Insufficient match volume (PM = ${css.PM}), failing the minimum study threshold of 15 proper matches required to act as an independent lineage anchor.<br><br><strong>Recommendation:</strong> This subject cannot currently serve as a proof-grade anchor. More matches must be mapped to this cluster before an independent Collateral Saturation evaluation can be performed.`;
    } else if(css.ST_str === "PASS") {
        narrative = `The cluster demonstrates strong structural integrity (Stability: <strong style="color:green;">PASS</strong>) with ${css.BR} independent branches and a dominance ratio of ${css.DR.toFixed(1)}. This lineage signature is highly saturated and reliable. No immediate corrective action is required.`;
    } else if (css.ST_str === "PARTIAL") {
        narrative = `The cluster demonstrates moderate integrity but flags a <strong style="color:#f57f17;">PARTIAL</strong> stability warning.<br><br><strong>Limiting Factor:</strong> The genetic evidence relies on only ${css.BR} independent branches, creating a potential single-family bias.<br><br><strong>Recommendation:</strong> To upgrade this line to Platinum/PASS status, targeted DNA testing should be recruited from descendants of alternate children of the target ancestor to broaden the Forensic Handshake.`;
    } else {
        narrative = `The cluster triggers a <strong style="color:red;">FAIL</strong> stability warning.<br><br><strong>Limiting Factor:</strong> The evidence lacks independent branch replication (Branch Count: ${css.BR}) and/or fails to establish dominance over competing lineage theories (Dominance Ratio: ${css.DR.toFixed(1)}).<br><br><strong>Recommendation:</strong> This connection is currently considered structurally insufficient. Targeted recruitment of distant cousins from entirely different branches is strictly required to resolve the biological proof.`;
    }

    return `
    <div class="brief-section-title">${getSec()} Diagnostic Review & Lineage Viability</div>
    <p>This section evaluates the structural viability of the DNA cluster using the Collateral Saturation Score (CSS v2a) methodology.</p>
    <table class="brief-table" style="text-align:center; margin-bottom:20px;">
        <thead><tr><th style="text-align:left;">Evaluated Subject</th><th>PM</th><th>HC-T</th><th>HC-2</th><th>DR</th><th>TB</th><th>BR</th><th>NS</th><th>ST</th><th style="background:#f3e5f5; color:#4a148c;">CSS v2a</th></tr></thead>
        <tbody>
            <tr>
                <td style="text-align:left;"><strong>${css.pName}</strong></td>
                <td>${css.PM}</td><td>${css.HC_T}</td><td>${css.HC_2}</td><td>${css.DR.toFixed(1)}</td><td>${css.TB}</td><td>${brStr}</td><td>${css.NS}</td>
                <td style="font-weight:bold; color:${stColor};">${css.ST_str}</td>
                <td style="font-weight:bold; color:#4a148c; background:#fafafa; font-size:1.1em;">${css.cssFinal.toFixed(2)}</td>
            </tr>
        </tbody>
    </table>
    <div style="background:#fbfbfb; border-left:4px solid ${stColor}; padding:15px; text-align:left; font-size:14px; line-height:1.6;">
        <strong>Diagnostic Analysis:</strong><br>${narrative}
    </div>
    `;
}

function getMatrixHTML(vgCSS = null) {
    let matrixRows = [];
    uniqueTesters.forEach(t => { let c = getCSS([t]); if(c) matrixRows.push(c); });
    if (vgCSS && vgCSS.isGroup) matrixRows.push(vgCSS);

    matrixRows.sort((a,b) => b.cssFinal - a.cssFinal);
    let tableHTML = `<table class="brief-table sortable" style="text-align:center; font-family:sans-serif;">
        <thead><tr><th style="text-align:left; cursor:pointer; width:22%;">Participant Kit</th><th title="Proper Matches" style="cursor:pointer;">PM</th><th title="Target Handshakes" style="cursor:pointer;">HC-T</th><th title="Secondary Handshakes" style="cursor:pointer;">HC-2</th><th title="Dominance Ratio" style="cursor:pointer;">DR</th><th title="Unique Testers" style="cursor:pointer;">TB</th><th title="Independent Branches" style="cursor:pointer;">BR</th><th title="Node Saturation" style="cursor:pointer;">NS</th><th title="Stability" style="cursor:pointer;">ST</th><th style="background:#f3e5f5; color:#4a148c; cursor:pointer;">CSS v2a</th></tr></thead><tbody>`;

    matrixRows.forEach(r => {
        let stColor = r.ST_str === "PASS" ? "green" : (r.ST_str === "PARTIAL" ? "#f57f17" : "red");
        let brStr = r.BR >= 6 ? "&ge;6" : r.BR;
        let rowStyle = r.isGroup ? 'background:#fff8e1; border:2px solid #fbc02d;' : '';
        let nameFmt = r.isGroup ? `<span style="color:#f57f17; font-weight:bold;">‚òÖ ${r.pName}</span>` : `<strong>${r.pName}</strong>`;

        tableHTML += `<tr style="${rowStyle}"><td style="text-align:left;" data-sort="${r.pName}">${nameFmt}</td><td>${r.PM}</td><td>${r.HC_T}</td><td>${r.HC_2}</td><td>${r.DR.toFixed(1)}</td><td>${r.TB}</td><td data-sort="${r.BR}">${brStr}</td><td>${r.NS}</td><td data-sort="${r.ST_str}"><span style="color:${stColor};font-weight:bold;">${r.ST_str}</span></td><td style="background:#fafafa; font-weight:bold; color:#4a148c; font-size:1.1em;" data-sort="${r.cssFinal}">${r.cssFinal.toFixed(2)}</td></tr>`;
    });
    tableHTML += `</tbody></table>`;

    return `<div class="academic-brief" style="max-width: 1100px; padding: 60px 80px; page-break-before: always;">
        <div class="brief-section-title" style="margin-top:20px;">${getSec()} Master CSS v2a Evaluation Matrix</div>
        <p style="font-size:14px; color:#004d40; margin-bottom:10px; font-weight:bold; border-bottom:1px solid #ddd; padding-bottom:10px;">${getStudyStats()}</p>
        <p style="font-size:13px; color:#555; margin-bottom:15px;"><strong>Note:</strong> Scores are provisional CSS v2a computed with empirically-weighted logarithmic normalization. This matrix evaluates and ranks active study participants based on their structural viability as keystone lineage representatives. <b style="color:#b71c1c;">Participants with PM &lt; 15 are included for comparison but do not meet minimum Collateral Saturation criteria.</b> Within a single saturated ancestral node, CSS primarily differentiates participants by PM, HC-T, and DR; between different nodes, TB/NS/BR drive separation.</p>
        ${tableHTML}
    </div>`;
}

function runConsolidator(mode) {
    const singleTester = partSel.value;
    const checkedBoxes = document.querySelectorAll('.vg-checkbox:checked');
    const groupTesters = Array.from(checkedBoxes).map(cb => cb.value);
    const ancName = ancSel.value;

    let activeTesters = [];
    if (groupTesters.length > 0) activeTesters = groupTesters;
    else if (singleTester) activeTesters = [singleTester];

    if(activeTesters.length === 0 && !ancName && mode !== 'matrix') return alert("Please select a Tester, a Virtual Group, or an Ancestor.");
    secCounter = 1;

    let pdfTitle = "Proof_Consolidator";
    if (activeTesters.length > 1) {
        pdfTitle = `Virtual_Group_${activeTesters.length}_Kits`;
    } else if (activeTesters.length === 1) {
        let m = activeTesters[0].match(/(.+) \[I?(\d+)\]/i);
        if(m) {
            let tName = m[1].trim().replace(/\./g, '').replace(/\s+/g, '-');
            pdfTitle = m[2] + "_" + tName;
        } else {
            pdfTitle = activeTesters[0].replace(/[^a-zA-Z0-9]/g, '-');
        }
    } else if (ancName) {
        pdfTitle = cleanName(ancName).replace(/[^a-zA-Z0-9]/g, '-');
    }
    if (mode === 'assembled') pdfTitle = pdfTitle + "_Assembled";
    document.title = pdfTitle;

    let vgCSS = null;
    if (activeTesters.length > 1) {
        vgCSS = getCSS(activeTesters);
    }

    if(mode === 'matrix') {
        document.title = "Master_CSS_Matrix";
        document.getElementById('report-container').innerHTML = getTitlePage() + getMethodologyPage() + getMatrixHTML(vgCSS) + `__APPENDIX_A_HTML__` + getAuthorshipPage();
        setTimeout(() => { if(window.init) window.init(); }, 100);
        return;
    }

    const isAssembled = (mode === 'assembled');
    const dateStr = new Date().toLocaleDateString('en-US', { year: 'numeric', month: 'long', day: 'numeric' });
    let spineHTML = "", branchesHTML = "", manifestHTML = "", verdict = "INSUFFICIENT DATA", kitCount = 0, totalCM = 0, targetGen = 0, targetAncestor = cleanName(ancName);
    let inferredAncestorText = "";

    if (activeTesters.length > 1) {
        const collaterals = DB.filter(m => activeTesters.includes(m.participant) && m.ancestor !== 'No Matches' && m.ancestor);
        kitCount = new Set(collaterals.map(r => r.participant)).size;
        totalCM = collaterals.reduce((sum, r) => sum + getCM(r.cm), 0);

        let idCounts = {};
        collaterals.forEach(m => {
            if(m.search_ids) {
                let ids = m.search_ids.split(',').map(x=>cleanNum(x));
                ids.forEach(id => { if(id) idCounts[id] = (idCounts[id] || 0) + 1; });
            }
        });

        let highestHeat = 0; let targetID = null;
        for (let [id, count] of Object.entries(idCounts)) {
            const nodeMatches = DB.filter(m => m.search_ids && m.search_ids.split(',').map(x=>cleanNum(x)).includes(id));
            const nodeUniqueKits = new Set(nodeMatches.map(m => m.participant)).size;
            if (nodeUniqueKits > highestHeat) { highestHeat = nodeUniqueKits; targetID = id; }
        }

        if (targetID) {
            const globalCollaterals = DB.filter(m => m.search_ids && m.search_ids.split(',').map(x=>cleanNum(x)).includes(targetID));
            let branches = new Set();
            let discoveredName = "";
            globalCollaterals.forEach(r => {
                const ids = r.search_ids.split(',').map(x => cleanNum(x));
                const names = r.search_names.split('|');
                let idx = ids.indexOf(targetID);
                if(idx !== -1) {
                    discoveredName = cleanName(names[idx]);
                    if (idx + 1 < names.length) {
                        branches.add(names[idx+1].replace(/findme/gi, '?').split(' (')[0].trim());
                    } else { branches.add("Direct Descendant"); }
                }
            });
            branchesHTML = `<ul class="collateral-list">`;
            Object.keys(branches).sort().forEach(b => { branchesHTML += `<li>Descendants via <strong>${b}</strong>: (${branches[b].size} corroborating kits)</li>`; });
            branchesHTML += `</ul>`;

            manifestHTML = `<table class="brief-table"><thead><tr><th>Supporting DNA Kit</th><th>Shared cM</th><th>Branch Intersecting Path</th></tr></thead><tbody>`;
            globalCollaterals.sort((a,b) => getCM(b.cm) - getCM(a.cm)).slice(0, 50).forEach(r => {
                const rIDs = r.search_ids.split(',').map(x => cleanNum(x));
                const rNames = r.search_names.split('|');
                let idx = rIDs.indexOf(targetID);
                let downPath = (idx !== -1) ? rNames.slice(idx).slice(0, 4).map(cleanName).join(' &rarr; ') + (rNames.length - idx > 4 ? "..." : "") : cleanName(r.lineage);
                manifestHTML += `<tr><td><strong>${r.participant}</strong></td><td>${r.cm}</td><td style="font-size:0.9em;color:#555;">${downPath}</td></tr>`;
            });
            if(globalCollaterals.length > 50) manifestHTML += `<tr><td colspan="3" style="text-align:center;font-style:italic;">... plus ${globalCollaterals.length - 50} additional supporting kits.</td></tr>`;
            manifestHTML += `</tbody></table>`;

            if(!targetAncestor) {
                targetAncestor = discoveredName || `Ancestor I${targetID}`;
                inferredAncestorText = " (Discovered via cluster convergence)";
            }
        }

        if(highestHeat >= 30) verdict = "PLATINUM STANDARD - FULLY VERIFIED";
        else if(highestHeat >= 15) verdict = "GOLD STANDARD - STRONGLY VALIDATED";
        else if(highestHeat >= 5) verdict = "SILVER STANDARD - VERIFIED";
        else if(highestHeat >= 2) verdict = "BRONZE STANDARD - EMERGING";
    }
    else if(activeTesters.length === 1 && ancName) {
        const testerName = activeTesters[0];
        const myRow = DB.find(r => r.participant === testerName);
        if(!myRow || !myRow.t_names) return alert("Tester paper trail not found.");
        const ancRow = DB.find(r => r.ancestor === ancName);
        if(!ancRow) return alert("Ancestor node not found in database.");
        const targetID = cleanNum(ancRow.id);
        const tNames = myRow.t_names.split('|');
        const tIDs = myRow.t_ids.split('|').map(x => cleanNum(x.split('+')[0]));
        const targetIdx = tIDs.indexOf(targetID);
        if(targetIdx === -1) return alert("This tester's documented lineage does not intersect with the selected ancestor.");
        targetGen = tNames.length - targetIdx;
        targetAncestor = cleanName(tNames[targetIdx]);

        spineHTML = `<table class="brief-table"><thead><tr><th style="width:50px;">Gen</th><th>Documented Ancestral Lineage</th></tr></thead><tbody>`;
        for(let i = targetIdx; i < tNames.length; i++) { spineHTML += `<tr><td><strong>${tNames.length - i}</strong></td><td>${cleanName(tNames[i])}</td></tr>`; }
        spineHTML += `</tbody></table>`;

        const collaterals = DB.filter(r => r.search_ids && r.search_ids.split(',').map(x => cleanNum(x)).includes(targetID));
        kitCount = new Set(collaterals.map(r => r.participant)).size;
        totalCM = collaterals.reduce((sum, r) => sum + getCM(r.cm), 0);

        let branches = {};
        collaterals.forEach(r => {
            const ids = r.search_ids.split(',').map(x => cleanNum(x));
            const names = r.search_names.split('|');
            let idx = ids.indexOf(targetID);
            let branchName = (idx !== -1 && idx + 1 < names.length) ? cleanName(names[idx+1]) : "Direct Descendant";
            if(!branches[branchName]) branches[branchName] = new Set();
            branches[branchName].add(r.participant);
        });
        branchesHTML = `<ul class="collateral-list">`;
        Object.keys(branches).sort().forEach(b => { branchesHTML += `<li>Descendants via <strong>${b}</strong>: (${branches[b].size} corroborating kits)</li>`; });
        branchesHTML += `</ul>`;

        manifestHTML = `<table class="brief-table"><thead><tr><th>Supporting DNA Kit</th><th>Shared cM</th><th>Branch Intersection Path</th></tr></thead><tbody>`;
        collaterals.sort((a,b) => getCM(b.cm) - getCM(a.cm)).slice(0, 50).forEach(r => {
            const rIDs = r.search_ids.split(',').map(x => cleanNum(x));
            const rNames = r.search_names.split('|');
            let idx = rIDs.indexOf(targetID);
            let downPath = (idx !== -1) ? rNames.slice(idx).slice(0, 4).map(cleanName).join(' &rarr; ') + (rNames.length - idx > 4 ? "..." : "") : cleanName(r.lineage);
            manifestHTML += `<tr><td><strong>${r.participant}</strong></td><td>${r.cm}</td><td style="font-size:0.9em;color:#555;">${downPath}</td></tr>`;
        });
        if(collaterals.length > 50) manifestHTML += `<tr><td colspan="3" style="text-align:center;font-style:italic;">... plus ${collaterals.length - 50} additional supporting kits.</td></tr>`;
        manifestHTML += `</tbody></table>`;
    }
    else if (activeTesters.length === 1 && !ancName) {
        const testerName = activeTesters[0];
        const myRow = DB.find(r => r.participant === testerName);
        if(!myRow || !myRow.t_names) return alert("Tester paper trail not found.");
        const tNames = myRow.t_names.split('|');
        const tIDs = myRow.t_ids.split('|').map(x => cleanNum(x.split('+')[0]));
        const totalGens = tNames.length;
        targetAncestor = cleanName(tNames[0]);
        targetGen = totalGens;

        spineHTML = `<table class="brief-table"><thead><tr><th style="width:50px;">Gen</th><th>Ancestral Node</th><th style="text-align:center;">Independent Kits</th><th>Status</th></tr></thead><tbody>`;
        let highestHeat = 0; let targetID = null;
        for(let i=0; i<totalGens; i++) {
            const primaryID = tIDs[i];
            const nodeMatches = DB.filter(m => m.search_ids && m.search_ids.split(',').map(x=>cleanNum(x)).includes(primaryID));
            const nodeHeat = new Set(nodeMatches.map(m => m.participant)).size;
            if(nodeHeat > highestHeat) { highestHeat = nodeHeat; targetID = primaryID; }
            let statusText = nodeHeat >= 30 ? "Confirmed Standard (30+)" : (nodeHeat >= 15 ? "Confirmed Validation (15+)" : (nodeHeat >= 5 ? "Verified Node (5+)" : (nodeHeat >= 2 ? "Emerging Node (2+)" : "Private Line")));
            if(i === totalGens - 1) statusText = "Subject Tester";
            spineHTML += `<tr><td><strong>${totalGens - i}</strong></td><td>${cleanName(tNames[i])}</td><td style="text-align:center;">${nodeHeat}</td><td>${statusText}</td></tr>`;
        }
        spineHTML += `</tbody></table>`;
        kitCount = highestHeat;
        verdict = highestHeat >= 30 ? "PLATINUM STANDARD" : (highestHeat >= 15 ? "GOLD STANDARD" : (highestHeat >= 5 ? "SILVER STANDARD" : "INSUFFICIENT DATA"));

        if (targetID && highestHeat > 0) {
            const collaterals = DB.filter(m => m.search_ids && m.search_ids.split(',').map(x=>cleanNum(x)).includes(targetID));
            totalCM = collaterals.reduce((sum, r) => sum + getCM(r.cm), 0);
            let branches = {};
            collaterals.forEach(r => {
                const ids = r.search_ids.split(',').map(x => cleanNum(x));
                const names = r.search_names.split('|');
                let idx = ids.indexOf(targetID);
                let branchName = (idx !== -1 && idx + 1 < names.length) ? cleanName(names[idx+1]) : "Direct Descendant";
                if(!branches[branchName]) branches[branchName] = new Set();
                branches[branchName].add(r.participant);
            });
            branchesHTML = `<ul class="collateral-list">`;
            Object.keys(branches).sort().forEach(b => { branchesHTML += `<li>Descendants via <strong>${b}</strong>: (${branches[b].size} corroborating kits)</li>`; });
            branchesHTML += `</ul>`;

            manifestHTML = `<table class="brief-table"><thead><tr><th>Supporting DNA Kit</th><th>Shared cM</th><th>Branch Intersecting Path</th></tr></thead><tbody>`;
            collaterals.sort((a,b) => getCM(b.cm) - getCM(a.cm)).slice(0, 50).forEach(r => {
                const rIDs = r.search_ids.split(',').map(x => cleanNum(x));
                const rNames = r.search_names.split('|');
                let idx = rIDs.indexOf(targetID);
                let downPath = (idx !== -1) ? rNames.slice(idx).slice(0, 4).map(cleanName).join(' &rarr; ') + (rNames.length - idx > 4 ? "..." : "") : cleanName(r.lineage);
                manifestHTML += `<tr><td><strong>${r.participant}</strong></td><td>${r.cm}</td><td style="font-size:0.9em;color:#555;">${downPath}</td></tr>`;
            });
            if(collaterals.length > 50) manifestHTML += `<tr><td colspan="3" style="text-align:center;font-style:italic;">... plus ${collaterals.length - 50} additional supporting kits.</td></tr>`;
            manifestHTML += `</tbody></table>`;
        }
    }
    else if (activeTesters.length === 0 && ancName) {
        const ancRow = DB.find(r => r.ancestor === ancName);
        if(!ancRow) return alert("Ancestor node not found in database.");
        const targetID = cleanNum(ancRow.id);
        const collaterals = DB.filter(r => r.search_ids && r.search_ids.split(',').map(x => cleanNum(x)).includes(targetID));
        kitCount = new Set(collaterals.map(r => r.participant)).size;
        totalCM = collaterals.reduce((sum, r) => sum + getCM(r.cm), 0);

        let branches = {};
        collaterals.forEach(r => {
            const ids = r.search_ids.split(',').map(x => cleanNum(x));
            const names = r.search_names.split('|');
            let idx = ids.indexOf(targetID);
            let branchName = (idx !== -1 && idx + 1 < names.length) ? cleanName(names[idx+1]) : "Direct Descendant";
            if(!branches[branchName]) branches[branchName] = new Set();
            branches[branchName].add(r.participant);
        });

        branchesHTML = `<ul class="collateral-list">`;
        Object.keys(branches).sort().forEach(b => { branchesHTML += `<li>Descendants via <strong>${b}</strong>: (${branches[b].size} corroborating kits)</li>`; });
        branchesHTML += `</ul>`;

        manifestHTML = `<table class="brief-table"><thead><tr><th>Supporting DNA Kit</th><th>Shared cM</th><th>Branch Intersecting Path</th></tr></thead><tbody>`;
        collaterals.sort((a,b) => getCM(b.cm) - getCM(a.cm)).slice(0, 50).forEach(r => {
            const rIDs = r.search_ids.split(',').map(x => cleanNum(x));
            const rNames = r.search_names.split('|');
            let idx = rIDs.indexOf(targetID);
            let downPath = (idx !== -1) ? rNames.slice(idx).slice(0, 4).map(cleanName).join(' &rarr; ') + (rNames.length - idx > 4 ? "..." : "") : cleanName(r.lineage);
            manifestHTML += `<tr><td><strong>${r.participant}</strong></td><td>${r.cm}</td><td style="font-size:0.9em;color:#555;">${downPath}</td></tr>`;
        });
        if(collaterals.length > 50) manifestHTML += `<tr><td colspan="3" style="text-align:center;font-style:italic;">... plus ${collaterals.length - 50} additional supporting kits.</td></tr>`;
        manifestHTML += `</tbody></table>`;

        if(kitCount >= 30) verdict = "PLATINUM STANDARD - FULLY VERIFIED";
        else if(kitCount >= 15) verdict = "GOLD STANDARD - STRONGLY VALIDATED";
        else if(kitCount >= 5) verdict = "SILVER STANDARD - VERIFIED";
        else if(kitCount >= 2) verdict = "BRONZE STANDARD - EMERGING";
    }

    let reportHTML = isAssembled ? getTitlePage() + getMethodologyPage() : '';
    let pbStyle = isAssembled ? 'page-break-before: always;' : '';

    let subjectHeader = "";
    if (activeTesters.length > 1) {
        let kitList = activeTesters.map(t => `<li style="margin-bottom:3px;">${t}</li>`).join("");
        subjectHeader = `<strong>Subject Tester:</strong> VIRTUAL GROUP (${activeTesters.length} Kits)<br><ul style="margin:5px 0 10px 0; padding-left:20px; font-size:13px; color:#333; font-weight:bold;">${kitList}</ul>`;
    } else if (activeTesters.length === 1) {
        subjectHeader = `<strong>Subject Tester:</strong> ${activeTesters[0]}<br>`;
    }

    let execPara = activeTesters.length > 1 ?
        `This brief evaluates the composite genetic evidence of a Virtual Group comprising ${activeTesters.length} individual testers. By pooling their isolated matches into a single structural cluster, we test if their combined evidence satisfies the Collateral Saturation threshold for proof-grade lineage assignment. The documentary paper trail is corroborated by <strong>${kitCount} independent participant kits</strong> sharing an aggregate of <strong>${totalCM.toLocaleString()} cM</strong> of Autosomal DNA.` :
        `This brief evaluates the genetic evidence corroborating the descent from the Target Ancestral Node. Through the application of Collateral DNA Saturation methodology, the documentary paper trail is corroborated by <strong>${kitCount} independent participant kits</strong> sharing an aggregate of <strong>${totalCM.toLocaleString()} cM</strong> of Autosomal DNA.`;

    reportHTML += `
    <div class="academic-brief" style="${pbStyle}">
        ${!isAssembled ? `<div class="brief-header"><h1>Formal Genetic Lineage Brief</h1><p>Biological Verification via Collateral Saturation</p></div>` : ''}
        <div class="brief-meta">
            ${subjectHeader}
            <strong>${(!ancName && activeTesters.length > 1) ? 'Inferred Target Node (Consensus):' : 'Target Ancestral Node:'}</strong> ${targetAncestor} ${inferredAncestorText} ${targetGen ? `(Gen ${targetGen})` : ''}<br>
            <strong>Date Compiled:</strong> ${dateStr}<br>
            <strong>Study Authority:</strong> Yates One-Name Study (ONS)<br>
            <strong>Database Status:</strong> ${getStudyStats()}
        </div>

        <div class="brief-section-title">${getSec()} Executive Abstract</div>
        <p>${execPara}</p>

        ${isAssembled ? getDiagnosticHTML(activeTesters, ancName) : ''}

        ${spineHTML !== "" ? `
        <div class="brief-section-title">${getSec()} Documented Lineage Spine</div>
        <p>The following genealogical paper trail connects the Subject Tester directly to the Target Ancestral Node:</p>
        ${spineHTML}` : ''}

        ${branchesHTML !== "" ? `
        <div class="brief-section-title" style="page-break-before: always;">${getSec()} Genetic Convergence (The Forensic Handshake)</div>
        <p>To rule out isolated coincidence or false-positive segment sharing, genetic genealogy relies on triangulation and cluster saturation. The biological integrity of this node is verified by independent lines of descent converging from the following documented children:</p>
        ${branchesHTML}` : ''}

        ${manifestHTML !== "" ? `
        <div class="brief-section-title">${getSec()} Empirical Data Manifest (Top 50 Kits)</div>
        ${manifestHTML}` : ''}

        <div class="verdict-stamp">FORENSIC STATUS: ${verdict}</div>
    </div>`;

    if(isAssembled) {
        reportHTML += getMatrixHTML(vgCSS);
        reportHTML += `__APPENDIX_A_HTML__`;
        reportHTML += getAuthorshipPage();
    }

    document.getElementById('report-container').innerHTML = reportHTML;
    setTimeout(() => { if(window.init) window.init(); }, 100);
}

// Ensure mutual exclusivity in UI
document.getElementById('testerSelect').addEventListener('change', function() {
    if(this.value) {
        document.querySelectorAll('.vg-checkbox').forEach(cb => cb.checked = false);
    }
});
document.getElementById('groupCheckboxes').addEventListener('change', function(e) {
    if(e.target.classList.contains('vg-checkbox')) {
        if(e.target.checked) document.getElementById('testerSelect').value = "";
    }
});
</script>""".replace('__APPENDIX_A_HTML__', APPENDIX_A_HTML)

CONSOLIDATOR_HTML = f"""
<div class="no-print consol-panel">
    <h2 style="color:#4a148c; margin-top:0;">The Omni-Proof Consolidator</h2>
    <p style="color:#555; margin-bottom:20px;">Generate a formal academic white-paper. Select a Tester, an Ancestor, or compute the Master Matrix.</p>

    <div style="display:flex; justify-content:center; gap:15px; flex-wrap:wrap; margin-bottom:15px;">
        <div style="flex:1; min-width:250px; text-align:left;">
            <label style="font-size:12px; font-weight:bold; color:#4a148c;">1A. Select Single Tester</label>
            <select id="testerSelect" style="width:100%; padding:8px; border:1px solid #7b1fa2; border-radius:4px;"><option value="">-- Choose One --</option></select>
        </div>
        <div style="flex:1; min-width:250px; text-align:left;">
            <label style="font-size:12px; font-weight:bold; color:#e65100;">1B. OR Create Virtual Group</label>
            <div id="groupCheckboxes" class="vg-checkbox-container"></div>
            <div style="font-size:11px; color:#666; margin-top:3px;">*Check multiple kits to pool evidence</div>
        </div>
        <div style="flex:1; min-width:250px; text-align:left;">
            <label style="font-size:12px; font-weight:bold; color:#4a148c;">2. Select Target Ancestor (Optional)</label>
            <select id="ancestorSelect" style="width:100%; padding:8px; border:1px solid #7b1fa2; border-radius:4px;"><option value="">-- Choose Target --</option></select>
        </div>
    </div>

    <button class="consol-btn" onclick="runConsolidator('brief')">Generate Formal Brief</button>
    <button class="consol-btn" style="background:#2e7d32; margin-left:10px;" onclick="runConsolidator('matrix')">üìä Generate CSS Matrix</button>
    <button class="consol-btn" style="background:#e65100; margin-left:10px;" onclick="runConsolidator('assembled')">üìö Assembled Report</button>
    <button class="consol-btn no-print" style="background:#0277bd; margin-left:10px;" onclick="window.print()">üñ®Ô∏è Print to PDF</button>
</div>
<div id="report-container"></div>
"""

# Bind Legal Footer to regular tools
BIO_TMPL = BIO_TMPL.replace('</div></div></div><script>', '</div>__LEGAL_FOOTER__</div></div><script>')
PROOF_TMPL = PROOF_TMPL.replace('</table></div></div></div></div><script>', '</table></div></div>__LEGAL_FOOTER__</div></div><script>')
DOSS_TMPL = DOSS_TMPL.replace('<div id="report-stack"></div></div><script>', '<div id="report-stack"></div>__LEGAL_FOOTER__</div><script>')
BUST_TMPL = BUST_TMPL.replace('<div id="cluster-table-div"></div></div></div></div><script>', '<div id="cluster-table-div"></div></div>__LEGAL_FOOTER__</div></div><script>')

print("‚úÖ Cell 4 (Template Library) Loaded Successfully.")

      [CELL 4] TEMPLATE LIBRARY LOADING...
‚úÖ Cell 4 (Template Library) Loaded Successfully.


‚úÖ Cell 4 Loaded.


      MASTER ORCHESTRATOR (V81)
      (Running Engine -> Publisher -> Upload)

>>> üöÄ PHASE 1: EXECUTING DATA ENGINE...
      [CELL 3] ENGINE STARTING (V113 - CLEAN)...

[STEP 1] Setup...
    üëâ Source: yates_study_2025.ged

[STEP 4] Tracing Lineages...

[SUCCESS] Engine V113 Complete. Saved 1712 verified matches to engine_database.csv.
‚úÖ PHASE 1 COMPLETE.

>>> üåê PHASE 2: EXECUTING PUBLISHER & UPLOAD...
      [CELL 4] PUBLISHER STARTING (Upgraded V8)...
    ‚úÖ Core Registers and Static Pages Built Locally.

[PHASE 3] Uploading via FTP to Live Server...
    üì§ Uploaded: contents.shtml
    üì§ Uploaded: subscribe.shtml
    üì§ Uploaded: ons_yates_dna_register.shtml
    üì§ Uploaded: research_admin.html
    üì§ Uploaded: brick_wall_buster.shtml
    üì§ Uploaded: ons_yates_dna_register_participants.shtml
    üì§ Uploaded: dna_dossier.html
    üì§ Uploaded: engine_database.csv
    üì§ Uploaded: share_dna.shtml
    üì§ Uploaded: lineage_proof.html
    üì§ Uploaded: admi

‚úÖ Cell 5 Loaded.


‚úÖ Cell 5 Loaded.


In [82]:
# @title [CELL 5] Core Publisher & FTP Uploader (V26: Admin Hub Hotlinks Restored)
def run_publisher():
    print("="*60)
    print("      [CELL 5] PUBLISHER STARTING (Upgraded V26 - Hotlinks Restored)...")
    print("="*60)

    import os, re, pytz, json, csv
    import pandas as pd
    from datetime import datetime
    from google.colab import userdata
    from ftplib import FTP_TLS

    if 'LEGAL_FOOTER_TMPL' not in globals():
        return print("‚ùå ERROR: Templates not found. You must run Cell 4 first!")

    try:
        HOST = os.environ.get("FTP_HOST") or userdata.get("FTP_HOST")
        USER = os.environ.get("FTP_USER") or userdata.get("FTP_USER")
        PASS = os.environ.get("FTP_PASS") or userdata.get("FTP_PASS")
    except Exception as e:
        return print(f"‚ùå Credential Error: {e}")

    REMOTE_SUBDIR = "ons-study"
    CSV_DB = "engine_database.csv"
    KEY_FILE = "match_to_unmasked.csv"

    if not os.path.exists(CSV_DB): return print("‚ùå ERROR: engine_database.csv not found.")

    # üåü LOCAL FIRST LOGIC
    print(f"\n[STEP 1] Resolving {KEY_FILE}...")
    if os.path.exists(KEY_FILE):
        print(f"    ‚úÖ Found {KEY_FILE} locally. Skipping FTP download.")
    else:
        try:
            ftps = FTP_TLS()
            ftps.connect(HOST, 21); ftps.auth(); ftps.login(USER, PASS); ftps.prot_p()
            try:
                with open(KEY_FILE, "wb") as f: ftps.retrbinary(f"RETR /{REMOTE_SUBDIR}/{KEY_FILE}", f.write)
                print(f"    ‚úÖ Successfully downloaded {KEY_FILE}.")
            except Exception as e:
                print(f"    ‚ö†Ô∏è FTP download failed: {e}")
            ftps.quit()
        except Exception as e:
            pass

    df = pd.read_csv(CSV_DB, encoding="iso-8859-15")
    df.fillna('', inplace=True)
    df.replace('nan', '', inplace=True)

    # üåü PRIVACY-FOCUSED TRUNCATION ALGORITHM (P.K. Musick) üåü
    def shorten_name(full_name):
        if pd.isna(full_name) or str(full_name).lower() == 'nan' or not str(full_name).strip(): return "Unknown"
        s = re.sub(r'\[.*?\]', '', str(full_name)).strip() # Remove any existing brackets
        parts = s.split()
        if len(parts) <= 1: return s

        # Handle Suffixes cleanly
        suffix = ""
        if parts[-1].lower() in ['jr', 'jr.', 'sr', 'sr.', 'iii', 'iv', 'v', 'md', 'm.d.', 'esq', 'esq.']:
            suffix = " " + parts.pop()

        if len(parts) == 1: return parts[0] + suffix

        # Convert all first/middle names to concatenated initials (e.g. "P.K.")
        initials = "".join([p[0].upper() + "." for p in parts[:-1]])
        last_name = parts[-1]

        return f"{initials} {last_name}{suffix}"

    # üåü EXTRACT GEDCOM IDS TO RESTORE ADMIN HOTLINKS
    db_ids = {}
    for _, r in df.iterrows():
        c = str(r.get('Tester_Code', '')).strip().lower()
        i = str(r.get('Tester_ID', '')).replace('I', '').strip()
        if c and i and i != 'nan':
            db_ids[c] = i

    tester_auth = []
    if os.path.exists(KEY_FILE):
        with open(KEY_FILE, 'r', errors='replace') as f:
            reader = csv.reader(f)
            for i, row in enumerate(reader):
                if len(row) >= 2:
                    if i == 0 and ("tester" in row[0].lower() or "masked" in row[0].lower() or "code" in row[0].lower()):
                        continue
                    code = row[0].strip().lower()
                    name = row[1].strip()
                    tid = row[2].strip() if len(row) > 2 else ""
                    tid = re.sub(r'[^0-9]', '', tid)
                    if code in db_ids:
                        tid = db_ids[code]  # Prioritize the exact TNG ID from the GEDCOM
                    tester_auth.append({'Kit_Code': code, 'Tester_Name': shorten_name(name), 'Tester_ID': tid})

    df_testers = pd.DataFrame(tester_auth)
    if df_testers.empty:
        fallback = []
        for kcode, grp in df.groupby('Tester_Code'):
            tid = str(grp.iloc[0]['Tester_ID']).replace('I','').strip()
            fallback.append({'Kit_Code': kcode, 'Tester_Name': shorten_name(grp.iloc[0]['Tester_Name']), 'Tester_ID': tid})
        df_testers = pd.DataFrame(fallback)

    # Apply truncation directly to the main dataframe
    df['Kit_Name'] = df.apply(
        lambda r: f"{shorten_name(r['Tester_Name'])} [I{re.sub(r'[^0-9]','',str(r['Tester_ID']))}]"
        if pd.notna(r['Tester_ID']) and re.sub(r'[^0-9]','',str(r['Tester_ID']))
        else f"{shorten_name(r['Tester_Name'])} [{r['Tester_Code']}]",
        axis=1
    )

    df.rename(columns={
        "Authority_Directory_Label": "Dir_Label",
        "Authority_FirstAncestor_alpha": "Alpha_Key",
        "Tester_Code": "Kit_Code",
        "Match_Lineage": "Lineage",
        "Match_Path_IDs": "s_ids"
    }, inplace=True)

    def normalize_id(val): return f"I{str(val).replace('@', '').strip()}" if str(val).replace('@', '').strip().isdigit() else str(val).replace('@', '').strip()

    df['search_ids'] = df['s_ids']
    df['search_names'] = df['Lineage'].astype(str).str.replace(' -> ', '|')
    df['t_names'] = df['Tester_Lineage'].astype(str).str.replace(' -> ', '|')
    df['t_ids'] = df['Tester_Path_IDs'].astype(str).str.replace(',', '|')
    df['Linked_Tree_Line'] = df.apply(lambda r: str(r.get('Lineage', '')).replace(str(r.get('Match_Name', '')), f'<a href="https://yates.one-name.net/tng/verticalchart.php?personID={normalize_id(r.get("Match_ID", ""))}&tree=tree1&parentset=0&display=vertical&generations=15" target="_blank" style="color:#006064;text-decoration:none;font-weight:bold;">{r.get("Match_Name", "")}</a>') if str(r.get('Match_Name', '')) in str(r.get('Lineage', '')) else str(r.get('Lineage', '')), axis=1)

    est = pytz.timezone('US/Eastern')
    current_year = datetime.now(est).year
    timestamp = datetime.now(est).strftime("%B %d, %Y %-I:%M %p EST")
    stats_bar_full = f'<div style="background:#f4f4f4;border-top:1px solid #ddd;border-bottom:1px solid #ddd;font-family:sans-serif;font-size:12px;color:#555;padding:8px 15px;text-align:center;margin-bottom:0;"><strong>Study Data Current As Of:</strong> {timestamp} | <strong>Total Autosomal matches:</strong> {len(df):,}</div>'
    LEGAL_FOOTER = LEGAL_FOOTER_TMPL.replace('__YEAR__', str(current_year))

    def get_sort_key(name):
        if pd.isna(name) or not name: return "zzz"
        s = str(name)
        s = re.sub(r'\[.*?\]', '', s) # Strip ID before sorting
        cleaned = re.sub(r'\b(jr\.?|sr\.?|iii|iv|v|md|m\.d\.|esq\.?)\b', '', s, flags=re.IGNORECASE)
        parts = re.split(r'\bnee\b|\bn√©e\b', cleaned.lower())[0].replace(',', '').replace('.', '').strip().split()
        return parts[-1] if parts else "zzz"

    match_counts = df.groupby('Kit_Code').size().reset_index(name='Match_Count')
    part_stats = pd.merge(df_testers, match_counts, on='Kit_Code', how='left')
    part_stats['Match_Count'] = part_stats['Match_Count'].fillna(0).astype(int)
    part_stats['Sort_Key'] = part_stats['Tester_Name'].apply(get_sort_key)

    total_m = part_stats['Match_Count'].sum()
    total_participants = len(part_stats)

    def make_admin_row(r):
        tid = str(r['Tester_ID']).strip()
        tname = str(r["Tester_Name"])
        kcode = str(r["Kit_Code"])
        mc = r['Match_Count']
        if tid and tid != 'nan' and tid != '':
            t_link = f'<a href="https://yates.one-name.net/tng/getperson.php?personID=I{tid}&tree=tree1" target="_blank" style="color:#00838f;text-decoration:underline;font-weight:bold;">{tname}</a>'
            tid_display = f" <span style='color:#777;font-size:0.85em;'>[I{tid}]</span>"
        else:
            t_link = f'<b style="color:#333;">{tname}</b>'
            tid_display = ""
        mc_str = f"<span style='color:#d32f2f;font-weight:bold;'>0</span>" if mc == 0 else str(mc)
        return f"<tr><td data-sort='{r['Sort_Key']}'>{t_link}{tid_display}<br><span style='color:#666;font-size:0.85em;'>Kit: {kcode}</span></td><td style='text-align:center;font-size:1.1em;vertical-align:middle;'>{mc_str}</td></tr>"

    part_stats_az = part_stats.sort_values(['Sort_Key', 'Tester_Name'], ascending=[True, True])
    admin_rows_az = [make_admin_row(r) for _, r in part_stats_az.iterrows()]
    part_stats_asc = part_stats.sort_values(['Match_Count', 'Sort_Key'], ascending=[True, True])
    admin_rows_asc = [make_admin_row(r) for _, r in part_stats_asc.iterrows()]

    anc_data = {}; part_data = {}
    for lbl, grp in df.groupby('Dir_Label'):
        if len(grp)<2: continue
        unique_t = len(grp['Kit_Name'].unique())
        integ = min(100, (len(grp)*2) + (unique_t*10))
        anc_data[grp.iloc[0]['Alpha_Key']] = {
            "name": lbl, "matches": len(grp), "cm": int(grp['cM'].sum()),
            "badge": "Platinum" if len(grp)>=30 else "Gold" if len(grp)>=15 else "Silver" if len(grp)>=5 else "Bronze",
            "list_data": grp['Kit_Name'].value_counts().head(3).to_dict(),
            "verdict": "Verified.", "integrity": integ, "testers": unique_t
        }

    for kname, grp in df.groupby('Kit_Name'):
        dir_lbl = grp.iloc[0]['Dir_Label']
        same_dir = df[df['Dir_Label'] == dir_lbl] if pd.notna(dir_lbl) else pd.DataFrame()
        integ = min(100, len(same_dir) * 5)
        part_data[kname] = {
            "name": kname, "sort_key": get_sort_key(kname), "matches": len(grp), "cm": int(grp['cM'].sum()),
            "badge": "Keystone Tester" if len(grp)>=15 else "Study Participant",
            "list_data": grp['Dir_Label'].value_counts().head(3).to_dict(),
            "verdict": f"Verified matches across {len(grp['Dir_Label'].unique())} ancestral lines.",
            "integrity": integ, "testers": 1
        }

    for _, r in part_stats[part_stats['Match_Count'] == 0].iterrows():
        tid = str(r['Tester_ID']).strip()
        kname = f"{r['Tester_Name']} [I{tid}]" if tid else f"{r['Tester_Name']} [{r['Kit_Code']}]"
        if kname not in part_data:
            part_data[kname] = {
                "name": kname, "sort_key": r['Sort_Key'], "matches": 0, "cm": 0,
                "badge": "Pending Matches", "list_data": {"No Matching DNA in Database": 0},
                "verdict": "This kit has no matching DNA records in the study database yet.",
                "integrity": 0, "testers": 1
            }

    smart_json = json.dumps({"ancestors": anc_data, "participants": part_data})
    db_json = df[['Dir_Label', 'Kit_Name', 'cM', 'Match_ID', 'Lineage', 'search_ids', 'search_names', 't_names', 't_ids', 'Tester_ID']].rename(columns={'Dir_Label':'ancestor', 'Kit_Name':'participant', 'cM':'cm', 'Match_ID':'id', 'Lineage':'lineage', 'Tester_ID':'tester_id'}).to_json(orient='records')
    JS_GLOBALS = f"const DATA={smart_json}; const DB={db_json};"

    def make_page(title, content, nav_b, bar, extra_css=""):
        s_info = SITE_INFO if nav_b else ""
        return f"<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"UTF-8\">\n<title>{title}</title>\n<link rel=\"stylesheet\" href=\"partials_unified.css\">\n<link rel=\"stylesheet\" href=\"dna_tree_styles.css\">\n{extra_css}\n</head>\n<body id=\"top\">\n<div class=\"wrap\">\n<h1 class=\"centerline no-print\">{title}</h1>\n<div id=\"nav-slot\">{bar}{NAV_HTML}</div>\n{s_info}{content}\n</div>\n{LEGAL_FOOTER}{JS_CORE}{BTT_BTN}\n</body>\n</html>"

    print("\n[STEP 2] Building HTML Files...")
    pages_to_upload = {}

    pages_to_upload["proof_consolidator.html"] = make_page("Proof Consolidator", CONSOLIDATOR_HTML, False, stats_bar_full, extra_css=CONSOLIDATOR_CSS).replace('</body>', CONSOLIDATOR_JS.replace('__JS_GLOBALS__', JS_GLOBALS) + '</body>')
    pages_to_upload["biological_proof.html"] = BIO_TMPL.replace('__CSS_BASE__', CSS_BASE).replace('__STATS_BAR__', stats_bar_full).replace('__NAV_HTML__', NAV_HTML).replace('__JS_GLOBALS__', JS_GLOBALS).replace('__PRINT_STATS__', stats_bar_full).replace('__LEGAL_FOOTER__', LEGAL_FOOTER)
    pages_to_upload["lineage_proof.html"] = PROOF_TMPL.replace('__STATS_BAR__', stats_bar_full).replace('__NAV_HTML__', NAV_HTML).replace('__JS_GLOBALS__', JS_GLOBALS).replace('__PRINT_STATS__', stats_bar_full).replace('__LEGAL_FOOTER__', LEGAL_FOOTER)
    pages_to_upload["dna_dossier.html"] = DOSS_TMPL.replace('__STATS_BAR__', stats_bar_full).replace('__NAV_HTML__', NAV_HTML).replace('__JS_GLOBALS__', JS_GLOBALS).replace('__LEGAL_FOOTER__', LEGAL_FOOTER)
    pages_to_upload["brick_wall_buster.shtml"] = BUST_TMPL.replace('__STATS_BAR__', stats_bar_full).replace('__NAV_HTML__', NAV_HTML).replace('__JS_GLOBALS__', JS_GLOBALS).replace('__LEGAL_FOOTER__', LEGAL_FOOTER)

    admin_content = f"""<div class="dashboard-grid"><a href="ons_yates_dna_register.shtml" class="dash-card"><span class="dash-icon">üìã</span><span class="dash-title">DNA Register</span></a><a href="dna_network.shtml" class="dash-card"><span class="dash-icon">üï∏Ô∏è</span><span class="dash-title">DNA Network</span></a><a href="proof_consolidator.html" class="dash-card" style="border-color:#4a148c; background:#f3e5f5;"><span class="dash-icon">üéì</span><span class="dash-title" style="color:#4a148c;">Proof Consolidator</span></a><a href="biological_proof.html" class="dash-card"><span class="dash-icon">üìú</span><span class="dash-title">Bio Proof</span></a><a href="lineage_proof.html" class="dash-card"><span class="dash-icon">üß¨</span><span class="dash-title">Proof Engine</span></a><a href="dna_dossier.html" class="dash-card"><span class="dash-icon">üìÅ</span><span class="dash-title">Forensic Dossier</span></a></div>
    <div class="audit-table-wrapper">
        <h2 style="color:#004d40;border-bottom:2px solid #004d40;padding-bottom:10px;margin-top:0;">Participant Activity Report - {total_participants} Official Testers</h2>
        <div style="text-align:center;margin:20px 0;"><a href="admin_singletons.shtml" style="padding:10px 20px;text-decoration:none;border-radius:4px;font-weight:bold;display:inline-block;background:#fbc02d;color:#333;margin-right:10px;">üîç View Singleton Lines</a><a href="engine_database.csv" style="padding:10px 20px;text-decoration:none;border-radius:4px;font-weight:bold;display:inline-block;background:#455a64;color:white;">‚¨áÔ∏è Download CSV</a></div>
        <div style="display:flex; gap:30px; flex-wrap:wrap;">
            <div style="flex: 1; min-width: 400px;">
                <h3 style="color:#006064;background:#e0f7fa;padding:10px;border-radius:4px;text-align:center;margin-bottom:0;border:1px solid #b2ebf2;">View 1: Sorted by Matches (Lowest &rarr; Highest)</h3>
                <div style="max-height:600px;overflow-y:auto;border:1px solid #ddd;border-top:none;background:#fafafa;"><table style="width:100%;border-collapse:collapse;"><thead><tr><th style="background:#004d40;color:white;padding:12px;text-align:left;position:sticky;top:0;">Participant Kit (TNG Linked)</th><th style="background:#004d40;color:white;padding:12px;text-align:center;position:sticky;top:0;">Matches</th></tr></thead><tbody>{''.join(admin_rows_asc)}</tbody></table></div>
            </div>
            <div style="flex: 1; min-width: 400px;">
                <h3 style="color:#006064;background:#e0f7fa;padding:10px;border-radius:4px;text-align:center;margin-bottom:0;border:1px solid #b2ebf2;">View 2: Sorted Alphabetically (A &rarr; Z)</h3>
                <div style="max-height:600px;overflow-y:auto;border:1px solid #ddd;border-top:none;background:#fafafa;"><table style="width:100%;border-collapse:collapse;"><thead><tr><th style="background:#004d40;color:white;padding:12px;text-align:left;position:sticky;top:0;">Participant Kit (TNG Linked)</th><th style="background:#004d40;color:white;padding:12px;text-align:center;position:sticky;top:0;">Matches</th></tr></thead><tbody>{''.join(admin_rows_az)}</tbody></table></div>
            </div>
        </div>
    </div>"""
    pages_to_upload["research_admin.html"] = make_page("Yates Research Admin Hub", admin_content, False, stats_bar_full, extra_css="<style>.dashboard-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(200px,1fr));gap:20px;margin:30px auto;max-width:1200px}.dash-card{background:white;padding:20px;border-radius:8px;text-align:center;box-shadow:0 4px 6px rgba(0,0,0,0.1);transition:transform 0.2s;text-decoration:none;color:#333;border:1px solid #ddd}.dash-card:hover{transform:translateY(-5px);border-color:#006064;background:#e0f7fa}.dash-icon{font-size:40px;margin-bottom:10px;display:block}.dash-title{font-weight:bold;font-size:1.1em;color:#006064}.audit-table-wrapper{background:white;padding:25px;border-radius:8px;box-shadow:0 4px 6px rgba(0,0,0,0.1);max-width:1400px;margin:0 auto} td{padding:10px;border-bottom:1px solid #eee}</style>")

    pages_to_upload["contents.shtml"] = make_page("Yates Study User Guide", CONTENTS_CONTENT, False, stats_bar_full, extra_css=CONTENTS_CSS)
    pages_to_upload["share_dna.shtml"] = make_page("Share Your Ancestry DNA Matches", SHARE_CONTENT, False, stats_bar_full)
    pages_to_upload["subscribe.shtml"] = make_page("Join the Yates Research Community", SUBSCRIBE_CONTENT, False, stats_bar_full)
    pages_to_upload["dna_theory_of_the_case.htm"] = make_page("The Yates DNA Strategy", THEORY_CONTENT, False, stats_bar_full)
    pages_to_upload["data_glossary.shtml"] = make_page("Data Glossary", GLOSSARY_CONTENT, False, stats_bar_full, extra_css=GLOSS_CSS)

    df_p = df[df['cM'] > 0].copy()
    df_p['sort_key'] = df_p['Kit_Name'].apply(get_sort_key)
    df_p.sort_values(by=['sort_key', 'Match_Name'], ascending=[True, True], inplace=True)
    df_p['Long_Narrative'] = df_p.apply(lambda r: f"<b>{r['Kit_Name']}</b> is a {r['cM']} cM match to <a href='https://yates.one-name.net/tng/verticalchart.php?personID={normalize_id(r['Match_ID'])}&tree=tree1&parentset=0&display=vertical&generations=15' target='_blank'><b>{r['Match_Name']}</b></a> via {r.get('Dir_Label', '').split('(')[0]} back {len(str(r.get('Lineage', '')).split('->'))} generations.", axis=1)
    df_p.rename(columns={'Long_Narrative': 'Participants who tested-Who they matched-Oldest known Yates ancestor'}, inplace=True)
    pages_to_upload["ons_yates_dna_register_participants.shtml"] = make_page("ONS Yates Study DNA Register", sb_str_part + f'<div class="table-scroll-wrapper">{df_p.to_html(columns=["Participants who tested-Who they matched-Oldest known Yates ancestor"], index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table")}</div>', True, stats_bar_full, extra_css=REGISTER_CSS)

    mc = df['Dir_Label'].value_counts()
    df_a = df[(df['Dir_Label'].isin(mc[mc >= 2].index)) & (df['Dir_Label'] != 'No Matches')].copy().sort_values(by=['Dir_Label', 'Lineage'], ascending=[True, True])
    df_a['Long_Narrative'] = df_a.apply(lambda r: f"{r['Kit_Name']} is a {r['cM']} cM match to <a href='https://yates.one-name.net/tng/verticalchart.php?personID={normalize_id(r['Match_ID'])}&tree=tree1&parentset=0&display=vertical&generations=15' target='_blank'><b>{r['Match_Name']}</b></a> via {r.get('Dir_Label', '').split('(')[0]} back {len(str(r.get('Lineage', '')).split('->'))} generations.", axis=1)
    df_a.rename(columns={'Long_Narrative': 'Participants who tested-Who they matched-Oldest known Yates ancestor'}, inplace=True)
    sbar_a = f"""<div style="background:#f4f4f4;border-top:1px solid #ddd;border-bottom:1px solid #ddd;font-family:sans-serif;font-size:12px;color:#555;padding:8px 15px;text-align:center;margin-bottom:0;"><strong>Last updated:</strong> {timestamp} &nbsp;|&nbsp; <strong>Validated Matches (2+):</strong> {len(df_a):,} <span style="color:#d32f2f;">(Singleton matches hidden)</span></div>"""
    pages_to_upload["ons_yates_dna_register.shtml"] = make_page("ONS Yates Study DNA Register", sb_str_anc + f'<div class="table-scroll-wrapper">{df_a.to_html(columns=["Participants who tested-Who they matched-Oldest known Yates ancestor"], index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table")}</div>', True, sbar_a, extra_css=REGISTER_CSS)
    pages_to_upload["yates_ancestor_register.shtml"] = pages_to_upload["ons_yates_dna_register.shtml"]

    df_s = df[(df['Dir_Label'].isin(mc[mc == 1].index)) & (df['Dir_Label'] != 'No Matches')].copy()
    stats_bar_single = f"""<div style="background:#f4f4f4;border-top:1px solid #ddd;border-bottom:1px solid #ddd;font-family:sans-serif;font-size:12px;color:#555;padding:8px 15px;text-align:center;margin-bottom:0;"><strong>HOUSEKEEPING VIEW:</strong> Showing {len(df_s):,} singleton matches.</div>"""
    df_s_anc = df_s.sort_values(by=['Dir_Label'], ascending=False).copy()
    df_s_anc['Long_Narrative'] = df_s_anc.apply(lambda r: f"{r['Kit_Name']} is a {r['cM']} cM match to <a href='https://yates.one-name.net/tng/verticalchart.php?personID={normalize_id(r['Match_ID'])}&tree=tree1&parentset=0&display=vertical&generations=15' target='_blank'><b>{r['Match_Name']}</b></a> via {r.get('Dir_Label', '').split('(')[0]} back {len(str(r.get('Lineage', '')).split('->'))} generations.", axis=1)
    df_s_anc.rename(columns={'Long_Narrative': 'Participants who tested-Who they matched-Oldest known Yates ancestor'}, inplace=True)
    pages_to_upload["admin_singletons.shtml"] = make_page("Singleton Match Register", sb_str_single_anc + f'<div class="table-scroll-wrapper">{df_s_anc.to_html(columns=["Participants who tested-Who they matched-Oldest known Yates ancestor"], index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table")}</div>', True, stats_bar_single, extra_css=REGISTER_CSS)

    df_s_part = df_s.copy()
    df_s_part['sort_key'] = df_s_part['Kit_Name'].apply(get_sort_key)
    df_s_part.sort_values(by=['sort_key', 'Match_Name'], ascending=[True, True], inplace=True)
    df_s_part['Long_Narrative'] = df_s_part.apply(lambda r: f"<b>{r['Kit_Name']}</b> is a {r['cM']} cM match to <a href='https://yates.one-name.net/tng/verticalchart.php?personID={normalize_id(r['Match_ID'])}&tree=tree1&parentset=0&display=vertical&generations=15' target='_blank'><b>{r['Match_Name']}</b></a> via {r.get('Dir_Label', '').split('(')[0]} back {len(str(r.get('Lineage', '')).split('->'))} generations.", axis=1)
    df_s_part.rename(columns={'Long_Narrative': 'Participants who tested-Who they matched-Oldest known Yates ancestor'}, inplace=True)
    pages_to_upload["admin_singletons_participants.shtml"] = make_page("Singleton Match Register", sb_str_single_part + f'<div class="table-scroll-wrapper">{df_s_part.to_html(columns=["Participants who tested-Who they matched-Oldest known Yates ancestor"], index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table")}</div>', True, stats_bar_single, extra_css=REGISTER_CSS)

    df_tree = df_a[['Linked_Tree_Line', 'Dir_Label']].copy()
    df_tree.rename(columns={'Linked_Tree_Line': 'TEMP'}, inplace=True)
    df_tree.sort_values(by=['Dir_Label'], ascending=[False], inplace=True)
    pages_to_upload["just-trees.shtml"] = make_page("Ancestor Register (Trees View)", tree_za_toggle + f'<div class="table-scroll-wrapper">{df_tree[["TEMP"]].to_html(index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table").replace("<th>TEMP</th>", "<th>&nbsp;</th>")}</div>', True, sbar_a, extra_css=TREE_CSS)
    df_tree.sort_values(by=['Dir_Label'], ascending=[True], inplace=True)
    pages_to_upload["just-trees-az.shtml"] = make_page("Ancestor Register (Trees View)", tree_az_toggle + f'<div class="table-scroll-wrapper">{df_tree[["TEMP"]].to_html(index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table").replace("<th>TEMP</th>", "<th>&nbsp;</th>")}</div>', True, sbar_a, extra_css=TREE_CSS)

    net_buf = []
    for anc, g in sorted(df.groupby('Dir_Label'), key=lambda x: len(x[1]), reverse=True):
        if len(g) < 2 or anc == 'No Matches': continue
        net_buf.append(f"""<details style="background:white;margin-bottom:15px;border:1px solid #ddd;border-radius:5px;overflow:hidden;"><summary style="background:#e0f2f1;padding:15px;cursor:pointer;font-weight:bold;color:#006064;list-style:none;"><span style="font-size:1.1em;">{anc}</span> <span style="float:right;color:#004d40;font-size:0.9em;">Matches: {len(g)} | Total cM: {g['cM'].sum()}</span></summary><div style="padding:15px;"><div style="background:#fffde7;border-left:6px solid #fbc02d;padding:10px;margin-bottom:15px;font-family:sans-serif;color:#333;font-size:0.95em;"><strong>Collateral Saturation Analysis:</strong> Validated by <b>{len(g['Kit_Name'].unique())} independent tester kits</b>.</div><table class="dataframe" border="1"><thead><tr style="text-align:left;"><th>Tester Kit</th><th>cM</th><th>Lineage</th></tr></thead><tbody>""")
        for _, r in g.sort_values('cM', ascending=False).iterrows(): net_buf.append(f"<tr><td>{r['Kit_Name']}</td><td>{r['cM']}</td><td>{r['Lineage']}</td></tr>")
        net_buf.append("</tbody></table></div></details>")

    NET_TMPL = f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Participating DNA Network</title><link rel="stylesheet" href="partials_unified.css"><link rel="stylesheet" href="dna_tree_styles.css"><style>summary::-webkit-details-marker{{display:none}}summary{{outline:none}}</style></head><body id="top"><div class="wrap"><h1 class="centerline">Participating DNA Network</h1><div id="nav-slot">{stats_bar_full}{NAV_HTML}</div>{SITE_INFO}<div style="margin:20px auto;max-width:1400px;width:95%;">{"".join(net_buf)}</div></div>{LEGAL_FOOTER}{JS_CORE}{BTT_BTN}</body></html>"""
    pages_to_upload["dna_network.shtml"] = NET_TMPL

    print("\n[STEP 3] Uploading via FTP to Live Server...")
    try:
        ftps = FTP_TLS()
        ftps.connect(HOST, 21)
        ftps.auth()
        ftps.login(USER, PASS)
        ftps.prot_p()

        found_dir = False
        for d in [f"/{REMOTE_SUBDIR}", f"/{REMOTE_SUBDIR}/", "htdocs/ons-study", REMOTE_SUBDIR]:
            try:
                ftps.cwd(d)
                found_dir = True
                break
            except: pass

        if not found_dir:
            print("‚ùå FTP Directory Not Found.")
        else:
            upload_count = 0
            if os.path.exists(CSV_DB):
                with open(CSV_DB, "rb") as fh:
                    ftps.storbinary(f"STOR {CSV_DB}", fh)
                print(f"    üì§ Uploaded: {CSV_DB}")

            for fn, content in pages_to_upload.items():
                with open(fn, "w", encoding="utf-8") as f: f.write(content)
                with open(fn, "rb") as fh:
                    ftps.storbinary(f"STOR {fn}", fh)
                print(f"    üì§ Uploaded: {fn}")
                upload_count += 1

            print(f"\nüéâ MASTER PIPELINE COMPLETE. Successfully uploaded {upload_count} pages + {CSV_DB} to the live site.")
        ftps.quit()
    except Exception as e:
        print(f"‚ùå Upload Failed: {e}")

print("‚úÖ Cell 5 (Pure Logic Engine) Loaded.")

‚úÖ Cell 5 (Pure Logic Engine) Loaded.


In [83]:
# @title [CELL 6] MASTER ORCHESTRATOR (Run This Button)
import os, sys
print("="*60)
print("      MASTER ORCHESTRATOR")
print("      (Running Engine -> Publisher -> Upload)")
print("="*60)

if 'run_engine' not in globals() or 'run_publisher' not in globals():
    print("‚ùå ERROR: Modules not loaded! Please run the Engine and Publisher setup cells first.")
else:
    print("\n>>> üöÄ PHASE 1: EXECUTING DATA ENGINE...")
    try:
        run_engine()
        print("‚úÖ PHASE 1 COMPLETE.")

        print("\n>>> üåê PHASE 2: EXECUTING PUBLISHER & UPLOAD...")
        run_publisher()
        print("‚úÖ PHASE 2 COMPLETE.")

        print("\n" + "="*60)
        print("      üèÜ MASTER PIPELINE SUCCESSFUL")
        print("="*60)
    except Exception as e:
        print(f"\n‚ùå CRITICAL FAILURE: {e}")

      MASTER ORCHESTRATOR
      (Running Engine -> Publisher -> Upload)

>>> üöÄ PHASE 1: EXECUTING DATA ENGINE...
      [CELL 3] ENGINE STARTING (V123 - DEEP RADAR)...

[STEP 1] Resolving Files (Local Priority)...
    ‚úÖ Found match_to_unmasked.csv locally. Skipping FTP download.
    üëâ Source GEDCOM: yates_study_2025.ged

[STEP 2] Loading Tester Authority CSV...

[STEP 3] Parsing GEDCOM for Study| Tags & Lineages...

[STEP 4] Constructing Database...

[SUCCESS] Engine V123 Complete. Saved 1713 verified matches to engine_database.csv.
‚úÖ PHASE 1 COMPLETE.

>>> üåê PHASE 2: EXECUTING PUBLISHER & UPLOAD...
      [CELL 5] PUBLISHER STARTING (Upgraded V26 - Hotlinks Restored)...

[STEP 1] Resolving match_to_unmasked.csv...
    ‚úÖ Found match_to_unmasked.csv locally. Skipping FTP download.

[STEP 2] Building HTML Files...

[STEP 3] Uploading via FTP to Live Server...
    üì§ Uploaded: engine_database.csv
    üì§ Uploaded: proof_consolidator.html
    üì§ Uploaded: biological_proof

In [71]:
# @title [CELL The Time Machine (Archiver + Dropbox Sync)]
import zipfile
import os
import pytz
import time
from datetime import datetime
from google.colab import files
from google.colab import userdata

# --- 1. INSTALL DROPBOX (IF MISSING) ---
try:
    import dropbox
    from dropbox.exceptions import AuthError
except ImportError:
    os.system('pip install dropbox')
    import dropbox
    from dropbox.exceptions import AuthError

def run_archiver():
    print("="*60)
    print("      [CELL 6] MANUAL ARCHIVER + DROPBOX SYNC")
    print("="*60)

    # --- 2. CREATE ZIP (SAFE MODE) ---
    # We explicitly exclude .zip to prevent "Zip Bombs"
    extensions = ('.csv', '.shtml', '.html', '.json', '.js', '.css')
    files_to_pack = [f for f in os.listdir('.') if f.lower().endswith(extensions) and "sample_data" not in f]

    if not files_to_pack:
        print("‚ùå No generated files found! Run the Publisher (Cell 4) first.")
        return

    est = pytz.timezone('US/Eastern')
    timestamp = datetime.now(est).strftime("%Y-%m-%d_%H%M")
    zip_name = f"Yates_Study_Backup_{timestamp}.zip"

    print(f"üì¶ Compressing {len(files_to_pack)} files into {zip_name}...")
    try:
        with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zf:
            for file in files_to_pack:
                zf.write(file)
        print(f"    ‚úÖ Archive Created: {zip_name} ({os.path.getsize(zip_name)/1024:.1f} KB)")
    except Exception as e:
        print(f"    ‚ùå Compression Failed: {e}")
        return

    # --- 3. FTP UPLOAD (BACKUPS FOLDER) ---
    print("\n[STEP 2] Uploading to Web Server (FTP)...")
    try:
        from ftplib import FTP_TLS
        HOST = os.environ.get("FTP_HOST") or userdata.get("FTP_HOST")
        USER = os.environ.get("FTP_USER") or userdata.get("FTP_USER")
        PASS = os.environ.get("FTP_PASS") or userdata.get("FTP_PASS")

        ftps = FTP_TLS()
        ftps.connect(HOST, 21); ftps.auth(); ftps.login(USER, PASS); ftps.prot_p()

        try:
            ftps.cwd("/ons-study/backups")
        except:
            try:
                ftps.mkd("/ons-study/backups")
                ftps.cwd("/ons-study/backups")
            except:
                pass

        with open(zip_name, "rb") as fh:
            ftps.storbinary(f"STOR {zip_name}", fh)
        print(f"    ‚úÖ FTP Success: /ons-study/backups/{zip_name}")
        ftps.quit()
    except Exception as e:
        print(f"    ‚ö†Ô∏è FTP Upload skipped: {e}")

    # --- 4. DROPBOX SYNC (NEW) ---
    print("\n[STEP 3] Syncing to Dropbox...")
    try:
        # Initialize with Refresh Token (Long-term access)
        dbx = dropbox.Dropbox(
            app_key=userdata.get('DBX_APP_KEY'),
            app_secret=userdata.get('DBX_APP_SECRET'),
            oauth2_refresh_token=userdata.get('DBX_REFRESH_TOKEN')
        )

        # Upload the Zip
        target_path = f"/Backups/{zip_name}"
        with open(zip_name, "rb") as f:
            dbx.files_upload(f.read(), target_path, mode=dropbox.files.WriteMode.overwrite)

        print(f"    ‚úÖ Dropbox Success: {target_path}")

    except Exception as e:
        print(f"    ‚ùå Dropbox Upload Failed: {e}")
        print("       (Check DBX_APP_KEY, DBX_APP_SECRET, DBX_REFRESH_TOKEN in Colab Secrets)")

    # --- 5. LOCAL DOWNLOAD (SAFETY NET) ---
    print("\n[STEP 4] Triggering Local Download...")
    try:
        files.download(zip_name)
    except Exception as e:
        print(f"    ‚ö†Ô∏è Auto-download blocked: {e}")

    print("‚úÖ Archival Process Complete.")

# Run it
run_archiver()

      [CELL 6] MANUAL ARCHIVER + DROPBOX SYNC
üì¶ Compressing 20 files into Yates_Study_Backup_2026-02-22_1750.zip...
    ‚úÖ Archive Created: Yates_Study_Backup_2026-02-22_1750.zip (2041.9 KB)

[STEP 2] Uploading to Web Server (FTP)...
    ‚úÖ FTP Success: /ons-study/backups/Yates_Study_Backup_2026-02-22_1750.zip

[STEP 3] Syncing to Dropbox...
    ‚úÖ Dropbox Success: /Backups/Yates_Study_Backup_2026-02-22_1750.zip

[STEP 4] Triggering Local Download...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚úÖ Archival Process Complete.


In [8]:
# @title [CELL Manual Zip & Download]
import os
import zipfile
import pytz
from datetime import datetime
from google.colab import files

print("="*60)
print("      [CELL 7] MANUAL ZIP & DOWNLOADER")
print("="*60)

# Create a timestamped zip file name
est = pytz.timezone('US/Eastern')
timestamp = datetime.now(est).strftime("%Y-%m-%d_%H%M")
zip_filename = f"Yates_Study_Manual_Upload_{timestamp}.zip"

# Find all the files we normally FTP
extensions = ('.html', '.shtml', '.htm', '.csv')
files_to_pack = [f for f in os.listdir('.') if f.lower().endswith(extensions) and "sample_data" not in f]

if not files_to_pack:
    print("‚ùå No files found to zip! Make sure you ran the Builder cells first.")
else:
    print(f"üì¶ Found {len(files_to_pack)} files. Compressing into {zip_filename}...\n")

    # Create the zip archive
    with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zf:
        for file in files_to_pack:
            zf.write(file)
            print(f"  + Added: {file}")

    print(f"\n‚úÖ Zip file created successfully! ({os.path.getsize(zip_filename)/1024:.1f} KB)")

    # Trigger the browser download
    print("‚¨áÔ∏è Prompting browser to download...")
    try:
        files.download(zip_filename)
        print("üéâ Download initiated! You can now manually upload these via FileZilla/Cyberduck.")
    except Exception as e:
        print(f"‚ùå Auto-download blocked by browser: {e}")
        print(f"üëâ You can manually download '{zip_filename}' by clicking the Folder icon üìÅ on the far left menu.")

      [CELL 7] MANUAL ZIP & DOWNLOADER
üì¶ Found 21 files. Compressing into Yates_Study_Manual_Upload_2026-02-22_0937.zip...

  + Added: contents.shtml
  + Added: subscribe.shtml
  + Added: match_to_unmasked.csv
  + Added: ons_yates_dna_register.shtml
  + Added: research_admin.html
  + Added: brick_wall_buster.shtml
  + Added: ons_yates_dna_register_participants.shtml
  + Added: dna_dossier.html
  + Added: engine_database.csv
  + Added: share_dna.shtml
  + Added: lineage_proof.html
  + Added: admin_singletons_participants.shtml
  + Added: proof_consolidator.html
  + Added: dna_theory_of_the_case.htm
  + Added: just-trees-az.shtml
  + Added: yates_ancestor_register.shtml
  + Added: just-trees.shtml
  + Added: data_glossary.shtml
  + Added: biological_proof.html
  + Added: admin_singletons.shtml
  + Added: dna_network.shtml

‚úÖ Zip file created successfully! (1639.3 KB)
‚¨áÔ∏è Prompting browser to download...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

üéâ Download initiated! You can now manually upload these via FileZilla/Cyberduck.


In [None]:
# @title GEDCOM Search: The Gremlin Hunter
import os

def find_errant_participant(search_term="yatesjohnrob"):
    print("="*75)
    print(f"      GEDCOM SEARCH: LOOKING FOR '{search_term}'")
    print("="*75)

    # Find the original GEDCOM file
    ged_files = [f for f in os.listdir('.') if f.lower().endswith('.ged') and "_processed" not in f.lower()]
    if not ged_files:
        return print("‚ùå No original GEDCOM found.")

    ged_file = sorted(ged_files, key=lambda x: os.path.getmtime(x), reverse=True)[0]
    print(f"üîç Scanning File: {ged_file}\n")

    current_id = None
    current_name = "Unknown"
    matches_found = 0

    print(f"{'ID#'.ljust(12)} | {'NAME'.ljust(30)} | EXACT LINE FOUND")
    print("-" * 75)

    with open(ged_file, 'r', encoding='utf-8', errors='replace') as f:
        for line_num, line in enumerate(f, 1):
            line_clean = line.strip()
            parts = line_clean.split(" ", 2)
            if len(parts) < 2: continue

            lvl = parts[0]
            tag = parts[1]
            val = parts[2] if len(parts) > 2 else ""

            # Track the current individual block
            if lvl == "0" and "INDI" in val:
                current_id = tag.replace("@", "")
                current_name = "Unknown"

            # Track the Name so we know who we are looking at
            elif lvl == "1" and tag == "NAME":
                current_name = val.replace("/", "").strip()

            # Trigger if the search term is anywhere in this line
            if search_term.lower() in line_clean.lower():
                if current_id:
                    print(f"{current_id.ljust(12)} | {current_name[:28].ljust(30)} | {line_clean}")
                else:
                    print(f"{'N/A'.ljust(12)} | {'(Outside INDI block)'.ljust(30)} | {line_clean}")
                matches_found += 1

    print("-" * 75)
    print(f"‚úÖ Found {matches_found} total mentions of '{search_term}'.")

find_errant_participant("yatesjohnrob")

      GEDCOM SEARCH: LOOKING FOR 'yatesjohnrob'
üîç Scanning File: yates_study_2025.ged

ID#          | NAME                           | EXACT LINE FOUND
---------------------------------------------------------------------------
I51017       | Terri Ann Yates                | 2 NPFX 361&yatesjohnrob
I51033       | Cynthia Lou Miller             | 2 NPFX 20&yatesjohnrob
I51044       | Rhonda Rowe                    | 2 NPFX 19&yatesjohnrob
---------------------------------------------------------------------------
‚úÖ Found 3 total mentions of 'yatesjohnrob'.


In [None]:
# @title DIAGNOSTIC: The Orphan Kit Hunter
import os
import csv
import re

def find_missing_kits():
    print("="*60)
    print("      DIAGNOSTIC: FINDING THE MISSING KITS")
    print("="*60)

    KEY_FILE = "match_to_unmasked.csv"
    csv_kits = {}

    # 1. Load the 94 kits from the CSV
    if os.path.exists(KEY_FILE):
        with open(KEY_FILE, 'r', errors='replace') as f:
            for row in csv.reader(f):
                if len(row) >= 2:
                    code = row[0].strip().lower()
                    name = row[1].strip()
                    csv_kits[code] = name
    else:
        return print("‚ùå Cannot find match_to_unmasked.csv")

    # 2. Find the GEDCOM
    ged_files = [f for f in os.listdir('.') if f.lower().endswith('.ged') and "_processed" not in f.lower()]
    if not ged_files:
        return print("‚ùå No original GEDCOM found.")
    ged_file = sorted(ged_files, key=lambda x: os.path.getmtime(x), reverse=True)[0]

    def resolve_code(payload):
        m = re.search(r'(\d+)\s*&?\s*([^ \t\n\r\f\v]+)', payload)
        return m.group(2).lower() if m else None

    # 3. Scan GEDCOM for every unique NPFX code actually in use
    gedcom_codes = set()
    with open(ged_file, 'r', encoding='utf-8', errors='replace') as f:
        for line in f:
            if "NPFX" in line:
                parts = line.strip().split(" ", 2)
                if len(parts) > 2:
                    code = resolve_code(parts[2])
                    if code:
                        gedcom_codes.add(code)

    # 4. Compare CSV against GEDCOM matches
    missing = []
    for csv_code, csv_name in csv_kits.items():
        if csv_code not in gedcom_codes:
            missing.append(f"{csv_name} (Masked Code: {csv_code})")

    print(f"üìä Total Kits in CSV: {len(csv_kits)}")
    print(f"üß¨ Kits with active Matches in GEDCOM: {len(csv_kits) - len(missing)}")
    print(f"‚ö†Ô∏è Orphaned Kits (0 matches found): {len(missing)}\n")

    print("--- THE MISSING 5 ---")
    for m in missing:
        print(f" ‚ùå {m}")

find_missing_kits()

      DIAGNOSTIC: FINDING THE MISSING KITS
üìä Total Kits in CSV: 95
üß¨ Kits with active Matches in GEDCOM: 89
‚ö†Ô∏è Orphaned Kits (0 matches found): 6

--- THE MISSING 5 ---
 ‚ùå unmasked (Masked Code: code)
 ‚ùå Anaya Yates (Masked Code: aanya)
 ‚ùå Donald Coram (Masked Code: coram)
 ‚ùå Fiona Houston (Masked Code: houston)
 ‚ùå Jan King (Masked Code: king)
 ‚ùå How Yates (Masked Code: yates_nj-h)
