<a href="https://colab.research.google.com/github/ronyates47/Gedcom-Utils/blob/main/ons_study_v23.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
# @title [CELL 1] Setup + Global Variables (V8 Baseline + Bio Proof Nav)
import os, sys, re, csv, json, html, socket, pytz
import pandas as pd
from ftplib import FTP_TLS
from datetime import datetime

try:
    import tqdm
except ImportError:
    os.system('pip install tqdm')
    import tqdm

print("="*60)
print("      [CELL 1] SETUP LOADED (V8 Baseline - Clean Nav)")
print("="*60)

TNG_BASE_URL = "https://yates.one-name.net/tng/verticalchart.php?personID="
TNG_SUFFIX = "&tree=tree1&parentset=0&display=vertical&generations=15"

# Orange background removed from Brick Wall Buster link
NAV_HTML = r"""<style>nav.oldnav ul{display:flex;flex-wrap:wrap;justify-content:center;background-color:#006064!important;border-bottom:2px solid #00acc1!important;margin:0;padding:0;list-style:none} nav.oldnav li{display:inline-block} nav.oldnav a{display:block;padding:10px 15px;text-decoration:none;color:#e0f7fa!important;font-size:14px} nav.oldnav a:hover{background-color:#00838f!important} @media print { nav.oldnav, #nav-slot, .no-print, .action-btn, .control-panel, .tabs { display: none !important; } }</style><nav class="oldnav"><ul><li><a href="/ons-study/research_admin.html" style="color:#ffcc80 !important; font-weight:bold;">Admin Hub</a></li><li><a href="/ons-study/contents.shtml" style="color:#ffcc80 !important; font-weight:bold;">Guide</a></li><li><a href="/ons-study/yates_ancestor_register.shtml">DNA Register</a></li><li><a href="/ons-study/just-trees.shtml">Trees</a></li><li><a href="/ons-study/dna_network.shtml">DNA Network</a></li><li><a href="/ons-study/lineage_proof.html">Lineage Proof</a></li><li><a href="/ons-study/biological_proof.html" style="color:#fff !important; font-weight:bold;">Biological Proof</a></li><li><a href="/ons-study/dna_dossier.html">Forensic Dossier</a></li><li><a href="/ons-study/brick_wall_buster.shtml">Brick Wall Buster</a></li><li><a href="/ons-study/data_glossary.shtml">Data Glossary</a></li><li><a href="https://yates.one-name.net/gengen/images/cousin-calculator.jpg" target="_blank" style="color:#b2dfdb;">Cousin Calc</a></li><li><a href="https://yates.one-name.net/gengen/images/Shared_cM_Project_v4.jpg" target="_blank" style="color:#b2dfdb;">cM Chart</a></li><li><a href="/ons-study/share_dna.shtml" style="background-color:#0277bd; font-weight:bold;">Share DNA</a></li><li><a href="/ons-study/subscribe.shtml" style="background-color:#004d40;">Subscribe</a></li></ul></nav>"""

SITE_INFO = r"""<div class="no-print" style="background:#e0f2f1;border:1px solid #b2dfdb;padding:20px;margin:20px auto;width:90%;border-radius:8px;font-family:sans-serif;"><h3 style="color:#006064;margin-top:0;border-bottom:2px solid #004d40;padding-bottom:10px;">Establishing Kinship Through Collateral DNA Saturation</h3><p style="color:#333;line-height:1.6;font-size:1.05em;margin-bottom:0;"><strong>Methodology:</strong> This register moves beyond the reliance on single "golden matches" to prove kinship. Instead, it employs <em>Collateral DNA Saturation</em>‚Äîa method that blends genealogical reasoning with data-driven logic.</p></div>"""

JS_CORE = r"""<script type="text/javascript">(function(){ function textOf(c){var val = c.getAttribute('data-sort') || c.textContent || c.innerText;return (val || '').replace(/\s+/g,' ').trim().toLowerCase();} function sortTable(t,i,d){if(!(t&&t.tBodies&&t.tBodies[0]))return;var tb=t.tBodies[0],r=Array.prototype.slice.call(tb.rows||[]),asc=(d==='asc');r.sort(function(a,b){var A=textOf(a.cells[i]),B=textOf(b.cells[i]),nA=parseFloat(A.replace(/[^0-9.\-]/g,'')),nB=parseFloat(B.replace(/[^0-9.\-]/g,''));if(!isNaN(nA)&&!isNaN(nB))return asc?(nA-nB):(nB-nA);return(A<B)?(asc?-1:1):(A>B)?(asc?1:-1):0;});var f=document.createDocumentFragment();for(var k=0;k<r.length;k++)f.appendChild(r[k]);tb.appendChild(f);} function makeSortable(t){if(!(t&&t.tHead&&t.tHead.rows.length))return;var th=t.tHead.rows[0].cells;for(var i=0;i<th.length;i++){(function(idx){var h=th[idx],d='asc';h.style.cursor='pointer';h.onclick=function(){d=(d==='asc')?'desc':'asc';for(var j=0;j<th.length;j++)th[j].innerHTML=th[j].innerHTML.replace(/\s+\(asc\)|\s+\(desc\)/,'');h.innerHTML+=(d==='asc'?' (asc)':' (desc)');sortTable(t,idx,d);};})(i);}} window.filterTable = function() { var input = document.getElementById("tableSearch"); var filter = input.value.toUpperCase(); var table = document.getElementById("reg-table") || document.querySelector("table.dataframe"); var tr = table.getElementsByTagName("tr"); for (var i = 1; i < tr.length; i++) { var tdArr = tr[i].getElementsByTagName("td"); var found = false; for (var j = 0; j < tdArr.length; j++) { if (tdArr[j]) { var txtValue = tdArr[j].textContent || tdArr[j].innerText; if (txtValue.toUpperCase().indexOf(filter) > -1) { found = true; break; } } } tr[i].style.display = found ? "" : "none"; } } function init(){ var t=document.getElementsByTagName('table'); for(var i=0;i<t.length;i++) if(/\bsortable\b/.test(t[i].className)) makeSortable(t[i]); } if(document.readyState==='loading')document.addEventListener('DOMContentLoaded',init,false);else init(); })();</script>"""

# --- EXACT V8 MAKE_PAGE SIGNATURE RESTORED ---
def make_page(title, content, count, view_type="", extra="", stats_bar=""):
    nav_blk = ""
    if view_type in ['ancestor', 'participant', 'tree_az', 'tree_za', 'proof', 'hot_paths', 'network', 'dossier', 'subscribe', 'share', 'buster', 'singleton']:
        nav_blk = SITE_INFO
    if view_type == 'subscribe' or view_type == 'theory' or view_type == 'share' or view_type == 'glossary':
        nav_blk = ""

    toggle = ""
    print_btn = ""
    search_bar = ""

    if view_type in ['ancestor', 'participant', 'singleton']:
        search_bar = """<div class="no-print" style="margin:20px auto;max-width:600px;text-align:center;"><input type="text" id="tableSearch" onkeyup="filterTable()" placeholder="üîç Type a name to filter list..." style="width:100%;padding:12px;font-size:16px;border:2px solid #006064;border-radius:4px;"></div>"""

    if view_type in ['ancestor', 'participant', 'singleton']:
        view_name = "Register"
        if view_type == 'singleton': view_name = "Singleton List"
        print_btn = f"""<div class="no-print" style="text-align:center;margin-bottom:15px;"><button onclick="window.print()" style="background:#0277bd;color:white;border:none;padding:10px 20px;border-radius:4px;font-weight:bold;cursor:pointer;font-size:14px;">üñ®Ô∏è Print {view_name}</button></div>"""

    if view_type == 'ancestor':
        toggle = f"""<div class="no-print" style="text-align:center;padding:10px;margin-bottom:10px;font-family:sans-serif;font-size:14px;background:#e0f7fa;border:1px solid #b2ebf2;"><strong>Sort Register:</strong> &nbsp;<span style="font-weight:bold;color:#006064;">By Ancestral Line</span> &nbsp;|&nbsp; <a href="ons_yates_dna_register_participants.shtml" style="color:#00acc1;text-decoration:none;">By Participant Name</a></div>"""
    elif view_type == 'participant':
        toggle = f"""<div class="no-print" style="text-align:center;padding:10px;margin-bottom:10px;font-family:sans-serif;font-size:14px;background:#e0f7fa;border:1px solid #b2ebf2;"><strong>Sort Register:</strong> &nbsp;<a href="ons_yates_dna_register.shtml" style="color:#00acc1;text-decoration:none;">By Ancestral Line</a> &nbsp;|&nbsp; <span style="font-weight:bold;color:#006064;">By Participant Name</span></div>"""
    elif 'tree' in view_type:
        za = f'<span style="font-weight:bold;color:#000;">Z-A</span>' if 'za' in view_type else f'<a href="just-trees.shtml" style="color:#006064;text-decoration:underline;">Z-A</a>'
        az = f'<span style="font-weight:bold;color:#000;">A-Z</span>' if 'az' in view_type else f'<a href="just-trees-az.shtml" style="color:#006064;text-decoration:underline;">A-Z</a>'
        toggle = f"""<div class="no-print" style="text-align:center;font-family:sans-serif;font-size:16px;margin:15px 0 10px 0;">Individual Yates Family trees: &nbsp; {za} &nbsp;|&nbsp; {az}</div>"""

    return f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>{title}</title><link rel="stylesheet" href="partials_unified.css"><link rel="stylesheet" href="dna_tree_styles.css">{extra}</head><body id="top"><div class="wrap"><h1 class="centerline">{title}</h1><div id="nav-slot">{stats_bar}{NAV_HTML}</div>{nav_blk}{search_bar}{print_btn}{toggle}{content}</div>{JS_CORE}</body></html>"""

print("‚úÖ Cell 1 Loaded.")

      [CELL 1] SETUP LOADED (V8 Baseline - Clean Nav)
‚úÖ Cell 1 Loaded.


In [11]:
# @title [CELL 3] The Data Engine (V124 - Sort Authority Inject)
def run_engine():
    print("="*60)
    print("      [CELL 3] ENGINE STARTING (V124 - SORT AUTHORITY)...")
    print("="*60)

    import os, sys, re, csv
    from ftplib import FTP_TLS
    from google.colab import userdata
    from datetime import datetime

    CSV_DB = "engine_database.csv"
    if os.path.exists(CSV_DB): os.remove(CSV_DB)

    try:
        HOST = os.environ.get("FTP_HOST") or userdata.get("FTP_HOST")
        USER = os.environ.get("FTP_USER") or userdata.get("FTP_USER")
        PASS = os.environ.get("FTP_PASS") or userdata.get("FTP_PASS")
    except: pass
    REMOTE_SUBDIR = "ons-study"
    KEY_FILE = "match_to_unmasked.csv"
    PROCESSED_GED = "_processed_unmasked.ged"

    def clean_and_standardize(raw_name):
        if not raw_name: return "findme"
        s = raw_name.replace("/", "").strip()
        triggers = ["unknown", "missing", "searching", "still searching", "living", "private", "nee", "nee ?", "wife", "mrs"]
        if s.lower() in triggers or s == "": return "findme"
        if "?" in s: return "findme"
        if "unknown" in s.lower(): return "findme"
        return s

    def get_surname(full_name):
        if not full_name or "findme" in full_name.lower(): return ""
        clean = re.sub(r'\b(jr\.?|sr\.?|iii|iv|esq\.?|m\.d\.?|ph\.d\.?)\b', '', full_name, flags=re.IGNORECASE)
        parts = clean.replace(',', '').split()
        return parts[-1] if parts else ""

    def make_directory_label(name, dates):
        if "findme" in name.lower(): return name
        sur = get_surname(name)
        if not sur: return name
        firsts = re.sub(f"{re.escape(sur)}$", "", name).strip()
        return f"{sur}, {firsts} {dates}"

    print("\n[STEP 1] Resolving Files (Local Priority)...")
    if os.path.exists(KEY_FILE):
        print(f"    ‚úÖ Found {KEY_FILE} locally. Skipping FTP download.")
    else:
        print(f"    üåê {KEY_FILE} not found locally. Attempting FTP fetch...")
        try:
            ftps = FTP_TLS()
            ftps.connect(HOST, 21); ftps.auth(); ftps.login(USER, PASS); ftps.prot_p()
            try:
                with open(KEY_FILE, "wb") as f: ftps.retrbinary(f"RETR /{REMOTE_SUBDIR}/{KEY_FILE}", f.write)
                print(f"    ‚úÖ Successfully downloaded {KEY_FILE}.")
            except Exception as e:
                print(f"    ‚ö†Ô∏è FTP download failed: {e}")
            ftps.quit()
        except Exception as e:
            print(f"    ‚ö†Ô∏è FTP connection failed: {e}")

    all_files = os.listdir('.')
    ged_files = [f for f in all_files if f.lower().endswith('.ged') and "_processed" not in f.lower()]
    if not ged_files: return print("‚ùå No GEDCOM found. Please upload one.")
    ged_files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
    DEFAULT_GEDCOM = ged_files[0]
    print(f"    üëâ Source GEDCOM: {DEFAULT_GEDCOM}")

    def resolve_code(payload):
        m = re.search(r'(\d+)\s*&?\s*([^ \t\n\r\f\v]+)', payload)
        return m.group(2).lower() if m else None

    print("\n[STEP 2] Loading Tester Authority CSV...")
    csv_auth = {}
    if os.path.exists(KEY_FILE):
        with open(KEY_FILE, 'r', errors='replace') as f:
            reader = csv.reader(f)
            for i, row in enumerate(reader):
                if len(row) >= 2:
                    if i == 0 and ("tester" in row[0].lower() or "masked" in row[0].lower() or "code" in row[0].lower()):
                        continue
                    code = row[0].strip().lower()
                    name = row[1].strip()
                    tid = row[2].strip() if len(row) > 2 else ""
                    if tid: tid = "I" + re.sub(r'[^0-9]', '', tid)
                    # ‚ú® NEW: Explicitly pull Column D (Index 3) as the Sort Key
                    sort_key = row[3].strip().lower() if len(row) > 3 else ""
                    csv_auth[code] = {"name": name, "id": tid, "sort_key": sort_key}

    print("\n[STEP 3] Parsing GEDCOM for Study| Tags & Lineages...")
    import shutil
    shutil.copyfile(DEFAULT_GEDCOM, PROCESSED_GED)

    individuals = {}; families = {}; study_testers = {}

    def is_yates(name_str):
        n = (name_str or "").lower()
        return "yates" in n or "yeates" in n or "yate" in n

    current_id = None; current_fam = None; current_tag = None
    with open(PROCESSED_GED, "r", encoding="utf-8", errors="replace") as f:
        for line in f:
            line = line.strip(); parts = line.split(" ", 2)
            if len(parts) < 2: continue
            lvl, tag, val = parts[0], parts[1], parts[2] if len(parts)>2 else ""

            if lvl == "0" and "INDI" in val:
                current_id = tag.replace("@", "")
                individuals[current_id] = {"name": "findme", "famc": None, "fams": [], "match_code": "", "cm": 0, "birt": "", "deat": ""}
                current_fam = None; current_tag = "INDI"
            elif current_id and lvl != "0":
                if tag == "NAME" and lvl == "1":
                    individuals[current_id]["name"] = clean_and_standardize(val)
                elif tag == "FAMC" and lvl == "1":
                    individuals[current_id]["famc"] = val.replace("@", "")
                elif tag == "FAMS" and lvl == "1":
                    individuals[current_id]["fams"].append(val.replace("@", ""))

                elif tag == "NICK" and lvl == "2" and "Study|" in val:
                    tester_code = val.split("Study|")[-1].strip().lower()
                    study_testers[tester_code] = {"id": current_id, "name": individuals[current_id]["name"]}

                elif tag == "NPFX" and lvl == "2":
                    code = resolve_code(val)
                    if code: individuals[current_id]["match_code"] = code.lower()
                    m = re.search(r'^(\d+)|(\d+)\s*cM', val, re.IGNORECASE)
                    if m: individuals[current_id]["cm"] = int(m.group(1) or m.group(2))

                elif tag == "BIRT": current_tag = "BIRT"
                elif tag == "DEAT": current_tag = "DEAT"
                elif tag == "DATE" and current_tag:
                    m = re.search(r'\d{4}', val)
                    if m: individuals[current_id][current_tag.lower()] = m.group(0)
                    current_tag = None

            if lvl == "0" and "FAM" in val:
                current_fam = tag.replace("@", "")
                families[current_fam] = {"husb": None, "wife": None}
                current_id = None
            elif current_fam and lvl != "0":
                if tag == "HUSB": families[current_fam]["husb"] = val.replace("@", "")
                elif tag == "WIFE": families[current_fam]["wife"] = val.replace("@", "")

    def get_parents(pid):
        if not pid or pid not in individuals: return None, None
        famc = individuals[pid]["famc"]
        if not famc or famc not in families: return None, None
        return families[famc]["husb"], families[famc]["wife"]

    def get_mother_surname(pid):
        if not pid: return ""
        _, mom_id = get_parents(pid)
        if mom_id and mom_id in individuals: return get_surname(individuals[mom_id]["name"])
        return ""

    def to_spanish_name(pid, current_name):
        if "findme" in current_name.lower(): return current_name
        mom_surname = get_mother_surname(pid)
        if not mom_surname or "findme" in mom_surname.lower(): return current_name
        if mom_surname.lower() not in current_name.lower(): return f"{current_name}-{mom_surname}"
        return current_name

    # üåü THE DEEP ANCESTRY RADAR
    yates_memo = {}
    def has_yates_ancestry(pid):
        if not pid or pid not in individuals: return False
        if pid in yates_memo: return yates_memo[pid]

        if is_yates(individuals[pid].get("name", "")):
            yates_memo[pid] = True
            return True

        dad_id, mom_id = get_parents(pid)
        res = has_yates_ancestry(dad_id) or has_yates_ancestry(mom_id)
        yates_memo[pid] = res
        return res

    def climb_full_line(start_id):
        curr = start_id; lineage_data = []
        while curr:
            p = individuals.get(curr)
            if not p: break
            spanish_name = to_spanish_name(curr, p["name"])
            spouse_name = "findme"; spouse_id = None
            if p["fams"]:
                fid = p["fams"][0]
                if fid in families:
                    f = families[fid]
                    sid = f["wife"] if f["husb"] == curr else f["husb"]
                    if sid and sid in individuals:
                        spouse_name = individuals[sid]["name"]; spouse_id = sid
            spouse_spanish = to_spanish_name(spouse_id, spouse_name) if spouse_id else spouse_name
            lineage_data.append({"name": spanish_name, "raw_name": p["name"], "id": curr, "spouse": spouse_spanish, "spouse_raw": spouse_name, "spouse_id": spouse_id})

            dad_id, mom_id = get_parents(curr)
            if not dad_id and not mom_id: break

            dad_has_yates = has_yates_ancestry(dad_id)
            mom_has_yates = has_yates_ancestry(mom_id)

            if dad_has_yates and not mom_has_yates: curr = dad_id
            elif mom_has_yates and not dad_has_yates: curr = mom_id
            else: curr = dad_id if dad_id else mom_id

        return lineage_data

    def format_dates(uid):
        if not uid or uid not in individuals: return "findme"
        b = individuals[uid]["birt"] or "findme"
        d = individuals[uid]["deat"] or "findme"
        b = re.sub(r'\?', 'findme', b); d = re.sub(r'\?', 'findme', d)
        if b == "findme" and d == "findme": return "findme"
        return f"({b} - {d})"

    testers = {}
    # Load from CSV Auth, including the new Sort Key
    for code, data in csv_auth.items():
        testers[code] = {"name": data["name"], "id": data["id"], "sort_key": data.get("sort_key", "")}

    # Load from GEDCOM Nick tags if missing
    for code, data in study_testers.items():
        if code not in testers:
            testers[code] = {"name": data["name"], "id": data["id"], "sort_key": ""}
        elif not testers[code]["id"]:
            testers[code]["id"] = data["id"]

    for kcode, tdata in testers.items():
        t_lin = ""; t_pids = ""
        if tdata["id"] and tdata["id"] in individuals:
            lin_data = climb_full_line(tdata["id"])
            if lin_data:
                full = list(reversed(lin_data))
                t_lin = " -> ".join([x["name"] for x in full])
                t_pids = ",".join([x["id"] for x in full])
        tdata["lineage_str"] = t_lin
        tdata["path_ids"] = t_pids

    print("\n[STEP 4] Constructing Database...")
    rows = []
    for uid, p in individuals.items():
        if p["match_code"]: # It's a found match!
            kit_code = p["match_code"]

            if kit_code in testers:
                t_name = testers[kit_code]["name"]
                t_id = testers[kit_code]["id"]
                t_lin = testers[kit_code]["lineage_str"]
                t_pids = testers[kit_code]["path_ids"]
                t_sort = testers[kit_code]["sort_key"] # ‚ú® Pull the Sort Key!
                tester_display = f"{t_name} [{t_id}]" if t_id else f"{t_name} [{kit_code}]"
            else:
                t_name = kit_code
                t_id = ""
                t_lin = ""
                t_pids = ""
                t_sort = ""
                tester_display = f"{kit_code} [{kit_code}]"

            lineage_data = climb_full_line(uid)
            if not lineage_data: continue

            full_line = list(reversed(lineage_data))
            gen1 = full_line[0]

            top_name = gen1["raw_name"]
            top_dates = format_dates(gen1["id"])
            spouse_name = gen1["spouse_raw"]
            spouse_id = gen1["spouse_id"]
            spouse_dates = format_dates(spouse_id)

            if spouse_name != "findme":
                husb_sur = get_surname(top_name); wife_sur = get_surname(spouse_name)
                if husb_sur.lower() == wife_sur.lower(): spouse_name += f" (n√©e {wife_sur})"

            pair_dated = f"{top_name} {top_dates}"
            if spouse_name != "findme": dir_label = make_directory_label(top_name, top_dates) + f" & {spouse_name}"
            else: dir_label = make_directory_label(top_name, top_dates)

            if spouse_name != "findme": pair_dated += f" & {spouse_name} {spouse_dates}"
            pair_simple = f"{top_name} & {spouse_name}" if spouse_name != "findme" else top_name

            clean_top = re.sub(r'[^a-zA-Z0-9]', '', top_name)
            clean_sp = re.sub(r'[^a-zA-Z0-9]', '', spouse_name.split('(')[0]) if spouse_name != "findme" else "ZZZ"
            sort_key = f"{clean_top}_{clean_sp}"

            path_names = []
            for i, x in enumerate(full_line):
                if i == 0: path_names.append(pair_dated)
                else: path_names.append(x["name"])

            lineage_str = " -> ".join(path_names)
            path_ids = ",".join([x["id"] for x in full_line])

            _, fa1_mom_id = get_parents(gen1["id"])
            fa1_mother = to_spanish_name(fa1_mom_id, individuals[fa1_mom_id]["name"]) if fa1_mom_id else "findme"

            fa2_mother = "findme"
            if spouse_id:
                _, fa2_mom_id = get_parents(spouse_id)
                if fa2_mom_id: fa2_mother = to_spanish_name(fa2_mom_id, individuals[fa2_mom_id]["name"])

            # ‚ú® NEW: Injecting Tester_Sort_Key directly into the CSV row
            rows.append({
                "Tester_Code": kit_code,
                "Tester_Name": t_name,
                "Tester_ID": t_id,
                "Tester_Display": tester_display,
                "Tester_Sort_Key": t_sort,
                "Tester_Lineage": t_lin,
                "Tester_Path_IDs": t_pids,
                "Match_Name": p["name"],
                "Match_ID": uid,
                "cM": p["cm"],
                "Match_Lineage": lineage_str,
                "Match_Path_IDs": path_ids,
                "Authority_Directory_Label": dir_label,
                "Authority_FirstAncestor": pair_simple,
                "Authority_FirstAncestor_alpha": sort_key,
                "Authority_FirstAncestor_dated": pair_dated,
                "fa_1 extracted": top_name, "fa_1_Dates": top_dates, "fa_1_Mother": fa1_mother,
                "fa_2 extracted": spouse_name, "fa_2 Dates": spouse_dates, "fa_2_Mother": fa2_mother,
                "Gen_Count": len(full_line)
            })

    rows.sort(key=lambda r: r["Authority_Directory_Label"])

    fieldnames = [
        "Tester_Code", "Tester_Name", "Tester_ID", "Tester_Display", "Tester_Sort_Key",
        "Tester_Lineage", "Tester_Path_IDs",
        "Match_Name", "Match_ID", "cM", "Match_Lineage", "Match_Path_IDs",
        "Authority_Directory_Label", "Authority_FirstAncestor", "Authority_FirstAncestor_alpha", "Authority_FirstAncestor_dated",
        "fa_1 extracted", "fa_1_Dates", "fa_1_Mother",
        "fa_2 extracted", "fa_2 Dates", "fa_2_Mother",
        "Gen_Count"
    ]

    with open(CSV_DB, "w", encoding="iso-8859-15", newline="", errors="replace") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
        writer.writeheader(); writer.writerows(rows)

    print(f"\n[SUCCESS] Engine V124 Complete. Saved {len(rows)} verified matches to {CSV_DB}.")

print("‚úÖ Cell 3 (Engine V124 - Sort Authority Inject) Loaded.")

‚úÖ Cell 3 (Engine V124 - Sort Authority Inject) Loaded.


In [48]:
# @title [CELL 4] The Template Library (Patch 55: Transmittal Cover & SPA Align)
print("="*60)
print("      [CELL 4] TEMPLATE LIBRARY LOADING (Vault & UI Fix)...")
print("="*60)

# üåü 0. CATCH-ALL CSS
REGISTER_CSS = "<style>.table-scroll-wrapper{overflow-x:auto; background:white; padding:20px; border-radius:8px; box-shadow:0 4px 6px rgba(0,0,0,0.1); max-width:1400px; margin:20px auto; display:flex; justify-content:center; flex-direction:column;} .table-scroll-wrapper table.dataframe { margin: 0 auto; width: 100%; }</style>"

# üåü 1. GLOBAL CSS & NAV
NAV_HTML = r"""<style>nav.oldnav ul{display:flex;flex-wrap:wrap;justify-content:center;background-color:#006064!important;border-bottom:2px solid #00acc1!important;margin:0;padding:0;list-style:none} nav.oldnav li{display:inline-block} nav.oldnav a{display:block;padding:10px 15px;text-decoration:none;color:#e0f7fa!important;font-size:14px} nav.oldnav a:hover{background-color:#00838f!important} @media print { nav.oldnav, #nav-slot, .no-print { display: none !important; } }</style><nav class="oldnav"><ul><li><a href="/ons-study/research_admin.html" style="color:#ffcc80 !important; font-weight:bold;">Admin Hub</a></li><li><a href="/ons-study/contents.shtml" style="color:#ffcc80 !important; font-weight:bold;">Guide</a></li><li><a href="/ons-study/ons_yates_dna_register.shtml">DNA Register</a></li><li><a href="/ons-study/just-trees.shtml">Trees</a></li><li><a href="/ons-study/dna_network.shtml">DNA Network</a></li><li><a href="/ons-study/proof_engine.html">Proof Engine</a></li><li><a href="/ons-study/dna_dossier.html">Forensic Dossier</a></li><li><a href="/ons-study/proof_consolidator.html" style="background-color:#4a148c; color:#fff !important; font-weight:bold; border-left:1px solid #7c43bd; border-right:1px solid #7c43bd;">Report</a></li><li><a href="/ons-study/anchor_frame.htm" style="background-color:#d84315; color:#fff !important; font-weight:bold; border-left:1px solid #ffab91; border-right:1px solid #ffab91;">ANCHOR</a></li><li><a href="/ons-study/dna_theory_of_the_case.htm" style="color:#b2dfdb;">Theory</a></li><li><a href="/ons-study/data_glossary.shtml">Data Glossary</a></li><li><a href="/ons-study/gedmatch_integration.shtml" style="color:#81d4fa;">GEDmatch</a></li><li><a href="/ons-study/share_dna.shtml" style="background-color:#0277bd; font-weight:bold;">Share DNA</a></li><li><a href="/ons-study/subscribe.shtml" style="background-color:#004d40; font-weight:bold;">Subscribe</a></li></ul></nav>"""
SITE_INFO = r"""<div class="no-print" style="background:#e0f2f1;border:1px solid #b2dfdb;padding:20px;margin:20px auto;width:90%;border-radius:8px;font-family:sans-serif;"><h3 style="color:#006064;margin-top:0;border-bottom:2px solid #004d40;padding-bottom:10px;">Establishing Kinship Through Collateral DNA Saturation</h3><p style="color:#333;line-height:1.6;margin-bottom:0;">This register employs <em>Collateral DNA Saturation</em>‚Äîa method blending genealogical reasoning with data-driven logic to prove connections using multiple independent DNA cousins.</p></div>"""
CSS_BASE = r"""body{font-family:'Segoe UI',sans-serif;background:#f0f2f5;padding:20px;display:flex;flex-direction:column;min-height:100vh;margin:0;} .wrap{flex:1;} .proof-card{background:white;max-width:1100px;margin:20px auto;border-radius:8px;box-shadow:0 4px 15px rgba(0,0,0,0.1);padding:40px} .badge{padding:5px 10px;border-radius:4px;font-weight:bold;font-size:0.85em;text-transform:uppercase;border:1px solid #ccc;} .badge-platinum{background:#eceff1;color:#263238} .badge-gold{background:#fff8e1;color:#f57f17} .badge-silver{background:#f5f5f5;color:#616161} .badge-bronze{background:#efebe9;color:#5d4037} table{width:100%;border-collapse:collapse;margin-top:15px;margin-bottom:40px;font-family:'Georgia',serif;font-size:15px;} th{background:#eceff1;color:#263238;padding:12px;text-align:left;border-bottom:2px solid #000;} td{padding:12px;border-bottom:1px solid #ddd;vertical-align:top;} @media print{ .no-print{display:none !important;} .only-print{display:block !important;} .proof-card{box-shadow:none;border:none;padding:0;margin:0;} body{background:white;padding:0;display:block;} th{background:#f0f0f0 !important;color:#000 !important;} .badge{border:1px solid #000;color:#000;background:transparent !important;} .legal-footer{background:transparent !important; border-top:2px solid #000 !important; color:#000 !important; page-break-inside:avoid !important; padding:10px 0 !important; margin-top:30px !important;} } .only-print{display:none;}"""
JS_CORE = r"""<script type="text/javascript">(function(){ function textOf(c){var val = c.getAttribute('data-sort') || c.textContent || c.innerText;return (val || '').replace(/ +/g,' ').trim().toLowerCase();} function sortTable(t,i,d){if(!(t&&t.tBodies&&t.tBodies[0]))return;var tb=t.tBodies[0],r=Array.prototype.slice.call(tb.rows||[]),asc=(d==='asc');r.sort(function(a,b){var A=textOf(a.cells[i]),B=textOf(b.cells[i]),nA=parseFloat(A.replace(/[^0-9.-]/g,'')),nB=parseFloat(B.replace(/[^0-9.-]/g,''));if(!isNaN(nA)&&!isNaN(nB))return asc?(nA-nB):(nB-nA);return(A<B)?(asc?-1:1):(A>B)?(asc?1:-1):0;});var f=document.createDocumentFragment();for(var k=0;k<r.length;k++)f.appendChild(r[k]);tb.appendChild(f);} function makeSortable(t){if(!(t&&t.tHead&&t.tHead.rows.length))return;var th=t.tHead.rows[0].cells;for(var i=0;i<th.length;i++){(function(idx){var h=th[idx],d='asc';h.style.cursor='pointer';h.onclick=function(){d=(d==='asc')?'desc':'asc';for(var j=0;j<th.length;j++)th[j].innerHTML=th[j].innerHTML.replace(' (asc)','').replace(' (desc)','');h.innerHTML+=(d==='asc'?' (asc)':' (desc)');sortTable(t,idx,d);};})(i);}} window.filterTable = function() { var input = document.getElementById("tableSearch"); var filter = input.value.toUpperCase(); var table = document.getElementById("reg-table") || document.querySelector("table.dataframe"); var tr = table.getElementsByTagName("tr"); for (var i = 1; i < tr.length; i++) { var tdArr = tr[i].getElementsByTagName("td"); var found = false; for (var j = 0; j < tdArr.length; j++) { if (tdArr[j]) { var txtValue = tdArr[j].textContent || tdArr[j].innerText; if (txtValue.toUpperCase().indexOf(filter) > -1) { found = true; break; } } } tr[i].style.display = found ? "" : "none"; } } function init(){ var t=document.getElementsByTagName('table'); for(var i=0;i<t.length;i++) if(t[i].className.indexOf('sortable') !== -1) makeSortable(t[i]); } if(document.readyState==='loading')document.addEventListener('DOMContentLoaded',init,false);else init(); })();</script>"""

# üåü 4. ACADEMIC PROOF CONSOLIDATOR (WITH TL;DR COVER INTEGRATION) üåü
CONSOLIDATOR_CSS = r"""<style>.consol-panel { background: #f3e5f5; border: 1px solid #ab47bc; padding: 25px; border-radius: 8px; margin-bottom: 25px; font-family: 'Segoe UI', sans-serif; text-align: center; } .consol-panel select { padding: 8px; font-size: 14px; width: 100%; border: 1px solid #7b1fa2; border-radius: 4px; } .consol-btn { background: #4a148c; color: white; border: none; padding: 12px 25px; font-size: 16px; font-weight: bold; border-radius: 4px; cursor: pointer; box-shadow: 0 4px 6px rgba(0,0,0,0.1); margin-top: 10px; } .consol-btn:hover { background: #38006b; } .vg-checkbox-container { height:150px; overflow-y:auto; border:1px solid #7b1fa2; background:white; border-radius:4px; padding:10px; font-size:13px; text-align:left; } .vg-checkbox-container label { display:block; margin-bottom:5px; cursor:pointer; } .vg-checkbox-container label:hover { background-color:#f3e5f5; } .academic-brief { background: white; max-width: 1100px; margin: 0 auto 30px auto; padding: 60px 80px; box-shadow: 0 4px 20px rgba(0,0,0,0.1); font-family: 'Georgia', serif; color: #000; line-height: 1.6; } .title-page { page-break-after: always; display: flex; flex-direction: column; justify-content: center; min-height: 70vh; padding: 20px; } .brief-section-title { font-size: 18px; text-transform: uppercase; border-bottom: 1px solid #ccc; padding-bottom: 5px; margin-top: 40px; margin-bottom: 20px; font-weight: bold; } .brief-table { width: 100%; border-collapse: collapse; margin-top: 15px; font-size: 13px; } .brief-table th { background: #f0f0f0; color: #000; border-bottom: 2px solid #000; border-top: 1px solid #000; padding: 10px; text-align: left; } .brief-table td { padding: 10px; border-bottom: 1px solid #ddd; vertical-align: middle; } @media print { .no-print { display: none !important; } body { background: white; padding: 0; } .academic-brief { box-shadow: none; padding: 0; max-width: 100%; border: none; margin-bottom: 0; } }</style>"""
CONSOLIDATOR_HTML = f"""<div class="no-print consol-panel"><h2 style="color:#4a148c; margin-top:0;">Virtual Group & White Paper Builder</h2><div style="display:flex; justify-content:center; gap:20px; flex-wrap:wrap; margin-bottom:15px;"><div style="flex:1; max-width:400px; min-width:250px; text-align:left;"><label style="font-size:12px; font-weight:bold; color:#4a148c;">Select Kits for Virtual Group Analysis</label><div id="groupCheckboxes" class="vg-checkbox-container"></div></div><div style="flex:1; max-width:350px; min-width:250px; text-align:left;"><label style="font-size:12px; font-weight:bold; color:#4a148c;">Custom Group Name (e.g. "VA Yates Protocol")</label><input type="text" id="customGroupName" style="width:100%; box-sizing:border-box; padding:8px; border:1px solid #7b1fa2; border-radius:4px; margin-bottom:10px;" placeholder="Optional"><button class="consol-btn" style="width:100%; box-sizing:border-box;" onclick="runConsolidator('matrix')">üìÑ Generate Academic White Paper</button></div></div></div><div id="report-container"></div>"""
CONSOLIDATOR_JS = r"""<script>__JS_GLOBALS__
const PRECOMPUTED = typeof __PRECOMPUTED_JSON__ !== 'undefined' ? __PRECOMPUTED_JSON__ : [];
function cleanNum(str){let res="";for(let i=0;i<str.length;i++)if(str[i]>='0'&&str[i]<='9')res+=str[i];return res;}

window.makeCascade = function(lin) {
    if(!lin) return "";
    let parts = String(lin).split('->');
    let h = '<div style="text-align:left; font-size:13px; line-height:1.6; font-family:\'Georgia\',serif; margin:8px 0;">';
    parts.forEach((p, i) => {
        let pad = i * 15;
        let prfx = i === 0 ? '' : '&uarr; ';
        let fw = (i === 0) ? 'font-weight:bold; color:#000;' : 'color:#444;';
        h += `<div style="margin-left:${pad}px; ${fw}">${prfx}${p.trim()}</div>`;
    });
    h += '</div>';
    return h;
};

document.addEventListener('DOMContentLoaded', function() {
    const groupDiv = document.getElementById('groupCheckboxes');
    if(!groupDiv) return;
    const validTesters = DB.filter(r => r.participant && r.participant.trim() !== "");
    const uniqueTesters = [...new Set(validTesters.map(r => r.participant))].sort((a, b) => {
        let keyA = DATA.participants[a] ? (DATA.participants[a].sort_key || a) : a;
        let keyB = DATA.participants[b] ? (DATA.participants[b].sort_key || b) : b;
        return keyA.toLowerCase().localeCompare(keyB.toLowerCase());
    });
    uniqueTesters.forEach(t => { let kcode = DATA.participants[t] ? DATA.participants[t].kit_code : ''; let displayStr = kcode ? `${t} [${kcode}]` : t; const lbl = document.createElement('label'); lbl.innerHTML = `<input type="checkbox" value="${t}" class="vg-checkbox"> ${displayStr}`; groupDiv.appendChild(lbl); });
});
const getStudyStats = () => { const d = new Date(); return `Study Data Current As Of: ${d.toLocaleDateString('en-US', { month: 'long', day: 'numeric', year: 'numeric' })} ${d.toLocaleTimeString('en-US', { hour: 'numeric', minute: '2-digit', timeZoneName: 'short' })} | Total Autosomal matches: ${DB.length.toLocaleString()}`; };

// ‚ú® NEW: The Transmittal Cover Injector
const getTransmittalCover = (vgCSS, testerArray) => {
    if (!vgCSS) return "";
    let tName = vgCSS.isGroup ? vgCSS.pName : (DATA.participants[testerArray[0]] ? DATA.participants[testerArray[0]].name : testerArray[0]);
    let mda = "Pending Verification";
    let anchor = vgCSS.ANCHOR ? vgCSS.ANCHOR.toFixed(1) : "UNSCORED";
    let linHtml = "<p style='font-style:italic; color:#666;'>Direct pedigree path rendered for individual profiles only.</p>";

    if (!vgCSS.isGroup && testerArray.length === 1) {
        let matches = DB.filter(m => m.participant === testerArray[0]).sort((x,y) => parseInt(y.cm||0) - parseInt(x.cm||0));
        if(matches.length > 0) {
            mda = matches[0].ancestor;
            linHtml = window.makeCascade(matches[0].lineage);
        }
    } else if (vgCSS.isGroup) {
        mda = vgCSS.pName + " Target Node";
    }

    return `<div class="academic-brief transmittal-cover" style="page-break-after: always; max-width: 800px; margin: 40px auto; padding: 40px; border: 2px solid #00838f; border-radius: 8px; background-color: #f9f9f9; box-shadow: 0 4px 15px rgba(0,0,0,0.1);">
        <h1 style="color: #2c3e50; text-align: center; border-bottom: 2px solid #00838f; padding-bottom: 10px; font-family: sans-serif;">Yates DNA Study Results: The Bottom Line</h1>
        <p style="font-size: 1.1em; line-height: 1.6; font-family: sans-serif;">Dear <strong>${tName}</strong>,</p>
        <p style="font-size: 1.1em; line-height: 1.6; font-family: sans-serif;">Thank you for participating in the study. We know the genetic and genealogical science in the attached report can be incredibly dense, so we have summarized exactly what your results mean below.</p>
        <div style="background-color: #ffffff; padding: 20px; border-left: 5px solid #00838f; margin: 20px 0; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
            <h2 style="margin-top: 0; color: #d32f2f; font-family: sans-serif;">Your Confirmed Lineage</h2>
            <p style="font-size: 1.2em; font-family: sans-serif;">Based on the combined DNA evidence and historical records, here is your proven most distant DNA Yates ancestor:</p>
            <p style="font-size: 1.5em; font-weight: bold; color: #2c3e50; text-align: center; margin: 20px 0; font-family: sans-serif;">${mda}</p>
        </div>
        <div style="background-color: #ffffff; padding: 20px; border-left: 5px solid #00838f; margin: 20px 0; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
            <h2 style="margin-top: 0; color: #d32f2f; font-family: sans-serif;">Your Evidence Score</h2>
            <p style="font-size: 1.2em; font-family: sans-serif;">Your genetic connection to this ancestor has achieved an <strong>ANCHOR Score of ${anchor}</strong>.</p>
            <p style="font-size: 1em; color: #555; font-family: sans-serif;"><em>(This score represents the validated strength of your specific DNA proof linking you to the ancestor above.)</em></p>
        </div>
        <h2 style="color: #2c3e50; margin-top: 30px; font-family: sans-serif;">Your Direct Line of Descent</h2>
        <div style="margin: 20px 0; padding: 15px; background: #fff; border: 1px dashed #ccc;">${linHtml}</div>
        <p style="font-size: 1.1em; line-height: 1.6; margin-top: 30px; font-family: sans-serif;">For the comprehensive breakdown of the DNA matches, source citations, and the rigorous methodology used to prove this connection, please review the complete Academic White Paper that follows.</p>
        <p style="font-size: 1.1em; line-height: 1.6; margin-top: 40px; font-family: sans-serif;">Best regards,<br><br><strong>Ron Yates</strong><br><em>Director, Yates One-Name Study</em></p>
    </div>`;
};

const getTitlePage = (testerArray, customName) => { const year = new Date().getFullYear(); const titleName = customName || (testerArray.length > 1 ? `Virtual Group (${testerArray.length} Kits)` : testerArray[0]); return `<div class="academic-brief title-page"><div style="font-family: Arial, sans-serif; text-align:center; line-height:1.6;"><h1 style="font-size:36px; border-bottom:none; margin-bottom:5px;">Collateral Saturation</h1><h2 style="font-size:20px; font-weight:normal; color:#444; margin-top:0;">A Quantitative Method for Autosomal Lineage Reconstruction</h2><br><br><br><p style="font-size:18px;"><b>Ronald Eugene Yates, MPH</b><br>University of California, Los Angeles<br>1975</p><br><br><br><p style="font-size:16px;">Yates DNA Study<br>Autosomal Lineage Reconstruction Project</p><br><br><br><h3 style="color:#4a148c;">Analysis Target: ${titleName}</h3><br><br><p style="font-size:16px;">${year}</p><p style="font-size:14px; color:#004d40; margin-top:20px; font-weight:bold;">${getStudyStats()}</p><br><br><br><p style="font-size:14px; color:#555;">&copy; ${year} Ronald Eugene Yates<br>All Rights Reserved.</p></div></div>`; };
const getMethodologyPage = () => { return `<div class="academic-brief" style="page-break-before: always;"><h2 style="color: #4a148c; border-bottom: 2px solid #ccc; padding-bottom: 5px; margin-top:0; font-size:22px; text-transform:uppercase;">Methodological Principles of Collateral Saturation</h2><p style="font-size:15px; line-height:1.6; color:#333; margin-top:20px;"><b>Collateral Saturation</b> is a lineage-validation method in which autosomal DNA evidence is evaluated at the level of descendant networks rather than isolated matches. A lineage hypothesis is considered reliable when it is supported by sufficient descendant density, replicated across independent branches, and remains stable under perturbation tests.</p><ol style="line-height:1.8;"><li><b>Minimum Descendant Count (PM &ge; 15):</b> The network must have sufficient representation to filter noise.</li><li><b>Branch Independence (BR &ge; 3):</b> Triangulation must occur across distinct, non-overlapping descendent lines.</li><li><b>Dominance Ratio (DR &ge; 1.5):</b> The primary genetic signal must clearly overpower secondary pedigree collapse signals.</li></ol></div>`; };
const getVirtualGroupPage = (testerArray) => { if (testerArray.length <= 1) return ""; let kitsHtml = testerArray.map(t => { let kcode = DATA.participants[t] ? DATA.participants[t].kit_code : ''; return `<li>${t} ${kcode ? `[${kcode}]` : ''}</li>`; }).join(""); return `<div class="academic-brief" style="page-break-before: always;"><h2 style="color: #4a148c; border-bottom: 2px solid #ccc; padding-bottom: 5px; margin-top:0; font-size:22px; text-transform:uppercase;">Virtual Group Protocol</h2><p style="font-size:15px; line-height:1.6;">In cases where isolated DNA kits lack sufficient power to achieve Collateral Saturation independently, multiple verifiable descendants of a specific ancestor can be logically joined into a <b>Virtual Group</b>. This protocol aggregates their match networks, treating them as a single proof-grade evaluation unit to reconstruct the older lineage.</p><h3 style="margin-top:20px; font-size:16px;">Kits Formally Merged for this Analysis (${testerArray.length}):</h3><ul style="line-height:1.6; color:#111;">${kitsHtml}</ul></div>`; };
function yearFromDateStr(s) { if (!s) return null; let m = String(s).match(/(\d{4})/); return m ? parseInt(m[1], 10) : null; }
function dateQualityScore(s) { if (!s) return 0; let t = String(s).toUpperCase().trim(); if (t.match(/\d{4}-\d{2}-\d{2}/)) return 1.0; if (t.match(/\b(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\b/) && t.match(/\d{4}/)) return 0.8; if (t.match(/\b\d{4}\b/)) { if (t.includes("ABT") || t.includes("AFT") || t.includes("BEF") || t.includes("BET") || t.includes("CAL")) return 0.45; return 0.6; } return 0.3; }
function placeQualityScore(p) { if (!p) return 0; let parts = String(p).split(",").map(x => x.trim()).filter(Boolean); if (parts.length >= 3) return 1.0; if (parts.length === 2) return 0.7; if (parts.length === 1) return 0.4; return 0; }
function tokenSet(placeStr) { if (!placeStr) return new Set(); return new Set(String(placeStr).toLowerCase().split(/[, ]+/).map(x => x.trim()).filter(Boolean)); }
function shareAnyToken(a, b) { if (!a.size || !b.size) return false; for (let t of a) if (b.has(t)) return true; return false; }
function normLog(val, cap) { val = Math.max(0, val || 0); return Math.min(1, Math.log(1 + val) / Math.log(1 + cap)); }
function weightedMean(pairs) { let num = 0, den = 0; pairs.forEach(p => { num += (p.v * p.w); den += p.w; }); return den > 0 ? (num / den) : 0; }
function getDOCS(spineIDs, targetID) {
  const total = spineIDs.length; if (total === 0) return { DOCS: 0, AX: 0, CC: 0, TP: 0 };
  const AX = spineIDs.includes(targetID) ? 1 : 0; const GD = total; const GD_n = normLog(GD, 14);
  let bcArr = [], dcArr = [], dqArr = [], pqArr = [], citedCount = 0, totalSources = 0, totalCites = 0, tpChecks = 0, tpViol = 0, gcChecks = 0, gcGood = 0;
  let idCollision = (new Set(spineIDs).size !== spineIDs.length); let prevTokens = null, prevBirthYear = null;
  for (let i = 0; i < spineIDs.length; i++) {
    const id = spineIDs[i]; const person = DATA.persons && DATA.persons[id] ? DATA.persons[id] : null;
    const bdate = person ? person.bdate : null; const bplace = person ? person.bplace : null; const ddate = person ? person.ddate : null; const dplace = person ? person.dplace : null;
    const bHas = (!!bdate) + (!!bplace); const dHas = (!!ddate) + (!!dplace);
    bcArr.push(bHas === 2 ? 1.0 : (bHas === 1 ? 0.6 : 0)); dcArr.push(dHas === 2 ? 1.0 : (dHas === 1 ? 0.6 : 0));
    dqArr.push(Math.max(dateQualityScore(bdate), dateQualityScore(ddate))); pqArr.push(Math.max(placeQualityScore(bplace), placeQualityScore(dplace)));
    const sc = person ? (person.sources_count || 0) : 0; const cc = person ? (person.citations_count || 0) : 0;
    totalSources += sc; totalCites += cc; if ((sc + cc) > 0) citedCount++;
    const by = yearFromDateStr(bdate); const dy = yearFromDateStr(ddate);
    if (by && dy) { tpChecks++; if (dy <= by) tpViol++; if ((dy - by) > 110) tpViol++; }
    if (prevBirthYear && by) { tpChecks++; const gap = Math.abs(prevBirthYear - by); if (gap < 12 || gap > 60) tpViol++; }
    if (by) prevBirthYear = by;
    const toks = tokenSet(bplace || dplace || ""); if (prevTokens) { gcChecks++; if (shareAnyToken(prevTokens, toks)) gcGood++; } prevTokens = toks;
  }
  const BC = bcArr.reduce((a,b)=>a+b,0) / total; const DC = dcArr.reduce((a,b)=>a+b,0) / total; const DQ = dqArr.reduce((a,b)=>a+b,0) / total; const PQ = pqArr.reduce((a,b)=>a+b,0) / total;
  const CC = citedCount / total; const SD = normLog(totalSources + totalCites, 50);
  const TP = tpChecks > 0 ? Math.max(0, 1 - (tpViol / tpChecks)) : 0.75; const GC = gcChecks > 0 ? (gcGood / gcChecks) : 0.6;
  let covered = 0; spineIDs.forEach(id => { const p = DATA.persons && DATA.persons[id] ? DATA.persons[id] : null; if (p && (p.bdate || p.bplace || p.ddate || p.dplace || (p.sources_count||0) > 0 || (p.citations_count||0) > 0)) covered++; });
  const NC = total > 0 ? (covered / total) : 0; const IDm = idCollision ? 0.85 : 1.0;
  const docsBase = 100 * weightedMean([{ v: AX, w: 2.0 }, { v: GD_n, w: 1.0 }, { v: NC, w: 1.0 }, { v: BC, w: 1.0 }, { v: DC, w: 1.0 }, { v: DQ, w: 1.0 }, { v: PQ, w: 1.0 }, { v: GC, w: 1.0 }, { v: CC, w: 2.0 }, { v: SD, w: 1.5 }, { v: TP, w: 1.5 }]) * IDm;
  const DOCS = AX === 1 ? docsBase : Math.min(docsBase, 35);
  return { DOCS, AX, GD, NC, BC, DC, DQ, PQ, GC, CC, SD, TP, IDm };
}
function getANCHOR(CSSv2a, DOCS) { const blend = (0.65 * CSSv2a) + (0.35 * DOCS); const synergy = 10 * (Math.min(CSSv2a, DOCS) / 100); return Math.min(100, blend + synergy); }
function getCSS(testerArray, customName = null) {
    if (testerArray.length === 1 && !customName) { let exist = PRECOMPUTED.find(x => x.pName === testerArray[0]); if (exist) return exist; }
    const isGroup = testerArray.length > 1; const pName = isGroup ? (customName || `VIRTUAL GROUP (${testerArray.length} Kits)`) : testerArray[0];
    const myMatches = DB.filter(m => testerArray.includes(m.participant) && m.ancestor !== 'No Matches' && m.ancestor);
    let PM = myMatches.length; if(PM === 0) return null;
    let dirs = {}; myMatches.forEach(m => { dirs[m.ancestor] = (dirs[m.ancestor] || 0) + 1; });
    let sortedDirs = Object.entries(dirs).sort((a,b) => b[1] - a[1]);
    let HC_T = sortedDirs.length > 0 ? sortedDirs[0][1] : PM; let HC_2 = sortedDirs.length > 1 ? sortedDirs[1][1] : 0;
    let TB = 0; let NS = 0; let BR = 0; let idCounts = {};
    myMatches.forEach(m => { if(m.search_ids) { let ids = m.search_ids.split(',').map(x=>cleanNum(x)); ids.forEach(id => { if(id) idCounts[id] = (idCounts[id] || 0) + 1; }); } });
    let highestHeat = 0; let targetID = null;
    for (let [id, count] of Object.entries(idCounts)) { const nodeMatches = DB.filter(m => m.search_ids && m.search_ids.split(',').map(x=>cleanNum(x)).includes(id)); const nodeUniqueKits = new Set(nodeMatches.map(m => m.participant)).size; if (nodeUniqueKits > highestHeat) { highestHeat = nodeUniqueKits; targetID = id; } }
    NS = highestHeat; let spineIDs = [];
    if (targetID) { const collaterals = DB.filter(m => m.search_ids && m.search_ids.split(',').map(x=>cleanNum(x)).includes(targetID)); TB = new Set(collaterals.map(m => m.participant)).size; let branches = new Set(); collaterals.forEach(r => { const ids = r.search_ids.split(',').map(x => cleanNum(x)); const names = r.search_names ? r.search_names.split('|') : []; let idx = ids.indexOf(targetID); if(idx !== -1 && idx + 1 < names.length) { branches.add(names[idx+1].replace(/findme/gi, '?').split(' (')[0].trim()); } else { branches.add("Direct Descendant"); } }); BR = branches.size; const bestMatch = myMatches.find(m => m.search_ids && m.search_ids.includes(targetID)); if (bestMatch) spineIDs = bestMatch.search_ids.split(',').map(x=>cleanNum(x)).filter(Boolean); }
    let DR = HC_T / (HC_2 > 0 ? HC_2 : 1); const norm = (val, cap) => Math.min(1, Math.log(1+val) / Math.log(1+cap));
    let PM_n = norm(PM, 150); let HC_n = norm(HC_T, 100); let DR_n = norm(DR, 10); let TB_n = norm(TB, 40); let NS_n = norm(NS, 150); let BR_n = 0;
    if(BR >= 6) BR_n = 1.0; else if(BR === 5) BR_n = 0.85; else if(BR === 4) BR_n = 0.70; else if(BR === 3) BR_n = 0.50; else if(BR === 2) BR_n = 0.25;
    let ST_str = "FAIL"; let ST_val = 0.60;
    if (PM >= 15) { if(BR >= 3 && DR >= 1.5) { ST_str = "PASS"; ST_val = 1.0; } else if(BR >= 2) { ST_str = "PARTIAL"; ST_val = 0.85; } }
    let weightedSum = (PM_n * 1.0) + (HC_n * 1.0) + (DR_n * 1.5) + (TB_n * 1.0) + (BR_n * 2.0) + (NS_n * 1.0); let cssBase = 100 * (weightedSum / 7.5); let cssFinal = cssBase * ST_val;
    let docsRes = getDOCS(spineIDs, targetID); let ANCHOR = getANCHOR(cssFinal, docsRes.DOCS);
    return { pName, PM, HC_T, HC_2, DR, TB, BR, NS, ST_str, cssFinal, isGroup, DOCS: docsRes.DOCS, ANCHOR, AX: docsRes.AX, CC: docsRes.CC, TP: docsRes.TP };
}
function getMatrixHTML(vgCSS = null) {
    let matrixRows = PRECOMPUTED.slice();
    if (vgCSS && vgCSS.isGroup) matrixRows.push(vgCSS);
    matrixRows.sort((a,b) => b.ANCHOR - a.ANCHOR);
    let tableHTML = `<table class="brief-table sortable" style="text-align:center; font-family:sans-serif;"><thead><tr><th style="text-align:left; cursor:pointer; width:18%;">Participant Kit</th><th title="Proper Matches" style="cursor:pointer;">PM</th><th title="Target Handshakes" style="cursor:pointer;">HC-T</th><th title="Dominance Ratio" style="cursor:pointer;">DR</th><th title="Independent Branches" style="cursor:pointer;">BR</th><th title="Apex Reach" style="cursor:pointer;">AX</th><th title="Citation Coverage" style="cursor:pointer;">CC</th><th title="Temporal Plausibility" style="cursor:pointer;">TP</th><th style="background:#e0f2f1; color:#004d40; cursor:pointer;" title="Documentary Score">DOCS</th><th style="background:#fff8e1; color:#f57f17; cursor:pointer; font-weight:bold;" title="Combined Synergy Score">ANCHOR</th></tr></thead><tbody>`;
    matrixRows.forEach(r => { let brStr = r.BR >= 6 ? "&ge;6" : r.BR; let rowStyle = r.isGroup ? 'background:#fbe9e7; border:2px solid #ffab91;' : ''; let nameFmt = r.isGroup ? `<span style="color:#d84315; font-weight:bold;">‚òÖ ${r.pName}</span>` : `<strong>${r.pName}</strong>`; tableHTML += `<tr style="${rowStyle}"><td style="text-align:left;" data-sort="${r.pName}">${nameFmt}</td><td>${r.PM}</td><td>${r.HC_T}</td><td>${r.DR.toFixed(1)}</td><td data-sort="${r.BR}">${brStr}</td><td>${r.AX}</td><td>${r.CC.toFixed(2)}</td><td>${r.TP.toFixed(2)}</td><td style="background:#e0f2f1; color:#004d40; font-weight:bold;" data-sort="${r.DOCS}">${r.DOCS.toFixed(1)}</td><td style="background:#fff8e1; font-weight:bold; color:#f57f17; font-size:1.1em;" data-sort="${r.ANCHOR}">${r.ANCHOR.toFixed(2)}</td></tr>`; }); tableHTML += `</tbody></table>`;
    return `<div class="academic-brief" style="page-break-before: always; max-width: 1200px;"><div class="brief-section-title" style="margin-top:20px;">Master ANCHOR Evaluation Matrix</div>${tableHTML}</div>`;
}
const getAppendixA = () => { return `<div class="academic-brief" style="max-width: 1200px; margin-top: 40px; page-break-before: always; text-align: left;"><h2 style="color: #4a148c; border-bottom: 2px solid #ccc; padding-bottom: 5px; margin-top:0; font-size:22px; text-transform:uppercase;">Appendix A (Addendum): ANCHOR Documentary Evidence Matrix (GEDCOM-Derived)</h2><p style="font-size:14px;"><b>Purpose:</b> These fields quantify documentary pedigree robustness using information commonly available in a standard GEDCOM (vital events, places, sources/citations, and plausibility checks). These measures are combined into <b>DOCS</b> (Documentary Score), which is then blended with <b>CSS v2a</b> to produce the combined <b>ANCHOR</b> score.</p><h3 style="color: #4a148c; margin-top: 30px;">A5. ANCHOR Documentary Matrix Fields</h3><table class="brief-table" style="font-size: 13px;"><thead><tr><th>Field</th><th>Abbrev</th><th>Definition</th><th>Computation</th><th>Desired Range</th></tr></thead><tbody><tr><td><b>Apex Reach</b></td><td>AX</td><td>Whether the participant‚Äôs documented spine reaches the target ancestral node (prevents pedigree-depth bias).</td><td>AX = 1 if targetID is present in participant t_ids; else 0 (gate/penalty).</td><td><b>Must be 1</b> for proof-grade documentary scoring.</td></tr><tr><td><b>GEDCOM Depth</b></td><td>GD</td><td>Generations/person-nodes in the scored spine segment (participant &rarr; target node).</td><td>GD = count(spineIDs) (log-capped normalization).</td><td>8‚Äì14 typical; higher is better if sourced.</td></tr><tr><td><b>Node Coverage</b></td><td>NC</td><td>Fraction of spine persons meeting minimum documentation presence.</td><td>NC = (# persons with any vital/place/source) / (# persons scored).</td><td>&ge;0.70 good; &ge;0.85 strong.</td></tr><tr><td><b>Birth Completeness</b></td><td>BC</td><td>Completeness of birth facts across the spine.</td><td>Per person: 1.0 if birth date+place; 0.6 if one present; 0 if none; BC=mean.</td><td>&ge;0.60 good; &ge;0.80 strong.</td></tr><tr><td><b>Death Completeness</b></td><td>DC</td><td>Completeness of death facts across the spine.</td><td>Per person: 1.0 if death date+place; 0.6 if one present; 0 if none; DC=mean.</td><td>&ge;0.50 good; &ge;0.75 strong.</td></tr><tr><td><b>Date Quality</b></td><td>DQ</td><td>Precision/quality of dates (prefers exactness; penalizes ambiguity).</td><td>Full date (YYYY-MM-DD) &gt; month/year &gt; year-only &gt; qualified (ABT/BEF/AFT) &gt; missing; DQ=mean.</td><td>&ge;0.60 good; &ge;0.80 strong.</td></tr><tr><td><b>Place Quality</b></td><td>PQ</td><td>Granularity/quality of place strings (more locality levels = stronger).</td><td>&ge;3 comma-levels=1.0; 2=0.7; 1=0.4; none=0; PQ=mean.</td><td>&ge;0.60 good; &ge;0.80 strong.</td></tr><tr><td><b>Geo Consistency</b></td><td>GC</td><td>Stability/coherence of locations across adjacent generations (flags implausible ‚Äújumps‚Äù).</td><td>GC = fraction of adjacent gen-pairs sharing at least one place token (county/state/country heuristic).</td><td>&ge;0.60 good; &ge;0.75 strong.</td></tr><tr><td><b>Citation Coverage</b></td><td>CC</td><td>Fraction of spine persons with &ge;1 citation/source (core proof requirement).</td><td>CC = (# persons with sources/citations) / (# persons scored).</td><td>&ge;0.50 good; &ge;0.70 strong.</td></tr><tr><td><b>Source Density</b></td><td>SD</td><td>Overall density of sources/citations across the scored spine.</td><td>SD = log_norm(total_sources + total_citations, cap&approx;50).</td><td>&ge;0.40 good; &ge;0.65 strong.</td></tr><tr><td><b>Temporal Plausibility</b></td><td>TP</td><td>Sanity checks for time realism (birth/death order, lifespan bounds, generation spacing).</td><td>TP = 1 &minus; (violations / checks), clipped 0‚Äì1.</td><td>&ge;0.85 good; &ge;0.95 strong.</td></tr><tr><td><b>Identity Collision Multiplier</b></td><td>IDm</td><td>Penalty when spine contains duplicate IDs or internal contradictions (signals GEDCOM hygiene issues).</td><td>IDm = 1.0 (none); 0.85 (minor); 0.70 (severe).</td><td>Desired 1.0.</td></tr></tbody></table><h3 style="color: #4a148c; margin-top: 30px;">A6. Composite Documentary Score</h3><p style="font-size:14px;"><b>DOCS</b> is a 0‚Äì100 composite that summarizes documentary strength:</p><table class="brief-table" style="font-size: 13px;"><thead><tr><th>Composite</th><th>Abbrev</th><th>Definition</th><th>Computation</th><th>Interpretation</th></tr></thead><tbody><tr><td><b>Documentary Score</b></td><td>DOCS</td><td>Normalized, weighted documentary robustness of the scored spine (GEDCOM-derived).</td><td>DOCS = 100 &times; weighted_mean(AX, GD, NC, BC, DC, DQ, PQ, GC, CC, SD, TP) &times; IDm<br><span style="color:#555;">Recommended weights: CC=2.0; SD=1.5; TP=1.5; AX=2.0; others=1.0 (v1).</span><br><span style="color:#b71c1c;"><b>AX Gate:</b> if AX=0, DOCS is capped (e.g., &le;35) to prevent strong scores from incomplete pedigrees.</span></td><td>85‚Äì100 Platinum; 70‚Äì85 Strong; 50‚Äì70 Moderate; 35‚Äì50 Weak; &lt;35 Insufficient.</td></tr></tbody></table><h3 style="color: #4a148c; margin-top: 30px;">A7. ANCHOR Combined Score (DNA + Documentary)</h3><table class="brief-table" style="font-size: 13px;"><thead><tr><th>Composite</th><th>Abbrev</th><th>Definition</th><th>Computation</th><th>Interpretation</th></tr></thead><tbody><tr><td><b>ANCHOR Score</b></td><td>ANCH</td><td>Combined proof-strength score blending CSS v2a (DNA network evidence) with DOCS (GEDCOM documentary evidence).</td><td><b>v1 (simple):</b> ANCH = 0.65 &times; CSSv2a + 0.35 &times; DOCS<br><b>v1b (with synergy):</b> ANCH = min(100, blend + 10 &times; (min(CSSv2a, DOCS)/100))<br><span style="color:#555;">Synergy rewards agreement (high DNA + high documentary support).</span></td><td>Higher scores indicate both a saturated genetic network and a robust documented spine. ANCH is intended for cross-participant ranking and publication-grade reporting.</td></tr></tbody></table><p style="font-size:12px; color:#666; margin-top:18px;"><b>Implementation note:</b> DOCS uses only fields commonly found in GEDCOM (birth/death facts, places, sources/citations). Where GEDCOM detail is sparse, DOCS will appropriately remain low, preventing documentary overconfidence.</p></div>`; };

// ‚ú® NEW: Integration of getTransmittalCover into the Report HTML Stack
window.runConsolidator = function(mode) {
    if(mode === 'matrix') {
        const boxes = document.querySelectorAll('.vg-checkbox:checked');
        const selectedKits = Array.from(boxes).map(b => b.value);
        let customName = document.getElementById('customGroupName').value.trim();
        if (customName === "") customName = null;
        let vgCSS = null;
        if (selectedKits.length > 0) { vgCSS = getCSS(selectedKits, customName); }
        document.title = "Academic_White_Paper";

        let reportHTML = getTransmittalCover(vgCSS, selectedKits) + getTitlePage(selectedKits, customName) + getMethodologyPage() + getVirtualGroupPage(selectedKits) + getMatrixHTML(vgCSS) + getAppendixA();
        document.getElementById('report-container').innerHTML = reportHTML;
        setTimeout(() => { if(window.init) window.init(); }, 100);
    }
}
</script>"""

# üåü 5. THE UNIFIED PROOF ENGINE üåü
PROOF_ENGINE_TMPL = r"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Forensic Proof Engine</title><style>__CSS_BASE__
.search-tabs { display:flex; justify-content:center; flex-wrap:wrap; gap:10px; margin-bottom:25px; }
.search-tab { padding:12px 20px; border:2px solid #ddd; background:#fff; cursor:pointer; font-weight:bold; border-radius:25px; color:#555; font-size:15px; transition:all 0.2s; box-shadow:0 2px 4px rgba(0,0,0,0.05); }
.search-tab:hover { background:#f4f4f4; transform:translateY(-2px); }
.search-tab.active-part { border-color:#4a148c; background:#4a148c; color:white; box-shadow:0 4px 10px rgba(74,20,140,0.3); }
.search-tab.active-anc { border-color:#006064; background:#006064; color:white; box-shadow:0 4px 10px rgba(0,96,100,0.3); }
.search-tab.active-id { border-color:#b71c1c; background:#b71c1c; color:white; box-shadow:0 4px 10px rgba(183,28,28,0.3); }
.search-box { display:flex; gap:10px; justify-content:center; margin-bottom:20px; }
.search-box select, .search-box input { padding:15px; width:100%; max-width:400px; border:2px solid #ccc; border-radius:8px; font-size:16px; box-shadow:inset 0 1px 3px rgba(0,0,0,0.05); }
.search-box button { padding:15px 25px; background:#b71c1c; color:white; border:none; border-radius:8px; cursor:pointer; font-weight:bold; font-size:16px; transition:all 0.2s;}
.search-box button:hover { background:#d32f2f; transform:scale(1.05); }
</style></head><body><div class="wrap"><h1 class="centerline no-print" style="margin-top:30px; color:#4a148c;">üî¨ Forensic Proof Engine</h1><div id="nav-slot" class="no-print">__STATS_BAR____NAV_HTML__</div>
<div class="proof-card no-print" style="border-top: 5px solid #4a148c;">
    <div class="search-tabs">
        <button id="tab-part" class="search-tab" onclick="setMode('part')">üë§ 1. Participant View</button>
        <button id="tab-anc" class="search-tab" onclick="setMode('anc')">üå≥ 2. Ancestor View</button>
        <button id="tab-id" class="search-tab" onclick="setMode('id')">üîç 3. Deep Path Dive (ID#)</button>
    </div>
    <div id="search-ui"></div>
</div>
<div class="proof-card" id="proof-result" style="display:none; margin-top:20px; padding:0; border:none; box-shadow:none; background:transparent;"></div>
__LEGAL_FOOTER__</div>
<script>__JS_GLOBALS__;

window.makeCascade = function(lin) {
    if(!lin) return "";
    let parts = String(lin).split('->');
    let h = '<div style="text-align:left; font-size:13px; line-height:1.6; font-family:\'Georgia\',serif; margin:8px 0;">';
    parts.forEach((p, i) => {
        let pad = i * 15;
        let prfx = i === 0 ? '' : '&uarr; ';
        let fw = (i === 0) ? 'font-weight:bold; color:#000;' : 'color:#444;';
        h += `<div style="margin-left:${pad}px; ${fw}">${prfx}${p.trim()}</div>`;
    });
    h += '</div>';
    return h;
};

let mode = 'part';
function setMode(newMode) {
    mode = newMode;
    document.getElementById('tab-part').className = 'search-tab' + (mode==='part'?' active-part':'');
    document.getElementById('tab-anc').className = 'search-tab' + (mode==='anc'?' active-anc':'');
    document.getElementById('tab-id').className = 'search-tab' + (mode==='id'?' active-id':'');
    document.getElementById('proof-result').style.display = 'none';

    let html = '';
    if(mode === 'part') {
        const keys = Object.keys(DATA.participants).sort((a,b) => {
            let keyA = DATA.participants[a].sort_key || a;
            let keyB = DATA.participants[b].sort_key || b;
            return keyA.toLowerCase().localeCompare(keyB.toLowerCase());
        });
        html = `<div class="search-box"><select id="querySelect" onchange="runSearch()" style="border-color:#4a148c;"><option value="">-- Select a Study Participant --</option>`;
        keys.forEach(k => { let code = DATA.participants[k].kit_code || ''; let dStr = code ? `${DATA.participants[k].name} [${code}]` : DATA.participants[k].name; html += `<option value="${k}">${dStr}</option>`; });
        html += `</select></div>`;
    } else if(mode === 'anc') {
        const keys = Object.keys(DATA.ancestors).sort((a,b) => DATA.ancestors[a].name.localeCompare(DATA.ancestors[b].name));
        html = `<div class="search-box"><select id="querySelect" onchange="runSearch()" style="border-color:#006064;"><option value="">-- Select an Ancestral Line --</option>`;
        keys.forEach(k => { html += `<option value="${k}">${DATA.ancestors[k].name} (${DATA.ancestors[k].matches} proper matches)</option>`; });
        html += `</select></div>`;
    } else if(mode === 'id') {
        html = `<div class="search-box"><input type="text" id="queryInput" placeholder="Enter GEDCOM ID (e.g. I1234) or Name..." style="border-color:#b71c1c;" onkeypress="if(event.key==='Enter') runSearch()"> <button onclick="runSearch()">Deep Search</button></div>`;
    }
    document.getElementById('search-ui').innerHTML = html;
}

window.runSearch = function() {
    let resDiv = document.getElementById('proof-result');
    let html = '';
    if(mode === 'part') {
        let k = document.getElementById('querySelect').value;
        if(!k) { resDiv.style.display='none'; return; }
        let p = DATA.participants[k];

        let cStat = p.css_status ? p.css_status : 'UNSCORED';
        let cssColor = cStat === 'PASS' ? '#2e7d32' : (cStat === 'PARTIAL' ? '#ef6c00' : '#c62828');

        html = `<div style="background:white; padding:30px; border-radius:8px; box-shadow:0 4px 15px rgba(0,0,0,0.1);"><div style="background:#f3e5f5; padding:20px; border-radius:8px; border-left:5px solid #ab47bc; margin-bottom:20px;"><h2 style="margin-top:0; color:#4a148c;">Participant Profile: ${p.name}</h2><p><strong>Status:</strong> <span class="badge" style="background:#4a148c; color:white;">${p.badge || 'N/A'}</span> &nbsp; <strong>CSSv2 Validation:</strong> <span class="badge" style="background:${cssColor}; color:white;">${cStat}</span></p><p><strong>Total Evidence Mass:</strong> ${p.cm} cM corroborating ${p.matches} node connections.</p></div><h3 style="color:#4a148c; border-bottom:2px solid #ccc; padding-bottom:5px;">Confirmed Ancestral Intersections</h3><table class="brief-table"><thead><tr><th>Target Node</th><th>Shared cM</th><th>Participant's Triangulation Path</th></tr></thead><tbody>`;
        let matches = DB.filter(m => m.participant === p.name).sort((x,y) => parseInt(y.cm||0) - parseInt(x.cm||0));
        matches.forEach(m => { html += `<tr><td style="width:25%;"><strong>${m.ancestor}</strong></td><td style="width:10%; color:#4a148c; font-weight:bold;">${m.cm} cM</td><td>${makeCascade(m.lineage)}</td></tr>`; });
        html += `</tbody></table></div>`;
    } else if(mode === 'anc') {
        let k = document.getElementById('querySelect').value;
        if(!k) { resDiv.style.display='none'; return; }
        let a = DATA.ancestors[k];
        html = `<div style="background:white; padding:30px; border-radius:8px; box-shadow:0 4px 15px rgba(0,0,0,0.1);"><div style="background:#e0f7fa; padding:20px; border-radius:8px; border-left:5px solid #00acc1; margin-bottom:20px;"><h2 style="margin-top:0; color:#006064;">Biological Proof: ${a.name}</h2><p><strong>Forensic Validation:</strong> <span class="badge badge-${a.badge ? a.badge.toLowerCase() : 'none'}">${a.badge || 'N/A'} Standard</span></p><p><strong>Integrity Score:</strong> ${a.integrity || 'N/A'}% (Verified by ${a.testers} independent kits)</p><p><strong>Total Evidence:</strong> ${a.cm} cM shared across ${a.matches} matching paths.</p></div><table class="brief-table"><thead><tr><th>Matching Kit</th><th>Shared cM</th><th>Documented Lineage Path</th></tr></thead><tbody>`;
        let matches = DB.filter(m => m.ancestor === a.name).sort((x,y) => parseInt(y.cm||0) - parseInt(x.cm||0));
        matches.forEach(m => { html += `<tr><td style="width:25%;"><strong>${m.participant}</strong></td><td style="width:10%; color:#006064; font-weight:bold;">${m.cm} cM</td><td>${makeCascade(m.lineage)}</td></tr>`; });
        html += `</tbody></table></div>`;
    } else if(mode === 'id') {
        let q = document.getElementById('queryInput').value.trim().toLowerCase();
        if(!q) { resDiv.style.display='none'; return; }
        let qNum = q.replace(/[^0-9]/g, '');
        let matches = DB.filter(m => {
            if(qNum && m.search_ids && m.search_ids.split(',').map(x=>x.replace(/[^0-9]/g,'')).includes(qNum)) return true;
            if(m.lineage && m.lineage.toLowerCase().includes(q)) return true;
            if(m.id && m.id.toLowerCase().includes(q)) return true;
            return false;
        }).sort((x,y) => parseInt(y.cm||0) - parseInt(x.cm||0));

        html = `<div style="background:white; padding:30px; border-radius:8px; box-shadow:0 4px 15px rgba(0,0,0,0.1);"><div style="background:#ffebee; padding:20px; border-radius:8px; border-left:5px solid #b71c1c; margin-bottom:20px;"><h2 style="margin-top:0; color:#b71c1c;">Deep Path Search: "${q}"</h2><p style="font-size:16px;">Found <strong>${matches.length}</strong> lineage connections passing through this node.</p></div>`;
        if(matches.length > 0) {
            html += `<table class="brief-table"><thead><tr><th style="color:#b71c1c;">Participant</th><th style="color:#b71c1c;">Primary Target</th><th style="color:#b71c1c;">cM</th><th style="color:#b71c1c;">Intersecting Lineage Path</th></tr></thead><tbody>`;
            matches.forEach(m => {
                let hlPath = m.lineage;
                if(qNum) {
                    let regex = new RegExp(`\\(I?${qNum}\\)`, 'gi');
                    hlPath = hlPath.replace(regex, match => `<mark style="background:#ffcdd2; color:#b71c1c; font-weight:bold; padding:2px 4px; border-radius:3px;">${match}</mark>`);
                } else if (q.length > 3) {
                    let regex = new RegExp(`(${q})`, 'gi');
                    hlPath = hlPath.replace(regex, match => `<mark style="background:#ffcdd2; color:#b71c1c; font-weight:bold; padding:2px 4px; border-radius:3px;">${match}</mark>`);
                }
                html += `<tr><td style="width:20%;"><strong>${m.participant}</strong></td><td style="width:20%;"><strong>${m.ancestor}</strong></td><td style="width:5%; color:#b71c1c; font-weight:bold;">${m.cm}</td><td>${makeCascade(hlPath)}</td></tr>`;
            });
            html += `</tbody></table></div>`;
        } else {
            html += `<p style="text-align:center; color:#777; font-style:italic;">No matching kits found passing through this ID or name. Note: Uncorroborated singletons may not appear in this index.</p></div>`;
        }
    }
    resDiv.innerHTML = html;
    resDiv.style.display = 'block';
}

document.addEventListener('DOMContentLoaded', () => setMode(mode));
</script>
"""

DOSS_TMPL = r"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Forensic Dossier</title><style>__CSS_BASE__</style></head><body><div class="wrap"><h1 class="centerline no-print" style="margin-top:30px; color:#004d40;">üìÅ Forensic Dossier</h1><div id="nav-slot" class="no-print">__STATS_BAR____NAV_HTML__</div><div class="proof-card"><div id="dos-ui" class="no-print"></div><div id="report-stack"></div></div>__LEGAL_FOOTER__</div>
<script>__JS_GLOBALS__;

window.makeCascade = function(lin) {
    if(!lin) return "";
    let parts = String(lin).split('->');
    let h = '<div style="text-align:left; font-size:13px; line-height:1.6; font-family:\'Georgia\',serif; margin:8px 0;">';
    parts.forEach((p, i) => {
        let pad = i * 15;
        let prfx = i === 0 ? '' : '&uarr; ';
        let fw = (i === 0) ? 'font-weight:bold; color:#000;' : 'color:#444;';
        h += `<div style="margin-left:${pad}px; ${fw}">${prfx}${p.trim()}</div>`;
    });
    h += '</div>';
    return h;
};

document.addEventListener('DOMContentLoaded', function() {
    const dosKeys = Object.keys(DATA.participants).sort((a,b) => {
        let keyA = DATA.participants[a].sort_key || a;
        let keyB = DATA.participants[b].sort_key || b;
        return keyA.toLowerCase().localeCompare(keyB.toLowerCase());
    });
    let sel = '<select id="dosSelect" onchange="renderDossier()" style="padding:12px; width:100%; border:2px solid #004d40; border-radius:4px; font-size:16px; margin-bottom:20px;"><option value="">-- Select Kit to Generate Formal Dossier --</option>'; dosKeys.forEach(k => { let code = DATA.participants[k].kit_code || ''; let displayStr = code ? `${DATA.participants[k].name} [${code}]` : DATA.participants[k].name; sel += `<option value="${k}">${displayStr}</option>`; }); sel += '</select>'; document.getElementById('dos-ui').innerHTML = sel; window.renderDossier = function() { let k = document.getElementById('dosSelect').value; if(!k) { document.getElementById('report-stack').innerHTML = ''; return; } let p = DATA.participants[k]; let matches = DB.filter(m => m.participant === p.name).sort((x,y) => parseInt(y.cm||0) - parseInt(x.cm||0)); let topAnc = matches.length > 0 ? matches[0].ancestor : "None Found";

    let cStat = p.css_status ? p.css_status : 'UNSCORED';
    let cssColor = cStat === 'PASS' ? '#2e7d32' : (cStat === 'PARTIAL' ? '#ef6c00' : '#c62828');

    let html = `<div style="border: 3px double #004d40; padding: 40px; background: white; margin-top:20px;"><div style="text-align:center; border-bottom:2px solid #004d40; padding-bottom:20px; margin-bottom:30px;"><h1 style="color:#004d40; text-transform:uppercase; margin:0; font-size:28px;">Forensic Evidence Dossier</h1><p style="margin:5px 0 0 0; color:#555; font-style:italic;">Yates DNA Study Lineage Reconstruction</p></div><div style="font-size:16px; line-height:1.8; background:#f4f4f4; padding:20px; border:1px solid #ddd; margin-bottom:30px;"><p style="margin:0;"><strong>SUBJECT IDENTIFIER:</strong> ${p.name}</p><p style="margin:0;"><strong>CSSv2 VALIDATION STATUS:</strong> <span style="color:${cssColor}; font-weight:bold;">${cStat}</span></p><p style="margin:0;"><strong>EVIDENCE INTEGRITY SCORE:</strong> ${p.integrity || 'N/A'}%</p><p style="margin:0;"><strong>PRIMARY CORROBORATED NODE:</strong> ${topAnc}</p><hr style="border-top:1px solid #ccc; margin:15px 0;"><p style="margin:0;"><strong>EXECUTIVE SUMMARY:</strong> This subject shares ${p.cm} cM of autosomal DNA across ${p.matches} independently verified ancestral nodes within the Yates study. The empirical data confirms the biological validity of the subject's descent pathway.</p></div><h3 style="color:#004d40; text-transform:uppercase; font-size:18px;">Cross-Referenced Match Index</h3><table class="brief-table"><thead><tr><th style="background:#e0f2f1;">Intersected Study Node</th><th style="background:#e0f2f1;">cM</th><th style="background:#e0f2f1;">Documented Route to Node</th></tr></thead><tbody>`; matches.forEach(m => { html += `<tr><td style="width:30%;"><strong>${m.ancestor}</strong></td><td style="width:10%; color:#004d40; font-weight:bold;">${m.cm}</td><td>${makeCascade(m.lineage)}</td></tr>`; }); html += '</tbody></table></div>'; document.getElementById('report-stack').innerHTML = html; } });</script></body></html>"""

print("‚úÖ Cell 4 (Amnesia-Proof Final + Safe UI) Loaded Successfully.")

      [CELL 4] TEMPLATE LIBRARY LOADING (Vault & UI Fix)...
‚úÖ Cell 4 (Amnesia-Proof Final + Safe UI) Loaded Successfully.


In [50]:
#@title [CELL 5] v1.5 - Master SPA Compiler (Encoding Fix)
import os, json
import pandas as pd

def execute_spa_build():
    CSV_DB = "engine_database.csv"
    if not os.path.exists(CSV_DB):
        print("‚ùå Error: engine_database.csv not found. Please run Cell 3 (Data Engine) first.")
        return

    print("Loading Data Engine outputs...")
    # FIX: Explicitly pass the iso-8859-15 encoding used in Cell 3 to prevent UnicodeDecodeErrors
    df = pd.read_csv(CSV_DB, encoding="iso-8859-15")

    # 1. Build JSON Globals for the Frontend Javascript
    db_list = []
    participants = {}
    ancestors = {}

    for idx, row in df.iterrows():
        t_name = str(row.get('Tester_Name', '')).strip()
        a_name = str(row.get('Authority_FirstAncestor', '')).strip()
        cm = int(row.get('cM', 0) if pd.notnull(row.get('cM')) else 0)

        db_list.append({
            "participant": t_name,
            "ancestor": a_name,
            "cm": cm,
            "lineage": str(row.get('Match_Lineage', '')),
            "id": str(row.get('Match_ID', '')),
            "search_ids": str(row.get('Match_Path_IDs', ''))
        })

        if t_name not in participants:
            participants[t_name] = {
                "name": t_name,
                "kit_code": str(row.get('Tester_Code', '')),
                "cm": 0,
                "matches": 0,
                "sort_key": str(row.get('Tester_Sort_Key', ''))
            }
        participants[t_name]["cm"] += cm
        participants[t_name]["matches"] += 1

        if a_name not in ancestors:
            ancestors[a_name] = {"name": a_name, "cm": 0, "matches": 0, "testers": set()}
        ancestors[a_name]["cm"] += cm
        ancestors[a_name]["matches"] += 1
        ancestors[a_name]["testers"].add(t_name)

    for a in ancestors:
        ancestors[a]["testers"] = len(ancestors[a]["testers"])

    js_globals = f"const DB = {json.dumps(db_list)}; const DATA = {{ participants: {json.dumps(participants)}, ancestors: {json.dumps(ancestors)} }};"

    output_dir = "html_output"
    os.makedirs(output_dir, exist_ok=True)

    # 2. Build Proof Consolidator (Academic White Paper)
    print("Generating proof_consolidator.html...")
    consol_content = CONSOLIDATOR_HTML + CONSOLIDATOR_JS.replace("__JS_GLOBALS__", js_globals)
    consol_page = f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Academic White Paper</title><link rel="stylesheet" href="partials_unified.css"><link rel="stylesheet" href="dna_tree_styles.css">{CONSOLIDATOR_CSS}</head><body id="top"><div class="wrap"><h1 class="centerline no-print">Academic White Paper</h1><div id="nav-slot" class="no-print">{NAV_HTML}</div>{consol_content}</div></body></html>"""
    with open(os.path.join(output_dir, "proof_consolidator.html"), "w", encoding='utf-8') as f:
        f.write(consol_page)

    # 3. Build Proof Engine
    print("Generating proof_engine.html...")
    engine_content = PROOF_ENGINE_TMPL.replace("__JS_GLOBALS__", js_globals).replace("__CSS_BASE__", CSS_BASE).replace("__NAV_HTML__", NAV_HTML).replace("__STATS_BAR__", "").replace("__LEGAL_FOOTER__", "")
    with open(os.path.join(output_dir, "proof_engine.html"), "w", encoding='utf-8') as f:
        f.write(engine_content)

    # 4. Build Forensic Dossier
    print("Generating dna_dossier.html...")
    doss_content = DOSS_TMPL.replace("__JS_GLOBALS__", js_globals).replace("__CSS_BASE__", CSS_BASE).replace("__NAV_HTML__", NAV_HTML).replace("__STATS_BAR__", "").replace("__LEGAL_FOOTER__", "")
    with open(os.path.join(output_dir, "dna_dossier.html"), "w", encoding='utf-8') as f:
        f.write(doss_content)

    print(f"‚úÖ Success! Generated all Master SPA HTML files in /{output_dir}/.")

# Trigger Execution
execute_spa_build()

Loading Data Engine outputs...
Generating proof_consolidator.html...
Generating proof_engine.html...
Generating dna_dossier.html...
‚úÖ Success! Generated all Master SPA HTML files in /html_output/.


In [51]:
# @title [CELL 6] MASTER ORCHESTRATOR (Run This Button)
import os, sys
print("="*60)
print("      MASTER ORCHESTRATOR")
print("      (Running Engine -> Publisher -> Upload)")
print("="*60)

if 'run_engine' not in globals() or 'run_publisher' not in globals():
    print("‚ùå ERROR: Modules not loaded! Please run the Engine and Publisher setup cells first.")
else:
    print("\n>>> üöÄ PHASE 1: EXECUTING DATA ENGINE...")
    try:
        run_engine()
        print("‚úÖ PHASE 1 COMPLETE.")

        print("\n>>> üåê PHASE 2: EXECUTING PUBLISHER & UPLOAD...")
        run_publisher()
        print("‚úÖ PHASE 2 COMPLETE.")

        print("\n" + "="*60)
        print("      üèÜ MASTER PIPELINE SUCCESSFUL")
        print("="*60)
    except Exception as e:
        print(f"\n‚ùå CRITICAL FAILURE: {e}")

      MASTER ORCHESTRATOR
      (Running Engine -> Publisher -> Upload)

>>> üöÄ PHASE 1: EXECUTING DATA ENGINE...
      [CELL 3] ENGINE STARTING (V124 - SORT AUTHORITY)...

[STEP 1] Resolving Files (Local Priority)...
    ‚úÖ Found match_to_unmasked.csv locally. Skipping FTP download.
    üëâ Source GEDCOM: yates_study_2025.ged

[STEP 2] Loading Tester Authority CSV...

[STEP 3] Parsing GEDCOM for Study| Tags & Lineages...

[STEP 4] Constructing Database...

[SUCCESS] Engine V124 Complete. Saved 1713 verified matches to engine_database.csv.
‚úÖ PHASE 1 COMPLETE.

>>> üåê PHASE 2: EXECUTING PUBLISHER & UPLOAD...
      [CELL 5] PUBLISHER STARTING (Hardwired CSS Status)...
    [+] Parsing GEDCOM Vitals: yates_study_2025.ged
    [+] Calculating ANCHOR Matrices Server-Side...

[LOCAL] Overwriting Database-Driven Pages on disk...

[STEP 3] Uploading via FTP to Live Server...
    [1/19] üì§ Uploaded: anchor_frame.htm
    [2/19] üì§ Uploaded: proof_consolidator.html
    [3/19] üì§ Uploa

In [30]:
# @title [CELL ] The Time Machine (Data Vault & Archiver)
def run_archiver():
    print("="*60)
    print("      [CELL 6] THE TIME MACHINE (Archive & Sync)")
    print("="*60)

    import zipfile
    import os
    import pytz
    import json
    from datetime import datetime
    from google.colab import files
    from google.colab import userdata

    est = pytz.timezone('US/Eastern')
    timestamp = datetime.now(est).strftime("%Y-%m-%d_%H%M_%S")

    # --- 1. GENERATE SITE SNAPSHOT JSON ---
    print("[STEP 1] Generating Site Snapshot JSON...")
    snapshot_data = {}
    html_files = [f for f in os.listdir('.') if f.lower().endswith(('.shtml', '.html'))]

    if not html_files:
        print("    ‚ùå No generated HTML files found! Run Cell 5 first.")
        return

    for f_name in html_files:
        try:
            with open(f_name, 'r', encoding='utf-8') as fh:
                snapshot_data[f_name] = fh.read()
        except Exception as e:
            print(f"    ‚ö†Ô∏è Could not read {f_name}: {e}")

    snapshot_name = f"site_snapshot_{timestamp}.json"
    with open(snapshot_name, 'w', encoding='utf-8') as f:
        json.dump(snapshot_data, f)
    print(f"    ‚úÖ Created snapshot JSON: {snapshot_name}")

    # --- 2. CREATE MASTER ZIP VAULT ---
    extensions = ('.csv', '.shtml', '.html', '.json', '.js', '.css', '.ged')
    files_to_pack = [f for f in os.listdir('.') if f.lower().endswith(extensions) and "sample_data" not in f]

    zip_name = f"Yates_Study_Backup_{timestamp}.zip"

    print(f"\n[STEP 2] Compressing {len(files_to_pack)} files into {zip_name}...")
    try:
        with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zf:
            for file in files_to_pack:
                zf.write(file)
        print(f"    ‚úÖ Archive Created: {zip_name} ({os.path.getsize(zip_name)/1024/1024:.2f} MB)")
    except Exception as e:
        print(f"    ‚ùå Compression Failed: {e}")
        return

    # --- 3. FTP UPLOAD (BACKUPS FOLDER) ---
    print("\n[STEP 3] Uploading to Web Server Vault (FTP)...")
    try:
        from ftplib import FTP_TLS
        HOST = os.environ.get("FTP_HOST") or userdata.get("FTP_HOST")
        USER = os.environ.get("FTP_USER") or userdata.get("FTP_USER")
        PASS = os.environ.get("FTP_PASS") or userdata.get("FTP_PASS")

        ftps = FTP_TLS()
        ftps.connect(HOST, 21); ftps.auth(); ftps.login(USER, PASS); ftps.prot_p()

        try:
            ftps.cwd("/ons-study/backups")
        except:
            try:
                ftps.mkd("/ons-study/backups")
                ftps.cwd("/ons-study/backups")
            except:
                print("    ‚ö†Ô∏è Could not navigate to /ons-study/backups/. Uploading to root.")

        with open(zip_name, "rb") as fh:
            ftps.storbinary(f"STOR {zip_name}", fh)
        print(f"    ‚úÖ FTP Success: /backups/{zip_name}")

        with open(snapshot_name, "rb") as fh:
            ftps.storbinary(f"STOR {snapshot_name}", fh)
        print(f"    ‚úÖ FTP Success: /backups/{snapshot_name}")

        ftps.quit()
    except Exception as e:
        print(f"    ‚ö†Ô∏è FTP Upload skipped/failed: {e}")

    # --- 4. DROPBOX SYNC (REFRESH TOKEN METHOD) ---
    print("\n[STEP 4] Syncing to Dropbox...")
    try:
        # Pull the specific Refresh Token keys from Colab Secrets
        dbx_app_key = os.environ.get("DBX_APP_KEY") or userdata.get("DBX_APP_KEY")
        dbx_app_secret = os.environ.get("DBX_APP_SECRET") or userdata.get("DBX_APP_SECRET")
        dbx_refresh = os.environ.get("DBX_REFRESH_TOKEN") or userdata.get("DBX_REFRESH_TOKEN")

        if not dbx_refresh:
            print("    ‚ùå ERROR: 'DBX_REFRESH_TOKEN' not found in Colab Secrets.")
            print("       Make sure your keys are named exactly DBX_APP_KEY, DBX_APP_SECRET, and DBX_REFRESH_TOKEN.")
        else:
            try:
                import dropbox
            except ImportError:
                os.system('pip install dropbox')
                import dropbox

            # Authenticate using the robust refresh method
            dbx = dropbox.Dropbox(
                app_key=dbx_app_key,
                app_secret=dbx_app_secret,
                oauth2_refresh_token=dbx_refresh
            )

            target_path = f"/Yates_Study_Sync/archives/{snapshot_name}"

            with open(snapshot_name, "rb") as f:
                dbx.files_upload(f.read(), target_path)
            print(f"    ‚úÖ Dropbox Sync Success: {target_path}")

    except Exception as e:
        print(f"    ‚ùå Dropbox Upload Failed: {e}")

    # --- 5. TRIGGER LOCAL DOWNLOAD ---
    print("\n[STEP 5] Triggering Local Download...")
    try:
        files.download(zip_name)
        print("    ‚úÖ Please check your browser downloads for the Archive.")
    except:
        print("    ‚ö†Ô∏è Could not auto-download. You can download the zip manually from the Colab files pane.")

run_archiver()

      [CELL 6] THE TIME MACHINE (Archive & Sync)
[STEP 1] Generating Site Snapshot JSON...
    ‚úÖ Created snapshot JSON: site_snapshot_2026-02-25_1002_28.json

[STEP 2] Compressing 22 files into Yates_Study_Backup_2026-02-25_1002_28.zip...
    ‚úÖ Archive Created: Yates_Study_Backup_2026-02-25_1002_28.zip (30.59 MB)

[STEP 3] Uploading to Web Server Vault (FTP)...
    ‚úÖ FTP Success: /backups/Yates_Study_Backup_2026-02-25_1002_28.zip
    ‚úÖ FTP Success: /backups/site_snapshot_2026-02-25_1002_28.json

[STEP 4] Syncing to Dropbox...
    ‚úÖ Dropbox Sync Success: /Yates_Study_Sync/archives/site_snapshot_2026-02-25_1002_28.json

[STEP 5] Triggering Local Download...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

    ‚úÖ Please check your browser downloads for the Archive.


In [None]:
# @title GEDCOM Search: The Gremlin Hunter
import os

def find_errant_participant(search_term="yatesjohnrob"):
    print("="*75)
    print(f"      GEDCOM SEARCH: LOOKING FOR '{search_term}'")
    print("="*75)

    # Find the original GEDCOM file
    ged_files = [f for f in os.listdir('.') if f.lower().endswith('.ged') and "_processed" not in f.lower()]
    if not ged_files:
        return print("‚ùå No original GEDCOM found.")

    ged_file = sorted(ged_files, key=lambda x: os.path.getmtime(x), reverse=True)[0]
    print(f"üîç Scanning File: {ged_file}\n")

    current_id = None
    current_name = "Unknown"
    matches_found = 0

    print(f"{'ID#'.ljust(12)} | {'NAME'.ljust(30)} | EXACT LINE FOUND")
    print("-" * 75)

    with open(ged_file, 'r', encoding='utf-8', errors='replace') as f:
        for line_num, line in enumerate(f, 1):
            line_clean = line.strip()
            parts = line_clean.split(" ", 2)
            if len(parts) < 2: continue

            lvl = parts[0]
            tag = parts[1]
            val = parts[2] if len(parts) > 2 else ""

            # Track the current individual block
            if lvl == "0" and "INDI" in val:
                current_id = tag.replace("@", "")
                current_name = "Unknown"

            # Track the Name so we know who we are looking at
            elif lvl == "1" and tag == "NAME":
                current_name = val.replace("/", "").strip()

            # Trigger if the search term is anywhere in this line
            if search_term.lower() in line_clean.lower():
                if current_id:
                    print(f"{current_id.ljust(12)} | {current_name[:28].ljust(30)} | {line_clean}")
                else:
                    print(f"{'N/A'.ljust(12)} | {'(Outside INDI block)'.ljust(30)} | {line_clean}")
                matches_found += 1

    print("-" * 75)
    print(f"‚úÖ Found {matches_found} total mentions of '{search_term}'.")

find_errant_participant("yatesjohnrob")

      GEDCOM SEARCH: LOOKING FOR 'yatesjohnrob'
üîç Scanning File: yates_study_2025.ged

ID#          | NAME                           | EXACT LINE FOUND
---------------------------------------------------------------------------
I51017       | Terri Ann Yates                | 2 NPFX 361&yatesjohnrob
I51033       | Cynthia Lou Miller             | 2 NPFX 20&yatesjohnrob
I51044       | Rhonda Rowe                    | 2 NPFX 19&yatesjohnrob
---------------------------------------------------------------------------
‚úÖ Found 3 total mentions of 'yatesjohnrob'.


In [31]:
# ==============================================================================
# REPORT PACKAGE: Academic White Paper / Consolidator (Cover Letter Module)
# ==============================================================================

def generate_tldr_cover(tester_row):
    """
    Generates the bottom-line transmittal cover HTML for a specific tester.
    Pulls directly from the engine_database.csv pandas row.
    """
    # 1. Map the dynamic variables from the engine database
    # (Note: Update the string keys below if your CSV headers are named differently)
    tester_name = tester_row.get('Tester_Name', 'Study Participant')
    tester_id = tester_row.get('Tester_ID', '')
    tester_sort_key = tester_row.get('Tester_Sort_Key', '')
    mda_name = tester_row.get('Most_Distant_Ancestor', 'Pending Final Validation')
    mda_lifespan = tester_row.get('MDA_Lifespan', '')
    anchor_score = tester_row.get('ANCHOR_Score', 'UNSCORED')

    # 2. Build the injected HTML template
    cover_html = f"""
    <div class="transmittal-cover" style="font-family: sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; border: 1px solid #ccc; border-radius: 8px; background-color: #f9f9f9;">

        <h1 style="color: #2c3e50; text-align: center; border-bottom: 2px solid #00838f; padding-bottom: 10px;">
            Yates DNA Study Results: The Bottom Line
        </h1>

        <p style="font-size: 1.1em; line-height: 1.6;">
            Dear <strong>{tester_name}</strong>,
        </p>

        <p style="font-size: 1.1em; line-height: 1.6;">
            Thank you for participating in the study. We know the genetic and genealogical science in the attached report can be incredibly dense, so we have summarized exactly what your results mean below.
        </p>

        <div style="background-color: #ffffff; padding: 20px; border-left: 5px solid #00838f; margin: 20px 0; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
            <h2 style="margin-top: 0; color: #d32f2f;">Your Confirmed Lineage</h2>
            <p style="font-size: 1.2em;">
                Based on the combined DNA evidence and historical records, here is your proven most distant DNA Yates ancestor:
            </p>
            <p style="font-size: 1.5em; font-weight: bold; color: #2c3e50; text-align: center; margin: 20px 0;">
                {mda_name} <br>
                <span style="font-size: 0.7em; color: #555;">({mda_lifespan})</span>
            </p>
        </div>

        <div style="background-color: #ffffff; padding: 20px; border-left: 5px solid #00838f; margin: 20px 0; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
            <h2 style="margin-top: 0; color: #d32f2f;">Your Evidence Score</h2>
            <p style="font-size: 1.2em;">
                Your genetic connection to this ancestor has achieved an <strong>ANCHOR Score of {anchor_score}</strong>.
            </p>
            <p style="font-size: 1em; color: #555;">
                <em>(This score represents the validated strength of your specific DNA proof linking you to the ancestor above.)</em>
            </p>
        </div>

        <h2 style="color: #2c3e50; margin-top: 30px;">Your Direct Line of Descent</h2>
        <p style="font-size: 1.1em; line-height: 1.6;">
            Below is the step-by-step path connecting you directly to your most distant proven ancestor:
        </p>

        <div id="pedigree-container-{tester_id}" style="margin: 20px 0; padding: 15px; background: #fff; border: 1px dashed #ccc; min-height: 200px;">
            <script>
                // Triggers the vertical pedigree chart upon load
                makeCascade('{tester_id}', '{tester_sort_key}');
            </script>
        </div>
        <p style="font-size: 1.1em; line-height: 1.6; margin-top: 30px;">
            For the comprehensive breakdown of the DNA matches, source citations, and the rigorous methodology used to prove this connection, please review the complete Academic White Paper that follows.
        </p>

        <p style="font-size: 1.1em; line-height: 1.6; margin-top: 40px;">
            Best regards,<br><br>
            <strong>Ron Yates</strong><br>
            <em>Director, Yates One-Name Study</em>
        </p>

    </div>
    """
    return cover_html

print("Cover letter generator function loaded successfully.")

Cover letter generator function loaded successfully.


In [None]:
import pandas as pd
import csv
import re
import os

input_file = "match_to_unmasked.csv"

if os.path.exists(input_file):
    rows = []
    with open(input_file, 'r', encoding='utf-8', errors='replace') as f:
        reader = csv.reader(f)
        for i, row in enumerate(reader):
            # Ensure the row has 4 columns
            while len(row) < 4:
                row.append("")

            if i == 0:
                row[3] = "Authority_Sort_Key"
            else:
                name = row[1].strip()
                # Remove suffixes and (n√©e)
                s = re.sub(r'\b(jr\.?|sr\.?|iii|iv|v|md|m\.d\.|esq\.?)\b', '', name, flags=re.IGNORECASE)
                s = re.split(r'\bnee\b|\bn√©e\b', s.lower())[0]
                # Strip out everything except letters and spaces
                s = re.sub(r'[^a-z\s]', '', s.lower())
                parts = s.split()

                # Cojoin: Last name + First name(s)
                if len(parts) >= 2:
                    sort_key = parts[-1] + "".join(parts[:-1])
                elif len(parts) == 1:
                    sort_key = parts[0]
                else:
                    sort_key = "zzz"

                row[3] = sort_key
            rows.append(row)

    with open(input_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerows(rows)

    print("‚úÖ Success! Column D (Authority_Sort_Key) has been generated.")
    df_preview = pd.read_csv(input_file)
    display(df_preview.head())
else:
    print("‚ùå match_to_unmasked.csv not found. Please upload it first.")

‚úÖ Success! Column D (Authority_Sort_Key) has been generated.


Unnamed: 0,code,unmasked,#id,Authority_Sort_Key,email,Unnamed: 5
0,1200am,Cher Midnight,I56373,midnightcher,,
1,aanya,Anaya Yates,I62027,yatesanaya,,
2,adamssarah,Sara Adams,I40355,adamssara,,
3,"addison,david",Dave Addison,I30853,addisondave,,
4,amanic,Aman Radnage,I61055,radnageaman,,
