<a href="https://colab.research.google.com/github/ronyates47/Gedcom-Utils/blob/main/ons_study_v8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
# @title [CELL 1] Setup + Helper Functions (V90 - Expanded Glossary)
import os
import sys
import re
import csv
import json
import html
import socket
import pytz
import pandas as pd
from ftplib import FTP_TLS
from datetime import datetime

# --- INSTALL TQDM IF MISSING ---
try:
    import tqdm
except ImportError:
    os.system('pip install tqdm')
    import tqdm

print("="*60)
print("      [CELL 1] SETUP LOADED (V90)")
print("      (Includes: Expanded Glossary Definitions)")
print("="*60)

# ==============================================================================
# 1. GLOBAL HELPER FUNCTIONS + HTML ASSETS
# ==============================================================================
TNG_BASE_URL = "https://yates.one-name.net/tng/verticalchart.php?personID="
TNG_SUFFIX = "&tree=tree1&parentset=0&display=vertical&generations=15"

NAV_HTML = """
<style>
nav.oldnav ul{display:flex;flex-wrap:wrap;justify-content:center;background-color:#006064!important;border-bottom:2px solid #00acc1!important;margin:0;padding:0;list-style:none}
nav.oldnav li{display:inline-block}
nav.oldnav a{display:block;padding:10px 15px;text-decoration:none;color:#e0f7fa!important;font-size:14px}
nav.oldnav a:hover{background-color:#00838f!important}
@media print { nav.oldnav, #nav-slot, .no-print, .action-btn, .control-panel, .tabs { display: none !important; } }
</style>
<nav class="oldnav"><ul>
<li><a href="/ons-study/research_admin.html" style="color:#ffcc80 !important; font-weight:bold;">Admin Hub</a></li>
<li><a href="/ons-study/contents.shtml" style="color:#ffcc80 !important; font-weight:bold;">Guide</a></li>
<li><a href="/ons-study/yates_ancestor_register.shtml">DNA Register</a></li>
<li><a href="/ons-study/just-trees.shtml">Trees</a></li>
<li><a href="/ons-study/dna_network.shtml">DNA Network</a></li>
<li><a href="/ons-study/lineage_proof.html">Lineage Proof</a></li>
<li><a href="/ons-study/dna_dossier.html">Forensic Dossier</a></li>
<li><a href="/ons-study/brick_wall_buster.shtml" style="background:#f57f17;color:black !important;">Brick Wall Buster</a></li>
<li><a href="/ons-study/data_glossary.shtml">Data Glossary</a></li>
<li><a href="/ons-study/share_dna.shtml" style="background-color:#0277bd; font-weight:bold;">Share DNA</a></li>
<li><a href="/ons-study/subscribe.shtml" style="background-color:#004d40;">Subscribe</a></li>
</ul></nav>"""

SITE_INFO = """<div class="no-print" style="background:#e0f2f1;border:1px solid #b2dfdb;padding:20px;margin:20px auto;width:90%;border-radius:8px;font-family:sans-serif;"><h3 style="color:#006064;margin-top:0;border-bottom:2px solid #004d40;padding-bottom:10px;">Establishing Kinship Through Collateral DNA Saturation</h3><p style="color:#333;line-height:1.6;font-size:1.05em;margin-bottom:0;"><strong>Methodology:</strong> This register moves beyond the reliance on single "golden matches" to prove kinship. Instead, it employs <em>Collateral DNA Saturation</em>‚Äîa method that blends genealogical reasoning with data-driven logic.</p></div>"""

# V86: Added Table Filtering Logic
JS_CORE = r"""<script type="text/javascript">
(function(){
    // SORTING LOGIC
    function textOf(c){var val = c.getAttribute('data-sort') || c.textContent || c.innerText;return (val || '').replace(/\s+/g,' ').trim().toLowerCase();}
    function sortTable(t,i,d){if(!(t&&t.tBodies&&t.tBodies[0]))return;var tb=t.tBodies[0],r=Array.prototype.slice.call(tb.rows||[]),asc=(d==='asc');r.sort(function(a,b){var A=textOf(a.cells[i]),B=textOf(b.cells[i]),nA=parseFloat(A.replace(/[^0-9.\-]/g,'')),nB=parseFloat(B.replace(/[^0-9.\-]/g,''));if(!isNaN(nA)&&!isNaN(nB))return asc?(nA-nB):(nB-nA);return(A<B)?(asc?-1:1):(A>B)?(asc?1:-1):0;});var f=document.createDocumentFragment();for(var k=0;k<r.length;k++)f.appendChild(r[k]);tb.appendChild(f);}
    function makeSortable(t){if(!(t&&t.tHead&&t.tHead.rows.length))return;var th=t.tHead.rows[0].cells;for(var i=0;i<th.length;i++){(function(idx){var h=th[idx],d='asc';h.style.cursor='pointer';h.onclick=function(){d=(d==='asc')?'desc':'asc';for(var j=0;j<th.length;j++)th[j].innerHTML=th[j].innerHTML.replace(/\s+\(asc\)|\s+\(desc\)/,'');h.innerHTML+=(d==='asc'?' (asc)':' (desc)');sortTable(t,idx,d);};})(i);}}

    // FILTERING LOGIC
    window.filterTable = function() {
        var input = document.getElementById("tableSearch");
        var filter = input.value.toUpperCase();
        var table = document.getElementById("reg-table") || document.querySelector("table.dataframe");
        var tr = table.getElementsByTagName("tr");
        for (var i = 1; i < tr.length; i++) {
            var tdArr = tr[i].getElementsByTagName("td");
            var found = false;
            for (var j = 0; j < tdArr.length; j++) {
                if (tdArr[j]) {
                    var txtValue = tdArr[j].textContent || tdArr[j].innerText;
                    if (txtValue.toUpperCase().indexOf(filter) > -1) {
                        found = true;
                        break;
                    }
                }
            }
            tr[i].style.display = found ? "" : "none";
        }
    }

    function init(){
        var t=document.getElementsByTagName('table');
        for(var i=0;i<t.length;i++) if(/\bsortable\b/.test(t[i].className)) makeSortable(t[i]);
    }
    if(document.readyState==='loading')document.addEventListener('DOMContentLoaded',init,false);else init();
})();
</script>"""

# ==============================================================================
# GLOSSARY BUILDER (SCALABLE)
# To add a term: Just add a tuple ("Term", "Definition") to the appropriate list.
# ==============================================================================
GLOSSARY_DATA = {
    "1. Identity Columns": [
        ("Tester-Participant-MASKED (The Trigger)", "The unique privacy code extracted from the user's NPFX tag."),
        ("Tester-Participant-Unmasked", "The real name of the tester.")
    ],
    "2. Analysis Terms": [
        ("Platinum Standard", "Lineages with 30+ matches and 10+ unique sources. Biologically proven."),
        ("Keystone Tester", "A high-value participant (15+ matches) who anchors a specific branch."),
        ("Forensic Handshake", "An informal term in genetic genealogy describing a confirmation pattern in which multiple independent DNA matches support the same ancestral line or family connection, strengthening confidence in an identification or relationship conclusion. Rather than relying on a single match, genealogists look for several matches that converge on the same family network, creating a ‚Äúhandshake‚Äù of agreement between genetic evidence and documentary research. This concept is especially valuable in unknown-parentage and forensic cases, where conclusions must be supported by redundant evidence.<br><br>A forensic handshake is often achieved through related methods such as triangulation, where three or more individuals share the same DNA segment from a common ancestor, and cluster analysis, which groups matches who also match one another and often represent a shared ancestral couple or lineage. Together, these approaches help meet an emerging genetic genealogy proof standard, emphasizing that reliable conclusions require multiple corroborating matches, consistency with records, and the exclusion of alternate explanations.")
    ],
    "3. Genealogy Concepts": [
        ("Spanish Naming System", "A traditional Hispanic naming convention in which an individual bears one or more given names followed by two surnames: the first inherited from the father (paternal surname) and the second from the mother (maternal surname). This system is historically rooted in Spain and is especially useful in genealogy because it preserves both parental lineages and improves identification in historical records."),
        ("N√©e", "A term meaning ‚Äúborn as,‚Äù used to indicate a woman‚Äôs maiden or birth surname before marriage. In genealogical and historical records, n√©e identifies the surname a woman carried in her natal family line, preserving her connection to her parents and ancestry. For example, ‚ÄúMaria Garc√≠a, n√©e L√≥pez‚Äù shows that Mar√≠a‚Äôs birth surname was L√≥pez, even though she later used Garc√≠a after marriage.<br><br>The use of n√©e is especially important for tracing maternal family lines, since women‚Äôs surnames may change across generations in many cultures. By recording a woman‚Äôs birth name, genealogists can correctly link her to her original family, distinguish between individuals with similar married names, and maintain continuity in lineage reconstruction, particularly in marriage, probate, and church records.")
    ]
}

def build_glossary_html(data):
    html_str = '<div style="background:white;padding:25px;border-radius:8px;border:1px solid #ddd;font-family:sans-serif;line-height:1.6;">'
    html_str += '<h2 style="color:#006064;border-bottom:2px solid #004d40;padding-bottom:10px;">ONS Yates Study: Data Glossary</h2>'

    # Iterate through categories in the dictionary
    for category, terms in data.items():
        html_str += f'<h3 style="color:#00838f;margin-top:25px;">{category}</h3><ul style="list-style-type:none;padding-left:0;">'
        for term, definition in terms:
            html_str += f'<li style="margin-bottom:15px;"><strong>{term}:</strong><br>{definition}</li>'
        html_str += '</ul>'

    html_str += '</div>'
    return html_str

GLOSSARY_CONTENT = build_glossary_html(GLOSSARY_DATA)

SUBSCRIBE_CONTENT = """<div style="background:white;padding:40px;border-radius:8px;box-shadow:0 4px 15px rgba(0,0,0,0.1);max-width:800px;margin:30px auto;text-align:center;font-family:'Segoe UI',sans-serif;"><h1 style="color:#006064;margin-bottom:10px;">Join the Yates Research Community</h1><p style="color:#555;font-size:1.1em;line-height:1.6;margin-bottom:30px;">Stay connected with the latest breakthroughs in the Yates One-Name Study. Get notified about new DNA groups, lineage verifications, and quarterly reports.</p><div style="background:#e0f2f1;padding:25px;border-radius:8px;border:1px solid #b2dfdb;display:inline-block;"><h3 style="margin-top:0;color:#004d40;">üìß One-Click Subscribe</h3><p style="margin-bottom:20px;">Click below to send a subscription request to our Groups.io list.</p><a href="mailto:yates-one-name-study+subscribe@groups.io?subject=Subscribe" style="display:inline-block;padding:15px 30px;background:#00838f;color:white;text-decoration:none;border-radius:5px;font-weight:bold;font-size:1.1em;box-shadow:0 2px 5px rgba(0,0,0,0.2);">Subscribe Now</a></div><p style="margin-top:30px;font-size:0.9em;color:#777;">Powered by Groups.io. You will receive a confirmation email shortly.</p></div>"""

SHARE_CONTENT = """<div style="max-width:900px;margin:30px auto;font-family:'Segoe UI',sans-serif;line-height:1.6;color:#333;"><div style="text-align:center;margin-bottom:40px;"><h1 style="color:#0277bd;margin-bottom:10px;">Share Your Ancestry DNA Matches</h1><p style="font-size:1.1em;color:#555;">Ancestry provides a built-in sharing feature that allows you to grant limited access to your DNA matches <strong>without sharing your personal account details</strong>. You remain in full control of your account at all times.</p></div><div style="display:grid;grid-template-columns:1fr 1fr;gap:30px;margin-bottom:30px;"><div style="background:white;padding:25px;border-radius:8px;box-shadow:0 4px 10px rgba(0,0,0,0.1);border-top:5px solid #0277bd;"><h3 style="color:#0277bd;margin-top:0;">How Sharing Works</h3><p>From your AncestryDNA <strong>Settings</strong> page, you may invite another individual by email and assign one of the following roles:</p><ul style="padding-left:20px;"><li><strong>Viewer</strong> (Read only)</li><li><strong>Collaborator</strong> (Recommended for Study)</li><li><strong>Manager</strong> (Full Control)</li></ul></div><div style="background:#e3f2fd;padding:25px;border-radius:8px;border:1px solid #90caf9;"><h3 style="color:#01579b;margin-top:0;">Privacy & Control</h3><p>This sharing arrangement provides <strong>limited access only</strong>. It does not allow changes to your account and does not expose your personal details.</p><p><strong>You may revoke access at any time through Ancestry.</strong></p></div></div><div style="background:white;padding:30px;border-radius:8px;border:1px solid #ddd;box-shadow:0 4px 15px rgba(0,0,0,0.05);"><h2 style="color:#004d40;border-bottom:2px solid #004d40;padding-bottom:10px;margin-top:0;">How to Share for the Yates One-Name Study</h2><ol style="font-size:1.1em;line-height:1.8;padding-left:25px;"><li>Open your <strong>AncestryDNA Settings</strong>.</li><li>Scroll to the section labeled "DNA Result Sharing" and click <strong>Invite</strong>.</li><li>Send the invitation to <strong>Ron Yates</strong> at: <br><span style="background:#fff3e0;padding:5px 10px;border-radius:4px;font-weight:bold;color:#e65100;font-family:monospace;font-size:1.2em;">yatesvilleron@gmail.com</span></li><li>Assign the role: <strong>Collaborator</strong>.</li></ol><div style="background:#fffde7;border-left:5px solid #fbc02d;padding:15px;margin-top:20px;font-size:0.95em;"><strong>Why Collaborator?</strong> The Collaborator role allows Ron to review shared matches and create small internal groups (colored dots) to identify which matches have been reviewed and which have contributed evidence to the Yates One-Name Study.</div></div><div style="margin-top:40px;"><h3 style="color:#006064;">What Happens Next?</h3><p>After sharing, you will receive an invitation to subscribe to the <strong>Yates One-Name Study Groups.io mailing list</strong>, where DNA proof summaries and study findings are shared.</p><h3 style="color:#006064;">Reciprocal Sharing (Optional)</h3><p>If you are interested in viewing Ron‚Äôs DNA matches, simply let him know. When a direct match exists, that relationship will be reflected in the study findings.</p></div></div>"""

THEORY_CONTENT = """<div style="max-width:1000px;margin:30px auto;font-family:'Segoe UI',sans-serif;line-height:1.6;color:#333;"><h1 style="text-align:center;color:#004d40;font-size:2.5em;margin-bottom:10px;">The Yates DNA Strategy</h1><p style="text-align:center;font-size:1.2em;color:#666;margin-bottom:40px;">Moving beyond traditional Y-DNA to solve modern genealogical mysteries.</p><div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(300px,1fr));gap:30px;margin-bottom:40px;"><div style="background:white;padding:25px;border-radius:8px;box-shadow:0 4px 10px rgba(0,0,0,0.1);border-top:5px solid #00838f;"><h2 style="color:#006064;margin-top:0;">The Autosomal Revolution</h2><p>Traditional one-name studies rely almost exclusively on Y-DNA to trace the paternal surname line. While valuable for deep history, this approach ignores 50% of our ancestors at every generation.</p><p><strong>Our Focus:</strong> We utilize <strong>Autosomal DNA (atDNA)</strong>‚Äîinherited from both parents‚Äîto verify connections across <em>all</em> branches. This allows us to:</p><ul style="padding-left:20px;color:#444;"><li>Bridge the "Gender Gap" by tracing female descendants.</li><li>Verify paper trails for the last 300 years (Genealogical Time).</li><li>Cluster "Floating" Yates families into their correct lines.</li></ul></div><div style="background:white;padding:25px;border-radius:8px;box-shadow:0 4px 10px rgba(0,0,0,0.1);border-top:5px solid #f9a825;"><h2 style="color:#f57f17;margin-top:0;">Collateral Saturation</h2><p>A single DNA match can be luck. Ten matches is a statistic. <strong>Thirty matches is a fact.</strong></p><p>We employ a technique called <strong>Collateral Saturation</strong>. We don't look for one "Golden Match." We analyze groups of matches from independent cousin lines. When descendants from four different children of <em>William & Mary Yates</em> all share DNA with you, the relationship is biologically confirmed.</p><div style="text-align:center;margin-top:20px;"><a href="dna_network.shtml" style="display:inline-block;padding:10px 20px;background:#f9a825;color:#333;text-decoration:none;border-radius:4px;font-weight:bold;">View the Network</a></div></div></div><div style="background:#e0f2f1;padding:30px;border-radius:8px;border:1px solid #b2dfdb;margin-bottom:40px;"><h2 style="color:#004d40;margin-top:0;text-align:center;">From Theory to Tools</h2><p style="text-align:center;max-width:700px;margin:0 auto 20px auto;">We have built a suite of forensic tools to visualize this data. Instead of raw spreadsheets, we offer interactive dashboards to prove your connection.</p><div style="display:flex;flex-wrap:wrap;justify-content:center;gap:15px;margin-top:20px;"><a href="ons_yates_dna_register.shtml" style="background:#006064;color:white;padding:12px 20px;text-decoration:none;border-radius:4px;font-weight:bold;">The Register</a><a href="lineage_proof.html" style="background:#00838f;color:white;padding:12px 20px;text-decoration:none;border-radius:4px;font-weight:bold;">Proof Engine</a><a href="dna_dossier.html" style="background:#00acc1;color:white;padding:12px 20px;text-decoration:none;border-radius:4px;font-weight:bold;">Forensic Dossier</a></div></div><div style="background:#f5f5f5;padding:20px;border-radius:8px;border:1px solid #ddd;"><h3 style="color:#555;margin-top:0;">Legacy Data: Y-DNA Haplogroups</h3><p style="font-size:0.9em;color:#666;margin-bottom:15px;">Y-DNA is the backbone of deep ancestry (27,000 BCE to 1600 AD). While not our primary focus for recent genealogy, we maintain a detailed record of the Yates Y-Chromosome mutations (R-M207 through FT266579).</p><a href="https://yates.one-name.net/gengen/dna_proof_y.htm" style="color:#006064;font-weight:bold;text-decoration:none;">&raquo; View Detailed Y-DNA Findings</a></div></div>"""

def make_page(title, content, count, view_type="", extra="", stats_bar=""):
    nav_blk = ""
    if view_type in ['ancestor', 'participant', 'tree_az', 'tree_za', 'proof', 'network', 'dossier', 'subscribe', 'share', 'buster', 'singleton']:
        nav_blk = SITE_INFO
    if view_type == 'subscribe' or view_type == 'theory' or view_type == 'share':
        nav_blk = ""

    toggle = ""
    print_btn = ""
    search_bar = ""

    # V86: Add Search Bar for Registers and Singletons
    if view_type in ['ancestor', 'participant', 'singleton']:
        search_bar = """<div class="no-print" style="margin:20px auto;max-width:600px;text-align:center;"><input type="text" id="tableSearch" onkeyup="filterTable()" placeholder="üîç Type a name to filter list..." style="width:100%;padding:12px;font-size:16px;border:2px solid #006064;border-radius:4px;"></div>"""

    if view_type in ['ancestor', 'participant', 'singleton']:
        view_name = "Register"
        if view_type == 'singleton': view_name = "Singleton List"
        print_btn = f"""<div class="no-print" style="text-align:center;margin-bottom:15px;"><button onclick="window.print()" style="background:#0277bd;color:white;border:none;padding:10px 20px;border-radius:4px;font-weight:bold;cursor:pointer;font-size:14px;">üñ®Ô∏è Print {view_name}</button></div>"""

    if view_type == 'ancestor':
        toggle = f"""<div class="no-print" style="text-align:center;padding:10px;margin-bottom:10px;font-family:sans-serif;font-size:14px;background:#e0f7fa;border:1px solid #b2ebf2;"><strong>Sort Register:</strong> &nbsp;<a href="ons_yates_dna_register.shtml" style="font-weight:bold;color:#006064;">By Ancestral Line</a> &nbsp;|&nbsp; <a href="ons_yates_dna_register_participants.shtml" style="color:#00acc1;text-decoration:none;">By Participant Name</a></div>"""
    elif view_type == 'participant':
        toggle = f"""<div class="no-print" style="text-align:center;padding:10px;margin-bottom:10px;font-family:sans-serif;font-size:14px;background:#e0f7fa;border:1px solid #b2ebf2;"><strong>Sort Register:</strong> &nbsp;<a href="ons_yates_dna_register.shtml" style="color:#00acc1;text-decoration:none;">By Ancestral Line</a> &nbsp;|&nbsp; <a href="ons_yates_dna_register_participants.shtml" style="font-weight:bold;color:#006064;">By Participant Name</a></div>"""
    elif 'tree' in view_type:
        za = f'<span style="font-weight:bold;color:#000;">Z-A</span>' if 'za' in view_type else f'<a href="just-trees.shtml" style="color:#006064;text-decoration:underline;">Z-A</a>'
        az = f'<span style="font-weight:bold;color:#000;">A-Z</span>' if 'az' in view_type else f'<a href="just-trees-az.shtml" style="color:#006064;text-decoration:underline;">A-Z</a>'
        toggle = f"""<div class="no-print" style="text-align:center;font-family:sans-serif;font-size:16px;margin:15px 0 10px 0;">Individual Yates Family trees: &nbsp; {za} &nbsp;|&nbsp; {az}</div>"""

    return f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>{title}</title><link rel="stylesheet" href="partials_unified.css"><link rel="stylesheet" href="dna_tree_styles.css">{extra}</head><body id="top"><div class="wrap"><h1 class="centerline">{title}</h1><div id="nav-slot">{stats_bar}{NAV_HTML}</div>{nav_blk}{search_bar}{print_btn}{toggle}{content}{JS_CORE}</div></body></html>"""

print("‚úÖ Cell 1 Loaded! (Glossary is now Scalable)")

      [CELL 1] SETUP LOADED (V90)
      (Includes: Expanded Glossary Definitions)
‚úÖ Cell 1 Loaded! (Glossary is now Scalable)


In [16]:
# @title [CELL 3] The Data Engine (V82 - Auto-Download Authority)
def run_engine():
    print("="*60)
    print("      [CELL 3] ENGINE STARTING...")
    print("="*60)

    import os
    import sys
    import re
    import csv
    from ftplib import FTP_TLS

    # --- CONFIGURATION ---
    # We need to grab credentials from the environment (loaded in Cell 2)
    HOST = os.environ.get("FTP_HOST", "").strip()
    USER = os.environ.get("FTP_USER", "").strip()
    PASS = os.environ.get("FTP_PASS", "").strip()
    REMOTE_SUBDIR = "ons-study"

    KEY_FILE       = "match_to_unmasked.csv"
    UNMASKED_FILE  = "yates_study_2025_UNMASKED.ged"
    CSV_DB         = "engine_database.csv"

    # ---------------------------------------------------------
    # STEP 0: DOWNLOAD AUTHORITY FILE FROM SERVER
    # ---------------------------------------------------------
    print("\n[STEP 0] Downloading Authority Key from Server...")
    try:
        if not HOST:
            # Fallback if env vars aren't set (Cell 2 didn't run or failed)
            from google.colab import userdata
            HOST = userdata.get("FTP_HOST")
            USER = userdata.get("FTP_USER")
            PASS = userdata.get("FTP_PASS")

        ftps = FTP_TLS()
        ftps.connect(HOST, 21)
        ftps.auth()
        ftps.login(USER, PASS)
        ftps.prot_p()

        # Try to find the key file in common locations
        locations = [
            f"/{REMOTE_SUBDIR}/{KEY_FILE}",
            f"/public_html/{REMOTE_SUBDIR}/{KEY_FILE}",
            f"/public_html/yates.one-name.net/{REMOTE_SUBDIR}/{KEY_FILE}",
            f"/{KEY_FILE}" # Sometimes it's in root
        ]

        found_key = False
        for path in locations:
            try:
                with open(KEY_FILE, "wb") as f:
                    ftps.retrbinary(f"RETR {path}", f.write)
                print(f"    ‚úÖ Found and downloaded: {path}")
                found_key = True
                break
            except:
                continue

        ftps.quit()

        if not found_key:
            print("    ‚ö†Ô∏è WARNING: Could not find 'match_to_unmasked.csv' on server.")
            print("       Using local version if it exists.")

    except Exception as e:
        print(f"    ‚ö†Ô∏è FTP Download Error: {e}")

    # ---------------------------------------------------------
    # STEP 1: FIND GEDCOM
    # ---------------------------------------------------------
    ged_files = [f for f in os.listdir('.') if f.lower().endswith('.ged') and 'unmasked' not in f.lower()]
    if not ged_files:
        print("\n[CRITICAL ERROR] No raw GEDCOM file found!")
        return False
    ged_files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
    DEFAULT_GEDCOM = ged_files[0]
    print(f"[INFO] Using GEDCOM: {DEFAULT_GEDCOM}")

    # ---------------------------------------------------------
    # STEP 2: LOAD KEYS (UNMASKING)
    # ---------------------------------------------------------
    print("\n[STEP 2] Unmasking Participants...")
    unmask_map = {}
    if os.path.exists(KEY_FILE):
        try:
            with open(KEY_FILE, mode='r', encoding='utf-8-sig', errors='replace') as f:
                reader = csv.reader(f)
                for row in reader:
                    if len(row) < 2: continue
                    code = row[0].strip().lower()
                    name = row[1].strip() # The Real Name
                    if code and name and code != "code":
                        unmask_map[code] = name
            print(f"    - Loaded {len(unmask_map)} privacy keys.")
        except Exception as e:
            print(f"    - Error reading key file: {e}")
    else:
        print("    [CRITICAL WARNING] No Key File found locally! Unmasking will fail.")

    def resolve_mask_code_greedy(payload):
        m = re.search(r'(\d+)\s*&?\s*([^ \t\n\r\f\v]+)', payload)
        if m: return m.group(2).lower()
        return None

    def resolve_name(code):
        # Crucial: If code is in map, return Real Name. Else return Code.
        return unmask_map.get(code, code)

    # ---------------------------------------------------------
    # STEP 3: GENERATE UNMASKED GEDCOM STREAM
    # ---------------------------------------------------------
    with open(DEFAULT_GEDCOM, 'r', encoding='utf-8', errors='replace') as fin, \
         open(UNMASKED_FILE, 'w', encoding='utf-8') as fout:
        buffer_lines = []; real_name = None
        for line in fin:
            if line.startswith("0 @"):
                if buffer_lines:
                    for bl in buffer_lines:
                        if bl.startswith("1 NAME") and real_name: fout.write(f"1 NAME {real_name}\n")
                        else: fout.write(bl)
                buffer_lines = [line]; real_name = None
            else:
                buffer_lines.append(line)
                if line.startswith("1 NPFX"):
                    parts = line.split(" ", 2)
                    if len(parts) > 2:
                        code = resolve_mask_code_greedy(parts[2].strip())
                        if code: real_name = resolve_name(code)
        if buffer_lines:
            for bl in buffer_lines:
                if bl.startswith("1 NAME") and real_name: fout.write(f"1 NAME {real_name}\n")
                else: fout.write(bl)
    print(f"    - Generated {UNMASKED_FILE}")

    # ---------------------------------------------------------
    # STEP 4: TRACE LINEAGES
    # ---------------------------------------------------------
    print("\n[STEP 3] Tracing Lineages...")
    individuals = {}; families = {}
    def clean_name(raw): return raw.replace("/", "").strip()
    def is_yates(name_str):
        if not name_str: return False
        n = name_str.lower()
        return "yates" in n or "yeates" in n or "yate" in n
    def extract_year(date_str):
        if not date_str: return ""
        m = re.search(r'\d{4}', date_str)
        return m.group(0) if m else ""

    current_id = None; current_fam = None; current_tag = None
    with open(UNMASKED_FILE, "r", encoding="utf-8", errors="replace") as f:
        for line in f:
            line = line.strip(); parts = line.split(" ", 2)
            if len(parts) < 2: continue
            level, tag = parts[0], parts[1]
            payload = parts[2] if len(parts) > 2 else ""

            if level == "0" and "INDI" in payload:
                current_id = tag.replace("@", "")
                individuals[current_id] = {"name": "Unknown", "parents_fam": None, "spouse_fams": [], "mask_code": "", "cm": 0, "birt": "", "deat": ""}
                current_fam = None; current_tag = "INDI"
            elif current_id and level != "0":
                if tag == "NAME": individuals[current_id]["name"] = clean_name(payload)
                elif tag == "FAMC": individuals[current_id]["parents_fam"] = payload.replace("@", "")
                elif tag == "FAMS": individuals[current_id]["spouse_fams"].append(payload.replace("@", ""))
                elif tag == "NPFX":
                    code = resolve_mask_code_greedy(payload)
                    if code: individuals[current_id]["mask_code"] = code
                    m = re.search(r'^(\d+)|(\d+)\s*cM', payload, re.IGNORECASE)
                    if m: individuals[current_id]["cm"] = int(m.group(1) if m.group(1) else m.group(2))
                elif tag == "BIRT": current_tag = "BIRT"
                elif tag == "DEAT": current_tag = "DEAT"
                elif tag == "DATE" and current_tag in ["BIRT", "DEAT"]:
                    year = extract_year(payload)
                    if current_tag == "BIRT": individuals[current_id]["birt"] = year
                    if current_tag == "DEAT": individuals[current_id]["deat"] = year
                    current_tag = None

            if level == "0" and "FAM" in payload:
                current_fam = tag.replace("@", "")
                families[current_fam] = {"husb": None, "wife": None}
                current_id = None
            elif current_fam and level != "0":
                if tag == "HUSB": families[current_fam]["husb"] = payload.replace("@", "")
                elif tag == "WIFE": families[current_fam]["wife"] = payload.replace("@", "")

    print(f"    - Parsed {len(individuals)} individuals.")

    def climb_yates_line_ids(start_id):
        curr = start_id; path_names = []; path_ids = []
        while curr:
            person = individuals.get(curr)
            if not person: break
            path_names.append(person["name"]); path_ids.append(curr)
            fam_id = person["parents_fam"]
            if not fam_id or fam_id not in families: break
            fam = families[fam_id]
            dad_id, mom_id = fam["husb"], fam["wife"]
            dad_name = individuals.get(dad_id, {}).get("name", ""); mom_name = individuals.get(mom_id, {}).get("name", "")
            if is_yates(dad_name) and not is_yates(mom_name): curr = dad_id
            elif is_yates(mom_name) and not is_yates(dad_name): curr = mom_id
            else: curr = dad_id if dad_id else mom_id
        return curr, path_names, path_ids

    def format_date_span(uid):
        if not uid or uid not in individuals: return ""
        b = individuals[uid]["birt"]; d = individuals[uid]["deat"]
        if not b and not d: return ""
        return f"({b if b else '?'} - {d if d else '?'})"

    def analyze_lineage_deep(start_id):
        queue = [(start_id, [], [])]; visited = set()
        while queue:
            curr, path_from_start, ids_from_start = queue.pop(0)
            if curr in visited: continue
            visited.add(curr)
            person = individuals.get(curr)
            if not person: continue

            if is_yates(person["name"]):
                top_id, climb_names, climb_ids = climb_yates_line_ids(curr)
                full_line_names = list(reversed(climb_names)) + list(reversed(path_from_start))
                full_line_ids = list(reversed(climb_ids)) + list(reversed(ids_from_start))
                top_name = full_line_names[0]; top_dates = format_date_span(top_id)
                spouse_name = "missing"; spouse_dates = ""
                top_person = individuals.get(top_id)
                if top_person:
                    for fid in top_person["spouse_fams"]:
                        if fid in families:
                            f = families[fid]; spouse_id = None
                            if f["husb"] == top_id: spouse_id = f["wife"]
                            elif f["wife"] == top_id: spouse_id = f["husb"]
                            if spouse_id and spouse_id in individuals:
                                spouse_name = individuals[spouse_id]["name"]; spouse_dates = format_date_span(spouse_id); break
                if "unknown" in spouse_name.lower(): spouse_name = "missing"
                pair_dated = f"{top_name} {top_dates} & {spouse_name} {spouse_dates}" if spouse_name != "missing" else f"{top_name} {top_dates}"
                pair_simple = f"{top_name} & {spouse_name}" if spouse_name != "missing" else f"{top_name}"
                rich_lineage_list = list(full_line_names)
                if rich_lineage_list: rich_lineage_list[0] = pair_dated
                lineage = " -> ".join(rich_lineage_list)
                id_path_str = ",".join(full_line_ids)
                clean_top = re.sub(r'[^a-zA-Z0-9]', '', top_name)
                clean_spouse = re.sub(r'[^a-zA-Z0-9]', '', spouse_name) if spouse_name != "missing" else "ZZZ"
                sort_key = f"{clean_top}_{clean_spouse}"
                return pair_simple, pair_dated, sort_key, top_name, top_dates, spouse_name, spouse_dates, lineage, len(full_line_names), id_path_str
            new_path = path_from_start + [person["name"]]; new_ids = ids_from_start + [curr]
            fam_id = person["parents_fam"]
            if fam_id and fam_id in families:
                fam = families[fam_id]
                if fam["husb"]: queue.append((fam["husb"], new_path, new_ids))
                if fam["wife"]: queue.append((fam["wife"], new_path, new_ids))
        fail_p = individuals.get(start_id, {})
        fail_n = fail_p.get("name", "Unknown")
        error_msg = f"Trace Failed at: {fail_n} ({start_id})"
        return "Disconnected", "‚ö†Ô∏è Unlinked / Disconnected Lines", "ZZ_Disconnected", "", "", "", "", error_msg, 0, ""

    rows = []
    for uid, data in individuals.items():
        if data["mask_code"]:
            pair_simple, pair_dated, sort_key, fa1, fa1_d, fa2, fa2_d, lineage, gens, id_path = analyze_lineage_deep(uid)
            final_name = resolve_name(data["mask_code"])
            rows.append({
                "Tester-Participant-MASKED": data["mask_code"], "Tester-Participant-Unmasked": final_name, "Found Match": data["name"], "ID#": uid, "cM": data["cm"], "Spacer": "",
                "Yates DNA Ancestral Line": lineage, "Authority_FirstAncestor": pair_simple, "Authority_FirstAncestor_alpha": sort_key, "Authority_FirstAncestor_dated": pair_dated,
                "fa_1 extracted": fa1, "fa_1_Dates": fa1_d, "fa_2 extracted": fa2, "fa_2 Dates": fa2_d, "Gen_Count": gens, "Ancestral_Path_IDs": id_path
            })

    # SUFFIX-AWARE SORTER FOR CSV
    def get_sortable_surname_py(full_name):
        if not full_name: return "zzz"
        # Strip suffixes
        cleaned = re.sub(r'\b(jr\.?|sr\.?|iii|iv|esq\.?|m\.d\.?|ph\.d\.?)\b', '', str(full_name), flags=re.IGNORECASE)
        cleaned = re.sub(r'[,\.]', '', cleaned)
        parts = cleaned.strip().split()
        if not parts: return "zzz"
        return parts[-1].lower()

    rows.sort(key=lambda x: get_sortable_surname_py(x["Tester-Participant-Unmasked"]))

    fieldnames = ["Tester-Participant-MASKED", "Tester-Participant-Unmasked", "Found Match", "ID#", "cM", "Spacer", "Yates DNA Ancestral Line", "Authority_FirstAncestor", "Authority_FirstAncestor_alpha", "Authority_FirstAncestor_dated", "fa_1 extracted", "fa_1_Dates", "fa_2 extracted", "fa_2 Dates", "Gen_Count", "Ancestral_Path_IDs"]

    with open(CSV_DB, "w", encoding="iso-8859-15", newline="", errors="replace") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
        writer.writeheader(); writer.writerows(rows)

    print(f"\n[SUCCESS] Engine Complete. Saved {len(rows)} verified matches to {CSV_DB}.")

print("‚úÖ Cell 3 (Engine + Auto-Download) Loaded.")

‚úÖ Cell 3 (Engine + Auto-Download) Loaded.


In [26]:
# @title [CELL 4] The Publisher (V93 - Custom Cohorts & Forensic Def)
def run_publisher():
    print("="*60)
    print("      [CELL 4] PUBLISHER STARTING...")
    print("="*60)

    import os
    import sys
    import json
    import pytz
    import pandas as pd
    from datetime import datetime
    import re

    # --- HELPER FUNCTIONS ---
    TNG_BASE_URL = "https://yates.one-name.net/tng/verticalchart.php?personID="
    TNG_SUFFIX = "&tree=tree1&parentset=0&display=vertical&generations=15"

    def normalize_id(val):
        s = str(val).replace('@', '').strip()
        if s.isdigit(): return f"I{s}"
        return s

    def build_narrative(row):
        part_name = str(row.get('Tester-Participant-Unmasked', '')).strip()
        cm = str(row.get('cM', '0'))
        anc_dated = str(row.get('Authority_FirstAncestor_dated', 'Unknown'))
        found_match = str(row.get('Found Match', 'Unknown')).strip()
        gen_count = row.get('Gen_Count', 0)
        rid = normalize_id(row.get('ID#', ''))
        linked_found_match = f'<a href="{TNG_BASE_URL}{rid}{TNG_SUFFIX}" target="_blank"><b>{found_match}</b></a>'
        return f"{part_name} is a {cm} cM DNA match to {linked_found_match} is related via {anc_dated} back {gen_count} generations."

    def build_linked_lineage(row):
        line = str(row.get('Yates DNA Ancestral Line', ''))
        found = str(row.get('Found Match', ''))
        rid = normalize_id(row.get('ID#', ''))
        if found and rid and found in line:
            url = f"{TNG_BASE_URL}{rid}{TNG_SUFFIX}"
            link_html = f'<a href="{url}" target="_blank" style="color:#006064;text-decoration:none;font-weight:bold;">{found}</a>'
            return line.replace(found, link_html)
        return line

def get_sortable_surname(full_name):
        if not full_name: return "zzz"
        s = str(full_name).lower()

        # 1. HANDLE N√âE: Stop reading at "nee" or "n√©e"
        # "Mary Yates nee Smith" becomes "Mary Yates"
        s = re.split(r'\bnee\b|\bn√©e\b', s)[0]

        # 2. STRIP SUFFIXES
        s = re.sub(r'\b(jr\.?|sr\.?|ii|iii|iv|esq\.?|m\.d\.?|ph\.d\.?)\b', '', s)
        s = re.sub(r'[\,\.]', '', s)

        parts = s.strip().split()
        if not parts: return "zzz"

        # 3. SPANISH/COMPOUND DETECTION (Simple Heuristic)
        # If the user intentionally hyphenated it (Garcia-Lopez), logic holds.
        # If no hyphen, we generally default to last word for English names.
        # To strictly enforce Spanish logic, we'd need to know the ethnicity,
        # BUT we can check for "y" (e.g. "Ortega y Gasset")
        if len(parts) >= 3 and parts[-2] == 'y':
             return parts[-3] # Sort by "Ortega"

        return parts[-1]

    def format_last_first(full_name):
        if not full_name: return ""
        cleaned = re.sub(r'\b(jr\.?|sr\.?|ii|iii|iv|esq\.?|m\.d\.?|ph\.d\.?)\b', '', str(full_name), flags=re.IGNORECASE)
        cleaned = re.sub(r'[\,\.]', '', cleaned)
        parts = cleaned.strip().split()
        if len(parts) < 2: return full_name
        surname = parts.pop()
        firstname = " ".join(parts)
        return f"{surname.title()}, {firstname}"

    # --- 1. DATA PREP ---
    CSV_DB = "engine_database.csv"
    if not os.path.exists(CSV_DB):
        print("‚ùå ERROR: engine_database.csv not found. Run Cell 3 first.")
        return

    pages_to_upload = {}
    df = pd.read_csv(CSV_DB, encoding="iso-8859-15")

    # ADD RAW CSV TO UPLOAD LIST
    pages_to_upload["engine_database.csv"] = df.to_csv(index=False, encoding='iso-8859-15')
    print("    - [STAGED] engine_database.csv for download")

    est = pytz.timezone('US/Eastern')
    timestamp = datetime.now(est).strftime("%B %d, %Y %-I:%M %p EST")

    stats_bar_full = f"""<div style="background:#f4f4f4;border-top:1px solid #ddd;border-bottom:1px solid #ddd;font-family:sans-serif;font-size:12px;color:#555;padding:8px 15px;text-align:center;margin-bottom:0;"><strong>Last updated:</strong> {timestamp} &nbsp;|&nbsp; <strong>Total Autosomal matches:</strong> {len(df):,}</div>"""
    stats_bar_html = stats_bar_full

    df['Long_Narrative'] = df.apply(build_narrative, axis=1)
    df['Linked_Tree_Line'] = df.apply(build_linked_lineage, axis=1)

    # --- JSON DATA PREP ---
    ancestor_data = {}
    for alpha, group_df in df.groupby('Authority_FirstAncestor_alpha'):
        match_count = int(len(group_df))
        if match_count < 2: continue

        total_cm = int(group_df['cM'].sum())
        unique_count = int(len(group_df['Tester-Participant-Unmasked'].unique()))
        top_testers = group_df['Tester-Participant-Unmasked'].value_counts().head(3).to_dict()
        badge = "Bronze"; integrity = 25; verdict = f"This line is an **Emerging Frontier**. With {match_count} matches, the connection is real but fragile."

        if "Disconnected" in alpha or "ZZ_" in alpha:
            badge = "Action Required"; integrity = 0; verdict = "**Data Quality Issue.** Matches linked to disconnected profiles. Fix parent links in TNG."
        elif match_count >= 5:
            badge = "Silver"; integrity = 50; verdict = f"**Likely Valid.** Supported by {match_count} matches sharing {total_cm} cM."
        if match_count >= 15 and unique_count >= 3:
            badge = "Gold"; integrity = 80; verdict = f"**Strong Genetic Confirmation.** Anchored by {unique_count} independent tester groups."
        if match_count >= 30 and unique_count >= 10:
            badge = "Platinum"; integrity = 100; verdict = f"**The Platinum Standard.** Biologically confirmed beyond reasonable doubt."

        ancestor_data[alpha] = {"name": group_df.iloc[0]['Authority_FirstAncestor_dated'], "matches": match_count, "cm": total_cm, "testers": unique_count, "badge": badge, "verdict": verdict, "integrity": integrity, "list_data": top_testers}

    participant_data = {}
    for p_name, group_df in df.groupby('Tester-Participant-Unmasked'):
        match_count = int(len(group_df)); total_cm = int(group_df['cM'].sum())
        top_anc = group_df['Authority_FirstAncestor_dated'].mode()[0]
        my_ancestors = group_df['Authority_FirstAncestor_dated'].value_counts().head(3).to_dict()
        badge = "New Tester"; integrity = 25; verdict = f"**Growth Opportunity.** New entrant needing cousin recruitment."
        if match_count >= 5: badge = "Active Cousin"; integrity = 50; verdict = f"**Active Contributor.** Consistent data linking to {top_anc.split('&')[0]}."
        if match_count >= 15: badge = "Keystone Tester"; integrity = 90; verdict = f"**The Keystone Driver.** Structural pillar connecting disparate branches."
        participant_data[p_name] = {"name": p_name, "matches": match_count, "cm": total_cm, "testers": 1, "badge": badge, "verdict": verdict, "integrity": integrity, "list_data": my_ancestors}

    smart_packet_json = json.dumps({"ancestors": ancestor_data, "participants": participant_data})
    proof_db_json = df[['Authority_FirstAncestor_dated','Tester-Participant-Unmasked','cM','ID#','Yates DNA Ancestral Line']].rename(columns={'Authority_FirstAncestor_dated':'ancestor','Tester-Participant-Unmasked':'participant','cM':'cm','ID#':'id','Yates DNA Ancestral Line':'lineage'}).to_json(orient='records')

    # --- 2. GENERATE PAGES ---

    # ‚öôÔ∏è SHARED JS
    js_tools = """
    function getSortableName(fullname) {
        if (!fullname) return "zzz";
        let clean = fullname.replace(/\\b(jr\\.?|sr\\.?|iii|iv|ii|esq\\.?|m\\.d\\.?|ph\\.d\\.?)\\b/gi, "");
        clean = clean.replace(/[\\,\\.]/g, "");
        let parts = clean.trim().split(/\\s+/);
        let surname = parts.length > 0 ? parts[parts.length - 1].toLowerCase() : "zzz";
        return surname.toLowerCase() + " " + clean.toLowerCase();
    }
    function formatName(fullname) {
        if (!fullname) return "";
        let clean = fullname.replace(/\\b(jr\\.?|sr\\.?|iii|iv|ii|esq\\.?|m\\.d\\.?|ph\\.d\\.?)\\b/gi, "");
        clean = clean.replace(/[\\,\\.]/g, "");
        let parts = clean.trim().split(/\\s+/);
        if (parts.length < 2) return fullname;
        let surname = parts.pop();
        let firstname = parts.join(" ");
        return surname.charAt(0).toUpperCase() + surname.slice(1) + ", " + firstname;
    }
    """

    # 1. Buster (Updated UI)
    buster_html = f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Yates Brick Wall Buster</title><link rel="stylesheet" href="partials_unified.css"><style>body{{font-family:'Segoe UI',sans-serif;background:#f0f2f5;padding:20px}}.dashboard{{max-width:900px;margin:0 auto;background:white;padding:30px;border-radius:8px;box-shadow:0 4px 15px rgba(0,0,0,0.1)}}.buster-header{{background:#f57f17;color:white;padding:20px;border-radius:8px 8px 0 0;margin:-30px -30px 20px -30px;text-align:center}}.control-panel{{background:#fff3e0;padding:20px;border-radius:8px;border:1px solid #ffcc80;margin-bottom:20px}}.tabs{{display:flex;gap:10px;margin-bottom:15px;border-bottom:2px solid #ddd;}}.tab{{padding:10px 20px;cursor:pointer;background:#eee;border-radius:5px 5px 0 0;font-weight:bold;flex:1;text-align:center;}}.tab.active{{background:#f57f17;color:white;}}select{{width:100%;padding:12px;font-size:16px;border:1px solid #f57c00;border-radius:4px;margin-top:5px}}button{{background:#e65100;color:white;border:none;padding:12px 25px;font-weight:bold;border-radius:4px;cursor:pointer;margin-top:10px;font-size:16px}}button:hover{{background:#ef6c00}}.results-box{{display:none;margin-top:30px}}.finding-box{{background:#e8eaf6;border-left:5px solid #3f51b5;padding:15px;margin-bottom:15px}}.conclusion-box{{background:#e0f2f1;border-left:5px solid #009688;padding:15px;margin-bottom:15px}}.speculation-box{{background:#ffebee;border-left:5px solid #e91e63;padding:15px;margin-bottom:15px}}.checkbox-list{{height:250px;overflow-y:scroll;border:1px solid #f57c00;background:white;border-radius:4px;padding:10px;}}.checkbox-item{{display:block;margin-bottom:5px;padding:5px;border-bottom:1px solid #eee;}}.checkbox-item:hover{{background-color:#ffe0b2;}}.checkbox-item input{{margin-right:10px;transform:scale(1.2);}}.tooltip{{cursor:pointer;font-size:1.2em;margin-left:10px;}}.tooltip:hover{{opacity:0.7;}}.info-bubble{{display:none;background:#e1f5fe;border:1px solid #81d4fa;padding:15px;border-radius:8px;margin-top:10px;color:#01579b;font-size:0.95em;}}</style></head><body><div class="wrap"><div class="dashboard"><div class="buster-header"><h1>üß± Brick Wall Buster</h1><p>Predictive Forensic Analysis for Stalled Lineages</p></div>{stats_bar_html}{NAV_HTML}{SITE_INFO}<div class="tabs"><div class="tab active" onclick="setMode('ancestor')">1. Define by Ancestor</div><div class="tab" onclick="setMode('custom')">2. Define by Participants</div><div class="tab" onclick="setMode('virtual')">3. Build Custom Cohort</div></div><div class="control-panel"><div id="panel-ancestor"><label style="font-weight:bold;color:#bf360c;">Select Your "End of Line" Ancestor:</label><select id="wallSelect"><option value="">-- Choose the ancestor you are stuck on --</option></select></div><div id="panel-virtual" style="display:none;"><label style="font-weight:bold;color:#bf360c;">Select Testers for Virtual Cohort (Scroll & Check):</label><div id="virtualList" class="checkbox-list"></div></div><div id="panel-custom" style="display:none;"><label style="font-weight:bold;color:#bf360c;">Select Testers (Scroll & Check): <span class="tooltip" onclick="toggleInfo()" title="Click to learn about Forensic Handshake Strategy">‚ùì</span></label><div id="info-bubble" class="info-bubble"><strong>The Forensic Handshake Strategy:</strong><br>1. <strong>Isolate:</strong> We identify your specific group (the "Cluster").<br>2. <strong>Corroborate:</strong> We look for independent matches that converge on the same ancestral line.<br>3. <strong>Verify:</strong> A "Handshake" occurs when multiple testers confirm the same relationship, ruling out coincidence.</div><div id="testerList" class="checkbox-list"></div></div><button onclick="runAnalysis()">üî® Bust This Wall</button></div><div id="results" class="results-box"><div class="no-print" style="float:right;"><button onclick="window.print()" style="background:#455a64;font-size:14px;">üñ®Ô∏è Print Official Report</button></div><h2 style="border-bottom:2px solid #ddd;padding-bottom:10px;margin-top:0;">Forensic Report</h2><div class="finding-box"><strong>üîç Findings:</strong> <span id="txt-finding"></span></div><div class="conclusion-box"><strong>üí° Study Context:</strong> <span id="txt-conclusion"></span></div><div class="speculation-box"><strong>üöÄ Forensic Handshake Prediction:</strong> <span id="txt-speculation"></span></div><div id="bridge-alert" style="display:none;background:#c8e6c9;border:1px solid #4caf50;padding:15px;margin-top:15px;color:#2e7d32;"></div></div></div></div><script>const DATA={smart_packet_json};const FULL_DB={proof_db_json};let MODE='ancestor';
    {js_tools}
    function toggleInfo(){{const b=document.getElementById('info-bubble');b.style.display=(b.style.display==='block')?'none':'block';}}

    // Init Tabs
    const ancSel=document.getElementById('wallSelect');
    Object.keys(DATA.ancestors).sort((a,b)=>DATA.ancestors[a].name.localeCompare(DATA.ancestors[b].name)).forEach(k=>{{const o=document.createElement('option');o.value=k;o.innerText=DATA.ancestors[k].name;ancSel.appendChild(o);}});

    const testListDiv=document.getElementById('testerList');
    const virtListDiv=document.getElementById('virtualList');
    const allTesters=[...new Set(FULL_DB.map(r=>r.participant))];
    allTesters.sort((a,b)=> getSortableName(a).localeCompare(getSortableName(b)));

    let listHTML="";
    allTesters.forEach(t=>{{
        listHTML+=`<label class="checkbox-item"><input type="checkbox" value="${{t}}"> ${{formatName(t)}}</label>`;
    }});

    testListDiv.innerHTML=listHTML;
    virtListDiv.innerHTML=listHTML; // Duplicate list for Virtual Tab

    function setMode(m){{
        MODE=m;
        document.querySelectorAll('.tab').forEach(t=>t.classList.remove('active'));event.target.classList.add('active');
        document.getElementById('panel-ancestor').style.display=(m==='ancestor')?'block':'none';
        document.getElementById('panel-custom').style.display=(m==='custom')?'block':'none';
        document.getElementById('panel-virtual').style.display=(m==='virtual')?'block':'none';
        document.getElementById('results').style.display='none';
    }}

    function runAnalysis(){{
        let targetName="",clusterCount=0,clusterMembers=[],clusterMatches=0;

        if(MODE==='custom'){{
            const checkedBoxes=document.querySelectorAll('#testerList input:checked');
            if(checkedBoxes.length===0)return alert("Select at least one tester.");
            clusterMembers=Array.from(checkedBoxes).map(cb=>cb.value);
            targetName="Custom Group ("+clusterMembers.length+" Testers)";
        }} else if(MODE==='virtual'){{
            const checkedBoxes=document.querySelectorAll('#virtualList input:checked');
            if(checkedBoxes.length===0)return alert("Select at least one tester for your cohort.");
            clusterMembers=Array.from(checkedBoxes).map(cb=>cb.value);
            targetName="Virtual Cohort ("+clusterMembers.length+" Testers)";
        }} else {{
            const key=ancSel.value;
            if(!key)return;
            const d=DATA.ancestors[key];
            targetName=d.name;
            clusterMembers=Object.keys(d.list_data);
        }}

        // Common Logic
        clusterCount=clusterMembers.length;
        const groupRows=FULL_DB.filter(r=>clusterMembers.includes(r.participant));
        clusterMatches=groupRows.length;

        const allMatches=FULL_DB.length,outsideMatches=allMatches-clusterMatches,pct=((clusterMatches/allMatches)*100).toFixed(2);let bridgeHTML="";const clusterRows=FULL_DB.filter(r=>clusterMembers.includes(r.participant));const bridges=clusterRows.filter(r=>r.ancestor!==targetName&&r.ancestor!==null);if(bridges.length>0){{const uniqueBridges=[...new Set(bridges.map(b=>b.participant+" -> "+b.ancestor))];if(uniqueBridges.length>0){{bridgeHTML=`<strong>üåâ BRIDGE DETECTED (Forensic Handshake Verified):</strong><br>A "Bridge" is a participant in your cluster who matches YOU but is <em>also</em> confirmed to belong to a known lineage in the database. We found <strong>${{uniqueBridges.length}}</strong> specific bridge connections.<br><br>These testers act as the "Forensic Handshake" linking your group to the following families:<br><ul>`;uniqueBridges.slice(0,5).forEach(b=>bridgeHTML+=`<li>${{b}}</li>`);bridgeHTML+=`</ul>`;document.getElementById('bridge-alert').innerHTML=bridgeHTML;document.getElementById('bridge-alert').style.display='block';}}else{{document.getElementById('bridge-alert').style.display='none';}}}}else{{document.getElementById('bridge-alert').style.display='none';}}const memStr=(clusterMembers.length>5)?clusterMembers.slice(0,5).join(", ")+"...":clusterMembers.join(", ");let findHTML=`<ul style="margin-top:0;padding-left:20px;"><li><strong>Target Group:</strong> ${{targetName}}</li><li><strong>Participants:</strong> ${{clusterCount}} (${{memStr}})</li><li><strong>Data Volume:</strong> ${{clusterMatches}} confirmed matches within this group.</li></ul>`;document.getElementById('txt-finding').innerHTML=findHTML;document.getElementById('txt-conclusion').innerHTML=`This cluster represents <strong>${{pct}}%</strong> of the total Yates database. There are <strong>${{outsideMatches.toLocaleString()}} other matches</strong> in the study.`;let otherGroups=[];for(const[k,v]of Object.entries(DATA.ancestors)){{if(v.name!==targetName&&v.matches>=5)otherGroups.push(v);}}otherGroups.sort((a,b)=>b.matches-a.matches);const top3=otherGroups.slice(0,3);let html="";if(top3.length===0){{html+="<p>No strong signals found. The database currently lacks a 'Proven Line' (5+ matches) that connects to your group.</p>";}}else{{html+=`<p><strong>Methodology: The Forensic Handshake.</strong> We analyzed your cluster for "Handshakes"‚Äîpatterns where multiple independent matches converge on the same ancestor to confirm a biological link. These are your most probable lineages:</p><ul>`;top3.forEach(g=>{{html+=`<li><strong>${{g.name}}</strong>: Validated by <strong>${{g.matches}} Handshake Connections</strong> (shared match instances) within your cluster. (Probability: <span style="color:green">High</span>)</li>`;}});html+=`</ul>`;}}document.getElementById('txt-speculation').innerHTML=html;document.getElementById('results').style.display='block';}}</script></body></html>"""
    pages_to_upload["brick_wall_buster.shtml"] = buster_html
    print("    - [GENERATED] Brick Wall Buster (v93 - Custom Cohorts)")

    # 2. Lineage Proof Engine
    proof_html = f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Lineage Proof Engine</title><link rel="stylesheet" href="partials_unified.css"><style>body{{font-family:'Segoe UI',sans-serif;background:#f0f2f5;padding:20px}}.proof-card{{background:white;max-width:1000px;margin:20px auto;border-radius:8px;box-shadow:0 4px 15px rgba(0,0,0,0.1);padding:30px}}.badge-large{{font-size:1.2em;padding:10px 20px;border-radius:30px;font-weight:bold;color:white;display:inline-block;margin-bottom:20px}}.badge-platinum{{background:#e5e4e2;color:#333;border:2px solid #ccc}}.badge-gold{{background:#ffd700;color:#b45f06}}.badge-silver{{background:#c0c0c0;color:#333}}.badge-bronze{{background:#cd7f32}}.stats-grid{{display:grid;grid-template-columns:repeat(auto-fit,minmax(150px,1fr));gap:15px;margin-bottom:30px}}.stat-box{{background:#f8f9fa;padding:15px;border-radius:8px;text-align:center;border:1px solid #ddd}}.stat-val{{font-size:1.8em;font-weight:bold;color:#006064}}.stat-lbl{{color:#666;font-size:0.9em;text-transform:uppercase}}table{{width:100%;border-collapse:collapse;margin-top:20px}}th{{background:#004d40;color:white;padding:12px;text-align:left}}td{{padding:10px;border-bottom:1px solid #eee}}tr:hover{{background-color:#f1f8e9}}select{{padding:10px;font-size:16px;width:100%;max-width:600px;margin-bottom:20px;border:1px solid #006064;border-radius:4px}}</style></head><body><div class="wrap"><h1 class="centerline">üß¨ Lineage Proof Engine</h1><div id="nav-slot">{stats_bar_html}{NAV_HTML}</div><div class="proof-card"><h3 style="color:#006064;margin-top:0;">Verify an Ancestral Line</h3><p>Select a couple to audit the forensic evidence supporting them.</p><select id="proofSelect" onchange="runProof()"><option value="">-- Select Ancestor --</option></select><div id="proof-result" style="display:none;"><div style="text-align:center"><span id="p-badge" class="badge-large"></span></div><div class="stats-grid"><div class="stat-box"><div class="stat-val" id="p-matches">0</div><div class="stat-lbl">Matches</div></div><div class="stat-box"><div class="stat-val" id="p-testers">0</div><div class="stat-lbl">Unique Testers</div></div><div class="stat-box"><div class="stat-val" id="p-cm">0</div><div class="stat-lbl">Total cM</div></div><div class="stat-box"><div class="stat-val" id="p-integrity">0%</div><div class="stat-lbl">Integrity</div></div></div><div style="background:#e0f2f1;padding:15px;border-left:5px solid #004d40;margin-bottom:20px;font-style:italic;" id="p-verdict"></div><h4 style="border-bottom:2px solid #ddd;padding-bottom:10px;">Evidence Manifest</h4><div style="max-height:500px;overflow-y:auto;"><table id="evidence-table"><thead><tr><th>Participant</th><th>cM</th><th>Lineage Path</th></tr></thead><tbody></tbody></table></div></div></div></div><script>const DATA={smart_packet_json};const DB={proof_db_json};
    const sel=document.getElementById('proofSelect');
    Object.keys(DATA.ancestors).sort((a,b)=>DATA.ancestors[a].name.localeCompare(DATA.ancestors[b].name)).forEach(k=>{{const o=document.createElement('option');o.value=k;o.innerText=DATA.ancestors[k].name;sel.appendChild(o);}});
    function runProof(){{const key=sel.value;if(!key){{document.getElementById('proof-result').style.display='none';return;}}const d=DATA.ancestors[key];document.getElementById('proof-result').style.display='block';
    document.getElementById('p-badge').className='badge-large badge-'+d.badge.toLowerCase().split(' ')[0];document.getElementById('p-badge').innerText=d.badge;document.getElementById('p-matches').innerText=d.matches;document.getElementById('p-testers').innerText=d.testers;document.getElementById('p-cm').innerText=d.cm.toLocaleString();document.getElementById('p-integrity').innerText=d.integrity+'%';document.getElementById('p-verdict').innerHTML=d.verdict;
    const matches=DB.filter(r=>r.ancestor===d.name).sort((a,b)=>b.cm-a.cm);const tbody=document.querySelector('#evidence-table tbody');tbody.innerHTML='';matches.forEach(m=>{{const tr=document.createElement('tr');tr.innerHTML=`<td>${{m.participant}}</td><td>${{m.cm}}</td><td style="font-size:0.9em;color:#555;">${{m.lineage}}</td>`;tbody.appendChild(tr);}});}}</script></body></html>"""
    pages_to_upload["lineage_proof.html"] = proof_html
    print("    - [GENERATED] Lineage Proof Engine")

    # 3. Dossier
    dossier_html = f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Yates DNA Forensic Dossier</title><link rel="stylesheet" href="partials_unified.css"><style>body{{font-family:'Segoe UI',sans-serif;background:#f0f2f5;padding:20px}}.dossier-card{{background:white;max-width:900px;margin:20px auto;border-radius:8px;box-shadow:0 4px 20px rgba(0,0,0,0.1);overflow:hidden;position:relative}}.dossier-header{{background:#006064;color:white;padding:25px;text-align:center}}.dossier-body{{padding:30px}}.badge{{display:inline-block;padding:8px 15px;border-radius:20px;font-weight:bold;color:white;font-size:0.9em;margin-bottom:15px;text-transform:uppercase}}.badge-platinum, .badge-keystone{{background:#e5e4e2;color:#333;border:2px solid #ccc}}.badge-gold{{background:#ffd700;color:#b45f06}}.badge-silver, .badge-active{{background:#c0c0c0;color:#333}}.badge-bronze, .badge-new{{background:#cd7f32}}.metric-grid{{display:grid;grid-template-columns:1fr 1fr;gap:20px;margin-bottom:30px}}.metric-box{{background:#f8f9fa;padding:20px;border-radius:8px;text-align:center;border:1px solid #e9ecef}}.metric-val{{font-size:2em;font-weight:bold;color:#004d40}}.metric-lbl{{color:#666;font-size:0.9em;text-transform:uppercase}}.progress-container{{margin:20px 0}}.progress-bar{{background:#e9ecef;height:10px;border-radius:5px;overflow:hidden}}.progress-fill{{height:100%;background:#00838f;width:0%;transition:width 1s ease}}.verdict-box{{background:#e0f2f1;border-left:5px solid #004d40;padding:20px;margin:20px 0;font-family:'Georgia',serif;font-style:italic;color:#004d40;line-height:1.5}}.contributors{{margin-top:20px;border-top:1px solid #eee;padding-top:20px}}.contributor-item{{display:flex;justify-content:space-between;padding:8px 0;border-bottom:1px solid #f1f1f1}}select{{padding:10px;font-size:16px;width:100%;max-width:500px;margin:20px auto;display:block}}.switch{{text-align:center;margin:20px}}.switch label{{margin:0 15px;font-weight:bold;cursor:pointer;color:#006064}}.action-btn{{padding:10px 20px;background:#00838f;color:white;border:none;border-radius:4px;cursor:pointer;font-size:16px;margin:0 5px}}.action-btn:hover{{background:#006064}}#composite-container{{background:white;max-width:900px;margin:20px auto;padding:20px;border-radius:8px;box-shadow:0 4px 20px rgba(0,0,0,0.1);display:none;}}.comp-table{{width:100%;border-collapse:collapse;}}.comp-table th{{background:#004d40;color:white;padding:12px;text-align:left;}}.comp-table td{{padding:10px;border-bottom:1px solid #eee;}}.footer-total{{margin-top:15px;text-align:right;font-weight:bold;color:#004d40;border-top:2px solid #eee;padding-top:10px;}}</style></head><body><div class="wrap"><h1 class="centerline">Yates DNA Forensic Dossier (v80)</h1><div id="nav-slot">{stats_bar_html}{NAV_HTML}</div><div class="switch"><label><input type="radio" name="mode" value="ancestor" checked onchange="switchMode()"> Search by Ancestor</label><label><input type="radio" name="mode" value="participant" onchange="switchMode()"> Search by Participant</label></div><div style="text-align:center;margin:30px;"><select id="dossierSelect"><option value="">-- Select --</option></select><div style="margin-top:15px;"><button class="action-btn" onclick="addReport()">Add Report</button><button class="action-btn" style="background:#d32f2f;" onclick="clearReports()">Clear All</button></div></div><div id="composite-container"><h2 style="color:#004d40;border-bottom:2px solid #004d40;padding-bottom:10px;margin-top:0;">Comparison Dashboard</h2><table class="comp-table"><thead><tr><th>Name</th><th>Role/Badge</th><th>Matches</th><th>Total cM</th><th>Integrity</th></tr></thead><tbody id="comp-body"></tbody></table><div id="comp-footer" class="footer-total"></div></div><div id="report-stack"></div></div><script>const DATA={smart_packet_json};let currentMode = 'ancestor';
    let compTotalMatches = 0; let compTotalCM = 0; let compCount = 0;
    {js_tools}
    function switchMode(){{currentMode = document.querySelector('input[name="mode"]:checked').value;populateDropdown();clearReports();}}
    function populateDropdown(){{
        const sel = document.getElementById('dossierSelect');
        sel.innerHTML = '<option value="">-- Select --</option>';
        const source = (currentMode === 'ancestor') ? DATA.ancestors : DATA.participants;
        const sortedKeys = Object.keys(source).sort((a,b) => {{
            const nA = source[a].name; const nB = source[b].name;
            if (currentMode === 'participant') {{ return getSortableName(nA).localeCompare(getSortableName(nB)); }}
            return nA.localeCompare(nB);
        }});
        sortedKeys.forEach(key => {{ const opt = document.createElement('option'); opt.value = key; opt.innerText = (currentMode === 'participant') ? formatName(source[key].name) : source[key].name; sel.appendChild(opt); }});
    }}
    function clearReports(){{document.getElementById('report-stack').innerHTML = '';document.getElementById('comp-body').innerHTML = '';document.getElementById('composite-container').style.display = 'none';document.getElementById('dossierSelect').selectedIndex = 0; compTotalMatches=0; compTotalCM=0; compCount=0;}}
    function addReport(){{const key = document.getElementById('dossierSelect').value;if(!key) return;const d = (currentMode === 'ancestor') ? DATA.ancestors[key] : DATA.participants[key];document.getElementById('composite-container').style.display = 'block';
    compTotalMatches += d.matches; compTotalCM += d.cm; compCount++;
    const tr = document.createElement('tr');tr.innerHTML = `<td><b>${{d.name}}</b></td><td>${{d.badge}}</td><td>${{d.matches}}</td><td>${{d.cm.toLocaleString()}}</td><td>${{d.integrity}}%</td>`;document.getElementById('comp-body').appendChild(tr);
    document.getElementById('comp-footer').innerText = `Composite Evidence: ${{compCount}} Profiles, ${{compTotalMatches.toLocaleString()}} Matches, ${{compTotalCM.toLocaleString()}} cM`;
    const badgeClass = d.badge.toLowerCase().split(' ')[0];const lblTesters = (currentMode==='ancestor') ? 'Unique Testers' : 'Study Rank';const listTitle = (currentMode === 'ancestor') ? 'Top Contributors' : 'Ancestral Connections';let listHTML = '';for (const [name, count] of Object.entries(d.list_data)) {{listHTML += `<div class="contributor-item"><span>${{name}}</span><span>${{count}} matches</span></div>`;}}const html = `<div class="dossier-card" style="display:block; animation: fadeIn 0.5s;"><div class="dossier-header"><h2 style="margin:0">${{d.name}}</h2><div style="margin-top:10px;opacity:0.9">${{currentMode.toUpperCase()}} REPORT</div></div><div class="dossier-body"><div style="text-align:center"><span class="badge badge-${{badgeClass}}">${{d.badge}}</span></div><div class="verdict-box"><strong>Forensic Analysis:</strong><br>${{d.verdict}}</div><div class="metric-grid"><div class="metric-box"><div class="metric-val">${{d.matches}}</div><div class="metric-lbl">Matches</div></div><div class="metric-box"><div class="metric-val">${{d.testers}}</div><div class="metric-lbl">${{lblTesters}}</div></div><div class="metric-box"><div class="metric-val">${{d.cm.toLocaleString()}}</div><div class="metric-lbl">Total cM</div></div><div class="metric-box"><div class="metric-val">${{d.integrity}}%</div><div class="metric-lbl">Integrity Score</div></div></div><div class="progress-container"><div style="display:flex;justify-content:space-between;margin-bottom:5px;font-size:0.9em;font-weight:bold;"><span>Score</span><span>${{d.integrity}}/100</span></div><div class="progress-bar"><div class="progress-fill" style="width:${{d.integrity}}%"></div></div></div><div class="contributors"><h3>${{listTitle}}</h3><div>${{listHTML}}</div></div></div></div>`;document.getElementById('report-stack').insertAdjacentHTML('afterbegin', html);}}populateDropdown();</script></body></html>"""
    pages_to_upload["dna_dossier.html"] = dossier_html
    print("    - [GENERATED] Forensic Dossier")

    # Guide
    contents_html = f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Yates Study User Guide</title><link rel="stylesheet" href="partials_unified.css"><style>body{{font-family:'Segoe UI',sans-serif;background:#f4f7f6;padding:20px}}.guide-grid{{display:grid;grid-template-columns:repeat(auto-fit,minmax(300px,1fr));gap:25px;max-width:1200px;margin:30px auto}}.guide-card{{background:white;padding:25px;border-radius:8px;border-left:5px solid #006064;box-shadow:0 4px 10px rgba(0,0,0,0.05);transition:transform 0.2s}}.guide-card:hover{{transform:translateY(-5px)}}.card-title{{font-size:1.4em;font-weight:bold;color:#004d40;margin-top:0}}.card-why{{color:#b71c1c;font-weight:bold;margin:10px 0 5px 0;font-size:0.9em;text-transform:uppercase}}.card-what{{color:#555;font-size:1em;line-height:1.5;margin-bottom:20px}}.card-btn{{display:inline-block;padding:10px 20px;background:#00838f;color:white;text-decoration:none;border-radius:4px;font-weight:bold}}.card-btn:hover{{background:#006064}}</style></head><body><div class="wrap"><h1 class="centerline">Welcome to the Yates DNA Study Portal</h1><div id="nav-slot">{stats_bar_html}{NAV_HTML}</div><div style="text-align:center;max-width:800px;margin:20px auto;color:#444;font-size:1.1em;">This site transforms raw DNA data into forensic genealogical evidence. Use the tools below to explore your heritage, verify ancestors, and analyze the strength of your genetic connections.</div>
    <div class="guide-grid">
        <div class="guide-card"><h2 class="card-title">1. The DNA Register</h2><div class="card-why">Why View This?</div><div class="card-what">To see the raw evidence. This is the master list of all 1,700+ DNA matches in the study, sorted by ancestral line.</div><a href="ons_yates_dna_register.shtml" class="card-btn">Open Register</a></div>
        <div class="guide-card"><h2 class="card-title">2. DNA Network</h2><div class="card-why">Why View This?</div><div class="card-what">To see the big picture. Visual clusters showing which ancestral lines are genetically "saturated" and proven by multiple testers.</div><a href="dna_network.shtml" class="card-btn">View Network</a></div>
        <div class="guide-card"><h2 class="card-title">3. Lineage Proof Engine</h2><div class="card-why">Why View This?</div><div class="card-what">To verify a connection. An interactive tool that tests if a specific ancestor is biologically confirmed by independent cousins.</div><a href="lineage_proof.html" class="card-btn">Run Proof</a></div>
        <div class="guide-card" style="border-left-color:#f57f17;"><h2 class="card-title" style="color:#e65100;">4. Brick Wall Buster</h2><div class="card-why" style="color:#bf360c;">Why View This?</div><div class="card-what">To break through a dead end. This predictive engine uses "Negative Space Analysis" to suggest which proven family line you likely belong to based on who you match.</div><a href="brick_wall_buster.shtml" class="card-btn" style="background:#ef6c00;">Bust This Wall</a></div>
        <div class="guide-card"><h2 class="card-title">5. Forensic Dossier</h2><div class="card-why">Why View This?</div><div class="card-what">To get your "Scorecard." Generate a one-page forensic report on yourself or an ancestor, grading the strength of the evidence.</div><a href="dna_dossier.html" class="card-btn">Create Dossier</a></div>
        <div class="guide-card"><h2 class="card-title">6. Research Admin Hub</h2><div class="card-why">Why View This?</div><div class="card-what">For study managers. A high-level audit showing participant statistics, masked IDs, and total study metrics.</div><a href="research_admin.html" class="card-btn" style="background:#455a64;">Admin Access</a></div>
        <div class="guide-card"><h2 class="card-title">7. Data Glossary</h2><div class="card-why">Why View This?</div><div class="card-what">To understand the terms. Definitions for "Keystone Tester," "Platinum Standard," and other forensic terms used in this study.</div><a href="data_glossary.shtml" class="card-btn" style="background:#78909c;">Read Glossary</a></div>
        <div class="guide-card" style="border-left-color:#fbc02d;"><h2 class="card-title">8. Corrections & Mysteries</h2><div class="card-why">See Something? Say Something.</div><div class="card-what">We know we aren't 100% right, nor 100% wrong. Genealogy is a collaboration. If you can solve a mystery or correct a detail, tell us. <br><br><strong>Important:</strong> Please copy/paste the <strong>ID# (e.g., I1234)</strong> so we can fix the correct person.</div><a href="mailto:yates@one-name.org?subject=Yates DNA Study: Correction / Mystery&body=I have a correction or potential update regarding Person ID: [PASTE ID HERE]. Here are the details:" class="card-btn" style="background:#f9a825;color:#333;">Email Correction</a></div>
    </div></div></body></html>"""
    pages_to_upload["contents.shtml"] = contents_html
    print("    - [GENERATED] Contents Guide")

    # Subscribe & Theory & Share
    pages_to_upload["subscribe.shtml"] = make_page("Subscribe", SUBSCRIBE_CONTENT, len(df), "subscribe", stats_bar=stats_bar_html)
    pages_to_upload["share_dna.shtml"] = make_page("Share Your DNA", SHARE_CONTENT, len(df), "share", stats_bar=stats_bar_html)
    pages_to_upload["dna_theory_of_the_case.htm"] = make_page("The Yates DNA Strategy", THEORY_CONTENT, len(df), "theory", stats_bar=stats_bar_html)

    # Admin Hub
    part_stats = df.groupby('Tester-Participant-Unmasked').agg({'Tester-Participant-MASKED': 'first', 'ID#': 'count'}).reset_index().rename(columns={'ID#': 'Match_Count'})
    part_stats['Sort_Key'] = part_stats['Tester-Participant-Unmasked'].apply(get_sortable_surname)
    part_stats = part_stats.sort_values(['Sort_Key', 'Tester-Participant-Unmasked'], ascending=[True, True])
    total_matches = part_stats['Match_Count'].sum(); part_stats['Share_Pct'] = (part_stats['Match_Count'] / total_matches) * 100
    admin_rows = []
    for _, r in part_stats.iterrows():
        name = r['Tester-Participant-Unmasked']
        full_sort_val = get_sortable_surname(name)
        display_name = format_last_first(name) # V86: Flip Name in Admin Table
        row = f"<tr><td>{r['Tester-Participant-MASKED']}</td><td data-sort='{full_sort_val}'><b>{display_name}</b></td><td>{r['Match_Count']}</td><td>{r['Share_Pct']:.2f}%</td></tr>"
        admin_rows.append(row)

    # V86: Admin Buttons
    admin_buttons = """<div style="text-align:center;margin:20px 0;"><a href="admin_singletons.shtml" class="action-btn" style="background:#fbc02d;color:#333;margin-right:10px;">üîç View Singleton Lines (Housekeeping)</a><a href="engine_database.csv" class="action-btn" style="background:#455a64;">‚¨áÔ∏è Download Raw Data (CSV)</a></div>"""

    pages_to_upload["research_admin.html"] = f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Yates Research Admin Hub</title><link rel="stylesheet" href="partials_unified.css"><style>body{{font-family:'Segoe UI',sans-serif;background:#f0f2f5;padding:20px}}.dashboard-grid{{display:grid;grid-template-columns:repeat(auto-fit,minmax(200px,1fr));gap:20px;margin:30px auto;max-width:1200px}}.dash-card{{background:white;padding:20px;border-radius:8px;text-align:center;box-shadow:0 4px 6px rgba(0,0,0,0.1);transition:transform 0.2s;text-decoration:none;color:#333;border:1px solid #ddd}}.dash-card:hover{{transform:translateY(-5px);border-color:#006064;background:#e0f7fa}}.dash-icon{{font-size:40px;margin-bottom:10px;display:block}}.dash-title{{font-weight:bold;font-size:1.1em;color:#006064}}.audit-table-wrapper{{background:white;padding:25px;border-radius:8px;box-shadow:0 4px 6px rgba(0,0,0,0.1);max-width:1200px;margin:0 auto}}.audit-table{{width:100%;border-collapse:collapse}}.audit-table th{{background:#004d40;color:white;padding:12px;text-align:left;position:sticky;top:0}}.audit-table td{{padding:10px;border-bottom:1px solid #eee}}.audit-table tr:hover{{background-color:#f5f5f5}}.total-row{{background:#e0f2f1;font-weight:bold;border-top:2px solid #004d40}}.action-btn{{padding:10px 20px;text-decoration:none;border-radius:4px;font-weight:bold;display:inline-block;}}</style></head><body><div class="wrap"><h1 class="centerline">Yates Research Admin Hub</h1><div id="nav-slot">{stats_bar_html}{NAV_HTML}</div><div class="dashboard-grid"><a href="ons_yates_dna_register.shtml" class="dash-card"><span class="dash-icon">üìã</span><span class="dash-title">DNA Register</span></a><a href="dna_network.shtml" class="dash-card"><span class="dash-icon">üï∏Ô∏è</span><span class="dash-title">DNA Network</span></a><a href="lineage_proof.html" class="dash-card"><span class="dash-icon">üß¨</span><span class="dash-title">Proof Engine</span></a><a href="dna_dossier.html" class="dash-card"><span class="dash-icon">üìÅ</span><span class="dash-title">Forensic Dossier</span></a><a href="just-trees.shtml" class="dash-card"><span class="dash-icon">üå≥</span><span class="dash-title">Trees View</span></a></div><div class="audit-table-wrapper"><h2 style="color:#004d40;border-bottom:2px solid #004d40;padding-bottom:10px;margin-top:0;">Participant Activity Report (Sorted by Surname)</h2>{admin_buttons}<div style="max-height:600px;overflow-y:auto;"><table class="audit-table sortable"><thead><tr><th>Masked ID</th><th>Unmasked Participant</th><th>Matches</th><th>% Share</th></tr></thead><tbody>{''.join(admin_rows)}</tbody><tfoot><tr class="total-row"><td colspan="2" style="text-align:right;padding-right:20px;">TOTAL DATABASE:</td><td>{total_matches}</td><td>100%</td></tr></tfoot></table></div></div></div><script>{js_tools}</script></body></html>"""
    print("    - [GENERATED] Research Admin Hub (with Download & Singleton Links)")

    # Glossary & Registers
    pages_to_upload["data_glossary.shtml"] = make_page("Data Glossary", GLOSSARY_CONTENT, len(df), "glossary", stats_bar=stats_bar_html)
    df_par = df.copy(); df_par.rename(columns={'Long_Narrative': 'Participants who tested-Who they matched-Oldest known Yates ancestor'}, inplace=True)
    pages_to_upload["ons_yates_dna_register_participants.shtml"] = make_page("ONS Yates Study DNA Register", f'<div class="table-scroll-wrapper" style="margin:0 auto;width:90%;">{df_par.to_html(columns=["Participants who tested-Who they matched-Oldest known Yates ancestor"], index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table")}</div>', len(df), "participant", stats_bar=stats_bar_html)

    # 2. ANCESTOR REGISTER (Show 2+ Matches Only)
    match_counts = df['Authority_FirstAncestor_dated'].value_counts()
    valid_ancestors = match_counts[match_counts >= 2].index
    df_anc = df[df['Authority_FirstAncestor_dated'].isin(valid_ancestors)].copy()
    stats_bar_anc = f"""<div style="background:#f4f4f4;border-top:1px solid #ddd;border-bottom:1px solid #ddd;font-family:sans-serif;font-size:12px;color:#555;padding:8px 15px;text-align:center;margin-bottom:0;"><strong>Last updated:</strong> {timestamp} &nbsp;|&nbsp; <strong>Validated Matches (2+):</strong> {len(df_anc):,} <span style="color:#d32f2f;">(Singleton matches hidden)</span></div>"""
    df_anc.sort_values(by=['Authority_FirstAncestor_alpha', 'Yates DNA Ancestral Line'], ascending=[True, True], inplace=True)
    df_anc_disp = df_anc.copy()
    df_anc_disp.rename(columns={'Long_Narrative': 'Participants who tested-Who they matched-Oldest known Yates ancestor'}, inplace=True)
    pages_to_upload["ons_yates_dna_register.shtml"] = make_page("ONS Yates Study DNA Register", f'<div class="table-scroll-wrapper" style="margin:0 auto;width:90%;">{df_anc_disp.to_html(columns=["Participants who tested-Who they matched-Oldest known Yates ancestor"], index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table")}</div>', len(df_anc), "ancestor", stats_bar=stats_bar_anc)
    pages_to_upload["yates_ancestor_register.shtml"] = pages_to_upload["ons_yates_dna_register.shtml"]

    # 3. SINGLETON REGISTER (Housekeeping View)
    singleton_ancestors = match_counts[match_counts == 1].index
    df_single = df[df['Authority_FirstAncestor_dated'].isin(singleton_ancestors)].copy()
    stats_bar_single = f"""<div style="background:#fff3e0;border-top:1px solid #ffcc80;border-bottom:1px solid #ffcc80;font-family:sans-serif;font-size:12px;color:#e65100;padding:8px 15px;text-align:center;margin-bottom:0;"><strong>HOUSEKEEPING VIEW:</strong> Showing {len(df_single):,} singleton matches. These lines need confirmation.</div>"""
    df_single.sort_values(by=['Authority_FirstAncestor_alpha'], ascending=[True])
    df_single_disp = df_single.copy()
    df_single_disp.rename(columns={'Long_Narrative': 'Participants who tested-Who they matched-Oldest known Yates ancestor'}, inplace=True)
    pages_to_upload["admin_singletons.shtml"] = make_page("Singleton Match Register", f'<div class="table-scroll-wrapper" style="margin:0 auto;width:90%;">{df_single_disp.to_html(columns=["Participants who tested-Who they matched-Oldest known Yates ancestor"], index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table")}</div>', len(df_single), "singleton", stats_bar=stats_bar_single)
    print("    - [GENERATED] Singleton Register (Housekeeping)")

    # Trees
    df_tree = df_anc[['Linked_Tree_Line', 'Authority_FirstAncestor_alpha']].copy()
    df_tree.rename(columns={'Linked_Tree_Line': 'TEMP'}, inplace=True)
    df_tree.sort_values(by=['Authority_FirstAncestor_alpha'], ascending=[False], inplace=True)
    pages_to_upload["just-trees.shtml"] = make_page("Ancestor Register (Trees View)", f'<div class="table-scroll-wrapper" style="margin:0 auto;width:90%;">{df_tree[["TEMP"]].to_html(index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table").replace("<th>TEMP</th>", "<th>&nbsp;</th>")}</div>', len(df_anc), "tree_za", stats_bar=stats_bar_anc)
    df_tree.sort_values(by=['Authority_FirstAncestor_alpha'], ascending=[True], inplace=True)
    pages_to_upload["just-trees-az.shtml"] = make_page("Ancestor Register (Trees View)", f'<div class="table-scroll-wrapper" style="margin:0 auto;width:90%;">{df_tree[["TEMP"]].to_html(index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table").replace("<th>TEMP</th>", "<th>&nbsp;</th>")}</div>', len(df_anc), "tree_az", stats_bar=stats_bar_anc)

    # Network
    network_buffer = []
    for ancestor_name, group_df in sorted(df.groupby('Authority_FirstAncestor_dated'), key=lambda x: len(x[1]), reverse=True):
        if len(group_df) < 2: continue
        total_cm_g = group_df['cM'].sum(); unique_testers_g = len(group_df['Tester-Participant-Unmasked'].unique())
        analyzer_comment = f"""<div style="background:#fffde7;border-left:6px solid #fbc02d;padding:10px;margin-bottom:15px;font-family:sans-serif;color:#333;font-size:0.95em;"><strong>Collateral Saturation Analysis:</strong> Validated by <b>{unique_testers_g} independent testers</b> representing distinct lines. This cluster aggregates <b>{len(group_df)} matches</b> and <b>{total_cm_g:.0f} cM</b> of shared DNA, providing forensic confirmation of the ancestral couple.</div>"""
        group_df = group_df.sort_values('cM', ascending=False)
        network_buffer.append(f"""<details style="background:white;margin-bottom:15px;border:1px solid #ddd;border-radius:5px;overflow:hidden;"><summary style="background:#e0f2f1;padding:15px;cursor:pointer;font-weight:bold;color:#006064;list-style:none;"><span style="font-size:1.1em;">{ancestor_name}</span> <span style="float:right;color:#004d40;font-size:0.9em;">Matches: {len(group_df)} | Total cM: {total_cm_g:.0f}</span></summary><div style="padding:15px;">{analyzer_comment}<table class="dataframe" border="1"><thead><tr style="text-align:left;"><th>Tester</th><th>cM</th><th>Lineage</th></tr></thead><tbody>""")
        for _, r in group_df.iterrows(): network_buffer.append(f"<tr><td>{r['Tester-Participant-Unmasked']}</td><td>{r['cM']}</td><td>{r['Yates DNA Ancestral Line']}</td></tr>")
        network_buffer.append("</tbody></table></div></details>")
    pages_to_upload["dna_network.shtml"] = f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Participating DNA Network</title><link rel="stylesheet" href="partials_unified.css"><link rel="stylesheet" href="dna_tree_styles.css"><style>summary::-webkit-details-marker{{display:none}}summary{{outline:none}}</style></head><body id="top"><div class="wrap"><h1 class="centerline">Participating DNA Network</h1><div id="nav-slot">{stats_bar_html}{NAV_HTML}</div>{SITE_INFO}<div style="margin:20px auto;width:90%;">{"".join(network_buffer)}</div><script>{js_tools}</script></body></html>"""
    print("    - [GENERATED] DNA Network")

    # --- 3. UPLOAD (With Ghost Page Fix) ---
    print("\n[STEP 3] Uploading to Server...")
    if len(pages_to_upload) > 0:
        try:
            ftps = connect_session()
            target_dirs = [f"/{REMOTE_SUBDIR}", f"/public_html/{REMOTE_SUBDIR}", f"/public_html/yates.one-name.net/{REMOTE_SUBDIR}", f"htdocs/{REMOTE_SUBDIR}", REMOTE_SUBDIR]
            found_dir = False
            for d in target_dirs:
                try:
                    ftps.cwd(d)
                    found_dir = True
                    print(f"[SUCCESS] Locked onto correct web directory: {d}")
                    break
                except: pass

            if not found_dir:
                print(f"\n[CRITICAL WARNING] Could not locate the '{REMOTE_SUBDIR}' folder!")
                print(f"STUCK IN: {ftps.pwd()}")
                print("Aborting upload to avoid ghost files.")
            else:
                for fn, content in pages_to_upload.items():
                    with open(fn, "w", encoding="utf-8") as f: f.write(content)
                    with open(fn, "rb") as fh: ftps.storbinary(f"STOR {fn}", fh)
                    print(f"    - Uploaded: {fn}")
                print(f"\n[SUCCESS] All Files Published Live to {ftps.pwd()}")
            ftps.quit()
        except Exception as e: print(f"\n[ERROR] Upload Failed: {e}")
    else: print("[WARN] No content generated to upload.")

print("‚úÖ Cell 4 (Publisher) Loaded.")

‚úÖ Cell 4 (Publisher) Loaded.


In [27]:
# @title [CELL 5] MASTER ORCHESTRATOR (Run This Button)
import os, sys
print("="*60)
print("      MASTER ORCHESTRATOR (V81)")
print("      (Running Engine -> Publisher -> Upload)")
print("="*60)

if 'run_engine' not in globals() or 'run_publisher' not in globals():
    print("‚ùå ERROR: Modules not loaded! Please run Cells 3 and 4 first.")
else:
    print("\n>>> üöÄ PHASE 1: EXECUTING DATA ENGINE...")
    try:
        run_engine()
        print("‚úÖ PHASE 1 COMPLETE.")

        print("\n>>> üåê PHASE 2: EXECUTING PUBLISHER & UPLOAD...")
        run_publisher()
        print("‚úÖ PHASE 2 COMPLETE.")

        print("\n" + "="*60)
        print("      üèÜ V81 UPDATE SUCCESSFUL")
        print("="*60)
    except Exception as e:
        print(f"\n‚ùå CRITICAL FAILURE: {e}")

      MASTER ORCHESTRATOR (V81)
      (Running Engine -> Publisher -> Upload)

>>> üöÄ PHASE 1: EXECUTING DATA ENGINE...
      [CELL 3] ENGINE STARTING...

[STEP 0] Downloading Authority Key from Server...
    ‚úÖ Found and downloaded: /ons-study/match_to_unmasked.csv
[INFO] Using GEDCOM: yates_study_2025.ged

[STEP 2] Unmasking Participants...
    - Loaded 94 privacy keys.
    - Generated yates_study_2025_UNMASKED.ged

[STEP 3] Tracing Lineages...
    - Parsed 63676 individuals.

[SUCCESS] Engine Complete. Saved 1711 verified matches to engine_database.csv.
‚úÖ PHASE 1 COMPLETE.

>>> üåê PHASE 2: EXECUTING PUBLISHER & UPLOAD...
      [CELL 4] PUBLISHER STARTING...
    - [STAGED] engine_database.csv for download
    - [GENERATED] Brick Wall Buster (v93 - Custom Cohorts)
    - [GENERATED] Lineage Proof Engine
    - [GENERATED] Forensic Dossier
    - [GENERATED] Contents Guide
    - [GENERATED] Research Admin Hub (with Download & Singleton Links)
    - [GENERATED] Singleton Register (H

In [None]:
# @title [CELL 6] The Time Machine (Archiver + Dropbox Sync)
import zipfile
import os
import pytz
import time
from datetime import datetime
from google.colab import files
from google.colab import userdata

# --- 1. INSTALL DROPBOX (IF MISSING) ---
try:
    import dropbox
    from dropbox.exceptions import AuthError
except ImportError:
    os.system('pip install dropbox')
    import dropbox
    from dropbox.exceptions import AuthError

def run_archiver():
    print("="*60)
    print("      [CELL 6] MANUAL ARCHIVER + DROPBOX SYNC")
    print("="*60)

    # --- 2. CREATE ZIP (SAFE MODE) ---
    # We explicitly exclude .zip to prevent "Zip Bombs"
    extensions = ('.csv', '.shtml', '.html', '.json', '.js', '.css')
    files_to_pack = [f for f in os.listdir('.') if f.lower().endswith(extensions) and "sample_data" not in f]

    if not files_to_pack:
        print("‚ùå No generated files found! Run the Publisher (Cell 4) first.")
        return

    est = pytz.timezone('US/Eastern')
    timestamp = datetime.now(est).strftime("%Y-%m-%d_%H%M")
    zip_name = f"Yates_Study_Backup_{timestamp}.zip"

    print(f"üì¶ Compressing {len(files_to_pack)} files into {zip_name}...")
    try:
        with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zf:
            for file in files_to_pack:
                zf.write(file)
        print(f"    ‚úÖ Archive Created: {zip_name} ({os.path.getsize(zip_name)/1024:.1f} KB)")
    except Exception as e:
        print(f"    ‚ùå Compression Failed: {e}")
        return

    # --- 3. FTP UPLOAD (BACKUPS FOLDER) ---
    print("\n[STEP 2] Uploading to Web Server (FTP)...")
    try:
        from ftplib import FTP_TLS
        HOST = os.environ.get("FTP_HOST") or userdata.get("FTP_HOST")
        USER = os.environ.get("FTP_USER") or userdata.get("FTP_USER")
        PASS = os.environ.get("FTP_PASS") or userdata.get("FTP_PASS")

        ftps = FTP_TLS()
        ftps.connect(HOST, 21); ftps.auth(); ftps.login(USER, PASS); ftps.prot_p()

        try:
            ftps.cwd("/ons-study/backups")
        except:
            try:
                ftps.mkd("/ons-study/backups")
                ftps.cwd("/ons-study/backups")
            except:
                pass

        with open(zip_name, "rb") as fh:
            ftps.storbinary(f"STOR {zip_name}", fh)
        print(f"    ‚úÖ FTP Success: /ons-study/backups/{zip_name}")
        ftps.quit()
    except Exception as e:
        print(f"    ‚ö†Ô∏è FTP Upload skipped: {e}")

    # --- 4. DROPBOX SYNC (NEW) ---
    print("\n[STEP 3] Syncing to Dropbox...")
    try:
        # Initialize with Refresh Token (Long-term access)
        dbx = dropbox.Dropbox(
            app_key=userdata.get('DBX_APP_KEY'),
            app_secret=userdata.get('DBX_APP_SECRET'),
            oauth2_refresh_token=userdata.get('DBX_REFRESH_TOKEN')
        )

        # Upload the Zip
        target_path = f"/Backups/{zip_name}"
        with open(zip_name, "rb") as f:
            dbx.files_upload(f.read(), target_path, mode=dropbox.files.WriteMode.overwrite)

        print(f"    ‚úÖ Dropbox Success: {target_path}")

    except Exception as e:
        print(f"    ‚ùå Dropbox Upload Failed: {e}")
        print("       (Check DBX_APP_KEY, DBX_APP_SECRET, DBX_REFRESH_TOKEN in Colab Secrets)")

    # --- 5. LOCAL DOWNLOAD (SAFETY NET) ---
    print("\n[STEP 4] Triggering Local Download...")
    try:
        files.download(zip_name)
    except Exception as e:
        print(f"    ‚ö†Ô∏è Auto-download blocked: {e}")

    print("‚úÖ Archival Process Complete.")

# Run it
run_archiver()