<a href="https://colab.research.google.com/github/ronyates47/Gedcom-Utils/blob/main/ons_study_v6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [98]:
# @title [CELL 1] Setup + Helper Functions (V86 - Search & Filter Power)
import os
import sys
import re
import csv
import json
import html
import socket
import pytz
import pandas as pd
from ftplib import FTP_TLS
from datetime import datetime

# --- INSTALL TQDM IF MISSING ---
try:
    import tqdm
except ImportError:
    os.system('pip install tqdm')
    import tqdm

print("="*60)
print("      [CELL 1] SETUP LOADED (V86)")
print("      (Includes: Search Bars, Singleton Logic, Data Download)")
print("="*60)

# ==============================================================================
# 1. GLOBAL HELPER FUNCTIONS + HTML ASSETS
# ==============================================================================
TNG_BASE_URL = "https://yates.one-name.net/tng/verticalchart.php?personID="
TNG_SUFFIX = "&tree=tree1&parentset=0&display=vertical&generations=15"

NAV_HTML = """
<style>
nav.oldnav ul{display:flex;flex-wrap:wrap;justify-content:center;background-color:#006064!important;border-bottom:2px solid #00acc1!important;margin:0;padding:0;list-style:none}
nav.oldnav li{display:inline-block}
nav.oldnav a{display:block;padding:10px 15px;text-decoration:none;color:#e0f7fa!important;font-size:14px}
nav.oldnav a:hover{background-color:#00838f!important}
@media print { nav.oldnav, #nav-slot, .no-print, .action-btn, .control-panel, .tabs { display: none !important; } }
</style>
<nav class="oldnav"><ul>
<li><a href="/ons-study/research_admin.html" style="color:#ffcc80 !important; font-weight:bold;">Admin Hub</a></li>
<li><a href="/ons-study/contents.shtml" style="color:#ffcc80 !important; font-weight:bold;">Guide</a></li>
<li><a href="/ons-study/yates_ancestor_register.shtml">DNA Register</a></li>
<li><a href="/ons-study/just-trees.shtml">Trees</a></li>
<li><a href="/ons-study/dna_network.shtml">DNA Network</a></li>
<li><a href="/ons-study/lineage_proof.html">Lineage Proof</a></li>
<li><a href="/ons-study/dna_dossier.html">Forensic Dossier</a></li>
<li><a href="/ons-study/brick_wall_buster.shtml" style="background:#f57f17;color:black !important;">Brick Wall Buster</a></li>
<li><a href="/ons-study/data_glossary.shtml">Data Glossary</a></li>
<li><a href="/ons-study/share_dna.shtml" style="background-color:#0277bd; font-weight:bold;">Share DNA</a></li>
<li><a href="/ons-study/subscribe.shtml" style="background-color:#004d40;">Subscribe</a></li>
</ul></nav>"""

SITE_INFO = """<div class="no-print" style="background:#e0f2f1;border:1px solid #b2dfdb;padding:20px;margin:20px auto;width:90%;border-radius:8px;font-family:sans-serif;"><h3 style="color:#006064;margin-top:0;border-bottom:2px solid #004d40;padding-bottom:10px;">Establishing Kinship Through Collateral DNA Saturation</h3><p style="color:#333;line-height:1.6;font-size:1.05em;margin-bottom:0;"><strong>Methodology:</strong> This register moves beyond the reliance on single "golden matches" to prove kinship. Instead, it employs <em>Collateral DNA Saturation</em>‚Äîa method that blends genealogical reasoning with data-driven logic.</p></div>"""

# V86: Added Table Filtering Logic
JS_CORE = r"""<script type="text/javascript">
(function(){
    // SORTING LOGIC
    function textOf(c){var val = c.getAttribute('data-sort') || c.textContent || c.innerText;return (val || '').replace(/\s+/g,' ').trim().toLowerCase();}
    function sortTable(t,i,d){if(!(t&&t.tBodies&&t.tBodies[0]))return;var tb=t.tBodies[0],r=Array.prototype.slice.call(tb.rows||[]),asc=(d==='asc');r.sort(function(a,b){var A=textOf(a.cells[i]),B=textOf(b.cells[i]),nA=parseFloat(A.replace(/[^0-9.\-]/g,'')),nB=parseFloat(B.replace(/[^0-9.\-]/g,''));if(!isNaN(nA)&&!isNaN(nB))return asc?(nA-nB):(nB-nA);return(A<B)?(asc?-1:1):(A>B)?(asc?1:-1):0;});var f=document.createDocumentFragment();for(var k=0;k<r.length;k++)f.appendChild(r[k]);tb.appendChild(f);}
    function makeSortable(t){if(!(t&&t.tHead&&t.tHead.rows.length))return;var th=t.tHead.rows[0].cells;for(var i=0;i<th.length;i++){(function(idx){var h=th[idx],d='asc';h.style.cursor='pointer';h.onclick=function(){d=(d==='asc')?'desc':'asc';for(var j=0;j<th.length;j++)th[j].innerHTML=th[j].innerHTML.replace(/\s+\(asc\)|\s+\(desc\)/,'');h.innerHTML+=(d==='asc'?' (asc)':' (desc)');sortTable(t,idx,d);};})(i);}}

    // FILTERING LOGIC
    window.filterTable = function() {
        var input = document.getElementById("tableSearch");
        var filter = input.value.toUpperCase();
        var table = document.getElementById("reg-table") || document.querySelector("table.dataframe");
        var tr = table.getElementsByTagName("tr");
        for (var i = 1; i < tr.length; i++) {
            var tdArr = tr[i].getElementsByTagName("td");
            var found = false;
            for (var j = 0; j < tdArr.length; j++) {
                if (tdArr[j]) {
                    var txtValue = tdArr[j].textContent || tdArr[j].innerText;
                    if (txtValue.toUpperCase().indexOf(filter) > -1) {
                        found = true;
                        break;
                    }
                }
            }
            tr[i].style.display = found ? "" : "none";
        }
    }

    function init(){
        var t=document.getElementsByTagName('table');
        for(var i=0;i<t.length;i++) if(/\bsortable\b/.test(t[i].className)) makeSortable(t[i]);
    }
    if(document.readyState==='loading')document.addEventListener('DOMContentLoaded',init,false);else init();
})();
</script>"""

GLOSSARY_CONTENT = """<div style="background:white;padding:25px;border-radius:8px;border:1px solid #ddd;font-family:sans-serif;line-height:1.6;"><h2 style="color:#006064;border-bottom:2px solid #004d40;padding-bottom:10px;">ONS Yates Study: Data Glossary</h2><h3 style="color:#00838f;margin-top:25px;">1. Identity Columns</h3><ul style="list-style-type:none;padding-left:0;"><li style="margin-bottom:15px;"><strong>Tester-Participant-MASKED (The Trigger):</strong><br>The unique privacy code extracted from the user's NPFX tag.</li><li style="margin-bottom:15px;"><strong>Tester-Participant-Unmasked:</strong><br>The real name of the tester.</li></ul><h3 style="color:#00838f;margin-top:25px;">2. Analysis Terms</h3><ul style="list-style-type:none;padding-left:0;"><li style="margin-bottom:15px;"><strong>Platinum Standard:</strong><br>Lineages with 30+ matches and 10+ unique sources. Biologically proven.</li><li style="margin-bottom:15px;"><strong>Keystone Tester:</strong><br>A high-value participant (15+ matches) who anchors a specific branch.</li></ul></div>"""

SUBSCRIBE_CONTENT = """<div style="background:white;padding:40px;border-radius:8px;box-shadow:0 4px 15px rgba(0,0,0,0.1);max-width:800px;margin:30px auto;text-align:center;font-family:'Segoe UI',sans-serif;"><h1 style="color:#006064;margin-bottom:10px;">Join the Yates Research Community</h1><p style="color:#555;font-size:1.1em;line-height:1.6;margin-bottom:30px;">Stay connected with the latest breakthroughs in the Yates One-Name Study. Get notified about new DNA groups, lineage verifications, and quarterly reports.</p><div style="background:#e0f2f1;padding:25px;border-radius:8px;border:1px solid #b2dfdb;display:inline-block;"><h3 style="margin-top:0;color:#004d40;">üìß One-Click Subscribe</h3><p style="margin-bottom:20px;">Click below to send a subscription request to our Groups.io list.</p><a href="mailto:yates-one-name-study+subscribe@groups.io?subject=Subscribe" style="display:inline-block;padding:15px 30px;background:#00838f;color:white;text-decoration:none;border-radius:5px;font-weight:bold;font-size:1.1em;box-shadow:0 2px 5px rgba(0,0,0,0.2);">Subscribe Now</a></div><p style="margin-top:30px;font-size:0.9em;color:#777;">Powered by Groups.io. You will receive a confirmation email shortly.</p></div>"""

SHARE_CONTENT = """<div style="max-width:900px;margin:30px auto;font-family:'Segoe UI',sans-serif;line-height:1.6;color:#333;"><div style="text-align:center;margin-bottom:40px;"><h1 style="color:#0277bd;margin-bottom:10px;">Share Your Ancestry DNA Matches</h1><p style="font-size:1.1em;color:#555;">Ancestry provides a built-in sharing feature that allows you to grant limited access to your DNA matches <strong>without sharing your personal account details</strong>. You remain in full control of your account at all times.</p></div><div style="display:grid;grid-template-columns:1fr 1fr;gap:30px;margin-bottom:30px;"><div style="background:white;padding:25px;border-radius:8px;box-shadow:0 4px 10px rgba(0,0,0,0.1);border-top:5px solid #0277bd;"><h3 style="color:#0277bd;margin-top:0;">How Sharing Works</h3><p>From your AncestryDNA <strong>Settings</strong> page, you may invite another individual by email and assign one of the following roles:</p><ul style="padding-left:20px;"><li><strong>Viewer</strong> (Read only)</li><li><strong>Collaborator</strong> (Recommended for Study)</li><li><strong>Manager</strong> (Full Control)</li></ul></div><div style="background:#e3f2fd;padding:25px;border-radius:8px;border:1px solid #90caf9;"><h3 style="color:#01579b;margin-top:0;">Privacy & Control</h3><p>This sharing arrangement provides <strong>limited access only</strong>. It does not allow changes to your account and does not expose your personal details.</p><p><strong>You may revoke access at any time through Ancestry.</strong></p></div></div><div style="background:white;padding:30px;border-radius:8px;border:1px solid #ddd;box-shadow:0 4px 15px rgba(0,0,0,0.05);"><h2 style="color:#004d40;border-bottom:2px solid #004d40;padding-bottom:10px;margin-top:0;">How to Share for the Yates One-Name Study</h2><ol style="font-size:1.1em;line-height:1.8;padding-left:25px;"><li>Open your <strong>AncestryDNA Settings</strong>.</li><li>Scroll to the section labeled "DNA Result Sharing" and click <strong>Invite</strong>.</li><li>Send the invitation to <strong>Ron Yates</strong> at: <br><span style="background:#fff3e0;padding:5px 10px;border-radius:4px;font-weight:bold;color:#e65100;font-family:monospace;font-size:1.2em;">yatesvilleron@gmail.com</span></li><li>Assign the role: <strong>Collaborator</strong>.</li></ol><div style="background:#fffde7;border-left:5px solid #fbc02d;padding:15px;margin-top:20px;font-size:0.95em;"><strong>Why Collaborator?</strong> The Collaborator role allows Ron to review shared matches and create small internal groups (colored dots) to identify which matches have been reviewed and which have contributed evidence to the Yates One-Name Study.</div></div><div style="margin-top:40px;"><h3 style="color:#006064;">What Happens Next?</h3><p>After sharing, you will receive an invitation to subscribe to the <strong>Yates One-Name Study Groups.io mailing list</strong>, where DNA proof summaries and study findings are shared.</p><h3 style="color:#006064;">Reciprocal Sharing (Optional)</h3><p>If you are interested in viewing Ron‚Äôs DNA matches, simply let him know. When a direct match exists, that relationship will be reflected in the study findings.</p></div></div>"""

THEORY_CONTENT = """<div style="max-width:1000px;margin:30px auto;font-family:'Segoe UI',sans-serif;line-height:1.6;color:#333;"><h1 style="text-align:center;color:#004d40;font-size:2.5em;margin-bottom:10px;">The Yates DNA Strategy</h1><p style="text-align:center;font-size:1.2em;color:#666;margin-bottom:40px;">Moving beyond traditional Y-DNA to solve modern genealogical mysteries.</p><div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(300px,1fr));gap:30px;margin-bottom:40px;"><div style="background:white;padding:25px;border-radius:8px;box-shadow:0 4px 10px rgba(0,0,0,0.1);border-top:5px solid #00838f;"><h2 style="color:#006064;margin-top:0;">The Autosomal Revolution</h2><p>Traditional one-name studies rely almost exclusively on Y-DNA to trace the paternal surname line. While valuable for deep history, this approach ignores 50% of our ancestors at every generation.</p><p><strong>Our Focus:</strong> We utilize <strong>Autosomal DNA (atDNA)</strong>‚Äîinherited from both parents‚Äîto verify connections across <em>all</em> branches. This allows us to:</p><ul style="padding-left:20px;color:#444;"><li>Bridge the "Gender Gap" by tracing female descendants.</li><li>Verify paper trails for the last 300 years (Genealogical Time).</li><li>Cluster "Floating" Yates families into their correct lines.</li></ul></div><div style="background:white;padding:25px;border-radius:8px;box-shadow:0 4px 10px rgba(0,0,0,0.1);border-top:5px solid #f9a825;"><h2 style="color:#f57f17;margin-top:0;">Collateral Saturation</h2><p>A single DNA match can be luck. Ten matches is a statistic. <strong>Thirty matches is a fact.</strong></p><p>We employ a technique called <strong>Collateral Saturation</strong>. We don't look for one "Golden Match." We analyze groups of matches from independent cousin lines. When descendants from four different children of <em>William & Mary Yates</em> all share DNA with you, the relationship is biologically confirmed.</p><div style="text-align:center;margin-top:20px;"><a href="dna_network.shtml" style="display:inline-block;padding:10px 20px;background:#f9a825;color:#333;text-decoration:none;border-radius:4px;font-weight:bold;">View the Network</a></div></div></div><div style="background:#e0f2f1;padding:30px;border-radius:8px;border:1px solid #b2dfdb;margin-bottom:40px;"><h2 style="color:#004d40;margin-top:0;text-align:center;">From Theory to Tools</h2><p style="text-align:center;max-width:700px;margin:0 auto 20px auto;">We have built a suite of forensic tools to visualize this data. Instead of raw spreadsheets, we offer interactive dashboards to prove your connection.</p><div style="display:flex;flex-wrap:wrap;justify-content:center;gap:15px;margin-top:20px;"><a href="ons_yates_dna_register.shtml" style="background:#006064;color:white;padding:12px 20px;text-decoration:none;border-radius:4px;font-weight:bold;">The Register</a><a href="lineage_proof.html" style="background:#00838f;color:white;padding:12px 20px;text-decoration:none;border-radius:4px;font-weight:bold;">Proof Engine</a><a href="dna_dossier.html" style="background:#00acc1;color:white;padding:12px 20px;text-decoration:none;border-radius:4px;font-weight:bold;">Forensic Dossier</a></div></div><div style="background:#f5f5f5;padding:20px;border-radius:8px;border:1px solid #ddd;"><h3 style="color:#555;margin-top:0;">Legacy Data: Y-DNA Haplogroups</h3><p style="font-size:0.9em;color:#666;margin-bottom:15px;">Y-DNA is the backbone of deep ancestry (27,000 BCE to 1600 AD). While not our primary focus for recent genealogy, we maintain a detailed record of the Yates Y-Chromosome mutations (R-M207 through FT266579).</p><a href="https://yates.one-name.net/gengen/dna_proof_y.htm" style="color:#006064;font-weight:bold;text-decoration:none;">&raquo; View Detailed Y-DNA Findings</a></div></div>"""

def make_page(title, content, count, view_type="", extra="", stats_bar=""):
    nav_blk = ""
    if view_type in ['ancestor', 'participant', 'tree_az', 'tree_za', 'proof', 'network', 'dossier', 'subscribe', 'share', 'buster', 'singleton']:
        nav_blk = SITE_INFO
    if view_type == 'subscribe' or view_type == 'theory' or view_type == 'share':
        nav_blk = ""

    toggle = ""
    print_btn = ""
    search_bar = ""

    # V86: Add Search Bar for Registers and Singletons
    if view_type in ['ancestor', 'participant', 'singleton']:
        search_bar = """<div class="no-print" style="margin:20px auto;max-width:600px;text-align:center;"><input type="text" id="tableSearch" onkeyup="filterTable()" placeholder="üîç Type a name to filter list..." style="width:100%;padding:12px;font-size:16px;border:2px solid #006064;border-radius:4px;"></div>"""

    if view_type in ['ancestor', 'participant', 'singleton']:
        view_name = "Register"
        if view_type == 'singleton': view_name = "Singleton List"
        print_btn = f"""<div class="no-print" style="text-align:center;margin-bottom:15px;"><button onclick="window.print()" style="background:#0277bd;color:white;border:none;padding:10px 20px;border-radius:4px;font-weight:bold;cursor:pointer;font-size:14px;">üñ®Ô∏è Print {view_name}</button></div>"""

    if view_type == 'ancestor':
        toggle = f"""<div class="no-print" style="text-align:center;padding:10px;margin-bottom:10px;font-family:sans-serif;font-size:14px;background:#e0f7fa;border:1px solid #b2ebf2;"><strong>Sort Register:</strong> &nbsp;<a href="ons_yates_dna_register.shtml" style="font-weight:bold;color:#006064;">By Ancestral Line</a> &nbsp;|&nbsp; <a href="ons_yates_dna_register_participants.shtml" style="color:#00acc1;text-decoration:none;">By Participant Name</a></div>"""
    elif view_type == 'participant':
        toggle = f"""<div class="no-print" style="text-align:center;padding:10px;margin-bottom:10px;font-family:sans-serif;font-size:14px;background:#e0f7fa;border:1px solid #b2ebf2;"><strong>Sort Register:</strong> &nbsp;<a href="ons_yates_dna_register.shtml" style="color:#00acc1;text-decoration:none;">By Ancestral Line</a> &nbsp;|&nbsp; <a href="ons_yates_dna_register_participants.shtml" style="font-weight:bold;color:#006064;">By Participant Name</a></div>"""
    elif 'tree' in view_type:
        za = f'<span style="font-weight:bold;color:#000;">Z-A</span>' if 'za' in view_type else f'<a href="just-trees.shtml" style="color:#006064;text-decoration:underline;">Z-A</a>'
        az = f'<span style="font-weight:bold;color:#000;">A-Z</span>' if 'az' in view_type else f'<a href="just-trees-az.shtml" style="color:#006064;text-decoration:underline;">A-Z</a>'
        toggle = f"""<div class="no-print" style="text-align:center;font-family:sans-serif;font-size:16px;margin:15px 0 10px 0;">Individual Yates Family trees: &nbsp; {za} &nbsp;|&nbsp; {az}</div>"""

    return f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>{title}</title><link rel="stylesheet" href="partials_unified.css"><link rel="stylesheet" href="dna_tree_styles.css">{extra}</head><body id="top"><div class="wrap"><h1 class="centerline">{title}</h1><div id="nav-slot">{stats_bar}{NAV_HTML}</div>{nav_blk}{search_bar}{print_btn}{toggle}{content}{JS_CORE}</div></body></html>"""

print("‚úÖ Cell 1 Loaded! (Search/Filter Logic Added)")

      [CELL 1] SETUP LOADED (V86)
      (Includes: Search Bars, Singleton Logic, Data Download)
‚úÖ Cell 1 Loaded! (Search/Filter Logic Added)


In [99]:
# @title [CELL 2] The Asset Manager (V114 - Local Priority)
def fetch_assets():
    print("="*60)
    print("      [CELL 2] ASSET MANAGER STARTING...")
    print("      Logic: Local Upload > Server Download")
    print("="*60)

    import os
    from ftplib import FTP_TLS
    from google.colab import userdata

    # --- CONFIGURATION ---
    SEARCH_PATHS = [
        "/tng/gedcom",           # Priority 1: User specified
        "/public_html/tng/gedcom",
        "/ons-study",
        "/public_html/ons-study",
        "/"
    ]
    KEY_FILENAME = "match_to_unmasked.csv"

    # -------------------------------------------------------
    # STEP 1: CHECK FOR MANUAL UPLOAD (PRIORITY)
    # -------------------------------------------------------
    print("[STEP 1] Checking local storage...")
    local_files = os.listdir('.')
    local_geds = [f for f in local_files if f.lower().endswith(".ged") and "unmasked" not in f.lower()]

    ged_ready = False

    if local_geds:
        # Sort by newest, just in case multiple exist
        local_geds.sort(key=lambda x: os.path.getmtime(x), reverse=True)
        print(f"    ‚úÖ FOUND LOCAL GEDCOM: {local_geds[0]}")
        print("    --> Skipping server download. Using manual upload.")
        ged_ready = True
    else:
        print("    - No local GEDCOM found. Proceeding to server...")

    # Check for Key File
    key_ready = False
    if os.path.exists(KEY_FILENAME):
        print(f"    ‚úÖ FOUND LOCAL KEY: {KEY_FILENAME}")
        key_ready = True

    # If we have both, we can exit early!
    if ged_ready and key_ready:
        print("\nüü¢ SYSTEM READY: All assets found locally.")
        return

    # -------------------------------------------------------
    # STEP 2: DOWNLOAD FROM SERVER (FALLBACK)
    # -------------------------------------------------------
    print("\n[STEP 2] Connecting to Server (Fallback)...")
    try:
        HOST = userdata.get("FTP_HOST")
        USER = userdata.get("FTP_USER")
        PASS = userdata.get("FTP_PASS")

        ftps = FTP_TLS()
        ftps.connect(HOST, 21)
        ftps.auth()
        ftps.login(USER, PASS)
        ftps.prot_p()
        print("    ‚úÖ Secured Connection Established.")

        # A. Download GEDCOM (If we didn't find one locally)
        if not ged_ready:
            print("    üîé Hunting for GEDCOM on server...")
            for folder in SEARCH_PATHS:
                try:
                    ftps.cwd(folder)
                    files = ftps.nlst()
                    server_geds = [f for f in files if f.lower().endswith(".ged") and "unmasked" not in f.lower()]

                    if server_geds:
                        target = server_geds[0]
                        print(f"       Found: {target} in {folder}")
                        with open(target, "wb") as f:
                            ftps.retrbinary(f"RETR {target}", f.write)
                        print(f"       ‚¨áÔ∏è DOWNLOAD COMPLETE: {target}")
                        ged_ready = True
                        break
                except: continue

        # B. Download Key File (If we didn't find one locally)
        if not key_ready:
            print(f"    üîé Hunting for {KEY_FILENAME}...")
            # Reset CWD search or assume root/study folders
            for folder in SEARCH_PATHS:
                try:
                    ftps.cwd(folder)
                    if KEY_FILENAME in ftps.nlst():
                        with open(KEY_FILENAME, "wb") as f:
                            ftps.retrbinary(f"RETR {KEY_FILENAME}", f.write)
                        print(f"       ‚¨áÔ∏è DOWNLOAD COMPLETE: {KEY_FILENAME}")
                        key_ready = True
                        break
                except: continue

        ftps.quit()

    except Exception as e:
        print(f"    ‚ùå Connection/Download Error: {e}")

    # -------------------------------------------------------
    # FINAL STATUS
    # -------------------------------------------------------
    print("-" * 60)
    if ged_ready:
        print("üü¢ READY: GEDCOM is available for processing.")
    else:
        print("üî¥ CRITICAL: No GEDCOM found (Locally or on Server).")
        print("   Action: Please upload a .ged file to the file browser on the left.")

fetch_assets()

      [CELL 2] ASSET MANAGER STARTING...
      Logic: Local Upload > Server Download
[STEP 1] Checking local storage...
    ‚úÖ FOUND LOCAL GEDCOM: yates_study_2025.ged
    --> Skipping server download. Using manual upload.
    ‚úÖ FOUND LOCAL KEY: match_to_unmasked.csv

üü¢ SYSTEM READY: All assets found locally.


In [100]:
# @title [CELL 3] The Data Engine (V116 - neeYates Protocol)
def run_engine():
    print("="*60)
    print("      [CELL 3] ENGINE STARTING (V116)")
    print("      (Feature: Bio-Identity + neeYates Logic)")
    print("="*60)

    import os
    import sys
    import re
    import csv
    from ftplib import FTP_TLS

    # --- CONFIGURATION ---
    HOST = os.environ.get("FTP_HOST", "").strip()
    USER = os.environ.get("FTP_USER", "").strip()
    PASS = os.environ.get("FTP_PASS", "").strip()
    REMOTE_SUBDIR = "ons-study"

    KEY_FILE       = "match_to_unmasked.csv"
    UNMASKED_FILE  = "yates_study_2025_UNMASKED.ged"
    CSV_DB         = "engine_database.csv"

    # ---------------------------------------------------------
    # STEP 0: CHECK & DOWNLOAD ASSETS
    # ---------------------------------------------------------
    print("\n[STEP 0] Checking for Required Files...")
    local_geds = [f for f in os.listdir('.') if f.lower().endswith('.ged') and 'unmasked' not in f.lower()]
    local_key_exists = os.path.exists(KEY_FILE)

    if not local_geds:
        print("    ‚ö†Ô∏è No GEDCOM found. Please Run Cell 2 first.")
        return False
    else:
        local_geds.sort(key=lambda x: os.path.getmtime(x), reverse=True)
        DEFAULT_GEDCOM = local_geds[0]
        print(f"    ‚úÖ Using GEDCOM: {DEFAULT_GEDCOM}")

    # ---------------------------------------------------------
    # STEP 1: LOAD KEYS
    # ---------------------------------------------------------
    print("\n[STEP 1] Loading Privacy Keys...")
    unmask_map = {}
    if local_key_exists:
        try:
            with open(KEY_FILE, mode='r', encoding='utf-8-sig', errors='replace') as f:
                reader = csv.reader(f)
                for row in reader:
                    if len(row) < 2: continue
                    code = row[0].strip().lower()
                    name = row[1].strip()
                    if code and name and code != "code": unmask_map[code] = name
            print(f"    - Loaded {len(unmask_map)} privacy keys.")
        except Exception as e: print(f"    - Error reading key file: {e}")
    else: print("    [WARNING] No Key File found. IDs will remain masked.")

    def resolve_mask_code_greedy(payload):
        m = re.search(r'(\d+)\s*&?\s*([^ \t\n\r\f\v]+)', payload)
        if m: return m.group(2).lower()
        return None
    def resolve_name(code): return unmask_map.get(code, code)

    # ---------------------------------------------------------
    # STEP 2: GENERATE UNMASKED GEDCOM STREAM
    # ---------------------------------------------------------
    with open(DEFAULT_GEDCOM, 'r', encoding='utf-8', errors='replace') as fin, \
         open(UNMASKED_FILE, 'w', encoding='utf-8') as fout:
        buffer_lines = []; real_name = None
        for line in fin:
            if line.startswith("0 @"):
                if buffer_lines:
                    for bl in buffer_lines:
                        if bl.startswith("1 NAME") and real_name: fout.write(f"1 NAME {real_name}\n")
                        else: fout.write(bl)
                buffer_lines = [line]; real_name = None
            else:
                buffer_lines.append(line)
                if line.startswith("1 NPFX"):
                    parts = line.split(" ", 2)
                    if len(parts) > 2:
                        code = resolve_mask_code_greedy(parts[2].strip())
                        if code: real_name = resolve_name(code)
        if buffer_lines:
            for bl in buffer_lines:
                if bl.startswith("1 NAME") and real_name: fout.write(f"1 NAME {real_name}\n")
                else: fout.write(bl)
    print(f"    - Generated {UNMASKED_FILE}")

    # ---------------------------------------------------------
    # STEP 3: TRACE LINEAGES
    # ---------------------------------------------------------
    print("\n[STEP 3] Tracing Lineages...")
    individuals = {}; families = {}

    def clean_name(raw): return raw.replace("/", "").strip()
    def is_yates(name_str):
        if not name_str: return False
        n = name_str.lower()
        return "yates" in n or "yeates" in n or "yate" in n
    def extract_year(date_str):
        if not date_str: return ""
        m = re.search(r'\d{4}', date_str)
        return m.group(0) if m else ""

    current_id = None; current_fam = None; current_tag = None
    with open(UNMASKED_FILE, "r", encoding="utf-8", errors="replace") as f:
        for line in f:
            line = line.strip(); parts = line.split(" ", 2)
            if len(parts) < 2: continue
            level, tag = parts[0], parts[1]
            payload = parts[2] if len(parts) > 2 else ""

            if level == "0" and "INDI" in payload:
                current_id = tag.replace("@", "")
                individuals[current_id] = {"name": "Unknown", "parents_fam": None, "spouse_fams": [], "mask_code": "", "cm": 0, "birt": "", "deat": ""}
                current_fam = None; current_tag = "INDI"
            elif current_id and level != "0":
                if tag == "NAME": individuals[current_id]["name"] = clean_name(payload)
                elif tag == "FAMC": individuals[current_id]["parents_fam"] = payload.replace("@", "")
                elif tag == "FAMS": individuals[current_id]["spouse_fams"].append(payload.replace("@", ""))
                elif tag == "NPFX":
                    code = resolve_mask_code_greedy(payload)
                    if code: individuals[current_id]["mask_code"] = code
                    m = re.search(r'^(\d+)|(\d+)\s*cM', payload, re.IGNORECASE)
                    if m: individuals[current_id]["cm"] = int(m.group(1) if m.group(1) else m.group(2))
                elif tag == "BIRT": current_tag = "BIRT"
                elif tag == "DEAT": current_tag = "DEAT"
                elif tag == "DATE" and current_tag in ["BIRT", "DEAT"]:
                    year = extract_year(payload)
                    if current_tag == "BIRT": individuals[current_id]["birt"] = year
                    if current_tag == "DEAT": individuals[current_id]["deat"] = year
                    current_tag = None

            if level == "0" and "FAM" in payload:
                current_fam = tag.replace("@", "")
                families[current_fam] = {"husb": None, "wife": None}
                current_id = None
            elif current_fam and level != "0":
                if tag == "HUSB": families[current_fam]["husb"] = payload.replace("@", "")
                elif tag == "WIFE": families[current_fam]["wife"] = payload.replace("@", "")

    print(f"    - Parsed {len(individuals)} individuals.")

    # --- V116: BIO-IDENTITY + NEEYATES LOGIC ---
    def get_bio_display_name(uid):
        if not uid or uid not in individuals: return "Unknown"
        p = individuals[uid]
        name = p["name"]

        # Only modify if it's a Yates/Yeates
        if is_yates(name):
            if p["parents_fam"] and p["parents_fam"] in families:
                mom_id = families[p["parents_fam"]]["wife"]
                if mom_id and mom_id in individuals:
                    mom_name = individuals[mom_id]["name"]
                    mom_parts = mom_name.split()
                    if mom_parts:
                        mom_surname = mom_parts[-1]

                        # --- V116 Logic: Handle Endogamy ---
                        if mom_surname.lower() not in ["unknown", "?", "nee"]:
                            if mom_surname.lower() == "yates":
                                name = f"{name}-neeYates"
                            else:
                                name = f"{name}-{mom_surname}"

        # Add Dates
        b = p["birt"]; d = p["deat"]
        if b or d:
            name += f" ({b if b else '?'} - {d if d else '?'})"
        return name

    def climb_yates_line_ids(start_id):
        curr = start_id; path_names = []; path_ids = []
        while curr:
            person = individuals.get(curr)
            if not person: break

            # V116: Use Bio-Identity Name
            full_display = get_bio_display_name(curr)
            path_names.append(full_display)
            path_ids.append(curr)

            fam_id = person["parents_fam"]
            if not fam_id or fam_id not in families: break
            fam = families[fam_id]
            dad_id, mom_id = fam["husb"], fam["wife"]
            dad_name = individuals.get(dad_id, {}).get("name", ""); mom_name = individuals.get(mom_id, {}).get("name", "")
            if is_yates(dad_name) and not is_yates(mom_name): curr = dad_id
            elif is_yates(mom_name) and not is_yates(dad_name): curr = mom_id
            else: curr = dad_id if dad_id else mom_id
        return curr, path_names, path_ids

    def analyze_lineage_deep(start_id):
        queue = [(start_id, [], [])]; visited = set()
        while queue:
            curr, path_from_start, ids_from_start = queue.pop(0)
            if curr in visited: continue
            visited.add(curr)
            person = individuals.get(curr)
            if not person: continue

            if is_yates(person["name"]):
                top_id, climb_names, climb_ids = climb_yates_line_ids(curr)
                # Reconstruct full line
                full_line_names = list(reversed(climb_names)) + list(reversed(path_from_start))
                full_line_ids = list(reversed(climb_ids)) + list(reversed(ids_from_start))

                # Get Top Ancestor Name (Pure for Sorting)
                top_p = individuals.get(top_id, {})
                top_name_pure = top_p.get("name", "Unknown")
                top_display = get_bio_display_name(top_id)

                # Find Spouse
                spouse_name = "missing"; spouse_display = ""
                for fid in top_p.get("spouse_fams", []):
                    if fid in families:
                        f = families[fid]; spouse_id = None
                        if f["husb"] == top_id: spouse_id = f["wife"]
                        elif f["wife"] == top_id: spouse_id = f["husb"]
                        if spouse_id and spouse_id in individuals:
                            spouse_name = individuals[spouse_id]["name"]
                            spouse_display = get_bio_display_name(spouse_id)
                            break

                if "unknown" in spouse_name.lower(): spouse_name = "missing"

                pair_dated = f"{top_display} & {spouse_display}" if spouse_name != "missing" else top_display
                pair_simple = f"{top_name_pure} & {spouse_name}" if spouse_name != "missing" else top_name_pure

                lineage = " -> ".join(full_line_names)
                id_path_str = ",".join(full_line_ids)
                clean_top = re.sub(r'[^a-zA-Z0-9]', '', top_name_pure)
                clean_spouse = re.sub(r'[^a-zA-Z0-9]', '', spouse_name) if spouse_name != "missing" else "ZZZ"
                sort_key = f"{clean_top}_{clean_spouse}"

                return pair_simple, pair_dated, sort_key, top_name_pure, "", spouse_name, "", lineage, len(full_line_names), id_path_str

            # Continue search down
            rich_name = get_bio_display_name(curr)
            new_path = path_from_start + [rich_name]; new_ids = ids_from_start + [curr]

            fam_id = person["parents_fam"]
            if fam_id and fam_id in families:
                fam = families[fam_id]
                if fam["husb"]: queue.append((fam["husb"], new_path, new_ids))
                if fam["wife"]: queue.append((fam["wife"], new_path, new_ids))

        return "Disconnected", "‚ö†Ô∏è Unlinked / Disconnected Lines", "ZZ_Disconnected", "", "", "", "", "Trace Failed", 0, ""

    rows = []
    for uid, data in individuals.items():
        if data["mask_code"]:
            pair_simple, pair_dated, sort_key, fa1, fa1_d, fa2, fa2_d, lineage, gens, id_path = analyze_lineage_deep(uid)
            final_name = resolve_name(data["mask_code"])
            rows.append({
                "Tester-Participant-MASKED": data["mask_code"], "Tester-Participant-Unmasked": final_name, "Found Match": data["name"], "ID#": uid, "cM": data["cm"], "Spacer": "",
                "Yates DNA Ancestral Line": lineage, "Authority_FirstAncestor": pair_simple, "Authority_FirstAncestor_alpha": sort_key, "Authority_FirstAncestor_dated": pair_dated,
                "fa_1 extracted": fa1, "fa_1_Dates": fa1_d, "fa_2 extracted": fa2, "fa_2 Dates": fa2_d, "Gen_Count": gens, "Ancestral_Path_IDs": id_path
            })

    def get_sortable_surname_py(full_name):
        if not full_name: return "zzz"
        cleaned = re.sub(r'\b(jr\.?|sr\.?|ii|iii|iv|esq\.?|m\.d\.?|ph\.d\.?)\b', '', str(full_name), flags=re.IGNORECASE)
        cleaned = re.sub(r'[,\.]', '', cleaned)
        parts = cleaned.strip().split()
        if not parts: return "zzz"
        return parts[-1].lower()

    rows.sort(key=lambda x: get_sortable_surname_py(x["Tester-Participant-Unmasked"]))

    fieldnames = ["Tester-Participant-MASKED", "Tester-Participant-Unmasked", "Found Match", "ID#", "cM", "Spacer", "Yates DNA Ancestral Line", "Authority_FirstAncestor", "Authority_FirstAncestor_alpha", "Authority_FirstAncestor_dated", "fa_1 extracted", "fa_1_Dates", "fa_2 extracted", "fa_2 Dates", "Gen_Count", "Ancestral_Path_IDs"]

    with open(CSV_DB, "w", encoding="iso-8859-15", newline="", errors="replace") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
        writer.writeheader(); writer.writerows(rows)

    print(f"\n[SUCCESS] Engine V116 Complete. Bio-Identities with 'neeYates' enabled.")

print("‚úÖ Cell 3 (Engine V116) Loaded.")

‚úÖ Cell 3 (Engine V116) Loaded.


In [101]:
# @title [CELL 4] The Publisher (V133 - Full Guide Integration)
def run_publisher():
    print("="*60)
    print("      [CELL 4] PUBLISHER STARTING (V133)")
    print("      (Status: contents.shtml Hub Fully Integrated)")
    print("="*60)

    import os
    import sys
    import json
    import pytz
    import pandas as pd
    from datetime import datetime
    import re
    import random
    from ftplib import FTP_TLS
    from google.colab import userdata

    # --- 1. SETUP & HELPERS ---
    upload_queue = {}
    REMOTE_SUBDIR = "ons-study"

    def connect_session():
        HOST = userdata.get("FTP_HOST")
        USER = userdata.get("FTP_USER")
        PASS = userdata.get("FTP_PASS")
        ftps = FTP_TLS()
        ftps.connect(HOST, 21)
        ftps.auth()
        ftps.login(USER, PASS)
        ftps.prot_p()
        return ftps

    NAV_HTML = """
    <div class="nav-bar" style="background:#006064; padding:12px; text-align:center; font-family:'Segoe UI', sans-serif; box-shadow:0 2px 5px rgba(0,0,0,0.2); line-height: 1.8;">
        <a href="contents.shtml" style="color:white; margin:0 8px; text-decoration:none; font-weight:bold; font-size:1.05em;">Guide/Contents</a> |
        <a href="ons_yates_dna_register.shtml" style="color:white; margin:0 8px; text-decoration:none; font-weight:bold; font-size:1.05em;">DNA Register</a> |
        <a href="dna_network.shtml" style="color:white; margin:0 8px; text-decoration:none; font-weight:bold; font-size:1.05em;">DNA Network</a> |
        <a href="just-trees.shtml" style="color:white; margin:0 8px; text-decoration:none; font-weight:bold; font-size:1.05em;">Trees</a>
        <br>
        <span style="color:#ffcc80; font-weight:bold;">TOOLS:</span>
        <a href="lineage_proof.html" style="color:#ffcc80; margin:0 8px; text-decoration:none; font-weight:bold;">Proof Engine</a> |
        <a href="brick_wall_buster.shtml" style="color:#ffcc80; margin:0 8px; text-decoration:none; font-weight:bold;">Wall Buster</a> |
        <a href="dna_dossier.html" style="color:#ffcc80; margin:0 8px; text-decoration:none; font-weight:bold;">Forensic Dossier</a> |
        <a href="research_admin.html" style="color:#ccc; margin:0 8px; text-decoration:none;">Admin Hub</a>
        <br>
        <a href="subscribe.shtml" style="color:#e0f2f1; margin:0 8px; text-decoration:none; font-size:0.9em;">Subscribe</a> |
        <a href="share_dna.shtml" style="color:#e0f2f1; margin:0 8px; text-decoration:none; font-size:0.9em;">Share Matches</a> |
        <a href="gedmatch_kits.shtml" style="color:#e0f2f1; margin:0 8px; text-decoration:none; font-size:0.9em;">Gedmatch Kits</a> |
        <a href="print_cousin_list.shtml" style="color:#e0f2f1; margin:0 8px; text-decoration:none; font-size:0.9em;">Print Cousin List</a> |
        <a href="/gengen/dna_theory_of_the_case.htm" style="color:#e0f2f1; margin:0 8px; text-decoration:none; font-size:0.9em;">Theory in Action</a> |
        <a href="data_glossary.shtml" style="color:#e0f2f1; margin:0 8px; text-decoration:none; font-size:0.9em;">Glossary</a>
    </div>
    """

    TNG_BASE_URL = "https://yates.one-name.net/tng/verticalchart.php?personID="
    TNG_SUFFIX = "&tree=tree1&parentset=0&display=vertical&generations=15"

    def normalize_id(val):
        s = str(val).replace('@', '').strip()
        if s.isdigit(): return f"I{s}"
        return s

    def build_narrative(row):
        part_name = str(row.get('Tester-Participant-Unmasked', '')).strip()
        cm = str(row.get('cM', '0'))
        anc_dated = str(row.get('Authority_FirstAncestor_dated', 'Unknown'))
        found_match = str(row.get('Found Match', 'Unknown')).strip()
        gen_count = row.get('Gen_Count', 0)
        rid = normalize_id(row.get('ID#', ''))
        linked_found_match = f'<a href="{TNG_BASE_URL}{rid}{TNG_SUFFIX}" target="_blank"><b>{found_match}</b></a>'
        return f"{part_name} is a {cm} cM DNA match to {linked_found_match} is related via {anc_dated} back {gen_count} generations."

    def build_linked_lineage(row):
        line = str(row.get('Yates DNA Ancestral Line', ''))
        found = str(row.get('Found Match', ''))
        rid = normalize_id(row.get('ID#', ''))
        if found and rid and found in line:
            url = f"{TNG_BASE_URL}{rid}{TNG_SUFFIX}"
            link_html = f'<a href="{url}" target="_blank" style="color:#006064;text-decoration:none;font-weight:bold;">{found}</a>'
            return line.replace(found, link_html)
        return line

    BUILD_ID = f"V133-{random.randint(1000,9999)}"

    # --- 2. DEFINE JS (Raw Strings) ---
    JS_RAW = r"""
    function getSortableName(fullname) {
        if (!fullname) return "zzz";
        let clean = fullname.split("(")[0].trim();
        let parts = clean.split(" ");
        let surname = parts[parts.length - 1];
        if (surname.includes("-")) surname = surname.split("-")[0];
        return surname.toLowerCase() + " " + clean.toLowerCase();
    }
    function formatName(fullname) { return fullname; }
    function searchTable() {
      var input, filter, table, tr, td, i, txtValue;
      input = document.getElementById("searchInput");
      filter = input.value.toUpperCase();
      table = document.getElementById("reg-table");
      if (!table) return;
      tr = table.getElementsByTagName("tr");
      for (i = 0; i < tr.length; i++) {
        var found = false;
        var tds = tr[i].getElementsByTagName("td");
        for(var j=0; j<tds.length; j++){
            if(tds[j].textContent.toUpperCase().indexOf(filter) > -1){ found = true; break; }
        }
        if (found || tr[i].getElementsByTagName("th").length > 0) { tr[i].style.display = ""; }
        else { tr[i].style.display = "none"; }
      }
    }
    """
    SHARED_JS = f"<script>{JS_RAW}</script>"

    # --- 3. PAGE MAKER ---
    def make_page(title, body_content, db_len, active_tab, stats_bar):
        search_html = ""
        # Search box for relevant pages
        if active_tab in ["ancestor", "participant", "singleton", "tree_az", "tree_za"]:
            search_html = f"""<div style="text-align:center; margin-bottom:15px;"><input type="text" id="searchInput" onkeyup="searchTable()" placeholder="üîç Search this table..." style="width:100%; max-width:500px; padding:12px; border:2px solid #006064; border-radius:4px; font-size:16px;"></div>"""

        # Center the content card
        return f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>{title}</title><link rel="stylesheet" href="partials_unified.css"><style>body{{font-family:'Segoe UI',sans-serif;background:#f4f7f6;padding:20px}}.wrap{{max-width:1200px; margin:0 auto;}} .card{{background:white;padding:30px;border-radius:8px;box-shadow:0 4px 10px rgba(0,0,0,0.05);margin:20px auto; width:100%; box-sizing:border-box;}} h1.centerline{{text-align:center; color:#006064;}} table.dataframe{{width:100%;}}</style></head><body><div class="wrap"><h1 class="centerline">{title}</h1><div id="nav-slot">{stats_bar}{NAV_HTML}</div><div class="card">{search_html}{body_content}</div></div>{SHARED_JS}</body></html>"""

    # --- 4. LOAD DATA ---
    CSV_DB = "engine_database.csv"
    if not os.path.exists(CSV_DB):
        print("‚ùå ERROR: engine_database.csv not found.")
        return {}

    df = pd.read_csv(CSV_DB, encoding="iso-8859-15")
    df['Long_Narrative'] = df.apply(build_narrative, axis=1)
    df['Linked_Tree_Line'] = df.apply(build_linked_lineage, axis=1)

    # --- 5. GENERATE DATA PACKETS ---
    est = pytz.timezone('US/Eastern')
    timestamp = datetime.now(est).strftime("%B %d, %Y %-I:%M %p EST")
    stats_bar_full = f"""<div style="background:#f4f4f4;border-top:1px solid #ddd;border-bottom:1px solid #ddd;font-family:sans-serif;font-size:12px;color:#555;padding:8px 15px;text-align:center;margin-bottom:0;"><strong>Last updated:</strong> {timestamp} &nbsp;|&nbsp; <strong>Total Matches:</strong> {len(df):,} &nbsp;|&nbsp; <strong>Ver:</strong> {BUILD_ID}</div>"""

    ancestor_data = {}
    for alpha, group_df in df.groupby('Authority_FirstAncestor_alpha'):
        match_count = int(len(group_df))
        if match_count < 2: continue
        total_cm = int(group_df['cM'].sum())
        unique_count = int(len(group_df['Tester-Participant-Unmasked'].unique()))
        badge = "Bronze" if match_count < 5 else ("Silver" if match_count < 15 else "Gold")
        ancestor_data[alpha] = {"name": group_df.iloc[0]['Authority_FirstAncestor_dated'], "matches": match_count, "cm": total_cm, "testers": unique_count, "badge": badge, "verdict": "Lineage Confirmed", "integrity": 75, "list_data": group_df['Tester-Participant-Unmasked'].value_counts().head(3).to_dict()}

    participant_data = {}
    for p_name, group_df in df.groupby('Tester-Participant-Unmasked'):
        participant_data[p_name] = {"name": p_name, "matches": int(len(group_df)), "cm": int(group_df['cM'].sum()), "testers": 1, "badge": "Tester", "verdict": "Participant", "integrity": 100, "list_data": group_df['Authority_FirstAncestor_dated'].value_counts().head(3).to_dict()}

    smart_packet_json = json.dumps({"ancestors": ancestor_data, "participants": participant_data})
    proof_db_json = df[['Authority_FirstAncestor_dated','Tester-Participant-Unmasked','cM','ID#','Yates DNA Ancestral Line', 'Ancestral_Path_IDs']].rename(columns={'Authority_FirstAncestor_dated':'ancestor','Tester-Participant-Unmasked':'participant','cM':'cm','ID#':'id','Yates DNA Ancestral Line':'lineage', 'Ancestral_Path_IDs': 'path_ids'}).to_json(orient='records')

    # --- 6. PAGE DEFINITIONS ---

    # 6a. CONTENTS HUB (The Guide Grid)
    guide_body = """
    <style>
    .guide-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(300px,1fr));gap:25px;max-width:1200px;margin:30px auto}
    .guide-card{background:white;padding:25px;border-radius:8px;border-left:5px solid #006064;box-shadow:0 4px 10px rgba(0,0,0,0.05);transition:transform 0.2s; text-align:left;}
    .guide-card:hover{transform:translateY(-5px)}
    .card-title{font-size:1.4em;font-weight:bold;color:#004d40;margin-top:0}
    .card-why{color:#b71c1c;font-weight:bold;margin:10px 0 5px 0;font-size:0.9em;text-transform:uppercase}
    .card-what{color:#555;font-size:1em;line-height:1.5;margin-bottom:20px}
    .card-btn{display:inline-block;padding:10px 20px;background:#00838f;color:white;text-decoration:none;border-radius:4px;font-weight:bold}
    .card-btn:hover{background:#006064}
    </style>
    <div style="text-align:center;max-width:800px;margin:0 auto 20px auto;color:#444;font-size:1.1em;">This site transforms raw DNA data into forensic genealogical evidence. Use the tools below to explore your heritage, verify ancestors, and analyze the strength of your genetic connections.</div>
    <div class="guide-grid">
        <div class="guide-card"><h2 class="card-title">1. The DNA Register</h2><div class="card-why">Why View This?</div><div class="card-what">To see the raw evidence. This is the master list of all matches in the study, sorted by ancestral line.</div><a href="ons_yates_dna_register.shtml" class="card-btn">Open Register</a></div>
        <div class="guide-card"><h2 class="card-title">2. DNA Network</h2><div class="card-why">Why View This?</div><div class="card-what">To see the big picture. Visual clusters showing which ancestral lines are genetically proven by multiple testers.</div><a href="dna_network.shtml" class="card-btn">View Network</a></div>
        <div class="guide-card"><h2 class="card-title">3. Lineage Proof Engine</h2><div class="card-why">Why View This?</div><div class="card-what">To verify a connection. An interactive tool that tests if a specific ancestor is biologically confirmed.</div><a href="lineage_proof.html" class="card-btn">Run Proof</a></div>
        <div class="guide-card" style="border-left-color:#f57f17;"><h2 class="card-title" style="color:#e65100;">4. Brick Wall Buster</h2><div class="card-why" style="color:#bf360c;">Why View This?</div><div class="card-what">Break through a dead end. Suggests which family line you likely belong to based on match dominance.</div><a href="brick_wall_buster.shtml" class="card-btn" style="background:#ef6c00;">Bust This Wall</a></div>
        <div class="guide-card"><h2 class="card-title">5. Forensic Dossier</h2><div class="card-why">Why View This?</div><div class="card-what">Get your "Scorecard." Generate forensic reports on yourself or an ancestor, grading evidence strength.</div><a href="dna_dossier.html" class="card-btn">Create Dossier</a></div>
        <div class="guide-card"><h2 class="card-title">6. Research Admin Hub</h2><div class="card-why">Why View This?</div><div class="card-what">For study managers. High-level audit showing participant statistics, masked IDs, and study metrics.</div><a href="research_admin.html" class="card-btn" style="background:#455a64;">Admin Access</a></div>
        <div class="guide-card"><h2 class="card-title">7. Data Glossary</h2><div class="card-why">Why View This?</div><div class="card-what">Understand the terms. Definitions for forensic terms like "Keystone Tester" and "Spanish naming."</div><a href="data_glossary.shtml" class="card-btn" style="background:#78909c;">Read Glossary</a></div>
        <div class="guide-card" style="border-left-color:#fbc02d;"><h2 class="card-title">8. Corrections</h2><div class="card-why">See Something?</div><div class="card-what">Genealogy is collaboration. If you can solve a mystery, tell us. Include the <strong>Person ID (e.g. I1234)</strong>.</div><a href="mailto:ron@yates.one-name.net" class="card-btn" style="background:#f9a825;color:#333;">Email Correction</a></div>
    </div>
    """
    upload_queue["contents.shtml"] = make_page("Yates DNA Study Guide", guide_body, len(df), "contents", stats_bar_full)

    # 6b. Register, Network, Dossier, Proof (Simplified for Queue)
    upload_queue["ons_yates_dna_register.shtml"] = make_page("DNA Register", df.to_html(columns=["Long_Narrative"], index=False, classes="dataframe", escape=False, table_id="reg-table"), len(df), "ancestor", stats_bar_full)

    network_html = ""
    for anc, group in df.groupby('Authority_FirstAncestor_dated'):
        network_html += f"<details style='margin-bottom:10px;'><summary style='cursor:pointer; font-weight:bold; color:#006064;'>{anc} ({len(group)} matches)</summary><ul>"
        for _, r in group.iterrows(): network_html += f"<li>{r['Tester-Participant-Unmasked']} - {r['cM']} cM</li>"
        network_html += "</ul></details>"
    upload_queue["dna_network.shtml"] = make_page("DNA Network", network_html, len(df), "network", stats_bar_full)

    # Injecting placeholders for Buster/Proof/Dossier
    proof_js = f"""<script>const DATA={smart_packet_json}; const DB={proof_db_json}; function runProof(){{}} function runIdSearch(){{}}</script>"""
    upload_queue["lineage_proof.html"] = make_page("Lineage Proof Engine", "<h3>Verify an Ancestral Line</h3><select id='proofSelect' onchange='runProof()'><option>Select...</option></select><div id='proof-result'></div>", len(df), "proof", stats_bar_full) + proof_js

    # --- 7. UPLOAD ---
    print("\n[STEP 3] Uploading Manifest...")
    try:
        ftps = connect_session()
        for d in [f"/public_html/{REMOTE_SUBDIR}", f"/{REMOTE_SUBDIR}"]:
            try:
                ftps.cwd("/"); ftps.cwd(d)
                print(f"    üéØ Target: {d}")
                for fn, content in upload_queue.items():
                    with open(fn, "w", encoding="utf-8") as f: f.write(content)
                    with open(fn, "rb") as fh: ftps.storbinary(f"STOR {fn}", fh)
                print(f"       ‚úÖ Deployed {len(upload_queue)} files.")
            except: pass
        ftps.quit()
    except Exception as e: print(f"‚ùå Error: {e}")

    return upload_queue

print("‚úÖ Cell 4 (Publisher V133) Loaded.")

‚úÖ Cell 4 (Publisher V133) Loaded.


In [102]:
# @title [CELL 5] The Button (V119)
# This cell executes the entire pipeline (Asset Fetch -> Engine -> Publish).

try:
    print(">>> üåê PHASE 1: ASSET CHECK...")
    # fetch_assets() # Uncomment if you want to force download every time

    print("\n>>> ‚öôÔ∏è PHASE 2: RUNNING ENGINE...")
    run_engine()

    print("\n>>> üöÄ PHASE 3: RUNNING PUBLISHER...")
    # Capture the returned queue to verify success
    published_files = run_publisher()

    if published_files:
        count = len(published_files)
        print(f"\n‚úÖ SUCCESS: {count} files were generated and uploaded.")
        print("   Verify at: https://yates.one-name.net/ons-study/")
    else:
        print("\n‚ö†Ô∏è WARNING: Publisher ran but reported 0 files uploaded.")

except Exception as e:
    print(f"\n‚ùå CRITICAL FAILURE: {e}")
    import traceback
    traceback.print_exc()

>>> üåê PHASE 1: ASSET CHECK...

>>> ‚öôÔ∏è PHASE 2: RUNNING ENGINE...
      [CELL 3] ENGINE STARTING (V116)
      (Feature: Bio-Identity + neeYates Logic)

[STEP 0] Checking for Required Files...
    ‚úÖ Using GEDCOM: yates_study_2025.ged

[STEP 1] Loading Privacy Keys...
    - Loaded 94 privacy keys.
    - Generated yates_study_2025_UNMASKED.ged

[STEP 3] Tracing Lineages...
    - Parsed 63676 individuals.

[SUCCESS] Engine V116 Complete. Bio-Identities with 'neeYates' enabled.

>>> üöÄ PHASE 3: RUNNING PUBLISHER...
      [CELL 4] PUBLISHER STARTING (V133)
      (Status: contents.shtml Hub Fully Integrated)

[STEP 3] Uploading Manifest...
    üéØ Target: /public_html/ons-study
       ‚úÖ Deployed 4 files.
    üéØ Target: /ons-study
       ‚úÖ Deployed 4 files.

‚úÖ SUCCESS: 4 files were generated and uploaded.
   Verify at: https://yates.one-name.net/ons-study/


In [None]:
# @title [TEST BENCH] V114 - Asset Manager Prototype
# This is a STANDALONE test function. It will not break your main pipeline.

def test_fetch_assets():
    print("="*60)
    print("      üß™ TEST BENCH: V114 ASSET MANAGER")
    print("      (Testing 'Local Priority' Logic)")
    print("="*60)

    import os
    from ftplib import FTP_TLS
    from google.colab import userdata

    # --- CONFIGURATION ---
    # We prioritize your specific path
    SEARCH_PATHS = [
        "/tng/gedcom",           # <--- PRIORITY 1: The user's specific path
        "/public_html/tng/gedcom",
        "/ons-study",
        "/public_html/ons-study",
        "/"
    ]
    KEY_FILENAME = "match_to_unmasked.csv"

    # -------------------------------------------------------
    # STEP 1: CHECK FOR MANUAL UPLOAD (PRIORITY)
    # -------------------------------------------------------
    print("[STEP 1] Scanning local Colab environment...")
    local_files = os.listdir('.')
    local_geds = [f for f in local_files if f.lower().endswith(".ged") and "unmasked" not in f.lower()]

    ged_ready = False

    if local_geds:
        # Sort by newest, just in case multiple exist
        local_geds.sort(key=lambda x: os.path.getmtime(x), reverse=True)
        print(f"    ‚úÖ FOUND LOCAL UPLOAD: {local_geds[0]}")
        print("    ‚ú® SUCCESS: Logic detected local file. Skipping Server Download.")
        ged_ready = True
    else:
        print("    ‚ö†Ô∏è No local GEDCOM found. Falling back to Server...")

    # Check for Key File
    key_ready = False
    if os.path.exists(KEY_FILENAME):
        print(f"    ‚úÖ FOUND LOCAL KEY: {KEY_FILENAME}")
        key_ready = True

    # If we have both, we can exit early!
    if ged_ready and key_ready:
        print("\nüü¢ TEST RESULT: Assets ready. (If this were real, Cell 3 would start now).")
        return

    # -------------------------------------------------------
    # STEP 2: DOWNLOAD FROM SERVER (FALLBACK)
    # -------------------------------------------------------
    print("\n[STEP 2] Initiating Server Connection (Fallback Mode)...")
    try:
        HOST = userdata.get("FTP_HOST")
        USER = userdata.get("FTP_USER")
        PASS = userdata.get("FTP_PASS")

        ftps = FTP_TLS()
        ftps.connect(HOST, 21)
        ftps.auth()
        ftps.login(USER, PASS)
        ftps.prot_p()
        print("    ‚úÖ Connection Established.")

        # A. Download GEDCOM (If we didn't find one locally)
        if not ged_ready:
            print("    üîé Hunting for GEDCOM on server...")
            for folder in SEARCH_PATHS:
                try:
                    ftps.cwd(folder)
                    files = ftps.nlst()
                    server_geds = [f for f in files if f.lower().endswith(".ged") and "unmasked" not in f.lower()]

                    if server_geds:
                        target = server_geds[0]
                        print(f"       Found on Server: {target} in {folder}")
                        # We won't actually download in TEST mode to save time, just prove we found it
                        print(f"       ‚ú® SUCCESS: Logic found the file at {folder}/{target}")
                        ged_ready = True
                        break
                except: continue

            if not ged_ready:
                print("       ‚ùå TEST FAILURE: Could not find GEDCOM on server.")

        # B. Download Key File (If we didn't find one locally)
        if not key_ready:
            print(f"    üîé Hunting for {KEY_FILENAME}...")
            # Reset CWD search or assume root/study folders
            for folder in SEARCH_PATHS:
                try:
                    ftps.cwd(folder)
                    if KEY_FILENAME in ftps.nlst():
                        print(f"       ‚ú® SUCCESS: Logic found Key File at {folder}")
                        key_ready = True
                        break
                except: continue

        ftps.quit()

    except Exception as e:
        print(f"    ‚ùå Connection/Download Error: {e}")

    # -------------------------------------------------------
    # FINAL STATUS
    # -------------------------------------------------------
    print("-" * 60)
    if ged_ready:
        print("üü¢ TEST PASSED: The Asset Manager is working correctly.")
    else:
        print("üî¥ TEST FAILED: Could not locate assets locally OR on server.")

# Run the test function
test_fetch_assets()

      üß™ TEST BENCH: V114 ASSET MANAGER
      (Testing 'Local Priority' Logic)
[STEP 1] Scanning local Colab environment...
    ‚úÖ FOUND LOCAL UPLOAD: yates_study_2025.ged
    ‚ú® SUCCESS: Logic detected local file. Skipping Server Download.
    ‚úÖ FOUND LOCAL KEY: match_to_unmasked.csv

üü¢ TEST RESULT: Assets ready. (If this were real, Cell 3 would start now).
