<a href="https://colab.research.google.com/github/ronyates47/Gedcom-Utils/blob/main/ons_study_v9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# @title [CELL 1] Setup + Branding (V125 - Inline Logo Fix)
import os
import sys
import re
import csv
import json
import html
import socket
import pytz
import pandas as pd
from ftplib import FTP_TLS
from datetime import datetime

# --- INSTALL TQDM IF MISSING ---
try:
    import tqdm
except ImportError:
    os.system('pip install tqdm')
    import tqdm

print("="*60)
print("      [CELL 1] SETUP LOADED (V125)")
print("      BRANDING: THE FORENSIC GENEALOGY PUBLISHER")
print("      (Logo now inline with Page Titles)")
print("="*60)

# ==============================================================================
# 1. BRANDING & ASSETS
# ==============================================================================
LOGO_URL = "https://yates.one-name.net/gengen/images/yates_modern_logo.png"

BRAND_NAME = "The Forensic Genealogy Publisher"
COPYRIGHT_OWNER = "Ronald Eugene Yates"
TRADEMARKS = 'The terms "Forensic Handshake", "Brick Wall Buster", and "Collateral Saturation" are trademarks of the Yates One-Name Study.'

# ==============================================================================
# 2. GLOBAL HELPER FUNCTIONS + HTML ASSETS
# ==============================================================================
TNG_BASE_URL = "https://yates.one-name.net/tng/verticalchart.php?personID="
TNG_SUFFIX = "&tree=tree1&parentset=0&display=vertical&generations=15"

# FOOTER (Visible in Print)
FOOTER_HTML = f"""
<div id="legal-footer" style="margin-top:50px;padding:20px;background:#f4f4f4;border-top:1px solid #ddd;text-align:center;color:#666;font-family:sans-serif;font-size:0.85em;">
    <p style="margin-bottom:5px;"><strong>&copy; {datetime.now().year} {COPYRIGHT_OWNER}. All Rights Reserved.</strong></p>
    <p style="margin-bottom:5px;">Generated by <em>{BRAND_NAME}&trade;</em></p>
    <p style="font-style:italic;color:#888;">{TRADEMARKS}</p>
</div>
"""

NAV_HTML = """
<style>
/* NAVIGATION STYLES */
nav.oldnav ul{display:flex;flex-wrap:wrap;justify-content:center;background-color:#006064!important;border-bottom:2px solid #00acc1!important;margin:0;padding:0;list-style:none}
nav.oldnav li{display:inline-block}
nav.oldnav a{display:block;padding:10px 15px;text-decoration:none;color:#e0f7fa!important;font-size:14px}
nav.oldnav a:hover{background-color:#00838f!important}

/* LOGO STYLING (New V125) */
.title-logo {
    height: 1.2em; /* Matches the text height mostly */
    width: auto;
    vertical-align: middle; /* Aligns with the middle of the text */
    margin-right: 10px; /* Space between logo and text */
    margin-bottom: 4px; /* Slight optical adjustment */
}

/* PRINT SETTINGS */
@media print {
    nav.oldnav, .action-btn, .control-panel, .tabs, .no-print-strict { display: none !important; }
    #nav-slot, #legal-footer { display: block !important; }
    body { font-size: 12px; }
}
</style>
<nav class="oldnav"><ul>
<li><a href="/ons-study/research_admin.php" style="color:#ffcc80 !important; font-weight:bold;">Admin Hub</a></li>
<li><a href="/ons-study/contents.shtml" style="color:#ffcc80 !important; font-weight:bold;">Guide</a></li>
<li><a href="/ons-study/yates_ancestor_register.php">DNA Register</a></li>
<li><a href="/ons-study/just-trees.shtml">Trees</a></li>
<li><a href="/ons-study/dna_network.php">DNA Network</a></li>
<li><a href="/ons-study/lineage_proof.php">Lineage Proof</a></li>
<li><a href="/ons-study/dna_dossier.php">Forensic Dossier</a></li>
<li><a href="/ons-study/brick_wall_buster.php" style="background:#f57f17;color:black !important;">Brick Wall Buster</a></li>
<li><a href="/ons-study/data_glossary.shtml">Data Glossary</a></li>
<li><a href="https://yates.one-name.net/gengen/images/cousin-calculator.jpg" target="_blank" style="color:#b2dfdb;">Cousin Calc</a></li>
<li><a href="https://yates.one-name.net/gengen/images/Shared_cM_Project_v4.jpg" target="_blank" style="color:#b2dfdb;">cM Chart</a></li>
<li><a href="/ons-study/share_dna.shtml" style="background-color:#0277bd; font-weight:bold;">Share DNA</a></li>
<li><a href="/ons-study/subscribe.shtml" style="background-color:#004d40;">Subscribe</a></li>
</ul></nav>"""

SITE_INFO = """<div class="no-print-strict" style="background:#e0f2f1;border:1px solid #b2dfdb;padding:20px;margin:20px auto;width:90%;border-radius:8px;font-family:sans-serif;"><h3 style="color:#006064;margin-top:0;border-bottom:2px solid #004d40;padding-bottom:10px;">Establishing Kinship Through Collateral DNA Saturation</h3><p style="color:#333;line-height:1.6;font-size:1.05em;margin-bottom:0;"><strong>Methodology:</strong> This register moves beyond the reliance on single "golden matches" to prove kinship. Instead, it employs <em>Collateral DNA Saturation</em>‚Äîa method that blends genealogical reasoning with data-driven logic.</p></div>"""

JS_CORE = r"""<script type="text/javascript">
(function(){
    // SORTING LOGIC
    function textOf(c){var val = c.getAttribute('data-sort') || c.textContent || c.innerText;return (val || '').replace(/\s+/g,' ').trim().toLowerCase();}
    function sortTable(t,i,d){if(!(t&&t.tBodies&&t.tBodies[0]))return;var tb=t.tBodies[0],r=Array.prototype.slice.call(tb.rows||[]),asc=(d==='asc');r.sort(function(a,b){var A=textOf(a.cells[i]),B=textOf(b.cells[i]),nA=parseFloat(A.replace(/[^0-9.\-]/g,'')),nB=parseFloat(B.replace(/[^0-9.\-]/g,''));if(!isNaN(nA)&&!isNaN(nB))return asc?(nA-nB):(nB-nA);return(A<B)?(asc?-1:1):(A>B)?(asc?1:-1):0;});var f=document.createDocumentFragment();for(var k=0;k<r.length;k++)f.appendChild(r[k]);tb.appendChild(f);}
    function makeSortable(t){if(!(t&&t.tHead&&t.tHead.rows.length))return;var th=t.tHead.rows[0].cells;for(var i=0;i<th.length;i++){(function(idx){var h=th[idx],d='asc';h.style.cursor='pointer';h.onclick=function(){d=(d==='asc')?'desc':'asc';for(var j=0;j<th.length;j++)th[j].innerHTML=th[j].innerHTML.replace(/\s+\(asc\)|\s+\(desc\)/,'');h.innerHTML+=(d==='asc'?' (asc)':' (desc)');sortTable(t,idx,d);};})(i);}}

    // FILTERING LOGIC
    window.filterTable = function() {
        var input = document.getElementById("tableSearch");
        var filter = input.value.toUpperCase();
        var table = document.getElementById("reg-table") || document.querySelector("table.dataframe");
        var tr = table.getElementsByTagName("tr");
        for (var i = 1; i < tr.length; i++) {
            var tdArr = tr[i].getElementsByTagName("td");
            var found = false;
            for (var j = 0; j < tdArr.length; j++) {
                if (tdArr[j]) {
                    var txtValue = tdArr[j].textContent || tdArr[j].innerText;
                    if (txtValue.toUpperCase().indexOf(filter) > -1) {
                        found = true;
                        break;
                    }
                }
            }
            tr[i].style.display = found ? "" : "none";
        }
    }

    function init(){
        var t=document.getElementsByTagName('table');
        for(var i=0;i<t.length;i++) if(/\bsortable\b/.test(t[i].className)) makeSortable(t[i]);
    }
    if(document.readyState==='loading')document.addEventListener('DOMContentLoaded',init,false);else init();
})();
</script>"""

GLOSSARY_CONTENT = """<div style="background:white;padding:25px;border-radius:8px;border:1px solid #ddd;font-family:sans-serif;line-height:1.6;"><h2 style="color:#006064;border-bottom:2px solid #004d40;padding-bottom:10px;">ONS Yates Study: Data Glossary</h2><h3 style="color:#00838f;margin-top:25px;">1. Identity Columns</h3><ul style="list-style-type:none;padding-left:0;"><li style="margin-bottom:15px;"><strong>Tester-Participant-MASKED (The Trigger):</strong><br>The unique privacy code extracted from the user's NPFX tag.</li><li style="margin-bottom:15px;"><strong>Tester-Participant-Unmasked:</strong><br>The real name of the tester.</li></ul><h3 style="color:#00838f;margin-top:25px;">2. Analysis Terms</h3><ul style="list-style-type:none;padding-left:0;"><li style="margin-bottom:15px;"><strong>Platinum Standard:</strong><br>Lineages with 30+ matches and 10+ unique sources. Biologically proven.</li><li style="margin-bottom:15px;"><strong>Keystone Tester:</strong><br>A high-value participant (15+ matches) who anchors a specific branch.</li><li style="margin-bottom:15px;"><strong>Forensic Handshake:</strong><br>An informal term in genetic genealogy describing a confirmation pattern in which multiple independent DNA matches support the same ancestral line or family connection, strengthening confidence in an identification or relationship conclusion. Rather than relying on a single match, genealogists look for several matches that converge on the same family network, creating a ‚Äúhandshake‚Äù of agreement between genetic evidence and documentary research. This concept is especially valuable in unknown-parentage and forensic cases, where conclusions must be supported by redundant evidence.<br><br>A forensic handshake is often achieved through related methods such as triangulation, where three or more individuals share the same DNA segment from a common ancestor, and cluster analysis, which groups matches who also match one another and often represent a shared ancestral couple or lineage. Together, these approaches help meet an emerging genetic genealogy proof standard, emphasizing that reliable conclusions require multiple corroborating matches, consistency with records, and the exclusion of alternate explanations.</li></ul><h3 style="color:#00838f;margin-top:25px;">3. Genealogy Concepts</h3><ul style="list-style-type:none;padding-left:0;"><li style="margin-bottom:15px;"><strong>Spanish Naming System:</strong><br>A traditional Hispanic naming convention in which an individual bears one or more given names followed by two surnames: the first inherited from the father (paternal surname) and the second from the mother (maternal surname). This system is historically rooted in Spain and is especially useful in genealogy because it preserves both parental lineages and improves identification in historical records.</li><li style="margin-bottom:15px;"><strong>N√©e:</strong><br>A term meaning ‚Äúborn as,‚Äù used to indicate a woman‚Äôs maiden or birth surname before marriage. In genealogical and historical records, n√©e identifies the surname a woman carried in her natal family line, preserving her connection to her parents and ancestry. For example, ‚ÄúMaria Garc√≠a, n√©e L√≥pez‚Äù shows that Mar√≠a‚Äôs birth surname was L√≥pez, even though she later used Garc√≠a after marriage.<br><br>The use of n√©e is especially important for tracing maternal family lines, since women‚Äôs surnames may change across generations in many cultures. By recording a woman‚Äôs birth name, genealogists can correctly link her to her original family, distinguish between individuals with similar married names, and maintain continuity in lineage reconstruction, particularly in marriage, probate, and church records.</li></ul></div>"""

SUBSCRIBE_CONTENT = """<div style="background:white;padding:40px;border-radius:8px;box-shadow:0 4px 15px rgba(0,0,0,0.1);max-width:800px;margin:30px auto;text-align:center;font-family:'Segoe UI',sans-serif;"><h1 style="color:#006064;margin-bottom:10px;">Join the Yates Research Community</h1><p style="color:#555;font-size:1.1em;line-height:1.6;margin-bottom:30px;">Stay connected with the latest breakthroughs in the Yates One-Name Study. Get notified about new DNA groups, lineage verifications, and quarterly reports.</p><div style="background:#e0f2f1;padding:25px;border-radius:8px;border:1px solid #b2dfdb;display:inline-block;"><h3 style="margin-top:0;color:#004d40;">üìß One-Click Subscribe</h3><p style="margin-bottom:20px;">Click below to send a subscription request to our Groups.io list.</p><a href="mailto:yates-one-name-study+subscribe@groups.io?subject=Subscribe" style="display:inline-block;padding:15px 30px;background:#00838f;color:white;text-decoration:none;border-radius:5px;font-weight:bold;font-size:1.1em;box-shadow:0 2px 5px rgba(0,0,0,0.2);">Subscribe Now</a></div><p style="margin-top:30px;font-size:0.9em;color:#777;">Powered by Groups.io. You will receive a confirmation email shortly.</p></div>"""

SHARE_CONTENT = """<div style="max-width:900px;margin:30px auto;font-family:'Segoe UI',sans-serif;line-height:1.6;color:#333;"><div style="text-align:center;margin-bottom:40px;"><h1 style="color:#0277bd;margin-bottom:10px;">Share Your Ancestry DNA Matches</h1><p style="font-size:1.1em;color:#555;">Ancestry provides a built-in sharing feature that allows you to grant limited access to your DNA matches <strong>without sharing your personal account details</strong>. You remain in full control of your account at all times.</p></div><div style="display:grid;grid-template-columns:1fr 1fr;gap:30px;margin-bottom:30px;"><div style="background:white;padding:25px;border-radius:8px;box-shadow:0 4px 10px rgba(0,0,0,0.1);border-top:5px solid #0277bd;"><h3 style="color:#0277bd;margin-top:0;">How Sharing Works</h3><p>From your AncestryDNA <strong>Settings</strong> page, you may invite another individual by email and assign one of the following roles:</p><ul style="padding-left:20px;"><li><strong>Viewer</strong> (Read only)</li><li><strong>Collaborator</strong> (Recommended for Study)</li><li><strong>Manager</strong> (Full Control)</li></ul></div><div style="background:#e3f2fd;padding:25px;border-radius:8px;border:1px solid #90caf9;"><h3 style="color:#01579b;margin-top:0;">Privacy & Control</h3><p>This sharing arrangement provides <strong>limited access only</strong>. It does not allow changes to your account and does not expose your personal details.</p><p><strong>You may revoke access at any time through Ancestry.</strong></p></div></div><div style="background:white;padding:30px;border-radius:8px;border:1px solid #ddd;box-shadow:0 4px 15px rgba(0,0,0,0.05);"><h2 style="color:#004d40;border-bottom:2px solid #004d40;padding-bottom:10px;margin-top:0;">How to Share for the Yates One-Name Study</h2><ol style="font-size:1.1em;line-height:1.8;padding-left:25px;"><li>Open your <strong>AncestryDNA Settings</strong>.</li><li>Scroll to the section labeled "DNA Result Sharing" and click <strong>Invite</strong>.</li><li>Send the invitation to <strong>Ron Yates</strong> at: <br><span style="background:#fff3e0;padding:5px 10px;border-radius:4px;font-weight:bold;color:#e65100;font-family:monospace;font-size:1.2em;">yatesvilleron@gmail.com</span></li><li>Assign the role: <strong>Collaborator</strong>.</li></ol><div style="background:#fffde7;border-left:5px solid #fbc02d;padding:15px;margin-top:20px;font-size:0.95em;"><strong>Why Collaborator?</strong> The Collaborator role allows Ron to review shared matches and create small internal groups (colored dots) to identify which matches have been reviewed and which have contributed evidence to the Yates One-Name Study.</div></div><div style="margin-top:40px;"><h3 style="color:#006064;">What Happens Next?</h3><p>After sharing, you will receive an invitation to subscribe to the <strong>Yates One-Name Study Groups.io mailing list</strong>, where DNA proof summaries and study findings are shared.</p><h3 style="color:#006064;">Reciprocal Sharing (Optional)</h3><p>If you are interested in viewing Ron‚Äôs DNA matches, simply let him know. When a direct match exists, that relationship will be reflected in the study findings.</p></div></div>"""

THEORY_CONTENT = """<div style="max-width:1000px;margin:30px auto;font-family:'Segoe UI',sans-serif;line-height:1.6;color:#333;"><h1 style="text-align:center;color:#004d40;font-size:2.5em;margin-bottom:10px;">The Yates DNA Strategy</h1><p style="text-align:center;font-size:1.2em;color:#666;margin-bottom:40px;">Moving beyond traditional Y-DNA to solve modern genealogical mysteries.</p><div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(300px,1fr));gap:30px;margin-bottom:40px;"><div style="background:white;padding:25px;border-radius:8px;box-shadow:0 4px 10px rgba(0,0,0,0.1);border-top:5px solid #00838f;"><h2 style="color:#006064;margin-top:0;">The Autosomal Revolution</h2><p>Traditional one-name studies rely almost exclusively on Y-DNA to trace the paternal surname line. While valuable for deep history, this approach ignores 50% of our ancestors at every generation.</p><p><strong>Our Focus:</strong> We utilize <strong>Autosomal DNA (atDNA)</strong>‚Äîinherited from both parents‚Äîto verify connections across <em>all</em> branches. This allows us to:</p><ul style="padding-left:20px;color:#444;"><li>Bridge the "Gender Gap" by tracing female descendants.</li><li>Verify paper trails for the last 300 years (Genealogical Time).</li><li>Cluster "Floating" Yates families into their correct lines.</li></ul></div><div style="background:white;padding:25px;border-radius:8px;box-shadow:0 4px 10px rgba(0,0,0,0.1);border-top:5px solid #f9a825;"><h2 style="color:#f57f17;margin-top:0;">Collateral Saturation</h2><p>A single DNA match can be luck. Ten matches is a statistic. <strong>Thirty matches is a fact.</strong></p><p>We employ a technique called <strong>Collateral Saturation</strong>. We don't look for one "Golden Match." We analyze groups of matches from independent cousin lines. When descendants from four different children of <em>William & Mary Yates</em> all share DNA with you, the relationship is biologically confirmed.</p><div style="text-align:center;margin-top:20px;"><a href="dna_network.php" style="display:inline-block;padding:10px 20px;background:#f9a825;color:#333;text-decoration:none;border-radius:4px;font-weight:bold;">View the Network</a></div></div></div><div style="background:#e0f2f1;padding:30px;border-radius:8px;border:1px solid #b2dfdb;margin-bottom:40px;"><h2 style="color:#004d40;margin-top:0;text-align:center;">From Theory to Tools</h2><p style="text-align:center;max-width:700px;margin:0 auto 20px auto;">We have built a suite of forensic tools to visualize this data. Instead of raw spreadsheets, we offer interactive dashboards to prove your connection.</p><div style="display:flex;flex-wrap:wrap;justify-content:center;gap:15px;margin-top:20px;"><a href="ons_yates_dna_register.php" style="background:#006064;color:white;padding:12px 20px;text-decoration:none;border-radius:4px;font-weight:bold;">The Register</a><a href="lineage_proof.php" style="background:#00838f;color:white;padding:12px 20px;text-decoration:none;border-radius:4px;font-weight:bold;">Proof Engine</a><a href="dna_dossier.php" style="background:#00acc1;color:white;padding:12px 20px;text-decoration:none;border-radius:4px;font-weight:bold;">Forensic Dossier</a></div></div><div style="background:#f5f5f5;padding:20px;border-radius:8px;border:1px solid #ddd;"><h3 style="color:#555;margin-top:0;">Legacy Data: Y-DNA Haplogroups</h3><p style="font-size:0.9em;color:#666;margin-bottom:15px;">Y-DNA is the backbone of deep ancestry (27,000 BCE to 1600 AD). While not our primary focus for recent genealogy, we maintain a detailed record of the Yates Y-Chromosome mutations (R-M207 through FT266579).</p><a href="https://yates.one-name.net/gengen/dna_proof_y.htm" style="color:#006064;font-weight:bold;text-decoration:none;">&raquo; View Detailed Y-DNA Findings</a></div></div>"""

def make_page(title, content, count, view_type="", extra="", stats_bar=""):
    nav_blk = ""
    if view_type in ['ancestor', 'participant', 'tree_az', 'tree_za', 'proof', 'network', 'dossier', 'subscribe', 'share', 'buster', 'singleton']:
        nav_blk = SITE_INFO
    if view_type == 'subscribe' or view_type == 'theory' or view_type == 'share':
        nav_blk = ""

    toggle = ""
    print_btn = ""
    search_bar = ""

    # NEW: Inline Logo Image Tag
    inline_logo = f'<img src="{LOGO_URL}" class="title-logo" alt="Logo"> '

    if view_type in ['ancestor', 'participant', 'singleton']:
        search_bar = """<div class="no-print-strict" style="margin:20px auto;max-width:600px;text-align:center;"><input type="text" id="tableSearch" onkeyup="filterTable()" placeholder="üîç Type a name to filter list..." style="width:100%;padding:12px;font-size:16px;border:2px solid #006064;border-radius:4px;"></div>"""

    if view_type in ['ancestor', 'participant', 'singleton']:
        view_name = "Register"
        if view_type == 'singleton': view_name = "Singleton List"
        print_btn = f"""<div class="no-print-strict" style="text-align:center;margin-bottom:15px;"><button onclick="window.print()" style="background:#0277bd;color:white;border:none;padding:10px 20px;border-radius:4px;font-weight:bold;cursor:pointer;font-size:14px;">üñ®Ô∏è Print {view_name}</button></div>"""

    if view_type == 'ancestor':
        toggle = f"""<div class="no-print-strict" style="text-align:center;padding:10px;margin-bottom:10px;font-family:sans-serif;font-size:14px;background:#e0f7fa;border:1px solid #b2ebf2;"><strong>Sort Register:</strong> &nbsp;<a href="ons_yates_dna_register.php" style="font-weight:bold;color:#006064;">By Ancestral Line</a> &nbsp;|&nbsp; <a href="ons_yates_dna_register_participants.php" style="color:#00acc1;text-decoration:none;">By Participant Name</a></div>"""
    elif view_type == 'participant':
        toggle = f"""<div class="no-print-strict" style="text-align:center;padding:10px;margin-bottom:10px;font-family:sans-serif;font-size:14px;background:#e0f7fa;border:1px solid #b2ebf2;"><strong>Sort Register:</strong> &nbsp;<a href="ons_yates_dna_register.php" style="color:#00acc1;text-decoration:none;">By Ancestral Line</a> &nbsp;|&nbsp; <a href="ons_yates_dna_register_participants.php" style="font-weight:bold;color:#006064;">By Participant Name</a></div>"""
    elif 'tree' in view_type:
        za = f'<span style="font-weight:bold;color:#000;">Z-A</span>' if 'za' in view_type else f'<a href="just-trees.shtml" style="color:#006064;text-decoration:underline;">Z-A</a>'
        az = f'<span style="font-weight:bold;color:#000;">A-Z</span>' if 'az' in view_type else f'<a href="just-trees-az.shtml" style="color:#006064;text-decoration:underline;">A-Z</a>'
        toggle = f"""<div class="no-print-strict" style="text-align:center;font-family:sans-serif;font-size:16px;margin:15px 0 10px 0;">Individual Yates Family trees: &nbsp; {za} &nbsp;|&nbsp; {az}</div>"""

    return f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>{title}</title><link rel="stylesheet" href="partials_unified.css"><link rel="stylesheet" href="dna_tree_styles.css">{extra}</head><body id="top"><div class="wrap"><h1 class="centerline">{inline_logo}{title}</h1><div id="nav-slot">{stats_bar}{NAV_HTML}</div>{nav_blk}{search_bar}{print_btn}{toggle}{content}{FOOTER_HTML}{JS_CORE}</div></body></html>"""

print("‚úÖ Cell 1 Loaded! (Logo now inline with Page Titles)")

      [CELL 1] SETUP LOADED (V125)
      BRANDING: THE FORENSIC GENEALOGY PUBLISHER
      (Logo now inline with Page Titles)
‚úÖ Cell 1 Loaded! (Logo now inline with Page Titles)


In [2]:
# @title [CELL 3] The Data Engine (V113 - Force Fresh & Hyphen Debug)
def run_engine():
    print("="*60)
    print("      [CELL 3] ENGINE STARTING (V113 - FORCE REFRESH)...")
    print("="*60)

    import os
    import sys
    import re
    import csv
    from ftplib import FTP_TLS
    from google.colab import userdata
    from datetime import datetime

    # --- 1. FORCE CLEANUP ---
    CSV_DB = "engine_database.csv"
    if os.path.exists(CSV_DB):
        os.remove(CSV_DB)
        print("    üóëÔ∏è Deleted old database (Forcing fresh build)")

    # --- CONFIGURATION ---
    HOST = os.environ.get("FTP_HOST") or userdata.get("FTP_HOST")
    USER = os.environ.get("FTP_USER") or userdata.get("FTP_USER")
    PASS = os.environ.get("FTP_PASS") or userdata.get("FTP_PASS")

    REMOTE_SUBDIR = "ons-study"
    KEY_FILE       = "match_to_unmasked.csv"
    PROCESSED_GED  = "_processed_unmasked.ged"

    # --- HELPER: CLEANING ---
    def clean_and_standardize(raw_name):
        if not raw_name: return "findme"
        s = raw_name.replace("/", "").strip()
        triggers = ["unknown", "missing", "searching", "still searching", "living", "private", "nee", "nee ?", "wife", "mrs"]
        if s.lower() in triggers or s == "": return "findme"
        if "?" in s: return "findme"
        if "unknown" in s.lower(): return "findme"
        return s

    def get_surname(full_name):
        if not full_name or "findme" in full_name.lower(): return ""
        clean = re.sub(r'\b(jr\.?|sr\.?|iii|iv|esq\.?|m\.d\.?|ph\.d\.?)\b', '', full_name, flags=re.IGNORECASE)
        parts = clean.replace(',', '').split()
        return parts[-1] if parts else ""

    def make_directory_label(name, dates):
        if "findme" in name.lower(): return name
        sur = get_surname(name)
        if not sur: return name
        firsts = re.sub(f"{re.escape(sur)}$", "", name).strip()
        return f"{sur}, {firsts} {dates}"

    # ---------------------------------------------------------
    # STEP 1 & 2: SETUP
    # ---------------------------------------------------------
    print("\n[STEP 1] Setup...")
    try:
        ftps = FTP_TLS()
        ftps.connect(HOST, 21); ftps.auth(); ftps.login(USER, PASS); ftps.prot_p()
        try:
            with open(KEY_FILE, "wb") as f: ftps.retrbinary(f"RETR /{REMOTE_SUBDIR}/{KEY_FILE}", f.write)
        except: pass
        ftps.quit()
    except: pass

    all_files = os.listdir('.')
    ged_files = [f for f in all_files if f.lower().endswith('.ged') and "_processed" not in f and "unmasked" not in f.lower()]
    if not ged_files: return print("‚ùå No GEDCOM found.")
    ged_files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
    DEFAULT_GEDCOM = ged_files[0]
    print(f"    üëâ Source: {DEFAULT_GEDCOM}")

    # ---------------------------------------------------------
    # STEP 3: UNMASKING
    # ---------------------------------------------------------
    unmask_map = {}
    if os.path.exists(KEY_FILE):
        with open(KEY_FILE, 'r', errors='replace') as f:
            for row in csv.reader(f):
                if len(row)>1: unmask_map[row[0].strip().lower()] = row[1].strip()

    def resolve_code(payload):
        m = re.search(r'(\d+)\s*&?\s*([^ \t\n\r\f\v]+)', payload)
        return m.group(2).lower() if m else None

    with open(DEFAULT_GEDCOM, 'r', encoding='utf-8', errors='replace') as fin, \
         open(PROCESSED_GED, 'w', encoding='utf-8') as fout:
        buffer = []; real_name = None
        for line in fin:
            if line.startswith("0 @"):
                if buffer:
                    for b in buffer:
                        if b.startswith("1 NAME") and real_name: fout.write(f"1 NAME {real_name}\n")
                        else: fout.write(b)
                buffer = [line]; real_name = None
            else:
                buffer.append(line)
                if line.startswith("1 NPFX"):
                    parts = line.split(" ", 2)
                    if len(parts) > 2:
                        code = resolve_code(parts[2].strip())
                        if code: real_name = unmask_map.get(code, code)
        if buffer:
            for b in buffer:
                if b.startswith("1 NAME") and real_name: fout.write(f"1 NAME {real_name}\n")
                else: fout.write(b)

    # ---------------------------------------------------------
    # STEP 4: TRACE
    # ---------------------------------------------------------
    print("\n[STEP 4] Tracing Lineages...")
    individuals = {}; families = {}

    def is_yates(name_str):
        n = (name_str or "").lower()
        return "yates" in n or "yeates" in n or "yate" in n

    current_id = None; current_fam = None; current_tag = None
    with open(PROCESSED_GED, "r", encoding="utf-8", errors="replace") as f:
        for line in f:
            line = line.strip(); parts = line.split(" ", 2)
            if len(parts) < 2: continue
            lvl, tag, val = parts[0], parts[1], parts[2] if len(parts)>2 else ""

            if lvl == "0" and "INDI" in val:
                current_id = tag.replace("@", "")
                individuals[current_id] = {"name": "findme", "famc": None, "fams": [], "code": "", "cm": 0, "birt": "", "deat": ""}
                current_fam = None; current_tag = "INDI"
            elif current_id and lvl != "0":
                if tag == "NAME": individuals[current_id]["name"] = clean_and_standardize(val)
                elif tag == "FAMC": individuals[current_id]["famc"] = val.replace("@", "")
                elif tag == "FAMS": individuals[current_id]["fams"].append(val.replace("@", ""))
                elif tag == "NPFX":
                    code = resolve_code(val)
                    if code: individuals[current_id]["code"] = code
                    m = re.search(r'^(\d+)|(\d+)\s*cM', val, re.IGNORECASE)
                    if m: individuals[current_id]["cm"] = int(m.group(1) or m.group(2))
                elif tag == "BIRT": current_tag = "BIRT"
                elif tag == "DEAT": current_tag = "DEAT"
                elif tag == "DATE" and current_tag:
                    m = re.search(r'\d{4}', val)
                    if m: individuals[current_id][current_tag.lower()] = m.group(0)
                    current_tag = None

            if lvl == "0" and "FAM" in val:
                current_fam = tag.replace("@", "")
                families[current_fam] = {"husb": None, "wife": None}
                current_id = None
            elif current_fam and lvl != "0":
                if tag == "HUSB": families[current_fam]["husb"] = val.replace("@", "")
                elif tag == "WIFE": families[current_fam]["wife"] = val.replace("@", "")

    def get_parents(pid):
        if not pid or pid not in individuals: return None, None
        famc = individuals[pid]["famc"]
        if not famc or famc not in families: return None, None
        return families[famc]["husb"], families[famc]["wife"]

    def get_mother_surname(pid):
        if not pid: return ""
        _, mom_id = get_parents(pid)
        if mom_id and mom_id in individuals:
            return get_surname(individuals[mom_id]["name"])
        return ""

    def to_spanish_name(pid, current_name):
        if "findme" in current_name.lower(): return current_name
        mom_surname = get_mother_surname(pid)
        if not mom_surname or "findme" in mom_surname.lower(): return current_name
        if mom_surname.lower() not in current_name.lower():
            # SUCCESSFUL SPANISH NAMING
            return f"{current_name}-{mom_surname}"
        return current_name

    def climb_full_line(start_id):
        curr = start_id; lineage_data = []
        while curr:
            p = individuals.get(curr)
            if not p: break

            # SPANISH LOGIC APPLIED HERE
            spanish_name = to_spanish_name(curr, p["name"])

            # Debug specific failure point
            if "Levi Yates" in p["name"]:
                ms = get_mother_surname(curr)
                print(f"    [DEBUG] Found Levi Yates. Mother Surname found: '{ms}'. Result: {spanish_name}")

            spouse_name = "findme"; spouse_id = None
            if p["fams"]:
                fid = p["fams"][0]
                if fid in families:
                    f = families[fid]
                    sid = f["wife"] if f["husb"] == curr else f["husb"]
                    if sid and sid in individuals:
                        spouse_name = individuals[sid]["name"]
                        spouse_id = sid

            spouse_spanish = to_spanish_name(spouse_id, spouse_name) if spouse_id else spouse_name

            lineage_data.append({
                "name": spanish_name,
                "raw_name": p["name"],
                "id": curr,
                "spouse": spouse_spanish,
                "spouse_raw": spouse_name,
                "spouse_id": spouse_id
            })

            dad_id, mom_id = get_parents(curr)
            if not dad_id and not mom_id: break

            dad_n = individuals.get(dad_id, {}).get("name", "findme") if dad_id else "findme"
            mom_n = individuals.get(mom_id, {}).get("name", "findme") if mom_id else "findme"

            if is_yates(dad_n) and not is_yates(mom_n): curr = dad_id
            elif is_yates(mom_n) and not is_yates(dad_n): curr = mom_id
            else: curr = dad_id if dad_id else mom_id

        return lineage_data

    def format_dates(uid):
        if not uid or uid not in individuals: return "findme"
        b = individuals[uid]["birt"] or "findme"
        d = individuals[uid]["deat"] or "findme"
        b = re.sub(r'\?', 'findme', b)
        d = re.sub(r'\?', 'findme', d)
        if b == "findme" and d == "findme": return "findme"
        return f"({b} - {d})"

    rows = []
    for uid, p in individuals.items():
        if p["code"]: # Tester
            lineage_data = climb_full_line(uid)
            if not lineage_data: continue

            full_line = list(reversed(lineage_data))
            gen1 = full_line[0]

            top_name = gen1["raw_name"]
            top_dates = format_dates(gen1["id"])
            spouse_name = gen1["spouse_raw"]
            spouse_id = gen1["spouse_id"]
            spouse_dates = format_dates(spouse_id)

            if spouse_name != "findme":
                husb_sur = get_surname(top_name)
                wife_sur = get_surname(spouse_name)
                if husb_sur.lower() == wife_sur.lower():
                    spouse_name += f" (n√©e {wife_sur})"

            pair_dated = f"{top_name} {top_dates}"
            if spouse_name != "findme": dir_label = make_directory_label(top_name, top_dates) + f" & {spouse_name}"
            else: dir_label = make_directory_label(top_name, top_dates)

            if spouse_name != "findme": pair_dated += f" & {spouse_name} {spouse_dates}"
            pair_simple = f"{top_name} & {spouse_name}" if spouse_name != "findme" else top_name

            clean_top = re.sub(r'[^a-zA-Z0-9]', '', top_name)
            clean_sp = re.sub(r'[^a-zA-Z0-9]', '', spouse_name.split('(')[0]) if spouse_name != "findme" else "ZZZ"
            sort_key = f"{clean_top}_{clean_sp}"

            path_names = []
            for i, x in enumerate(full_line):
                if i == 0: path_names.append(pair_dated)
                else: path_names.append(x["name"]) # Should include hyphen

            lineage_str = " -> ".join(path_names)
            path_ids = ",".join([x["id"] for x in full_line])

            _, fa1_mom_id = get_parents(gen1["id"])
            fa1_mother = to_spanish_name(fa1_mom_id, individuals[fa1_mom_id]["name"]) if fa1_mom_id else "findme"

            fa2_mother = "findme"
            if spouse_id:
                _, fa2_mom_id = get_parents(spouse_id)
                if fa2_mom_id: fa2_mother = to_spanish_name(fa2_mom_id, individuals[fa2_mom_id]["name"])

            rows.append({
                "Tester-Participant-MASKED": p["code"],
                "Tester-Participant-Unmasked": unmask_map.get(p["code"], p["code"]),
                "Found Match": p["name"],
                "ID#": uid, "cM": p["cm"], "Spacer": "",
                "Yates DNA Ancestral Line": lineage_str,
                "Authority_Directory_Label": dir_label,
                "Authority_FirstAncestor": pair_simple,
                "Authority_FirstAncestor_alpha": sort_key,
                "Authority_FirstAncestor_dated": pair_dated,
                "fa_1 extracted": top_name, "fa_1_Dates": top_dates, "fa_1_Mother": fa1_mother,
                "fa_2 extracted": spouse_name, "fa_2 Dates": spouse_dates, "fa_2_Mother": fa2_mother,
                "Gen_Count": len(full_line), "Ancestral_Path_IDs": path_ids
            })

    rows.sort(key=lambda r: r["Authority_Directory_Label"])

    fieldnames = [
        "Tester-Participant-MASKED", "Tester-Participant-Unmasked", "Found Match", "ID#", "cM", "Spacer",
        "Yates DNA Ancestral Line", "Authority_Directory_Label",
        "Authority_FirstAncestor", "Authority_FirstAncestor_alpha", "Authority_FirstAncestor_dated",
        "fa_1 extracted", "fa_1_Dates", "fa_1_Mother",
        "fa_2 extracted", "fa_2 Dates", "fa_2_Mother",
        "Gen_Count", "Ancestral_Path_IDs"
    ]

    with open(CSV_DB, "w", encoding="iso-8859-15", newline="", errors="replace") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
        writer.writeheader(); writer.writerows(rows)

    print(f"\n[SUCCESS] Engine V113 Complete. Saved {len(rows)} verified matches to {CSV_DB}.")

print("‚úÖ Cell 3 (Engine V113) Loaded.")

‚úÖ Cell 3 (Engine V113) Loaded.


In [3]:
# @title [CELL 4] The Publisher (V131 - Standalone & Inline Logo)
def run_publisher():
    print("="*60)
    print("      [CELL 4] PUBLISHER STARTING (V131 - STANDALONE)...")
    print("      (Generating independent pages with Inline Logo)")
    print("="*60)

    import os
    import sys
    import json
    import pytz
    import pandas as pd
    from datetime import datetime
    import re
    from google.colab import userdata

    # --- CONFIGURATION ---
    REMOTE_SUBDIR = "ons-study"

    # --- HELPER FUNCTIONS ---
    TNG_BASE_URL = "https://yates.one-name.net/tng/verticalchart.php?personID="
    TNG_SUFFIX = "&tree=tree1&parentset=0&display=vertical&generations=15"

    def normalize_id(val):
        s = str(val).replace('@', '').strip()
        if s.isdigit(): return f"I{s}"
        return s

    def build_narrative(row):
        part_name = str(row.get('Tester-Participant-Unmasked', '')).strip()
        cm = str(row.get('cM', '0'))
        anc_dated = str(row.get('Authority_FirstAncestor_dated', 'Unknown'))
        found_match = str(row.get('Found Match', 'Unknown')).strip()
        gen_count = row.get('Gen_Count', 0)
        rid = normalize_id(row.get('ID#', ''))
        linked_found_match = f'<a href="{TNG_BASE_URL}{rid}{TNG_SUFFIX}" target="_blank"><b>{found_match}</b></a>'
        return f"{part_name} is a {cm} cM DNA match to {linked_found_match} is related via {anc_dated} back {gen_count} generations."

    def build_linked_lineage(row):
        line = str(row.get('Yates DNA Ancestral Line', ''))
        found = str(row.get('Found Match', ''))
        rid = normalize_id(row.get('ID#', ''))
        if found and rid and found in line:
            url = f"{TNG_BASE_URL}{rid}{TNG_SUFFIX}"
            link_html = f'<a href="{url}" target="_blank" style="color:#006064;text-decoration:none;font-weight:bold;">{found}</a>'
            return line.replace(found, link_html)
        return line

    def get_sortable_surname(full_name):
        if not full_name: return "zzz"
        s = str(full_name).lower()
        s = re.split(r'\bnee\b|\bn√©e\b', s)[0]
        s = re.sub(r'\b(jr\.?|sr\.?|iii|iv|esq\.?|m\.d\.?|ph\.d\.?)\b', '', s)
        s = re.sub(r'[\,\.]', '', s)
        parts = s.strip().split()
        if not parts: return "zzz"
        if len(parts) >= 3 and parts[-2] == 'y': return parts[-3]
        return parts[-1]

    def make_participant_key(full_name):
        if not full_name: return "zzz"
        sur = get_sortable_surname(full_name)
        s_clean = full_name.lower().replace(sur, "").strip()
        return f"{sur}, {s_clean}".title()

    def format_last_first(full_name):
        if not full_name: return ""
        s = str(full_name)
        cleaned = re.sub(r'\b(jr\.?|sr\.?|iii|iv|esq\.?|m\.d\.?|ph\.d\.?)\b', '', s, flags=re.IGNORECASE)
        cleaned = re.sub(r'[\,\.]', '', cleaned)
        parts = cleaned.strip().split()
        if len(parts) < 2: return full_name
        if len(parts) >= 3 and parts[-2].lower() == 'y':
            surname = f"{parts[-3]} {parts[-2]} {parts[-1]}"
            firstname = " ".join(parts[:-3])
            return f"{surname}, {firstname}"
        surname = parts.pop()
        firstname = " ".join(parts)
        return f"{surname.title()}, {firstname}"

    # --- 1. DATA PREP ---
    CSV_DB = "engine_database.csv"
    if not os.path.exists(CSV_DB):
        print("‚ùå ERROR: engine_database.csv not found. Run Cell 3 first.")
        return

    pages_to_upload = {}
    df = pd.read_csv(CSV_DB, encoding="iso-8859-15")

    pages_to_upload["engine_database.csv"] = df.to_csv(index=False, encoding='iso-8859-15')

    est = pytz.timezone('US/Eastern')
    timestamp = datetime.now(est).strftime("%B %d, %Y %-I:%M %p EST")
    stats_bar_full = f"""<div style="background:#f4f4f4;border-top:1px solid #ddd;border-bottom:1px solid #ddd;font-family:sans-serif;font-size:12px;color:#555;padding:8px 15px;text-align:center;margin-bottom:0;"><strong>Last updated:</strong> {timestamp} &nbsp;|&nbsp; <strong>Total Autosomal matches:</strong> {len(df):,}</div>"""

    df['Long_Narrative'] = df.apply(build_narrative, axis=1)
    df['Linked_Tree_Line'] = df.apply(build_linked_lineage, axis=1)

    # --- JSON PREP ---
    ancestor_data = {}
    participant_data = {}
    badge_collections = {"Platinum": set(), "Gold": set(), "Silver": set(), "Bronze": set()}

    if 'Authority_Directory_Label' not in df.columns:
        df['Authority_Directory_Label'] = df['Authority_FirstAncestor_dated']

    # Ancestors
    for auth_label, group_df in df.groupby('Authority_Directory_Label'):
        match_count = int(len(group_df))
        if match_count < 2: continue
        total_cm = int(group_df['cM'].sum())
        unique_count = int(len(group_df['Tester-Participant-Unmasked'].unique()))
        top_testers = group_df['Tester-Participant-Unmasked'].value_counts().head(3).to_dict()
        alpha_key = group_df.iloc[0]['Authority_FirstAncestor_alpha']

        badge = "Bronze"; integrity = 25; verdict = f"This line is an **Emerging Frontier**. With {match_count} matches, the connection is real but fragile."
        if "Disconnected" in alpha_key or "ZZ_" in alpha_key: badge = "Action Required"; integrity = 0; verdict = "**Data Quality Issue.**"
        elif match_count >= 5: badge = "Silver"; integrity = 50; verdict = f"**Likely Valid.** Supported by {match_count} matches."
        if match_count >= 15 and unique_count >= 3: badge = "Gold"; integrity = 80; verdict = f"**Strong Genetic Confirmation.**"
        if match_count >= 30 and unique_count >= 10: badge = "Platinum"; integrity = 100; verdict = f"**The Platinum Standard.**"

        ancestor_data[alpha_key] = {"name": auth_label, "matches": match_count, "cm": total_cm, "testers": unique_count, "badge": badge, "verdict": verdict, "integrity": integrity, "list_data": top_testers}
        if badge in badge_collections:
            for p in group_df['Tester-Participant-Unmasked'].unique(): badge_collections[badge].add(p)

    virtual_groups = {}
    for b_name, p_set in badge_collections.items():
        if len(p_set) > 0: virtual_groups[f"Cohort: {b_name} Standard"] = list(p_set)

    # Participants
    for p_name, group_df in df.groupby('Tester-Participant-Unmasked'):
        match_count = int(len(group_df))
        if match_count < 2: continue
        total_cm = int(group_df['cM'].sum())
        top_anc = group_df['Authority_FirstAncestor_dated'].mode()[0]
        my_ancestors = group_df['Authority_FirstAncestor_dated'].value_counts().head(3).to_dict()
        badge = "Active Cousin"; integrity = 50; verdict = f"**Active Contributor.**"
        if match_count >= 15: badge = "Keystone Tester"; integrity = 90; verdict = f"**The Keystone Driver.**"
        sort_key = make_participant_key(p_name)
        participant_data[p_name] = {"name": p_name, "sort_key": sort_key, "matches": match_count, "cm": total_cm, "testers": 1, "badge": badge, "verdict": verdict, "integrity": integrity, "list_data": my_ancestors}

    smart_packet_json = json.dumps({"ancestors": ancestor_data, "participants": participant_data, "virtual": virtual_groups})
    proof_db_json = df[['Authority_Directory_Label','Tester-Participant-Unmasked','cM','ID#','Yates DNA Ancestral Line']].rename(columns={'Authority_Directory_Label':'ancestor','Tester-Participant-Unmasked':'participant','cM':'cm','ID#':'id','Yates DNA Ancestral Line':'lineage'}).to_json(orient='records')

    # --- 3. PAGE GENERATOR (STANDALONE - NO TNG WRAPPER) ---
    js_tools = """function getSortableName(fullname) {if (!fullname) return "zzz";let clean = fullname.replace(/\\b(jr\\.?|sr\\.?|iii|iv|ii|esq\\.?|m\\.d\\.?|ph\\.d\\.?)\\b/gi, "");clean = clean.replace(/[\\,\\.]/g, "");clean = clean.split(/\\bnee\\b|\\bn√©e\\b/i)[0];let parts = clean.trim().split(/\\s+/);if (parts.length >= 3 && parts[parts.length-2].toLowerCase() === 'y') {return parts[parts.length-3].toLowerCase() + " " + clean.toLowerCase();}let surname = parts.length > 0 ? parts[parts.length - 1].toLowerCase() : "zzz";return surname.toLowerCase() + " " + clean.toLowerCase();}function formatName(fullname) {if (!fullname) return "";let clean = fullname.replace(/\\b(jr\\.?|sr\\.?|iii|iv|ii|esq\\.?|m\\.d\\.?|ph\\.d\\.?)\\b/gi, "");clean = clean.replace(/[\\,\\.]/g, "");let parts = clean.trim().split(/\\s+/);if (parts.length < 2) return fullname;let surname = parts.pop();let firstname = parts.join(" ");return surname.charAt(0).toUpperCase() + surname.slice(1) + ", " + firstname;}"""

    def wrap_in_standalone(title, content):
        LOGO_URL = "https://yates.one-name.net/gengen/images/yates_modern_logo.png"

        # INLINE LOGO: 1em height to match text size
        inline_logo = f'<img src="{LOGO_URL}" class="title-logo" style="height:1.2em;width:auto;vertical-align:middle;margin-right:10px;margin-bottom:4px;" alt="Logo">'

        # Inject logo into the first H1
        if '<h1 class="centerline">' in content:
            content = content.replace('<h1 class="centerline">', f'<h1 class="centerline">{inline_logo} ')

        # STANDARD HTML STRUCTURE (No PHP Includes)
        return f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{title}</title>
<link rel="stylesheet" href="partials_unified.css">
<link rel="stylesheet" href="dna_tree_styles.css">
<style>
    body {{ font-family: sans-serif; background-color: #f9f9f9; margin: 0; padding: 0; }}
    .wrap {{ max-width: 1200px; margin: 20px auto; background: white; padding: 20px; box-shadow: 0 0 10px rgba(0,0,0,0.1); border-radius: 8px; }}
    h1.centerline {{ color: #006064; text-align: center; font-size: 2em; }}
    .title-logo {{ height: 1.2em; width: auto; vertical-align: middle; margin-right: 10px; }}
</style>
</head>
<body>
<div class="wrap">
    {stats_bar_full}
    {NAV_HTML}
    {content}
    <div style="text-align:center; padding: 20px; color: #777; font-size: 0.9em; border-top: 1px solid #eee; margin-top: 30px;">
        &copy; {datetime.now().year} Yates One-Name Study
    </div>
</div>
<script>{js_tools}</script>
</body>
</html>"""

    # 1. Buster
    buster_core = f"""<div class="dashboard"><div class="buster-header"><h1 class="centerline">üß± Brick Wall Buster</h1><p style="text-align:center;">Predictive Forensic Analysis</p></div>{SITE_INFO}<div class="tabs"><div class="tab active" onclick="setMode('ancestor')">1. Define by Ancestor</div><div class="tab" onclick="setMode('custom')">2. Define by Participants</div><div class="tab" onclick="setMode('virtual')">3. Build Custom Cohort</div></div><div class="control-panel"><div id="panel-ancestor"><label>Select Your "End of Line" Ancestor:</label><select id="wallSelect"><option value="">-- Choose --</option></select></div><div id="panel-virtual" style="display:none;"><label>Select Testers for Virtual Cohort:</label><div id="virtualList" class="checkbox-list"></div></div><div id="panel-custom" style="display:none;"><label>Select Testers:</label><div id="testerList" class="checkbox-list"></div></div><button onclick="runAnalysis()">üî® Bust This Wall</button></div><div id="results" class="results-box"><h2 style="border-bottom:2px solid #ddd;">Forensic Report</h2><div class="finding-box"><strong>üîç Findings:</strong> <span id="txt-finding"></span></div><div class="conclusion-box"><strong>üí° Study Context:</strong> <span id="txt-conclusion"></span></div><div class="speculation-box"><strong>üöÄ Prediction:</strong> <span id="txt-speculation"></span></div><div id="bridge-alert" style="display:none;"></div><h3 style="margin-top:30px;">Cluster Register</h3><div id="cluster-table-div"></div></div></div><script>const DATA={smart_packet_json};const FULL_DB={proof_db_json};let MODE='ancestor';const ancSel=document.getElementById('wallSelect');Object.keys(DATA.ancestors).sort((a,b)=>DATA.ancestors[b].name.localeCompare(DATA.ancestors[a].name)).forEach(k=>{{const o=document.createElement('option');o.value=k;o.innerText=DATA.ancestors[k].name;ancSel.appendChild(o);}});const testListDiv=document.getElementById('testerList');const virtListDiv=document.getElementById('virtualList');const allTesters=Object.keys(DATA.participants);allTesters.sort((a,b)=>DATA.participants[a].sort_key.localeCompare(DATA.participants[b].sort_key));let listHTML="";allTesters.forEach(t=>{{listHTML+=`<label class="checkbox-item"><input type="checkbox" value="${{t}}"> ${{t}}</label>`;}});testListDiv.innerHTML=listHTML;virtListDiv.innerHTML=listHTML;function setMode(m){{MODE=m;document.querySelectorAll('.tab').forEach(t=>t.classList.remove('active'));event.target.classList.add('active');document.getElementById('panel-ancestor').style.display=(m==='ancestor')?'block':'none';document.getElementById('panel-custom').style.display=(m==='custom')?'block':'none';document.getElementById('panel-virtual').style.display=(m==='virtual')?'block':'none';document.getElementById('results').style.display='none';}}function runAnalysis(){{let targetName="",clusterCount=0,clusterMembers=[],clusterMatches=0;if(MODE==='custom'){{const checkedBoxes=document.querySelectorAll('#testerList input:checked');if(checkedBoxes.length===0)return alert("Select at least one tester.");clusterMembers=Array.from(checkedBoxes).map(cb=>cb.value);targetName="Custom Group ("+clusterMembers.length+" Testers)";}}else if(MODE==='virtual'){{const checkedBoxes=document.querySelectorAll('#virtualList input:checked');if(checkedBoxes.length===0)return alert("Select at least one tester.");clusterMembers=Array.from(checkedBoxes).map(cb=>cb.value);targetName="Virtual Cohort";}}else{{const key=ancSel.value;if(!key)return;const d=DATA.ancestors[key];targetName=d.name;clusterMembers=Object.keys(d.list_data);}}clusterCount=clusterMembers.length;const groupRows=FULL_DB.filter(r=>clusterMembers.includes(r.participant));clusterMatches=groupRows.length;groupRows.sort((a,b)=>b.cm-a.cm);let tbl='<table class="dataframe"><thead><tr><th>Participant</th><th>cM</th><th>Lineage</th></tr></thead><tbody>';groupRows.forEach(r=>{{tbl+=`<tr><td>${{r.participant}}</td><td>${{r.cm}}</td><td style="font-size:0.9em;">${{r.lineage}}</td></tr>`;}});tbl+='</tbody></table>';document.getElementById('cluster-table-div').innerHTML=tbl;const allMatches=FULL_DB.length,outsideMatches=allMatches-clusterMatches,pct=((clusterMatches/allMatches)*100).toFixed(2);let findHTML=`<ul><li><strong>Target:</strong> ${{targetName}}</li><li><strong>Volume:</strong> ${{clusterMatches}} matches.</li></ul>`;document.getElementById('txt-finding').innerHTML=findHTML;let html="";const top3=[];for(const[k,v]of Object.entries(DATA.ancestors)){{if(v.name!==targetName&&v.matches>=5)top3.push(v);}}top3.sort((a,b)=>b.matches-a.matches);if(top3.length>0){{html+="<p>Most probable lineages:</p><ul>";top3.slice(0,3).forEach(g=>{{html+=`<li><strong>${{g.name}}</strong> (${{g.matches}} Handshakes)</li>`;}});html+="</ul>";}}else{{html="No strong signals found.";}}document.getElementById('txt-speculation').innerHTML=html;document.getElementById('results').style.display='block';}}</script>"""
    pages_to_upload["brick_wall_buster.php"] = wrap_in_standalone("Brick Wall Buster", buster_core)
    print("    - [GENERATED] Brick Wall Buster (Standalone)")

    # 2. Lineage Proof
    proof_core = f"""<h1 class="centerline">üß¨ Lineage Proof Engine</h1>{SITE_INFO}<div class="proof-card"><h3>Verify an Ancestral Line</h3><select id="proofSelect" onchange="runProof()"><option value="">-- Select Ancestor --</option></select><div id="proof-result" style="display:none;"><div style="text-align:center"><span id="p-badge" class="badge-large"></span></div><div class="stats-grid"><div class="stat-box"><div class="stat-val" id="p-matches">0</div><div class="stat-lbl">Matches</div></div><div class="stat-box"><div class="stat-val" id="p-cm">0</div><div class="stat-lbl">Total cM</div></div><div class="stat-box"><div class="stat-val" id="p-integrity">0%</div><div class="stat-lbl">Integrity</div></div></div><div class="verdict-box" id="p-verdict"></div><h4>Evidence Manifest</h4><div style="max-height:500px;overflow-y:auto;"><table id="evidence-table"><thead><tr><th>Participant</th><th>cM</th><th>Lineage Path</th></tr></thead><tbody></tbody></table></div></div></div><script>const DATA={smart_packet_json};const DB={proof_db_json};const sel=document.getElementById('proofSelect');Object.keys(DATA.ancestors).sort((a,b)=>DATA.ancestors[b].name.localeCompare(DATA.ancestors[a].name)).forEach(k=>{{const o=document.createElement('option');o.value=k;o.innerText=DATA.ancestors[k].name;sel.appendChild(o);}});function runProof(){{const key=sel.value;if(!key){{document.getElementById('proof-result').style.display='none';return;}}const d=DATA.ancestors[key];document.getElementById('proof-result').style.display='block';document.getElementById('p-badge').className='badge-large badge-'+d.badge.toLowerCase().split(' ')[0];document.getElementById('p-badge').innerText=d.badge;document.getElementById('p-matches').innerText=d.matches;document.getElementById('p-cm').innerText=d.cm.toLocaleString();document.getElementById('p-integrity').innerText=d.integrity+'%';document.getElementById('p-verdict').innerHTML=d.verdict;const matches=DB.filter(r=>r.ancestor===d.name).sort((a,b)=>b.cm-a.cm);const tbody=document.querySelector('#evidence-table tbody');tbody.innerHTML='';matches.forEach(m=>{{const tr=document.createElement('tr');tr.innerHTML=`<td>${{m.participant}}</td><td>${{m.cm}}</td><td style="font-size:0.9em;color:#555;">${{m.lineage}}</td>`;tbody.appendChild(tr);}});}}</script>"""
    pages_to_upload["lineage_proof.php"] = wrap_in_standalone("Lineage Proof Engine", proof_core)

    # 3. Dossier
    dossier_core = f"""<h1 class="centerline">Forensic Dossier</h1>{SITE_INFO}<div class="switch"><label><input type="radio" name="mode" value="ancestor" checked onchange="switchMode()"> Search by Ancestor</label><label><input type="radio" name="mode" value="participant" onchange="switchMode()"> Search by Participant</label></div><div style="text-align:center;margin:30px;"><select id="dossierSelect"><option value="">-- Select --</option></select><div style="margin-top:15px;"><button class="action-btn" onclick="addReport()">Add Report</button><button class="action-btn" style="background:#d32f2f;" onclick="clearReports()">Clear All</button></div></div><div id="composite-container"><h2 style="border-bottom:2px solid #004d40;">Comparison</h2><table class="comp-table"><thead><tr><th>Name</th><th>Role/Badge</th><th>Matches</th><th>Total cM</th></tr></thead><tbody id="comp-body"></tbody></table></div><div id="report-stack"></div><script>const DATA={smart_packet_json};let currentMode='ancestor';let compTotalMatches=0;let compTotalCM=0;let compCount=0;function switchMode(){{currentMode=document.querySelector('input[name="mode"]:checked').value;populateDropdown();clearReports();}}function populateDropdown(){{const sel=document.getElementById('dossierSelect');sel.innerHTML='<option value="">-- Select --</option>';const source=(currentMode==='ancestor')?DATA.ancestors:DATA.participants;const sortedKeys=Object.keys(source).sort((a,b)=>{{if(currentMode==='ancestor')return source[b].name.localeCompare(source[a].name);return source[a].sort_key.localeCompare(source[b].sort_key);}});sortedKeys.forEach(key=>{{const opt=document.createElement('option');opt.value=key;opt.innerText=source[key].name;sel.appendChild(opt);}});}}function clearReports(){{document.getElementById('report-stack').innerHTML='';document.getElementById('comp-body').innerHTML='';document.getElementById('composite-container').style.display='none';}}function addReport(){{const key=document.getElementById('dossierSelect').value;if(!key)return;const d=(currentMode==='ancestor')?DATA.ancestors[key]:DATA.participants[key];document.getElementById('composite-container').style.display='block';compTotalMatches+=d.matches;compTotalCM+=d.cm;compCount++;const tr=document.createElement('tr');tr.innerHTML=`<td><b>${{d.name}}</b></td><td>${{d.badge}}</td><td>${{d.matches}}</td><td>${{d.cm.toLocaleString()}}</td>`;document.getElementById('comp-body').appendChild(tr);const html=`<div class="dossier-card" style="display:block;"><div class="dossier-header"><h2>${{d.name}}</h2></div><div class="dossier-body"><div style="text-align:center"><span class="badge">${{d.badge}}</span></div><div class="verdict-box">${{d.verdict}}</div></div></div>`;document.getElementById('report-stack').insertAdjacentHTML('afterbegin',html);}}populateDropdown();</script>"""
    pages_to_upload["dna_dossier.php"] = wrap_in_standalone("Forensic Dossier", dossier_core)

    # 4. Network
    network_buffer = []
    sorted_groups = sorted(df.groupby('Authority_Directory_Label'), key=lambda x: len(x[1]), reverse=True)
    for ancestor_label, group_df in sorted_groups:
        if len(group_df) < 2: continue
        total_cm_g = group_df['cM'].sum(); unique_testers_g = len(group_df['Tester-Participant-Unmasked'].unique())
        analyzer_comment = f"""<div style="background:#fffde7;border-left:6px solid #fbc02d;padding:10px;margin-bottom:15px;font-family:sans-serif;color:#333;font-size:0.95em;"><strong>Collateral Saturation Analysis:</strong> Validated by <b>{unique_testers_g} independent testers</b>.</div>"""
        group_df = group_df.sort_values('cM', ascending=False)
        network_buffer.append(f"""<details style="background:white;margin-bottom:15px;border:1px solid #ddd;border-radius:5px;"><summary style="background:#e0f2f1;padding:15px;cursor:pointer;"><span>{ancestor_label}</span> <span style="float:right;">Matches: {len(group_df)} | cM: {total_cm_g:.0f}</span></summary><div style="padding:15px;">{analyzer_comment}<table class="dataframe" border="1"><thead><tr><th>Tester</th><th>cM</th><th>Lineage</th></tr></thead><tbody>""")
        for _, r in group_df.iterrows(): network_buffer.append(f"<tr><td>{r['Tester-Participant-Unmasked']}</td><td>{r['cM']}</td><td>{r['Yates DNA Ancestral Line']}</td></tr>")
        network_buffer.append("</tbody></table></div></details>")
    pages_to_upload["dna_network.php"] = wrap_in_standalone("Participating DNA Network", "".join(network_buffer))

    # 5. Admin Hub
    part_stats = df.groupby('Tester-Participant-Unmasked').agg({'Tester-Participant-MASKED': 'first', 'ID#': 'count'}).reset_index().rename(columns={'ID#': 'Match_Count'})
    part_stats['Sort_Key'] = part_stats['Tester-Participant-Unmasked'].apply(get_sortable_surname)
    part_stats = part_stats.sort_values(['Sort_Key', 'Tester-Participant-Unmasked'], ascending=[True, True])
    total_matches = part_stats['Match_Count'].sum(); part_stats['Share_Pct'] = (part_stats['Match_Count'] / total_matches) * 100
    admin_rows = []
    for _, r in part_stats.iterrows():
        admin_rows.append(f"<tr><td>{r['Tester-Participant-MASKED']}</td><td><b>{format_last_first(r['Tester-Participant-Unmasked'])}</b></td><td>{r['Match_Count']}</td><td>{r['Share_Pct']:.2f}%</td></tr>")
    admin_core = f"""<h1 class="centerline">Research Admin Hub</h1><div class="audit-table-wrapper"><table class="audit-table sortable"><thead><tr><th>Masked ID</th><th>Unmasked Participant</th><th>Matches</th><th>% Share</th></tr></thead><tbody>{''.join(admin_rows)}</tbody><tfoot><tr class="total-row"><td colspan="2" style="text-align:right;">TOTAL DATABASE:</td><td>{total_matches}</td><td>100%</td></tr></tfoot></table></div>"""
    pages_to_upload["research_admin.php"] = wrap_in_standalone("Research Admin Hub", admin_core)

    pages_to_upload["ons_yates_dna_register.php"] = wrap_in_standalone("DNA Register", "<h3>Please use the main menu to select a visualization.</h3>")

    # --- 4. UPLOAD ---
    print("\n[STEP 3] Uploading PHP Files to Server...")
    try:
        from ftplib import FTP_TLS
        HOST = os.environ.get("FTP_HOST") or userdata.get("FTP_HOST")
        USER = os.environ.get("FTP_USER") or userdata.get("FTP_USER")
        PASS = os.environ.get("FTP_PASS") or userdata.get("FTP_PASS")
        ftps = FTP_TLS()
        ftps.connect(HOST, 21); ftps.auth(); ftps.login(USER, PASS); ftps.prot_p()
        target_dirs = [f"/{REMOTE_SUBDIR}", f"/public_html/{REMOTE_SUBDIR}", f"htdocs/{REMOTE_SUBDIR}", REMOTE_SUBDIR]
        found_dir = False
        for d in target_dirs:
            try:
                ftps.cwd(d); found_dir = True; print(f"[SUCCESS] Locked onto: {d}"); break
            except: pass
        if not found_dir:
            print(f"\n[CRITICAL] Could not locate '{REMOTE_SUBDIR}'!")
        else:
            for fn, content in pages_to_upload.items():
                with open(fn, "w", encoding="utf-8") as f: f.write(content)
                with open(fn, "rb") as fh: ftps.storbinary(f"STOR {fn}", fh)
                print(f"    - Uploaded: {fn}")
            print(f"\n[SUCCESS] Independent Pages Published.")
        ftps.quit()
    except Exception as e: print(f"\n[ERROR] Upload Failed: {e}")

print("‚úÖ Cell 4 (Publisher V131 - Standalone & Inline Logo) Loaded.")

‚úÖ Cell 4 (Publisher V131 - Standalone & Inline Logo) Loaded.


In [4]:
# @title [CELL 5] MASTER ORCHESTRATOR (Run All)
def run_master_pipeline():
    print("="*60)
    print("      MASTER ORCHESTRATOR (V117)")
    print("      (Running Engine -> Publisher -> Upload)")
    print("      Brand: The Forensic Genealogy Publisher")
    print("="*60)

    # 1. Run Data Engine
    print("\n>>> üöÄ PHASE 1: EXECUTING DATA ENGINE...")
    try:
        run_engine()
        print("‚úÖ PHASE 1 COMPLETE.")
    except Exception as e:
        print(f"‚ùå PHASE 1 FAILED: {e}")
        return # Stop if engine fails

    # 2. Run Publisher
    print("\n>>> üåê PHASE 2: EXECUTING PUBLISHER & UPLOAD...")
    try:
        run_publisher()
        print("‚úÖ PHASE 2 COMPLETE.")
    except Exception as e:
        print(f"‚ùå PHASE 2 FAILED: {e}")
        return

    # 3. Run Archiver (Optional - checks if function exists)
    if 'run_archiver' in globals():
        print("\n>>> üì¶ PHASE 3: ARCHIVING...")
        try:
            run_archiver()
            print("‚úÖ PHASE 3 COMPLETE.")
        except Exception as e:
            print(f"‚ö†Ô∏è PHASE 3 SKIPPED: {e}")

    print("\n" + "="*60)
    print("      üèÜ PUBLISHING COMPLETE")
    print("="*60)

# EXECUTE
run_master_pipeline()

      MASTER ORCHESTRATOR (V117)
      (Running Engine -> Publisher -> Upload)
      Brand: The Forensic Genealogy Publisher

>>> üöÄ PHASE 1: EXECUTING DATA ENGINE...
      [CELL 3] ENGINE STARTING (V113 - FORCE REFRESH)...

[STEP 1] Setup...
    üëâ Source: yates_study_2025.ged

[STEP 4] Tracing Lineages...
    [DEBUG] Found Levi Yates. Mother Surname found: 'Anderson'. Result: Willis Levi Yates-Anderson
    [DEBUG] Found Levi Yates. Mother Surname found: 'Collins'. Result: Levi Yates-Collins
    [DEBUG] Found Levi Yates. Mother Surname found: 'Collins'. Result: Levi Yates-Collins
    [DEBUG] Found Levi Yates. Mother Surname found: 'Parker'. Result: Levi Yates-Parker
    [DEBUG] Found Levi Yates. Mother Surname found: 'Parker'. Result: Levi Yates-Parker
    [DEBUG] Found Levi Yates. Mother Surname found: 'Parker'. Result: Levi Yates-Parker
    [DEBUG] Found Levi Yates. Mother Surname found: 'Parker'. Result: Levi Yates-Parker
    [DEBUG] Found Levi Yates. Mother Surname found: 'Par

In [None]:
# @title [CELL 6] The Time Machine (Archiver + Dropbox Sync)
import zipfile
import os
import pytz
import time
from datetime import datetime
from google.colab import files
from google.colab import userdata

# --- 1. INSTALL DROPBOX (IF MISSING) ---
try:
    import dropbox
    from dropbox.exceptions import AuthError
except ImportError:
    os.system('pip install dropbox')
    import dropbox
    from dropbox.exceptions import AuthError

def run_archiver():
    print("="*60)
    print("      [CELL 6] MANUAL ARCHIVER + DROPBOX SYNC")
    print("="*60)

    # --- 2. CREATE ZIP (SAFE MODE) ---
    # We explicitly exclude .zip to prevent "Zip Bombs"
    extensions = ('.csv', '.shtml', '.html', '.json', '.js', '.css')
    files_to_pack = [f for f in os.listdir('.') if f.lower().endswith(extensions) and "sample_data" not in f]

    if not files_to_pack:
        print("‚ùå No generated files found! Run the Publisher (Cell 4) first.")
        return

    est = pytz.timezone('US/Eastern')
    timestamp = datetime.now(est).strftime("%Y-%m-%d_%H%M")
    zip_name = f"Yates_Study_Backup_{timestamp}.zip"

    print(f"üì¶ Compressing {len(files_to_pack)} files into {zip_name}...")
    try:
        with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zf:
            for file in files_to_pack:
                zf.write(file)
        print(f"    ‚úÖ Archive Created: {zip_name} ({os.path.getsize(zip_name)/1024:.1f} KB)")
    except Exception as e:
        print(f"    ‚ùå Compression Failed: {e}")
        return

    # --- 3. FTP UPLOAD (BACKUPS FOLDER) ---
    print("\n[STEP 2] Uploading to Web Server (FTP)...")
    try:
        from ftplib import FTP_TLS
        HOST = os.environ.get("FTP_HOST") or userdata.get("FTP_HOST")
        USER = os.environ.get("FTP_USER") or userdata.get("FTP_USER")
        PASS = os.environ.get("FTP_PASS") or userdata.get("FTP_PASS")

        ftps = FTP_TLS()
        ftps.connect(HOST, 21); ftps.auth(); ftps.login(USER, PASS); ftps.prot_p()

        try:
            ftps.cwd("/ons-study/backups")
        except:
            try:
                ftps.mkd("/ons-study/backups")
                ftps.cwd("/ons-study/backups")
            except:
                pass

        with open(zip_name, "rb") as fh:
            ftps.storbinary(f"STOR {zip_name}", fh)
        print(f"    ‚úÖ FTP Success: /ons-study/backups/{zip_name}")
        ftps.quit()
    except Exception as e:
        print(f"    ‚ö†Ô∏è FTP Upload skipped: {e}")

    # --- 4. DROPBOX SYNC (NEW) ---
    print("\n[STEP 3] Syncing to Dropbox...")
    try:
        # Initialize with Refresh Token (Long-term access)
        dbx = dropbox.Dropbox(
            app_key=userdata.get('DBX_APP_KEY'),
            app_secret=userdata.get('DBX_APP_SECRET'),
            oauth2_refresh_token=userdata.get('DBX_REFRESH_TOKEN')
        )

        # Upload the Zip
        target_path = f"/Backups/{zip_name}"
        with open(zip_name, "rb") as f:
            dbx.files_upload(f.read(), target_path, mode=dropbox.files.WriteMode.overwrite)

        print(f"    ‚úÖ Dropbox Success: {target_path}")

    except Exception as e:
        print(f"    ‚ùå Dropbox Upload Failed: {e}")
        print("       (Check DBX_APP_KEY, DBX_APP_SECRET, DBX_REFRESH_TOKEN in Colab Secrets)")

    # --- 5. LOCAL DOWNLOAD (SAFETY NET) ---
    print("\n[STEP 4] Triggering Local Download...")
    try:
        files.download(zip_name)
    except Exception as e:
        print(f"    ‚ö†Ô∏è Auto-download blocked: {e}")

    print("‚úÖ Archival Process Complete.")

# Run it
run_archiver()

      [CELL 6] MANUAL ARCHIVER + DROPBOX SYNC
üì¶ Compressing 1 files into Yates_Study_Backup_2026-02-17_1947.zip...
    ‚úÖ Archive Created: Yates_Study_Backup_2026-02-17_1947.zip (186.5 KB)

[STEP 2] Uploading to Web Server (FTP)...
    ‚ö†Ô∏è FTP Upload skipped: 530 Login authentication failed

[STEP 3] Syncing to Dropbox...
    ‚úÖ Dropbox Success: /Backups/Yates_Study_Backup_2026-02-17_1947.zip

[STEP 4] Triggering Local Download...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚úÖ Archival Process Complete.
