<a href="https://colab.research.google.com/github/ronyates47/Gedcom-Utils/blob/main/ons_study_v11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [71]:
# @title [CELL 1] Setup + Helper Functions (V107 - Hot Paths Nav)
import os
import sys
import re
import csv
import json
import html
import socket
import pytz
import pandas as pd
from ftplib import FTP_TLS
from datetime import datetime

# --- INSTALL TQDM IF MISSING ---
try:
    import tqdm
except ImportError:
    os.system('pip install tqdm')
    import tqdm

print("="*60)
print("      [CELL 1] SETUP LOADED (V107)")
print("      (Includes: Lineage Hot Paths Nav Link)")
print("="*60)

# ==============================================================================
# 1. GLOBAL HELPER FUNCTIONS + HTML ASSETS
# ==============================================================================
TNG_BASE_URL = "https://yates.one-name.net/tng/verticalchart.php?personID="
TNG_SUFFIX = "&tree=tree1&parentset=0&display=vertical&generations=15"

NAV_HTML = """
<style>
nav.oldnav ul{display:flex;flex-wrap:wrap;justify-content:center;background-color:#006064!important;border-bottom:2px solid #00acc1!important;margin:0;padding:0;list-style:none}
nav.oldnav li{display:inline-block}
nav.oldnav a{display:block;padding:10px 15px;text-decoration:none;color:#e0f7fa!important;font-size:14px}
nav.oldnav a:hover{background-color:#00838f!important}
@media print { nav.oldnav, #nav-slot, .no-print, .action-btn, .control-panel, .tabs { display: none !important; } }
</style>
<nav class="oldnav"><ul>
<li><a href="/ons-study/research_admin.html" style="color:#ffcc80 !important; font-weight:bold;">Admin Hub</a></li>
<li><a href="/ons-study/contents.shtml" style="color:#ffcc80 !important; font-weight:bold;">Guide</a></li>
<li><a href="/ons-study/yates_ancestor_register.shtml">DNA Register</a></li>
<li><a href="/ons-study/just-trees.shtml">Trees</a></li>
<li><a href="/ons-study/dna_network.shtml">DNA Network</a></li>
<li><a href="/ons-study/lineage_proof.html">Lineage Proof</a></li>
<li><a href="/ons-study/lineage_hot_paths.html" style="color:#fff !important; font-weight:bold;">Hot Paths</a></li>
<li><a href="/ons-study/dna_dossier.html">Forensic Dossier</a></li>
<li><a href="/ons-study/brick_wall_buster.shtml" style="background:#f57f17;color:black !important;">Brick Wall Buster</a></li>
<li><a href="/ons-study/data_glossary.shtml">Data Glossary</a></li>
<li><a href="https://yates.one-name.net/gengen/images/cousin-calculator.jpg" target="_blank" style="color:#b2dfdb;">Cousin Calc</a></li>
<li><a href="https://yates.one-name.net/gengen/images/Shared_cM_Project_v4.jpg" target="_blank" style="color:#b2dfdb;">cM Chart</a></li>
<li><a href="/ons-study/share_dna.shtml" style="background-color:#0277bd; font-weight:bold;">Share DNA</a></li>
<li><a href="/ons-study/subscribe.shtml" style="background-color:#004d40;">Subscribe</a></li>
</ul></nav>"""

SITE_INFO = """<div class="no-print" style="background:#e0f2f1;border:1px solid #b2dfdb;padding:20px;margin:20px auto;width:90%;border-radius:8px;font-family:sans-serif;"><h3 style="color:#006064;margin-top:0;border-bottom:2px solid #004d40;padding-bottom:10px;">Establishing Kinship Through Collateral DNA Saturation</h3><p style="color:#333;line-height:1.6;font-size:1.05em;margin-bottom:0;"><strong>Methodology:</strong> This register moves beyond the reliance on single "golden matches" to prove kinship. Instead, it employs <em>Collateral DNA Saturation</em>‚Äîa method that blends genealogical reasoning with data-driven logic.</p></div>"""

# V86: Added Table Filtering Logic
JS_CORE = r"""<script type="text/javascript">
(function(){
    // SORTING LOGIC
    function textOf(c){var val = c.getAttribute('data-sort') || c.textContent || c.innerText;return (val || '').replace(/\s+/g,' ').trim().toLowerCase();}
    function sortTable(t,i,d){if(!(t&&t.tBodies&&t.tBodies[0]))return;var tb=t.tBodies[0],r=Array.prototype.slice.call(tb.rows||[]),asc=(d==='asc');r.sort(function(a,b){var A=textOf(a.cells[i]),B=textOf(b.cells[i]),nA=parseFloat(A.replace(/[^0-9.\-]/g,'')),nB=parseFloat(B.replace(/[^0-9.\-]/g,''));if(!isNaN(nA)&&!isNaN(nB))return asc?(nA-nB):(nB-nA);return(A<B)?(asc?-1:1):(A>B)?(asc?1:-1):0;});var f=document.createDocumentFragment();for(var k=0;k<r.length;k++)f.appendChild(r[k]);tb.appendChild(f);}
    function makeSortable(t){if(!(t&&t.tHead&&t.tHead.rows.length))return;var th=t.tHead.rows[0].cells;for(var i=0;i<th.length;i++){(function(idx){var h=th[idx],d='asc';h.style.cursor='pointer';h.onclick=function(){d=(d==='asc')?'desc':'asc';for(var j=0;j<th.length;j++)th[j].innerHTML=th[j].innerHTML.replace(/\s+\(asc\)|\s+\(desc\)/,'');h.innerHTML+=(d==='asc'?' (asc)':' (desc)');sortTable(t,idx,d);};})(i);}}

    // FILTERING LOGIC
    window.filterTable = function() {
        var input = document.getElementById("tableSearch");
        var filter = input.value.toUpperCase();
        var table = document.getElementById("reg-table") || document.querySelector("table.dataframe");
        var tr = table.getElementsByTagName("tr");
        for (var i = 1; i < tr.length; i++) {
            var tdArr = tr[i].getElementsByTagName("td");
            var found = false;
            for (var j = 0; j < tdArr.length; j++) {
                if (tdArr[j]) {
                    var txtValue = tdArr[j].textContent || tdArr[j].innerText;
                    if (txtValue.toUpperCase().indexOf(filter) > -1) {
                        found = true;
                        break;
                    }
                }
            }
            tr[i].style.display = found ? "" : "none";
        }
    }

    function init(){
        var t=document.getElementsByTagName('table');
        for(var i=0;i<t.length;i++) if(/\bsortable\b/.test(t[i].className)) makeSortable(t[i]);
    }
    if(document.readyState==='loading')document.addEventListener('DOMContentLoaded',init,false);else init();
})();
</script>"""

GLOSSARY_CONTENT = """<div style="background:white;padding:25px;border-radius:8px;border:1px solid #ddd;font-family:sans-serif;line-height:1.6;"><h2 style="color:#006064;border-bottom:2px solid #004d40;padding-bottom:10px;">ONS Yates Study: Data Glossary</h2><h3 style="color:#00838f;margin-top:25px;">1. Identity Columns</h3><ul style="list-style-type:none;padding-left:0;"><li style="margin-bottom:15px;"><strong>Tester-Participant-MASKED (The Trigger):</strong><br>The unique privacy code extracted from the user's NPFX tag.</li><li style="margin-bottom:15px;"><strong>Tester-Participant-Unmasked:</strong><br>The real name of the tester.</li></ul><h3 style="color:#00838f;margin-top:25px;">2. Analysis Terms</h3><ul style="list-style-type:none;padding-left:0;"><li style="margin-bottom:15px;"><strong>Platinum Standard:</strong><br>Lineages with 30+ matches and 10+ unique sources. Biologically proven.</li><li style="margin-bottom:15px;"><strong>Keystone Tester:</strong><br>A high-value participant (15+ matches) who anchors a specific branch.</li><li style="margin-bottom:15px;"><strong>Forensic Handshake:</strong><br>An informal term in genetic genealogy describing a confirmation pattern in which multiple independent DNA matches support the same ancestral line or family connection, strengthening confidence in an identification or relationship conclusion. Rather than relying on a single match, genealogists look for several matches that converge on the same family network, creating a ‚Äúhandshake‚Äù of agreement between genetic evidence and documentary research. This concept is especially valuable in unknown-parentage and forensic cases, where conclusions must be supported by redundant evidence.<br><br>A forensic handshake is often achieved through related methods such as triangulation, where three or more individuals share the same DNA segment from a common ancestor, and cluster analysis, which groups matches who also match one another and often represent a shared ancestral couple or lineage. Together, these approaches help meet an emerging genetic genealogy proof standard, emphasizing that reliable conclusions require multiple corroborating matches, consistency with records, and the exclusion of alternate explanations.</li></ul><h3 style="color:#00838f;margin-top:25px;">3. Genealogy Concepts</h3><ul style="list-style-type:none;padding-left:0;"><li style="margin-bottom:15px;"><strong>Spanish Naming System:</strong><br>A traditional Hispanic naming convention in which an individual bears one or more given names followed by two surnames: the first inherited from the father (paternal surname) and the second from the mother (maternal surname). This system is historically rooted in Spain and is especially useful in genealogy because it preserves both parental lineages and improves identification in historical records.</li><li style="margin-bottom:15px;"><strong>N√©e:</strong><br>A term meaning ‚Äúborn as,‚Äù used to indicate a woman‚Äôs maiden or birth surname before marriage. In genealogical and historical records, n√©e identifies the surname a woman carried in her natal family line, preserving her connection to her parents and ancestry. For example, ‚ÄúMaria Garc√≠a, n√©e L√≥pez‚Äù shows that Mar√≠a‚Äôs birth surname was L√≥pez, even though she later used Garc√≠a after marriage.<br><br>The use of n√©e is especially important for tracing maternal family lines, since women‚Äôs surnames may change across generations in many cultures. By recording a woman‚Äôs birth name, genealogists can correctly link her to her original family, distinguish between individuals with similar married names, and maintain continuity in lineage reconstruction, particularly in marriage, probate, and church records.</li></ul></div>"""

SUBSCRIBE_CONTENT = """<div style="background:white;padding:40px;border-radius:8px;box-shadow:0 4px 15px rgba(0,0,0,0.1);max-width:800px;margin:30px auto;text-align:center;font-family:'Segoe UI',sans-serif;"><h1 style="color:#006064;margin-bottom:10px;">Join the Yates Research Community</h1><p style="color:#555;font-size:1.1em;line-height:1.6;margin-bottom:30px;">Stay connected with the latest breakthroughs in the Yates One-Name Study. Get notified about new DNA groups, lineage verifications, and quarterly reports.</p><div style="background:#e0f2f1;padding:25px;border-radius:8px;border:1px solid #b2dfdb;display:inline-block;"><h3 style="margin-top:0;color:#004d40;">üìß One-Click Subscribe</h3><p style="margin-bottom:20px;">Click below to send a subscription request to our Groups.io list.</p><a href="mailto:yates-one-name-study+subscribe@groups.io?subject=Subscribe" style="display:inline-block;padding:15px 30px;background:#00838f;color:white;text-decoration:none;border-radius:5px;font-weight:bold;font-size:1.1em;box-shadow:0 2px 5px rgba(0,0,0,0.2);">Subscribe Now</a></div><p style="margin-top:30px;font-size:0.9em;color:#777;">Powered by Groups.io. You will receive a confirmation email shortly.</p></div>"""

SHARE_CONTENT = """<div style="max-width:900px;margin:30px auto;font-family:'Segoe UI',sans-serif;line-height:1.6;color:#333;"><div style="text-align:center;margin-bottom:40px;"><h1 style="color:#0277bd;margin-bottom:10px;">Share Your Ancestry DNA Matches</h1><p style="font-size:1.1em;color:#555;">Ancestry provides a built-in sharing feature that allows you to grant limited access to your DNA matches <strong>without sharing your personal account details</strong>. You remain in full control of your account at all times.</p></div><div style="display:grid;grid-template-columns:1fr 1fr;gap:30px;margin-bottom:30px;"><div style="background:white;padding:25px;border-radius:8px;box-shadow:0 4px 10px rgba(0,0,0,0.1);border-top:5px solid #0277bd;"><h3 style="color:#0277bd;margin-top:0;">How Sharing Works</h3><p>From your AncestryDNA <strong>Settings</strong> page, you may invite another individual by email and assign one of the following roles:</p><ul style="padding-left:20px;"><li><strong>Viewer</strong> (Read only)</li><li><strong>Collaborator</strong> (Recommended for Study)</li><li><strong>Manager</strong> (Full Control)</li></ul></div><div style="background:#e3f2fd;padding:25px;border-radius:8px;border:1px solid #90caf9;"><h3 style="color:#01579b;margin-top:0;">Privacy & Control</h3><p>This sharing arrangement provides <strong>limited access only</strong>. It does not allow changes to your account and does not expose your personal details.</p><p><strong>You may revoke access at any time through Ancestry.</strong></p></div></div><div style="background:white;padding:30px;border-radius:8px;border:1px solid #ddd;box-shadow:0 4px 15px rgba(0,0,0,0.05);"><h2 style="color:#004d40;border-bottom:2px solid #004d40;padding-bottom:10px;margin-top:0;">How to Share for the Yates One-Name Study</h2><ol style="font-size:1.1em;line-height:1.8;padding-left:25px;"><li>Open your <strong>AncestryDNA Settings</strong>.</li><li>Scroll to the section labeled "DNA Result Sharing" and click <strong>Invite</strong>.</li><li>Send the invitation to <strong>Ron Yates</strong> at: <br><span style="background:#fff3e0;padding:5px 10px;border-radius:4px;font-weight:bold;color:#e65100;font-family:monospace;font-size:1.2em;">yatesvilleron@gmail.com</span></li><li>Assign the role: <strong>Collaborator</strong>.</li></ol><div style="background:#fffde7;border-left:5px solid #fbc02d;padding:15px;margin-top:20px;font-size:0.95em;"><strong>Why Collaborator?</strong> The Collaborator role allows Ron to review shared matches and create small internal groups (colored dots) to identify which matches have been reviewed and which have contributed evidence to the Yates One-Name Study.</div></div><div style="margin-top:40px;"><h3 style="color:#006064;">What Happens Next?</h3><p>After sharing, you will receive an invitation to subscribe to the <strong>Yates One-Name Study Groups.io mailing list</strong>, where DNA proof summaries and study findings are shared.</p><h3 style="color:#006064;">Reciprocal Sharing (Optional)</h3><p>If you are interested in viewing Ron‚Äôs DNA matches, simply let him know. When a direct match exists, that relationship will be reflected in the study findings.</p></div></div>"""

THEORY_CONTENT = """<div style="max-width:1000px;margin:30px auto;font-family:'Segoe UI',sans-serif;line-height:1.6;color:#333;"><h1 style="text-align:center;color:#004d40;font-size:2.5em;margin-bottom:10px;">The Yates DNA Strategy</h1><p style="text-align:center;font-size:1.2em;color:#666;margin-bottom:40px;">Moving beyond traditional Y-DNA to solve modern genealogical mysteries.</p><div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(300px,1fr));gap:30px;margin-bottom:40px;"><div style="background:white;padding:25px;border-radius:8px;box-shadow:0 4px 10px rgba(0,0,0,0.1);border-top:5px solid #00838f;"><h2 style="color:#006064;margin-top:0;">The Autosomal Revolution</h2><p>Traditional one-name studies rely almost exclusively on Y-DNA to trace the paternal surname line. While valuable for deep history, this approach ignores 50% of our ancestors at every generation.</p><p><strong>Our Focus:</strong> We utilize <strong>Autosomal DNA (atDNA)</strong>‚Äîinherited from both parents‚Äîto verify connections across <em>all</em> branches. This allows us to:</p><ul style="padding-left:20px;color:#444;"><li>Bridge the "Gender Gap" by tracing female descendants.</li><li>Verify paper trails for the last 300 years (Genealogical Time).</li><li>Cluster "Floating" Yates families into their correct lines.</li></ul></div><div style="background:white;padding:25px;border-radius:8px;box-shadow:0 4px 10px rgba(0,0,0,0.1);border-top:5px solid #f9a825;"><h2 style="color:#f57f17;margin-top:0;">Collateral Saturation</h2><p>A single DNA match can be luck. Ten matches is a statistic. <strong>Thirty matches is a fact.</strong></p><p>We employ a technique called <strong>Collateral Saturation</strong>. We don't look for one "Golden Match." We analyze groups of matches from independent cousin lines. When descendants from four different children of <em>William & Mary Yates</em> all share DNA with you, the relationship is biologically confirmed.</p><div style="text-align:center;margin-top:20px;"><a href="dna_network.shtml" style="display:inline-block;padding:10px 20px;background:#f9a825;color:#333;text-decoration:none;border-radius:4px;font-weight:bold;">View the Network</a></div></div></div><div style="background:#e0f2f1;padding:30px;border-radius:8px;border:1px solid #b2dfdb;margin-bottom:40px;"><h2 style="color:#004d40;margin-top:0;text-align:center;">From Theory to Tools</h2><p style="text-align:center;max-width:700px;margin:0 auto 20px auto;">We have built a suite of forensic tools to visualize this data. Instead of raw spreadsheets, we offer interactive dashboards to prove your connection.</p><div style="display:flex;flex-wrap:wrap;justify-content:center;gap:15px;margin-top:20px;"><a href="ons_yates_dna_register.shtml" style="background:#006064;color:white;padding:12px 20px;text-decoration:none;border-radius:4px;font-weight:bold;">The Register</a><a href="lineage_proof.html" style="background:#00838f;color:white;padding:12px 20px;text-decoration:none;border-radius:4px;font-weight:bold;">Proof Engine</a><a href="dna_dossier.html" style="background:#00acc1;color:white;padding:12px 20px;text-decoration:none;border-radius:4px;font-weight:bold;">Forensic Dossier</a></div></div><div style="background:#f5f5f5;padding:20px;border-radius:8px;border:1px solid #ddd;"><h3 style="color:#555;margin-top:0;">Legacy Data: Y-DNA Haplogroups</h3><p style="font-size:0.9em;color:#666;margin-bottom:15px;">Y-DNA is the backbone of deep ancestry (27,000 BCE to 1600 AD). While not our primary focus for recent genealogy, we maintain a detailed record of the Yates Y-Chromosome mutations (R-M207 through FT266579).</p><a href="https://yates.one-name.net/gengen/dna_proof_y.htm" style="color:#006064;font-weight:bold;text-decoration:none;">&raquo; View Detailed Y-DNA Findings</a></div></div>"""

def make_page(title, content, count, view_type="", extra="", stats_bar=""):
    nav_blk = ""
    if view_type in ['ancestor', 'participant', 'tree_az', 'tree_za', 'proof', 'hot_paths', 'network', 'dossier', 'subscribe', 'share', 'buster', 'singleton']:
        nav_blk = SITE_INFO
    if view_type == 'subscribe' or view_type == 'theory' or view_type == 'share':
        nav_blk = ""

    toggle = ""
    print_btn = ""
    search_bar = ""

    # V86: Add Search Bar for Registers and Singletons
    if view_type in ['ancestor', 'participant', 'singleton']:
        search_bar = """<div class="no-print" style="margin:20px auto;max-width:600px;text-align:center;"><input type="text" id="tableSearch" onkeyup="filterTable()" placeholder="üîç Type a name to filter list..." style="width:100%;padding:12px;font-size:16px;border:2px solid #006064;border-radius:4px;"></div>"""

    if view_type in ['ancestor', 'participant', 'singleton']:
        view_name = "Register"
        if view_type == 'singleton': view_name = "Singleton List"
        print_btn = f"""<div class="no-print" style="text-align:center;margin-bottom:15px;"><button onclick="window.print()" style="background:#0277bd;color:white;border:none;padding:10px 20px;border-radius:4px;font-weight:bold;cursor:pointer;font-size:14px;">üñ®Ô∏è Print {view_name}</button></div>"""

    if view_type == 'ancestor':
        toggle = f"""<div class="no-print" style="text-align:center;padding:10px;margin-bottom:10px;font-family:sans-serif;font-size:14px;background:#e0f7fa;border:1px solid #b2ebf2;"><strong>Sort Register:</strong> &nbsp;<a href="ons_yates_dna_register.shtml" style="font-weight:bold;color:#006064;">By Ancestral Line</a> &nbsp;|&nbsp; <a href="ons_yates_dna_register_participants.shtml" style="color:#00acc1;text-decoration:none;">By Participant Name</a></div>"""
    elif view_type == 'participant':
        toggle = f"""<div class="no-print" style="text-align:center;padding:10px;margin-bottom:10px;font-family:sans-serif;font-size:14px;background:#e0f7fa;border:1px solid #b2ebf2;"><strong>Sort Register:</strong> &nbsp;<a href="ons_yates_dna_register.shtml" style="color:#00acc1;text-decoration:none;">By Ancestral Line</a> &nbsp;|&nbsp; <a href="ons_yates_dna_register_participants.shtml" style="font-weight:bold;color:#006064;">By Participant Name</a></div>"""
    elif 'tree' in view_type:
        za = f'<span style="font-weight:bold;color:#000;">Z-A</span>' if 'za' in view_type else f'<a href="just-trees.shtml" style="color:#006064;text-decoration:underline;">Z-A</a>'
        az = f'<span style="font-weight:bold;color:#000;">A-Z</span>' if 'az' in view_type else f'<a href="just-trees-az.shtml" style="color:#006064;text-decoration:underline;">A-Z</a>'
        toggle = f"""<div class="no-print" style="text-align:center;font-family:sans-serif;font-size:16px;margin:15px 0 10px 0;">Individual Yates Family trees: &nbsp; {za} &nbsp;|&nbsp; {az}</div>"""

    return f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>{title}</title><link rel="stylesheet" href="partials_unified.css"><link rel="stylesheet" href="dna_tree_styles.css">{extra}</head><body id="top"><div class="wrap"><h1 class="centerline">{title}</h1><div id="nav-slot">{stats_bar}{NAV_HTML}</div>{nav_blk}{search_bar}{print_btn}{toggle}{content}{JS_CORE}</div></body></html>"""

print("‚úÖ Cell 1 Loaded! (Includes Hot Paths Nav Link)")

      [CELL 1] SETUP LOADED (V107)
      (Includes: Lineage Hot Paths Nav Link)
‚úÖ Cell 1 Loaded! (Includes Hot Paths Nav Link)


In [72]:
# @title [CELL 3] The Data Engine (V147 - Authority Keys & True Spine)
def run_engine():
    print("="*60)
    print("      [CELL 3] ENGINE STARTING (V147)...")
    print("="*60)

    import os, re, csv, pytz
    from ftplib import FTP_TLS
    from google.colab import userdata
    from datetime import datetime

    # --- HARDCODED AUTHORITY KEYS ---
    # Map a tester's Unmasked Name to their exact GEDCOM ID to stop the engine from guessing.
    TESTER_ID_MAP = {
        "Ron Yates": "I1",
        "Ronald Eugene Yates": "I1"
    }

    CSV_DB = "engine_database.csv"
    if os.path.exists(CSV_DB): os.remove(CSV_DB)

    HOST = os.environ.get("FTP_HOST") or userdata.get("FTP_HOST")
    USER = os.environ.get("FTP_USER") or userdata.get("FTP_USER")
    PASS = os.environ.get("FTP_PASS") or userdata.get("FTP_PASS")
    REMOTE_SUBDIR = "ons-study"
    KEY_FILE = "match_to_unmasked.csv"
    PROCESSED_GED = "_processed_unmasked.ged"

    try:
        ftps = FTP_TLS(); ftps.connect(HOST, 21); ftps.auth(); ftps.login(USER, PASS); ftps.prot_p()
        with open(KEY_FILE, "wb") as f: ftps.retrbinary(f"RETR /{REMOTE_SUBDIR}/{KEY_FILE}", f.write)
        ftps.quit()
    except: pass

    ged_files = [f for f in os.listdir('.') if f.lower().endswith('.ged') and "_processed" not in f and "unmasked" not in f.lower()]
    if not ged_files: return print("‚ùå No GEDCOM found. Please upload your .ged file.")
    DEFAULT_GEDCOM = sorted(ged_files, key=lambda x: os.path.getmtime(x), reverse=True)[0]

    unmask_map = {}
    unmask_id_map = {}
    if os.path.exists(KEY_FILE):
        with open(KEY_FILE, 'r', errors='replace') as f:
            for r in csv.reader(f):
                if len(r) >= 2:
                    u_name = r[1].strip()
                    unmask_map[r[0].strip().lower()] = u_name
                    if len(r) >= 3 and r[2].strip():
                        raw_id = r[2].strip().replace('@', '')
                        if raw_id.isdigit(): raw_id = f"I{raw_id}"
                        unmask_id_map[u_name] = raw_id

    with open(DEFAULT_GEDCOM, 'r', encoding='utf-8', errors='replace') as fin, open(PROCESSED_GED, 'w', encoding='utf-8') as fout:
        buf = []; r_name = None
        for line in fin:
            if line.startswith("0 @"):
                if buf:
                    for b in buf: fout.write(f"1 NAME {r_name}\n" if b.startswith("1 NAME") and r_name else b)
                buf = [line]; r_name = None
            else:
                buf.append(line)
                if line.startswith("1 NPFX"):
                    m = re.search(r'(\d+)\s*&?\s*([^ \t\n\r\f\v]+)', line.split(" ", 2)[2].strip() if len(line.split(" ", 2))>2 else "")
                    if m: r_name = unmask_map.get(m.group(2).lower(), m.group(2).lower())
        if buf:
            for b in buf: fout.write(f"1 NAME {r_name}\n" if b.startswith("1 NAME") and r_name else b)

    def clean_name(n): return n.replace("/", "").strip() if n else "findme"
    def get_surname(n): return re.sub(r'\b(jr\.?|sr\.?|iii|iv|esq\.?|m\.d\.?|ph\.d\.?)\b', '', n, flags=re.IGNORECASE).replace(',', '').split()[-1] if n and "findme" not in n.lower() else ""

    inds = {}; fams = {}
    cid = None; cfam = None; ctag = None
    with open(PROCESSED_GED, "r", encoding="utf-8", errors="replace") as f:
        for line in f:
            parts = line.strip().split(" ", 2)
            if len(parts)<2: continue
            lvl, tag, val = parts[0], parts[1], parts[2] if len(parts)>2 else ""
            if lvl=="0" and "INDI" in val:
                cid = tag.replace("@", "")
                inds[cid] = {"name": "findme", "famc": None, "fams": [], "code": "", "cm": 0, "birt": "", "deat": ""}
                cfam = None; ctag = "INDI"
            elif cid and lvl!="0":
                if tag=="NAME": inds[cid]["name"] = clean_name(val)
                elif tag=="FAMC": inds[cid]["famc"] = val.replace("@", "")
                elif tag=="FAMS": inds[cid]["fams"].append(val.replace("@", ""))
                elif tag=="NPFX":
                    m_code = re.search(r'(\d+)\s*&?\s*([^ \t\n\r\f\v]+)', val)
                    if m_code: inds[cid]["code"] = m_code.group(2).lower()
                    m_cm = re.search(r'^(\d+)|(\d+)\s*cM', val, re.IGNORECASE)
                    if m_cm: inds[cid]["cm"] = int(m_cm.group(1) or m_cm.group(2))
                elif tag in ["BIRT", "DEAT"]: ctag = tag
                elif tag=="DATE" and ctag:
                    m_yr = re.search(r'\d{4}', val)
                    if m_yr: inds[cid][ctag.lower()] = m_yr.group(0)
                    ctag = None
            if lvl=="0" and "FAM" in val:
                cfam = tag.replace("@", ""); fams[cfam] = {"husb": None, "wife": None}; cid = None
            elif cfam and lvl!="0":
                if tag=="HUSB": fams[cfam]["husb"] = val.replace("@", "")
                elif tag=="WIFE": fams[cfam]["wife"] = val.replace("@", "")

    def get_parents(pid):
        if pid and pid in inds and inds[pid]["famc"] and inds[pid]["famc"] in fams: return fams[inds[pid]["famc"]]["husb"], fams[inds[pid]["famc"]]["wife"]
        return None, None

    def to_spanish(pid, name):
        if "findme" in name.lower(): return name
        _, mid = get_parents(pid)
        msur = get_surname(inds[mid]["name"]) if mid and mid in inds else ""
        if msur and "findme" not in msur.lower() and msur.lower() not in name.lower(): return f"{name}-{msur}"
        return name

    def climb(sid):
        if not sid or sid not in inds: return []
        c = sid; path = []; pfamc = inds[sid]["famc"]
        while c:
            p = inds.get(c)
            if not p: break
            s_name = "findme"; s_id = None
            afam = pfamc if pfamc in p["fams"] else (p["fams"][0] if p["fams"] else None)
            if afam and afam in fams:
                s_id = fams[afam]["wife"] if fams[afam]["husb"]==c else fams[afam]["husb"]
                if s_id and s_id in inds: s_name = inds[s_id]["name"]
            path.append({"name": to_spanish(c, p["name"]), "raw": p["name"], "id": c, "s_raw": s_name, "s_id": s_id})
            did, mid = get_parents(c)
            pfamc = p["famc"]
            if not did and not mid: break
            dy = "yates" in (inds.get(did,{}).get("name","").lower() if did else "")
            my = "yates" in (inds.get(mid,{}).get("name","").lower() if mid else "")
            if dy and not my: c = did
            elif my and not dy: c = mid
            else: c = did
        return path

    rows = []
    for uid, p in inds.items():
        if p["code"]:
            path = climb(uid)
            if not path: continue
            f_line = list(reversed(path))
            gen1 = f_line[0]
            t_name = gen1["raw"]; s_name = gen1["s_raw"]
            b1 = inds[gen1["id"]]["birt"] or "findme"; d1 = inds[gen1["id"]]["deat"] or "findme"
            tdates = f"({b1} - {d1})" if b1!="findme" or d1!="findme" else "findme"
            dir_label = f"{get_surname(t_name)}, {t_name.replace(get_surname(t_name), '').strip()} {tdates}" + (f" & {s_name}" if s_name!="findme" else "")

            pnames = []; sids = []; snames = []; b_names = []; b_ids = []
            for i, x in enumerate(f_line):
                pnames.append(f"{t_name} {tdates}" if i==0 else x["name"])
                if x["id"]: sids.append(x["id"]); snames.append(x["name"])
                if x["s_id"]: sids.append(x["s_id"]); snames.append(x["s_raw"])
                if x["s_raw"] != "findme": b_names.append(f"{x['name']} & {x['s_raw']}"); b_ids.append(f"{x['id']}+{x['s_id']}")
                else: b_names.append(x["name"]); b_ids.append(x["id"])

            unmasked = unmask_map.get(p["code"], p["code"])
            rows.append({"MASKED": p["code"], "Unmasked": unmasked, "Match": p["name"], "ID": uid, "cM": p["cm"], "Lineage": " -> ".join(pnames), "Dir_Label": dir_label, "Alpha_Key": re.sub(r'[^a-zA-Z0-9]', '', t_name), "s_ids": ",".join(sids), "s_names": "|".join(snames), "b_ids": "|".join(b_ids), "b_names": "|".join(b_names), "Tester_Path_Names": "", "Tester_Path_IDs": ""})

    def get_exact_tester_id(tname):
        for k, v in TESTER_ID_MAP.items():
            if k.lower() == tname.lower(): return v
        if tname in unmask_id_map: return unmask_id_map[tname]
        cl_t = re.sub(r'[^a-zA-Z0-9 ]', '', tname).lower().strip()
        tid = next((u for u, p in inds.items() if re.sub(r'[^a-zA-Z0-9 ]', '', p["name"]).lower().strip() == cl_t), None)
        if not tid and len(cl_t.split())>=2:
            tid = next((u for u, p in inds.items() if p["name"] and p["name"].lower().startswith(cl_t.split()[0][0]) and p["name"].lower().endswith(cl_t.split()[-1])), None)
        return tid

    import pandas as pd
    tester_cache = {}
    for r in rows:
        tname = r["Unmasked"]
        if tname not in tester_cache:
            tid = get_exact_tester_id(tname)
            if tid:
                t_path = list(reversed(climb(tid)))
                tn = []; ti = []
                for x in t_path:
                    if x["s_raw"] != "findme": tn.append(f"{x['name']} & {x['s_raw']}"); ti.append(f"{x['id']}+{x['s_id']}")
                    else: tn.append(x["name"]); ti.append(x["id"])
                tester_cache[tname] = {"n": "|".join(tn), "i": "|".join(ti)}
            else: tester_cache[tname] = {"n": "", "i": ""}
        r["Tester_Path_Names"] = tester_cache[tname]["n"]
        r["Tester_Path_IDs"] = tester_cache[tname]["i"]

    df = pd.DataFrame(rows)
    df.to_csv(CSV_DB, index=False)
    print(f"    ‚úÖ Engine complete. {len(df)} rows generated.")

print("‚úÖ Cell 3 Loaded.")

‚úÖ Cell 3 Loaded.


In [73]:
# @title [CELL 4] Forensic Tools Builder (V147)
def run_forensic_tools():
    print("="*60)
    print("      [CELL 4] BUILDING FORENSIC TOOLS (V147)...")
    print("="*60)

    import os, json, pytz
    import pandas as pd
    from datetime import datetime

    CSV_DB = "engine_database.csv"
    if not os.path.exists(CSV_DB): return print("‚ùå ERROR: engine_database.csv not found.")
    df = pd.read_csv(CSV_DB, encoding="iso-8859-15")

    est = pytz.timezone('US/Eastern')
    timestamp = datetime.now(est).strftime("%B %d, %Y %-I:%M %p EST")
    print_stats_text = f"Study Data Current As Of: {timestamp} | Total DNA Participants Analyzed: {len(df):,}"

    anc_data = {}; part_data = {}
    for lbl, grp in df.groupby('Dir_Label'):
        if len(grp)<2: continue
        anc_data[grp.iloc[0]['Alpha_Key']] = {"name": lbl, "matches": len(grp), "cm": int(grp['cM'].sum()), "badge": "Platinum" if len(grp)>=30 else "Gold" if len(grp)>=15 else "Silver" if len(grp)>=5 else "Bronze"}
    for p, grp in df.groupby('Unmasked'):
        if len(grp)<2: continue
        part_data[p] = {"name": p, "sort_key": p.split()[-1], "matches": len(grp), "cm": int(grp['cM'].sum())}

    smart_json = json.dumps({"ancestors": anc_data, "participants": part_data})
    db_json = df[['Dir_Label', 'Unmasked', 'cM', 'ID', 'Lineage', 's_ids', 's_names', 'b_ids', 'b_names', 'Tester_Path_Names', 'Tester_Path_IDs']].rename(columns={'Dir_Label':'ancestor', 'Unmasked':'participant', 'cM':'cm', 'ID':'id', 'Lineage':'lineage', 's_ids':'search_ids', 's_names':'search_names', 'b_ids':'gen_ids', 'b_names':'gen_names', 'Tester_Path_Names':'t_names', 'Tester_Path_IDs':'t_ids'}).to_json(orient='records')
    JS_GLOBALS = f"const DATA={smart_json};\nconst DB={db_json};"

    # --- BIOLOGICAL PROOF (V147 UI) ---
    bio_proof_html = f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Biological Proof Register</title><link rel="stylesheet" href="partials_unified.css"><style>
    body{{font-family:'Segoe UI',sans-serif;background:#f0f2f5;padding:20px}}
    .proof-card{{background:white;max-width:1100px;margin:20px auto;border-radius:8px;box-shadow:0 4px 15px rgba(0,0,0,0.1);padding:40px}}
    .formal-brief{{font-family:'Georgia',serif;color:#000}}
    .formal-header{{text-align:center;border-bottom:3px solid #000;padding-bottom:20px;margin-bottom:30px}}
    .formal-header h1{{margin:0;text-transform:uppercase;font-size:26px;letter-spacing:1px}}
    .formal-header p{{margin:5px 0 0 0;font-style:italic;font-size:16px;color:#444}}
    .methodology-block{{background:#f9f9f9;padding:20px;border-left:4px solid #455a64;margin-bottom:30px;font-size:15px;line-height:1.6}}
    .badge{{padding:5px 10px;border-radius:4px;font-weight:bold;font-size:0.85em;text-transform:uppercase;border:1px solid #ccc}}
    .badge-platinum{{background:#eceff1;color:#263238;border-color:#cfd8dc}}
    .badge-gold{{background:#fff8e1;color:#f57f17;border-color:#ffe082}}
    .badge-silver{{background:#f5f5f5;color:#616161;border-color:#e0e0e0}}
    .badge-bronze{{background:#efebe9;color:#5d4037;border-color:#d7ccc8}}
    .badge-descendant{{background:#e3f2fd;color:#0d47a1;border-color:#90caf9}}
    .badge-terminal{{background:#fff;color:#000;border-color:#000;font-style:italic}}
    table{{width:100%;border-collapse:collapse;margin-top:15px;margin-bottom:40px;font-family:'Georgia',serif;font-size:15px}}
    th{{background:#eceff1;color:#263238;padding:12px;text-align:left;border-bottom:2px solid #000}}
    td{{padding:12px;border-bottom:1px solid #ddd;vertical-align:top}}
    select{{padding:12px;font-size:16px;width:100%;max-width:600px;margin-bottom:20px;border:1px solid #455a64;border-radius:4px}}
    .control-panel{{background:#eceff1;padding:25px;border-radius:8px;border:1px solid #cfd8dc;margin-bottom:20px;text-align:center;font-family:'Segoe UI',sans-serif}}
    .tabs{{display:flex;gap:10px;margin-bottom:15px;border-bottom:2px solid #ddd;font-family:'Segoe UI',sans-serif}}
    .tab{{padding:10px 20px;cursor:pointer;background:#eee;border-radius:5px 5px 0 0;font-weight:bold;flex:1;text-align:center;color:#555}}
    .tab.active{{background:#455a64;color:white}}
    @media print{{ .no-print{{display:none !important}} .only-print{{display:block !important}} .proof-card{{box-shadow:none;border:none;padding:0;margin:0}} body{{background:white;padding:0}} th{{background:#f0f0f0 !important;color:#000 !important;-webkit-print-color-adjust:exact;print-color-adjust:exact}} .methodology-block{{border-left:4px solid #000;background:transparent}} .badge{{border:1px solid #000;color:#000;background:transparent !important}} }}
    .only-print{{display:none}}
    </style></head><body><div class="wrap"><h1 class="centerline no-print">üìú Biological Proof Register</h1><div class="proof-card"><div class="no-print" style="float:right;margin-bottom:20px;"><button onclick="window.print()" style="background:#455a64;color:white;border:none;padding:10px 20px;border-radius:4px;cursor:pointer;font-weight:bold;">üñ®Ô∏è Print Formal Brief</button></div><div class="no-print tabs"><div class="tab active" onclick="setMode('participant', event)">1. Generate Formal Brief (Tester)</div><div class="tab" onclick="setMode('ancestor', event)">2. View Ancestral Cohort</div></div><div class="no-print control-panel"><div id="panel-participant"><label style="font-weight:bold;display:block;margin-bottom:10px;">Select Subject Tester for Official Report:</label><select id="hpParticipantSelect" onchange="runApexPath()"><option value="">-- Choose Participant --</option></select></div><div id="panel-ancestor" style="display:none;"><label style="font-weight:bold;display:block;margin-bottom:10px;">Select Target Ancestor:</label><select id="hpAncestorSelect" onchange="runAncestorMap()"><option value="">-- Select Ancestor --</option></select></div></div><div id="proof-result"></div></div></div><script>{JS_GLOBALS}
    const pSel=document.getElementById('hpParticipantSelect'); Object.keys(DATA.participants).sort().forEach(k=>{{const o=document.createElement('option');o.value=k;o.innerText=k;pSel.appendChild(o);}});
    const aSel=document.getElementById('hpAncestorSelect'); Object.keys(DATA.ancestors).sort().forEach(k=>{{const o=document.createElement('option');o.value=k;o.innerText=DATA.ancestors[k].name;aSel.appendChild(o);}});
    function setMode(m, e){{ document.querySelectorAll('.tab').forEach(t=>t.classList.remove('active')); e.target.classList.add('active'); document.getElementById('panel-ancestor').style.display=(m==='ancestor')?'block':'none'; document.getElementById('panel-participant').style.display=(m==='participant')?'block':'none'; document.getElementById('proof-result').innerHTML=''; }}
    function runAncestorMap(){{ const d=DATA.ancestors[aSel.value]; if(!d) return; const rows=DB.filter(r=>r.ancestor===d.name); let h=`<div class="formal-brief"><div class="formal-header"><h1>Descendant Cohort Validation</h1><p>Subject: ${{d.name}}</p></div><div class="methodology-block"><strong>Status:</strong> Validated by <strong>${{rows.length}} independent matches</strong> sharing <strong>${{d.cm}} total cM</strong> of DNA.</div><table><thead><tr><th>Participant</th><th>Shared cM</th><th>Documented Lineage Path</th></tr></thead><tbody>`; rows.sort((a,b)=>b.cm-a.cm).forEach(m=>{{ h+=`<tr><td><strong>${{m.participant}}</strong></td><td>${{m.cm}} cM</td><td style="font-size:0.9em;color:#444;">${{m.lineage.replace(/ -> /g, ' &rarr; ')}}</td></tr>`; }}); document.getElementById('proof-result').innerHTML=h+`</tbody></table></div>`; }}
    function runApexPath(){{
        const pName=pSel.value; if(!pName) return; const myMatches=DB.filter(r=>r.participant===pName);
        if(myMatches.length===0) return alert("No matches found.");
        const tNames=myMatches[0].t_names; const tIDs=myMatches[0].t_ids;
        let h=`<div class="formal-brief"><div class="only-print" style="text-align:right;font-size:12px;color:#666;margin-bottom:20px;">{print_stats_text}</div><div class="formal-header"><h1>Forensic DNA Lineage Report</h1><p>Biological Verification via Collateral Saturation</p></div>`;
        if(tNames && tIDs) {{
            const nArr=tNames.split('|'); const iArr=tIDs.split('|'); const tGens=nArr.length;
            h+=`<div class="methodology-block"><strong>SUBJECT TESTER:</strong> ${{pName}}<br><strong>METHODOLOGY:</strong> This report validates the documented paper trail of the subject using <em>Collateral DNA Saturation</em>. The algorithm traces the subject's exact claimed lineage backwards from the present day. A node is considered biologically proven when it is corroborated by redundant, independent lines of descent.</div>`;
            h+=`<h3 style="margin-top:30px;text-transform:uppercase;font-size:16px;">Claimed Documentary Lineage</h3><div style="padding:15px;border:1px solid #ddd;background:#fbfbfb;margin-bottom:40px;font-family:monospace;font-size:14px;color:#333;">${{nArr.join(' <br>&nbsp;&nbsp;&rdsh;&nbsp; ')}}</div>`;
            let trs="", highHeat=0;
            for(let i=0; i<tGens; i++){{
                const idc=iArr[i]; if(!idc) continue; const pid=idc.split('+')[0]; let heat=0;
                myMatches.forEach(m=>{{ if(m.search_ids && m.search_ids.split(',').includes(pid)) heat++; }});
                if(heat>highHeat) highHeat=heat;
                let bc="terminal", st="Private Line";
                if(heat>=30){{bc="platinum";st="Confirmed Standard (30+)";}} else if(heat>=15){{bc="gold";st="Confirmed Validation (15+)";}} else if(heat>=5){{bc="silver";st="Verified Node (5+)";}} else if(heat>=2){{bc="bronze";st="Emerging Node (2+)";}}
                if(i===tGens-1){{bc="descendant";st="Subject Tester";}}
                const dID=idc.split('+').map(x=>"I"+x.replace(/[^0-9]/g,'')).join(' & ');
                trs+=`<tr><td style="text-align:center;font-weight:bold;">${{tGens-i}}</td><td><strong>${{nArr[i]}}</strong><br><span style="font-size:0.85em;color:#666;">ID: ${{dID}}</span></td><td style="text-align:center;font-weight:bold;font-size:1.1em;">${{heat}}</td><td><span class="badge badge-${{bc}}">${{st}}</span></td></tr>`;
            }}
            let oStatus="Insufficient Data"; if(highHeat>=30) oStatus="Fully Validated (Platinum Standard)"; else if(highHeat>=15) oStatus="Strongly Validated (Gold Standard)"; else if(highHeat>=5) oStatus="Verified (Silver Standard)";
            h+=`<h3 style="margin-top:40px;text-transform:uppercase;font-size:16px;">Biological Corroboration Manifest</h3><p style="font-size:14px;color:#444;"><strong>Conclusion:</strong> The deep ancestry of this line is <strong>${{oStatus}}</strong> based on ${{myMatches.length}} total collateral matches aggregated across the lineage.</p><table><thead><tr><th style="text-align:center;width:60px;">Gen</th><th>Ancestor Node (Biological Couple)</th><th style="text-align:center;">Independent<br>DNA Corroborators</th><th>Forensic Status</th></tr></thead><tbody>${{trs}}</tbody></table></div>`;
        }} else {{ h+=`<p>Error: Could not locate documented spine for this tester.</p></div>`; }}
        document.getElementById('proof-result').innerHTML=h; document.getElementById('proof-result').style.display='block';
    }}</script></body></html>"""

    with open("biological_proof.html", "w", encoding="utf-8") as f: f.write(bio_proof_html)
    print("    ‚úÖ Biological Proof built.")

    # (We will build Buster and Dossier in a future step, skipping to save memory right now)

print("‚úÖ Cell 4 Loaded.")

‚úÖ Cell 4 Loaded.


In [80]:
# @title [CELL 5] Core Publisher & Uploader (V150 - Scope Fix)
def run_publisher():
    print("="*60)
    print("      [CELL 5] CORE PUBLISHER & FTP UPLOADER (V150)...")
    print("="*60)

    import os, re, pytz, json
    import pandas as pd
    from datetime import datetime
    from google.colab import userdata
    from ftplib import FTP_TLS

    # --- SECURE CREDENTIAL FETCH ---
    try:
        HOST = os.environ.get("FTP_HOST") or userdata.get("FTP_HOST")
        USER = os.environ.get("FTP_USER") or userdata.get("FTP_USER")
        PASS = os.environ.get("FTP_PASS") or userdata.get("FTP_PASS")
    except Exception as e:
        return print(f"‚ùå Credential Error: {e}")

    REMOTE_SUBDIR = "ons-study"
    CSV_DB = "engine_database.csv"
    if not os.path.exists(CSV_DB): return print("‚ùå ERROR: engine_database.csv not found.")
    df = pd.read_csv(CSV_DB, encoding="iso-8859-15")

    est = pytz.timezone('US/Eastern')
    timestamp = datetime.now(est).strftime("%B %d, %Y %-I:%M %p EST")
    stats_bar_full = f"""<div style="background:#f4f4f4;border-top:1px solid #ddd;border-bottom:1px solid #ddd;font-family:sans-serif;font-size:12px;color:#555;padding:8px 15px;text-align:center;margin-bottom:0;"><strong>Study Data Current As Of:</strong> {timestamp} | <strong>Total Autosomal matches:</strong> {len(df):,}</div>"""

    # --- HTML VARIABLES ---
    NAV_HTML = """<style>nav.oldnav ul{display:flex;flex-wrap:wrap;justify-content:center;background-color:#006064!important;border-bottom:2px solid #00acc1!important;margin:0;padding:0;list-style:none} nav.oldnav li{display:inline-block} nav.oldnav a{display:block;padding:10px 15px;text-decoration:none;color:#e0f7fa!important;font-size:14px} nav.oldnav a:hover{background-color:#00838f!important} @media print { nav.oldnav, #nav-slot, .no-print { display: none !important; } }</style><nav class="oldnav"><ul><li><a href="/ons-study/research_admin.html" style="color:#ffcc80 !important; font-weight:bold;">Admin Hub</a></li><li><a href="/ons-study/contents.shtml" style="color:#ffcc80 !important; font-weight:bold;">Guide</a></li><li><a href="/ons-study/yates_ancestor_register.shtml">DNA Register</a></li><li><a href="/ons-study/just-trees.shtml">Trees</a></li><li><a href="/ons-study/dna_network.shtml">DNA Network</a></li><li><a href="/ons-study/lineage_proof.html">Lineage Proof</a></li><li><a href="/ons-study/biological_proof.html" style="color:#fff !important; font-weight:bold;">Biological Proof</a></li><li><a href="/ons-study/dna_dossier.html">Forensic Dossier</a></li><li><a href="/ons-study/brick_wall_buster.shtml" style="background:#f57f17;color:black !important;">Brick Wall Buster</a></li></ul></nav>"""
    SITE_INFO = """<div class="no-print" style="background:#e0f2f1;border:1px solid #b2dfdb;padding:20px;margin:20px auto;width:90%;border-radius:8px;font-family:sans-serif;"><h3 style="color:#006064;margin-top:0;border-bottom:2px solid #004d40;padding-bottom:10px;">Establishing Kinship Through Collateral DNA Saturation</h3><p style="color:#333;line-height:1.6;margin-bottom:0;">This register employs <em>Collateral DNA Saturation</em>‚Äîa method blending genealogical reasoning with data-driven logic to prove connections using multiple independent DNA cousins.</p></div>"""
    JS_CORE = r"""<script type="text/javascript">(function(){ function textOf(c){var val = c.getAttribute('data-sort') || c.textContent || c.innerText;return (val || '').replace(/\s+/g,' ').trim().toLowerCase();} function sortTable(t,i,d){if(!(t&&t.tBodies&&t.tBodies[0]))return;var tb=t.tBodies[0],r=Array.prototype.slice.call(tb.rows||[]),asc=(d==='asc');r.sort(function(a,b){var A=textOf(a.cells[i]),B=textOf(b.cells[i]),nA=parseFloat(A.replace(/[^0-9.\-]/g,'')),nB=parseFloat(B.replace(/[^0-9.\-]/g,''));if(!isNaN(nA)&&!isNaN(nB))return asc?(nA-nB):(nB-nA);return(A<B)?(asc?-1:1):(A>B)?(asc?1:-1):0;});var f=document.createDocumentFragment();for(var k=0;k<r.length;k++)f.appendChild(r[k]);tb.appendChild(f);} function makeSortable(t){if(!(t&&t.tHead&&t.tHead.rows.length))return;var th=t.tHead.rows[0].cells;for(var i=0;i<th.length;i++){(function(idx){var h=th[idx],d='asc';h.style.cursor='pointer';h.onclick=function(){d=(d==='asc')?'desc':'asc';for(var j=0;j<th.length;j++)th[j].innerHTML=th[j].innerHTML.replace(/\s+\(asc\)|\s+\(desc\)/,'');h.innerHTML+=(d==='asc'?' (asc)':' (desc)');sortTable(t,idx,d);};})(i);}} window.filterTable = function() { var input = document.getElementById("tableSearch"); var filter = input.value.toUpperCase(); var table = document.getElementById("reg-table") || document.querySelector("table.dataframe"); var tr = table.getElementsByTagName("tr"); for (var i = 1; i < tr.length; i++) { var tdArr = tr[i].getElementsByTagName("td"); var found = false; for (var j = 0; j < tdArr.length; j++) { if (tdArr[j]) { var txtValue = tdArr[j].textContent || tdArr[j].innerText; if (txtValue.toUpperCase().indexOf(filter) > -1) { found = true; break; } } } tr[i].style.display = found ? "" : "none"; } } function init(){ var t=document.getElementsByTagName('table'); for(var i=0;i<t.length;i++) if(/\bsortable\b/.test(t[i].className)) makeSortable(t[i]); } if(document.readyState==='loading')document.addEventListener('DOMContentLoaded',init,false);else init(); })();</script>"""

    def normalize_id(val): return f"I{str(val).replace('@', '').strip()}" if str(val).replace('@', '').strip().isdigit() else str(val).replace('@', '').strip()

    def make_page(title, content, nav_b, bar):
        return f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>{title}</title><link rel="stylesheet" href="partials_unified.css"><link rel="stylesheet" href="dna_tree_styles.css"></head><body id="top"><div class="wrap"><h1 class="centerline">{title}</h1><div id="nav-slot">{bar}{NAV_HTML}</div>{SITE_INFO if nav_b else ""}{content}{JS_CORE}</div></body></html>"""

    def format_last_first(full_name):
        if not full_name: return ""
        s = str(full_name)
        cleaned = re.sub(r'\b(jr\.?|sr\.?|iii|iv|esq\.?|m\.d\.?|ph\.d\.?)\b', '', s, flags=re.IGNORECASE)
        parts = cleaned.replace(',', '').replace('.', '').strip().split()
        if len(parts) < 2: return full_name
        if len(parts) >= 3 and parts[-2].lower() == 'y': return f"{parts[-3]} {parts[-2]} {parts[-1]}, " + " ".join(parts[:-3])
        return f"{parts.pop().title()}, " + " ".join(parts)

    # REBUILT: Linked Tree Line Generator
    def build_linked_lineage(row):
        line = str(row.get('Lineage', ''))
        found = str(row.get('Match', ''))
        rid = normalize_id(row.get('ID', ''))
        if found and rid and found in line:
            return line.replace(found, f'<a href="https://yates.one-name.net/tng/verticalchart.php?personID={rid}&tree=tree1&parentset=0&display=vertical&generations=15" target="_blank" style="color:#006064;text-decoration:none;font-weight:bold;">{found}</a>')
        return line

    df['Linked_Tree_Line'] = df.apply(build_linked_lineage, axis=1)

    pages_to_upload = {}

    # --- ADMIN HUB ---
    part_stats = df.groupby('Unmasked').agg({'MASKED': 'first', 'ID': 'count'}).reset_index().rename(columns={'ID': 'Match_Count'})
    part_stats['Sort_Key'] = part_stats['Unmasked'].apply(lambda n: re.split(r'\bnee\b|\bn√©e\b', str(n).lower())[0].replace(',', '').replace('.', '').strip().split()[-1] if n else "zzz")
    part_stats = part_stats.sort_values(['Sort_Key', 'Unmasked'], ascending=[True, True])
    total_m = part_stats['Match_Count'].sum()

    admin_rows = [f"<tr><td>{r['MASKED']}</td><td data-sort='{r['Sort_Key']}'><b>{format_last_first(r['Unmasked'])}</b></td><td>{r['Match_Count']}</td><td>{(r['Match_Count']/total_m)*100:.2f}%</td></tr>" for _, r in part_stats.iterrows()]
    admin_buttons = """<div style="text-align:center;margin:20px 0;"><a href="admin_singletons.shtml" class="action-btn" style="background:#fbc02d;color:#333;margin-right:10px;">üîç View Singleton Lines</a><a href="engine_database.csv" class="action-btn" style="background:#455a64;">‚¨áÔ∏è Download CSV</a></div>"""

    pages_to_upload["research_admin.html"] = f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Yates Research Admin Hub</title><link rel="stylesheet" href="partials_unified.css"><style>body{{font-family:'Segoe UI',sans-serif;background:#f0f2f5;padding:20px}}.dashboard-grid{{display:grid;grid-template-columns:repeat(auto-fit,minmax(200px,1fr));gap:20px;margin:30px auto;max-width:1200px}}.dash-card{{background:white;padding:20px;border-radius:8px;text-align:center;box-shadow:0 4px 6px rgba(0,0,0,0.1);transition:transform 0.2s;text-decoration:none;color:#333;border:1px solid #ddd}}.dash-card:hover{{transform:translateY(-5px);border-color:#006064;background:#e0f7fa}}.dash-icon{{font-size:40px;margin-bottom:10px;display:block}}.dash-title{{font-weight:bold;font-size:1.1em;color:#006064}}.audit-table-wrapper{{background:white;padding:25px;border-radius:8px;box-shadow:0 4px 6px rgba(0,0,0,0.1);max-width:1200px;margin:0 auto}}.audit-table{{width:100%;border-collapse:collapse}}.audit-table th{{background:#004d40;color:white;padding:12px;text-align:left;position:sticky;top:0}}.audit-table td{{padding:10px;border-bottom:1px solid #eee}}.audit-table tr:hover{{background-color:#f5f5f5}}.total-row{{background:#e0f2f1;font-weight:bold;border-top:2px solid #004d40}}.action-btn{{padding:10px 20px;text-decoration:none;border-radius:4px;font-weight:bold;display:inline-block;}}</style></head><body><div class="wrap"><h1 class="centerline">Yates Research Admin Hub</h1><div id="nav-slot">{stats_bar_full}{NAV_HTML}</div><div class="dashboard-grid"><a href="ons_yates_dna_register.shtml" class="dash-card"><span class="dash-icon">üìã</span><span class="dash-title">DNA Register</span></a><a href="dna_network.shtml" class="dash-card"><span class="dash-icon">üï∏Ô∏è</span><span class="dash-title">DNA Network</span></a><a href="biological_proof.html" class="dash-card"><span class="dash-icon">üìú</span><span class="dash-title">Bio Proof</span></a><a href="lineage_proof.html" class="dash-card"><span class="dash-icon">üß¨</span><span class="dash-title">Proof Engine</span></a><a href="dna_dossier.html" class="dash-card"><span class="dash-icon">üìÅ</span><span class="dash-title">Forensic Dossier</span></a><a href="just-trees.shtml" class="dash-card"><span class="dash-icon">üå≥</span><span class="dash-title">Trees View</span></a></div><div class="audit-table-wrapper"><h2 style="color:#004d40;border-bottom:2px solid #004d40;padding-bottom:10px;margin-top:0;">Participant Activity Report (Sorted by Surname)</h2>{admin_buttons}<div style="max-height:600px;overflow-y:auto;"><table class="audit-table sortable"><thead><tr><th>Masked ID</th><th>Unmasked Participant</th><th>Matches</th><th>% Share</th></tr></thead><tbody>{''.join(admin_rows)}</tbody><tfoot><tr class="total-row"><td colspan="2" style="text-align:right;padding-right:20px;">TOTAL DATABASE:</td><td>{total_m}</td><td>100%</td></tr></tfoot></table></div></div></div>{JS_CORE}</body></html>"""

    # --- GUIDE / CONTENTS ---
    contents_html = f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Yates Study User Guide</title><link rel="stylesheet" href="partials_unified.css"><style>body{{font-family:'Segoe UI',sans-serif;background:#f4f7f6;padding:20px}}.guide-grid{{display:grid;grid-template-columns:repeat(auto-fit,minmax(300px,1fr));gap:25px;max-width:1200px;margin:30px auto}}.guide-card{{background:white;padding:25px;border-radius:8px;border-left:5px solid #006064;box-shadow:0 4px 10px rgba(0,0,0,0.05);transition:transform 0.2s}}.guide-card:hover{{transform:translateY(-5px)}}.card-title{{font-size:1.4em;font-weight:bold;color:#004d40;margin-top:0}}.card-why{{color:#b71c1c;font-weight:bold;margin:10px 0 5px 0;font-size:0.9em;text-transform:uppercase}}.card-what{{color:#555;font-size:1em;line-height:1.5;margin-bottom:20px}}.card-btn{{display:inline-block;padding:10px 20px;background:#00838f;color:white;text-decoration:none;border-radius:4px;font-weight:bold}}.card-btn:hover{{background:#006064}}</style></head><body><div class="wrap"><h1 class="centerline">Welcome to the Yates DNA Study Portal</h1><div id="nav-slot">{stats_bar_full}{NAV_HTML}</div><div style="text-align:center;max-width:800px;margin:20px auto;color:#444;font-size:1.1em;">This site transforms raw DNA data into forensic genealogical evidence. Use the tools below to explore your heritage, verify ancestors, and analyze the strength of your genetic connections.</div>
    <div class="guide-grid">
        <div class="guide-card"><h2 class="card-title">1. The DNA Register</h2><div class="card-why">Why View This?</div><div class="card-what">To see the raw evidence. This is the master list of all DNA matches in the study, sorted by ancestral line.</div><a href="ons_yates_dna_register.shtml" class="card-btn">Open Register</a></div>
        <div class="guide-card"><h2 class="card-title">2. DNA Network</h2><div class="card-why">Why View This?</div><div class="card-what">To see the big picture. Visual clusters showing which ancestral lines are genetically "saturated" and proven by multiple testers.</div><a href="dna_network.shtml" class="card-btn">View Network</a></div>
        <div class="guide-card" style="border-left-color:#3f51b5;"><h2 class="card-title" style="color:#1a237e;">3. Biological Proof</h2><div class="card-why" style="color:#3f51b5;">Why View This?</div><div class="card-what">To generate a formal, printable forensic brief annotating a documented paper trail with empirical DNA corroboration.</div><a href="biological_proof.html" class="card-btn" style="background:#3f51b5;">Open Proof Register</a></div>
        <div class="guide-card" style="border-left-color:#f57f17;"><h2 class="card-title" style="color:#e65100;">4. Brick Wall Buster</h2><div class="card-why" style="color:#bf360c;">Why View This?</div><div class="card-what">To break through a dead end. This predictive engine uses "Negative Space Analysis" to suggest which proven family line you likely belong to based on who you match.</div><a href="brick_wall_buster.shtml" class="card-btn" style="background:#ef6c00;">Bust This Wall</a></div>
        <div class="guide-card"><h2 class="card-title">5. Lineage Proof Engine</h2><div class="card-why">Why View This?</div><div class="card-what">To verify a connection. An interactive tool that tests if a specific ancestor is biologically confirmed by independent cousins.</div><a href="lineage_proof.html" class="card-btn">Run Proof</a></div>
        <div class="guide-card"><h2 class="card-title">6. Forensic Dossier</h2><div class="card-why">Why View This?</div><div class="card-what">To get your "Scorecard." Generate a one-page forensic report on yourself or an ancestor, grading the strength of the evidence.</div><a href="dna_dossier.html" class="card-btn">Create Dossier</a></div>
        <div class="guide-card"><h2 class="card-title">7. Research Admin Hub</h2><div class="card-why">Why View This?</div><div class="card-what">For study managers. A high-level audit showing participant statistics, masked IDs, and total study metrics.</div><a href="research_admin.html" class="card-btn" style="background:#455a64;">Admin Access</a></div>
    </div></div></body></html>"""
    pages_to_upload["contents.shtml"] = contents_html

    # --- REGISTERS ---
    df_p = df.copy()
    df_p['sort_key'] = df_p['Unmasked'].apply(lambda n: re.split(r'\bnee\b|\bn√©e\b', str(n).lower())[0].replace(',', '').replace('.', '').strip().split()[-1] if n else "zzz")
    df_p.sort_values(by=['sort_key', 'Match'], ascending=[True, True], inplace=True)

    df_p['Long_Narrative'] = df_p.apply(lambda r: f"{r['Unmasked']} is a {r['cM']} cM match to <a href='https://yates.one-name.net/tng/verticalchart.php?personID={normalize_id(r['ID'])}&tree=tree1&parentset=0&display=vertical&generations=15' target='_blank'><b>{r['Match']}</b></a> via {r['Dir_Label'].split('(')[0]} back {len(r['Lineage'].split('->'))} generations.", axis=1)
    df_p.rename(columns={'Long_Narrative': 'Participants who tested-Who they matched-Oldest known Yates ancestor'}, inplace=True)
    sb_str = """<div class="no-print" style="margin:20px auto;max-width:600px;text-align:center;"><input type="text" id="tableSearch" onkeyup="filterTable()" placeholder="üîç Type a name to filter list..." style="width:100%;padding:12px;font-size:16px;border:2px solid #006064;border-radius:4px;"></div><div class="no-print" style="text-align:center;margin-bottom:15px;"><button onclick="window.print()" style="background:#0277bd;color:white;border:none;padding:10px 20px;border-radius:4px;font-weight:bold;cursor:pointer;font-size:14px;">üñ®Ô∏è Print Register</button></div><div class="no-print" style="text-align:center;padding:10px;margin-bottom:10px;font-family:sans-serif;font-size:14px;background:#e0f7fa;border:1px solid #b2ebf2;"><strong>Sort Register:</strong> &nbsp;<a href="ons_yates_dna_register.shtml" style="color:#00acc1;text-decoration:none;">By Ancestral Line</a> &nbsp;|&nbsp; <a href="ons_yates_dna_register_participants.shtml" style="font-weight:bold;color:#006064;">By Participant Name</a></div>"""
    pages_to_upload["ons_yates_dna_register_participants.shtml"] = make_page("ONS Yates Study DNA Register", sb_str + f'<div class="table-scroll-wrapper" style="margin:0 auto;width:90%;">{df_p.to_html(columns=["Participants who tested-Who they matched-Oldest known Yates ancestor"], index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table")}</div>', True, stats_bar_full)

    mc = df['Dir_Label'].value_counts()
    df_a = df[df['Dir_Label'].isin(mc[mc >= 2].index)].copy().sort_values(by=['Dir_Label', 'Lineage'], ascending=[True, True])
    df_a['Long_Narrative'] = df_a.apply(lambda r: f"{r['Unmasked']} is a {r['cM']} cM match to <a href='https://yates.one-name.net/tng/verticalchart.php?personID={normalize_id(r['ID'])}&tree=tree1&parentset=0&display=vertical&generations=15' target='_blank'><b>{r['Match']}</b></a> via {r['Dir_Label'].split('(')[0]} back {len(r['Lineage'].split('->'))} generations.", axis=1)
    df_a.rename(columns={'Long_Narrative': 'Participants who tested-Who they matched-Oldest known Yates ancestor'}, inplace=True)
    sb_str_anc = sb_str.replace("color:#00acc1;text-decoration:none;", "font-weight:bold;color:#006064;").replace("font-weight:bold;color:#006064;", "color:#00acc1;text-decoration:none;")
    sbar_a = f"""<div style="background:#f4f4f4;border-top:1px solid #ddd;border-bottom:1px solid #ddd;font-family:sans-serif;font-size:12px;color:#555;padding:8px 15px;text-align:center;margin-bottom:0;"><strong>Last updated:</strong> {timestamp} &nbsp;|&nbsp; <strong>Validated Matches (2+):</strong> {len(df_a):,} <span style="color:#d32f2f;">(Singleton matches hidden)</span></div>"""
    pages_to_upload["ons_yates_dna_register.shtml"] = make_page("ONS Yates Study DNA Register", sb_str_anc + f'<div class="table-scroll-wrapper" style="margin:0 auto;width:90%;">{df_a.to_html(columns=["Participants who tested-Who they matched-Oldest known Yates ancestor"], index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table")}</div>', True, sbar_a)
    pages_to_upload["yates_ancestor_register.shtml"] = pages_to_upload["ons_yates_dna_register.shtml"]

    # --- SINGLETONS ---
    df_s = df[df['Dir_Label'].isin(mc[mc == 1].index)].copy().sort_values(by=['Dir_Label'], ascending=[True])
    df_s['Long_Narrative'] = df_s.apply(lambda r: f"{r['Unmasked']} is a {r['cM']} cM match to <a href='https://yates.one-name.net/tng/verticalchart.php?personID={normalize_id(r['ID'])}&tree=tree1&parentset=0&display=vertical&generations=15' target='_blank'><b>{r['Match']}</b></a> via {r['Dir_Label'].split('(')[0]} back {len(r['Lineage'].split('->'))} generations.", axis=1)
    df_s.rename(columns={'Long_Narrative': 'Participants who tested-Who they matched-Oldest known Yates ancestor'}, inplace=True)
    stats_bar_single = f"""<div style="background:#fff3e0;border-top:1px solid #ffcc80;border-bottom:1px solid #ffcc80;font-family:sans-serif;font-size:12px;color:#e65100;padding:8px 15px;text-align:center;margin-bottom:0;"><strong>HOUSEKEEPING VIEW:</strong> Showing {len(df_s):,} singleton matches.</div>"""
    pages_to_upload["admin_singletons.shtml"] = make_page("Singleton Match Register", f'<div class="table-scroll-wrapper" style="margin:0 auto;width:90%;">{df_s.to_html(columns=["Participants who tested-Who they matched-Oldest known Yates ancestor"], index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table")}</div>', True, stats_bar_single)

    # --- TREES ---
    df_tree = df_a[['Linked_Tree_Line', 'Dir_Label']].copy()
    df_tree.rename(columns={'Linked_Tree_Line': 'TEMP'}, inplace=True)
    df_tree.sort_values(by=['Dir_Label'], ascending=[False], inplace=True)
    pages_to_upload["just-trees.shtml"] = make_page("Ancestor Register (Trees View)", f'<div class="table-scroll-wrapper" style="margin:0 auto;width:90%;">{df_tree[["TEMP"]].to_html(index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table").replace("<th>TEMP</th>", "<th>&nbsp;</th>")}</div>', True, sbar_a)
    df_tree.sort_values(by=['Dir_Label'], ascending=[True], inplace=True)
    pages_to_upload["just-trees-az.shtml"] = make_page("Ancestor Register (Trees View)", f'<div class="table-scroll-wrapper" style="margin:0 auto;width:90%;">{df_tree[["TEMP"]].to_html(index=False, border=1, classes="dataframe sortable", escape=False, table_id="reg-table").replace("<th>TEMP</th>", "<th>&nbsp;</th>")}</div>', True, sbar_a)

    # --- NETWORK ---
    net_buf = []
    for anc, g in sorted(df.groupby('Dir_Label'), key=lambda x: len(x[1]), reverse=True):
        if len(g) < 2: continue
        net_buf.append(f"""<details style="background:white;margin-bottom:15px;border:1px solid #ddd;border-radius:5px;overflow:hidden;"><summary style="background:#e0f2f1;padding:15px;cursor:pointer;font-weight:bold;color:#006064;list-style:none;"><span style="font-size:1.1em;">{anc}</span> <span style="float:right;color:#004d40;font-size:0.9em;">Matches: {len(g)} | Total cM: {g['cM'].sum()}</span></summary><div style="padding:15px;"><div style="background:#fffde7;border-left:6px solid #fbc02d;padding:10px;margin-bottom:15px;font-family:sans-serif;color:#333;font-size:0.95em;"><strong>Collateral Saturation Analysis:</strong> Validated by <b>{len(g['Unmasked'].unique())} independent testers</b>.</div><table class="dataframe" border="1"><thead><tr style="text-align:left;"><th>Tester</th><th>cM</th><th>Lineage</th></tr></thead><tbody>""")
        for _, r in g.sort_values('cM', ascending=False).iterrows(): net_buf.append(f"<tr><td>{r['Unmasked']}</td><td>{r['cM']}</td><td>{r['Lineage']}</td></tr>")
        net_buf.append("</tbody></table></div></details>")
    pages_to_upload["dna_network.shtml"] = f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Participating DNA Network</title><link rel="stylesheet" href="partials_unified.css"><link rel="stylesheet" href="dna_tree_styles.css"><style>summary::-webkit-details-marker{{display:none}}summary{{outline:none}}</style></head><body id="top"><div class="wrap"><h1 class="centerline">Participating DNA Network</h1><div id="nav-slot">{stats_bar_full}{NAV_HTML}</div>{SITE_INFO}<div style="margin:20px auto;width:90%;">{"".join(net_buf)}</div>{JS_CORE}</body></html>"""

    # --- WRITE STATIC FILES TO DISK ---
    for fn, content in pages_to_upload.items():
        with open(fn, "w", encoding="utf-8") as f: f.write(content)
    print("    ‚úÖ Core Registers and Static Pages Built Locally.")

    # =====================================================================
    # PHASE 3: FTP UPLOAD (Pushes EVERYTHING to the site)
    # =====================================================================
    print("\n[PHASE 3] Uploading via FTP to Live Server...")
    try:
        ftps = FTP_TLS()
        ftps.connect(HOST, 21)
        ftps.auth()
        ftps.login(USER, PASS)
        ftps.prot_p()

        found_dir = False
        for d in [f"/{REMOTE_SUBDIR}", f"/public_html/{REMOTE_SUBDIR}", f"htdocs/{REMOTE_SUBDIR}", REMOTE_SUBDIR]:
            try:
                ftps.cwd(d)
                found_dir = True
                break
            except: pass

        if not found_dir:
            print("‚ùå FTP Directory Not Found.")
        else:
            files_to_upload = [f for f in os.listdir('.') if f.endswith('.html') or f.endswith('.shtml') or f.endswith('.htm') or f == CSV_DB]
            for fn in files_to_upload:
                if os.path.exists(fn):
                    with open(fn, "rb") as fh:
                        ftps.storbinary(f"STOR {fn}", fh)
                    print(f"    üì§ Uploaded: {fn}")
            print("\nüéâ MASTER PIPELINE COMPLETE. Check your live site.")
        ftps.quit()
    except Exception as e:
        print(f"‚ùå Upload Failed: {e}")

print("‚úÖ Cell 5 Loaded.")

‚úÖ Cell 5 Loaded.


In [81]:
# @title [CELL 6] Master Orchestrator (Run Button)
print("="*60)
print("      MASTER ORCHESTRATOR (V147)")
print("="*60)

# Run the three modules in sequence
run_engine()
run_forensic_tools()
run_publisher()

      MASTER ORCHESTRATOR (V147)
      [CELL 3] ENGINE STARTING (V147)...
    ‚úÖ Engine complete. 1712 rows generated.
      [CELL 4] BUILDING FORENSIC TOOLS (V147)...
    ‚úÖ Biological Proof built.
      [CELL 5] CORE PUBLISHER & FTP UPLOADER (V150)...
    ‚úÖ Core Registers and Static Pages Built Locally.

[PHASE 3] Uploading via FTP to Live Server...
    üì§ Uploaded: just-trees-az.shtml
    üì§ Uploaded: engine_database.csv
    üì§ Uploaded: data_glossary.shtml
    üì§ Uploaded: dna_network.shtml
    üì§ Uploaded: admin_singletons.shtml
    üì§ Uploaded: dna_theory_of_the_case.htm
    üì§ Uploaded: share_dna.shtml
    üì§ Uploaded: research_admin.html
    üì§ Uploaded: lineage_proof.html
    üì§ Uploaded: subscribe.shtml
    üì§ Uploaded: lineage_hot_paths.html
    üì§ Uploaded: yates_ancestor_register.shtml
    üì§ Uploaded: dna_dossier.html
    üì§ Uploaded: just-trees.shtml
    üì§ Uploaded: brick_wall_buster.shtml
    üì§ Uploaded: biological_proof.html
    ü

In [None]:
# @title [CELL 7] The Time Machine (Archiver + Dropbox Sync)
import zipfile
import os
import pytz
import time
from datetime import datetime
from google.colab import files
from google.colab import userdata

# --- 1. INSTALL DROPBOX (IF MISSING) ---
try:
    import dropbox
    from dropbox.exceptions import AuthError
except ImportError:
    os.system('pip install dropbox')
    import dropbox
    from dropbox.exceptions import AuthError

def run_archiver():
    print("="*60)
    print("      [CELL 6] MANUAL ARCHIVER + DROPBOX SYNC")
    print("="*60)

    # --- 2. CREATE ZIP (SAFE MODE) ---
    # We explicitly exclude .zip to prevent "Zip Bombs"
    extensions = ('.csv', '.shtml', '.html', '.json', '.js', '.css')
    files_to_pack = [f for f in os.listdir('.') if f.lower().endswith(extensions) and "sample_data" not in f]

    if not files_to_pack:
        print("‚ùå No generated files found! Run the Publisher (Cell 4) first.")
        return

    est = pytz.timezone('US/Eastern')
    timestamp = datetime.now(est).strftime("%Y-%m-%d_%H%M")
    zip_name = f"Yates_Study_Backup_{timestamp}.zip"

    print(f"üì¶ Compressing {len(files_to_pack)} files into {zip_name}...")
    try:
        with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zf:
            for file in files_to_pack:
                zf.write(file)
        print(f"    ‚úÖ Archive Created: {zip_name} ({os.path.getsize(zip_name)/1024:.1f} KB)")
    except Exception as e:
        print(f"    ‚ùå Compression Failed: {e}")
        return

    # --- 3. FTP UPLOAD (BACKUPS FOLDER) ---
    print("\n[STEP 2] Uploading to Web Server (FTP)...")
    try:
        from ftplib import FTP_TLS
        HOST = os.environ.get("FTP_HOST") or userdata.get("FTP_HOST")
        USER = os.environ.get("FTP_USER") or userdata.get("FTP_USER")
        PASS = os.environ.get("FTP_PASS") or userdata.get("FTP_PASS")

        ftps = FTP_TLS()
        ftps.connect(HOST, 21); ftps.auth(); ftps.login(USER, PASS); ftps.prot_p()

        try:
            ftps.cwd("/ons-study/backups")
        except:
            try:
                ftps.mkd("/ons-study/backups")
                ftps.cwd("/ons-study/backups")
            except:
                pass

        with open(zip_name, "rb") as fh:
            ftps.storbinary(f"STOR {zip_name}", fh)
        print(f"    ‚úÖ FTP Success: /ons-study/backups/{zip_name}")
        ftps.quit()
    except Exception as e:
        print(f"    ‚ö†Ô∏è FTP Upload skipped: {e}")

    # --- 4. DROPBOX SYNC (NEW) ---
    print("\n[STEP 3] Syncing to Dropbox...")
    try:
        # Initialize with Refresh Token (Long-term access)
        dbx = dropbox.Dropbox(
            app_key=userdata.get('DBX_APP_KEY'),
            app_secret=userdata.get('DBX_APP_SECRET'),
            oauth2_refresh_token=userdata.get('DBX_REFRESH_TOKEN')
        )

        # Upload the Zip
        target_path = f"/Backups/{zip_name}"
        with open(zip_name, "rb") as f:
            dbx.files_upload(f.read(), target_path, mode=dropbox.files.WriteMode.overwrite)

        print(f"    ‚úÖ Dropbox Success: {target_path}")

    except Exception as e:
        print(f"    ‚ùå Dropbox Upload Failed: {e}")
        print("       (Check DBX_APP_KEY, DBX_APP_SECRET, DBX_REFRESH_TOKEN in Colab Secrets)")

    # --- 5. LOCAL DOWNLOAD (SAFETY NET) ---
    print("\n[STEP 4] Triggering Local Download...")
    try:
        files.download(zip_name)
    except Exception as e:
        print(f"    ‚ö†Ô∏è Auto-download blocked: {e}")

    print("‚úÖ Archival Process Complete.")

# Run it
run_archiver()

      [CELL 6] MANUAL ARCHIVER + DROPBOX SYNC
üì¶ Compressing 17 files into Yates_Study_Backup_2026-02-16_2133.zip...
    ‚úÖ Archive Created: Yates_Study_Backup_2026-02-16_2133.zip (744.1 KB)

[STEP 2] Uploading to Web Server (FTP)...
    ‚úÖ FTP Success: /ons-study/backups/Yates_Study_Backup_2026-02-16_2133.zip

[STEP 3] Syncing to Dropbox...
    ‚úÖ Dropbox Success: /Backups/Yates_Study_Backup_2026-02-16_2133.zip

[STEP 4] Triggering Local Download...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚úÖ Archival Process Complete.
