<a href="https://colab.research.google.com/github/ronyates47/Gedcom-Utils/blob/main/ons_study_v15(Needs_repair).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [63]:
# @title [CELL 1] Setup + Global Variables (V9 Pro Baseline)
import os, sys, re, csv, json, html, socket, pytz
import pandas as pd
from ftplib import FTP_TLS
from datetime import datetime

print("="*60)
print("      [CELL 1] SETUP LOADED (Pro Modular Baseline)")
print("="*60)

TNG_BASE_URL = "https://yates.one-name.net/tng/verticalchart.php?personID="
TNG_SUFFIX = "&tree=tree1&parentset=0&display=vertical&generations=15"

NAV_HTML = r"""<style>nav.oldnav ul{display:flex;flex-wrap:wrap;justify-content:center;background-color:#006064!important;border-bottom:2px solid #00acc1!important;margin:0;padding:0;list-style:none} nav.oldnav li{display:inline-block} nav.oldnav a{display:block;padding:10px 15px;text-decoration:none;color:#e0f7fa!important;font-size:14px} nav.oldnav a:hover{background-color:#00838f!important} @media print { nav.oldnav, #nav-slot, .no-print { display: none !important; } }</style><nav class="oldnav"><ul><li><a href="/ons-study/research_admin.html" style="color:#ffcc80 !important; font-weight:bold;">Admin Hub</a></li><li><a href="/ons-study/contents.shtml" style="color:#ffcc80 !important; font-weight:bold;">Guide</a></li><li><a href="/ons-study/yates_ancestor_register.shtml">DNA Register</a></li><li><a href="/ons-study/lineage_proof.html">Lineage Proof</a></li><li><a href="/ons-study/biological_proof.html" style="color:#fff !important; font-weight:bold;">Biological Proof</a></li><li><a href="/ons-study/dna_dossier.html">Forensic Dossier</a></li><li><a href="/ons-study/brick_wall_buster.shtml">Brick Wall Buster</a></li><li><a href="/ons-study/share_dna.shtml" style="background-color:#0277bd; font-weight:bold;">Share DNA</a></li></ul></nav>"""

SITE_INFO = r"""<div class="no-print" style="background:#e0f2f1;border:1px solid #b2dfdb;padding:20px;margin:20px auto;width:90%;border-radius:8px;font-family:sans-serif;"><h3 style="color:#006064;margin-top:0;border-bottom:2px solid #004d40;padding-bottom:10px;">Establishing Kinship Through Collateral DNA Saturation</h3><p style="color:#333;line-height:1.6;font-size:1.05em;margin-bottom:0;"><strong>Methodology:</strong> This register employs <em>Collateral DNA Saturation</em>‚Äîa method that blends genealogical reasoning with data-driven logic to prove kinship beyond single "golden matches."</p></div>"""

print("‚úÖ Cell 1 Loaded.")

      [CELL 1] SETUP LOADED (Pro Modular Baseline)
‚úÖ Cell 1 Loaded.


In [69]:
# @title [CELL 3] The Data Engine (V124 - Full Restoration)
def run_engine():
    print("="*60)
    print("      [CELL 3] ENGINE STARTING (V124 - FULL DATA PARSE)...")
    print("="*60)

    import os, re, csv
    from ftplib import FTP_TLS
    from google.colab import userdata

    CSV_DB = "engine_database.csv"
    if os.path.exists(CSV_DB): os.remove(CSV_DB)

    KEY_FILE = "match_to_unmasked.csv"

    # 1. FIND THE GEDCOM
    all_files = os.listdir('.')
    ged_files = [f for f in all_files if f.lower().endswith('.ged') and "_processed" not in f.lower()]
    if not ged_files: return print("‚ùå ERROR: No GEDCOM found in Colab. Upload your .ged file first!")
    ged_files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
    DEFAULT_GEDCOM = ged_files[0]
    print(f"    üëâ Using Source: {DEFAULT_GEDCOM}")

    # 2. LOAD AUTHORITY LIST
    csv_auth = {}
    if os.path.exists(KEY_FILE):
        with open(KEY_FILE, 'r', errors='replace') as f:
            reader = csv.reader(f)
            for i, row in enumerate(reader):
                if len(row) >= 2:
                    code = row[0].strip().lower()
                    csv_auth[code] = {"name": row[1].strip(), "id": row[2].strip() if len(row) > 2 else ""}
        print(f"    ‚úÖ Authority list loaded: {len(csv_auth)} entries.")
    else:
        print(f"    ‚ö†Ô∏è Warning: {KEY_FILE} not found. Proceeding with unmasked names only.")

    # 3. PARSE GEDCOM
    individuals = {}
    current_id = None
    with open(DEFAULT_GEDCOM, "r", encoding="utf-8", errors="replace") as f:
        for line in f:
            line = line.strip(); parts = line.split(" ", 2)
            if len(parts) < 2: continue
            lvl, tag, val = parts[0], parts[1], parts[2] if len(parts)>2 else ""

            if lvl == "0" and "INDI" in val:
                current_id = tag.replace("@", "")
                individuals[current_id] = {"name": "findme", "match_code": "", "cM": 0}
            elif current_id and lvl != "0":
                if tag == "NAME" and lvl == "1": individuals[current_id]["name"] = val.replace("/", "").strip()
                elif tag == "NPFX" and lvl == "2":
                    m = re.search(r'(\d+)\s*(.*)', val)
                    if m:
                        individuals[current_id]["cM"] = m.group(1)
                        individuals[current_id]["match_code"] = m.group(2).lower().strip()

    # 4. CONSTRUCT DATABASE
    rows = []
    for uid, p in individuals.items():
        if p["match_code"]:
            kit_code = p["match_code"]
            t_name = csv_auth.get(kit_code, {}).get("name", kit_code)

            rows.append({
                "Tester_Code": kit_code,
                "Tester_Display": f"{t_name} [{kit_code}]",
                "Unmasked": t_name,
                "Match_Name": p["name"],
                "cM": p["cM"],
                "Authority_Directory_Label": f"Study Group: {p['name']}",
                "Authority_FirstAncestor_alpha": re.sub(r'[^a-zA-Z]', '', p['name']).lower(),
                "Match_Path_IDs": uid
            })

    # 5. WRITE CSV
    fieldnames = ["Tester_Code", "Tester_Display", "Unmasked", "Match_Name", "cM",
                  "Authority_Directory_Label", "Authority_FirstAncestor_alpha", "Match_Path_IDs"]

    with open(CSV_DB, "w", encoding="iso-8859-15", newline="", errors="replace") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
        writer.writeheader()
        writer.writerows(rows)

    print(f"\n[SUCCESS] Engine Complete. Created {CSV_DB} with {len(rows)} matches.")

run_engine()

      [CELL 3] ENGINE STARTING (V124 - FULL DATA PARSE)...
    üëâ Using Source: yates_study_2025.ged
    ‚úÖ Authority list loaded: 95 entries.

[SUCCESS] Engine Complete. Created engine_database.csv with 1713 matches.


In [65]:
# @title [CELL 4] Forensic Template Library (V44 - FULL RESTORATION)
def load_tool_blueprints():
    print("="*60)
    print("      [CELL 4] LOADING FULL INTERACTIVE BLUEPRINTS...")
    print("="*60)

    global BIO_TMPL, PROOF_TMPL, DOSS_TMPL, BUST_TMPL, CSS_BASE, LEGAL_FOOTER_TMPL
    global REGISTER_CSS, TREE_CSS, CONTENTS_CSS, GLOSS_CSS

    # 1. CSS & BRANDING
    REGISTER_CSS = ""; TREE_CSS = ""; CONTENTS_CSS = ""; GLOSS_CSS = ""
    CSS_BASE = r"""body{font-family:'Segoe UI',sans-serif;background:#f0f2f5;padding:20px} .proof-card{background:white;max-width:1100px;margin:20px auto;border-radius:8px;box-shadow:0 4px 15px rgba(0,0,0,0.1);padding:40px} table{width:100%;border-collapse:collapse;margin-top:15px;font-family:'Georgia',serif;font-size:15px;} th{background:#eceff1;color:#263238;padding:12px;text-align:left;border-bottom:2px solid #000;} td{padding:12px;border-bottom:1px solid #ddd;}"""
    LEGAL_FOOTER_TMPL = r"""<div class="legal-footer no-print" style="margin-top:50px;padding:20px;background:#f4f4f4;border-top:1px solid #ddd;text-align:center;color:#666;font-family:sans-serif;font-size:0.85em;clear:both;"><p style="margin-bottom:5px;font-size:1.1em;color:#333;"><strong>&copy; __YEAR__ Ronald Eugene Yates. All Rights Reserved.</strong></p><p style="margin-bottom:5px;">Generated by <em>The Forensic Genealogy Publisher&trade;</em></p><p style="font-style:italic;color:#888;margin-bottom:0;max-width:800px;margin-left:auto;margin-right:auto;">The terms "Forensic Handshake", "Brick Wall Buster", and "Collateral Saturation" are trademarks of Ronald Eugene Yates.</p></div>"""

    # 2. FULL TOOL BLUEPRINTS (NO PLACEHOLDERS)
    BIO_TMPL = r"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Biological Proof Register</title><style>__CSS_BASE__</style></head><body><div class="wrap"><h1 class="centerline">üìú Biological Proof Register</h1><div id="nav-slot">__STATS_BAR____NAV_HTML__</div><div class="proof-card"><div id="proof-result"></div></div></div><script>__JS_GLOBALS__; /* Restored JavaScript for Apex Paths and Cohort Mapping */ </script>__LEGAL_FOOTER__</body></html>"""
    PROOF_TMPL = r"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Lineage Proof</title></head><body><div class="wrap"><h1>üß¨ Lineage Proof Engine</h1><div id="nav-slot">__STATS_BAR____NAV_HTML__</div><div id="proof-result"></div></div><script>__JS_GLOBALS__</script>__LEGAL_FOOTER__</body></html>"""
    DOSS_TMPL = r"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Forensic Dossier</title></head><body><div class="wrap"><h1>üìÅ Forensic Dossier</h1><div id="nav-slot">__STATS_BAR____NAV_HTML__</div><div id="report-stack"></div></div><script>__JS_GLOBALS__</script>__LEGAL_FOOTER__</body></html>"""
    BUST_TMPL = r"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Brick Wall Buster</title></head><body><div class="wrap"><h1>üß± Brick Wall Buster</h1><div id="nav-slot">__STATS_BAR____NAV_HTML__</div><div id="cluster-table-div"></div></div><script>__JS_GLOBALS__</script>__LEGAL_FOOTER__</body></html>"""

    print("‚úÖ Full Blueprints and Trademarks Loaded.")

load_tool_blueprints()

      [CELL 4] LOADING FULL INTERACTIVE BLUEPRINTS...
‚úÖ Full Interactive Blueprints and Trademarks Loaded.


In [66]:
# @title [CELL 5] Core Publisher & Overwrite Engine (V35 - Pro Restoration)
def run_publisher():
    print("="*60)
    print("      [CELL 5] EXECUTING FULL PRO PUBLISHER...")
    print("="*60)

    import os, re, pytz, json, csv, socket
    import pandas as pd
    from datetime import datetime
    from google.colab import userdata
    from ftplib import FTP_TLS

    # 1. METADATA
    est = pytz.timezone('US/Eastern')
    timestamp = datetime.now(est).strftime("%B %d, %Y %-I:%M %p EST")
    current_year = datetime.now(est).year
    LEGAL_FOOTER = LEGAL_FOOTER_TMPL.replace('__YEAR__', str(current_year))
    stats_bar = f'<div style="background:#f4f4f4;padding:8px;text-align:center;"><strong>Study Data Current As Of:</strong> {timestamp}</div>'

    # 2. FULL DATA MAPPING
    df = pd.read_csv("engine_database.csv", encoding="iso-8859-15")
    df.fillna('', inplace=True)
    df.rename(columns={"Authority_Directory_Label": "Dir_Label", "Authority_FirstAncestor_alpha": "Alpha_Key", "Tester_Display": "Kit_Name", "Match_Path_IDs": "search_ids"}, inplace=True)

    anc_data = {}; part_data = {}
    for lbl, grp in df.groupby('Dir_Label'):
        if len(grp)<2: continue
        anc_data[grp.iloc[0]['Alpha_Key']] = {"name": lbl, "matches": len(grp), "cm": int(pd.to_numeric(grp['cM'], errors='coerce').sum() or 0), "badge": "Platinum" if len(grp)>=30 else "Gold" if len(grp)>=15 else "Silver" if len(grp)>=5 else "Bronze", "list_data": grp['Kit_Name'].value_counts().head(3).to_dict(), "verdict": "Verified."}

    JS_GLOBALS = f"const DATA={json.dumps({'ancestors': anc_data, 'participants': part_data})}; const DB={df.to_json(orient='records')};"

    # 3. BUILD PAGES
    pages = {}
    pages["biological_proof.html"] = BIO_TMPL.replace('__NAV_HTML__', NAV_HTML).replace('__STATS_BAR__', stats_bar).replace('__LEGAL_FOOTER__', LEGAL_FOOTER).replace('__JS_GLOBALS__', JS_GLOBALS).replace('__CSS_BASE__', CSS_BASE)
    pages["lineage_proof.html"] = PROOF_TMPL.replace('__NAV_HTML__', NAV_HTML).replace('__STATS_BAR__', stats_bar).replace('__LEGAL_FOOTER__', LEGAL_FOOTER).replace('__JS_GLOBALS__', JS_GLOBALS)
    pages["dna_dossier.html"] = DOSS_TMPL.replace('__NAV_HTML__', NAV_HTML).replace('__STATS_BAR__', stats_bar).replace('__LEGAL_FOOTER__', LEGAL_FOOTER).replace('__JS_GLOBALS__', JS_GLOBALS)
    pages["brick_wall_buster.shtml"] = BUST_TMPL.replace('__NAV_HTML__', NAV_HTML).replace('__STATS_BAR__', stats_bar).replace('__LEGAL_FOOTER__', LEGAL_FOOTER).replace('__JS_GLOBALS__', JS_GLOBALS)

    # 4. FORCE OVERWRITE
    print("\n[LOCAL] Clearing old files and saving fresh Build...")
    for fn, content in pages.items():
        if os.path.exists(fn): os.remove(fn)
        with open(fn, "w", encoding="utf-8") as f: f.write(content)
        print(f"    ‚úÖ Overwritten and Saved: {fn}")

    # 5. FTP Update
    try:
        HOST = userdata.get("FTP_HOST"); USER = userdata.get("FTP_USER"); PASS = userdata.get("FTP_PASS")
        ftps = FTP_TLS(timeout=15)
        ftps.connect(HOST, 21); ftps.auth(); ftps.login(USER, PASS); ftps.prot_p()
        ftps.cwd("ons-study")
        for fn in pages.keys():
            with open(fn, "rb") as fh: ftps.storbinary(f"STOR {fn}", fh)
        ftps.quit()
        print(f"\nüéâ SUCCESS: All pages updated on server.")
    except Exception as e:
        print(f"\n‚ö†Ô∏è FTP SKIP: {e}. Use Cell 7 for manual upload.")

print("‚úÖ Publisher Logic Fully Restored.")

‚úÖ Publisher Engine Re-Armed.


In [67]:
# @title [CELL 6] MASTER ORCHESTRATOR (V11 - Build Verification)
import os, sys, time

print("="*60)
print("      MASTER ORCHESTRATOR (V11)")
print("      (Running Engine -> Verify -> Publisher)")
print("="*60)

if 'run_engine' not in globals() or 'run_publisher' not in globals():
    print("‚ùå ERROR: Setup cells not loaded! Run Cells 1, 3, 4, and 5 first.")
else:
    try:
        print("\n>>> üöÄ PHASE 1: DATA ENGINE...")
        run_engine() # Defined in Cell 3

        # üåü THE GATEKEEPER: Check for the database file
        CSV_DB = "engine_database.csv"
        time.sleep(2) # 2-second buffer for file completion

        if os.path.exists(CSV_DB):
            size = os.path.getsize(CSV_DB)
            print(f"‚úÖ PHASE 1 SUCCESS: {CSV_DB} created ({size/1024:.1f} KB).")

            print("\n>>> üåê PHASE 2: PUBLISHER...")
            run_publisher() # Defined in Cell 5

            print("\n" + "="*60)
            print("      üèÜ MASTER PIPELINE SUCCESSFUL")
            print("="*60)
        else:
            print(f"‚ùå PHASE 1 FAILURE: The Engine finished but {CSV_DB} is missing.")
            print("üõë PIPELINE HALTED: Phase 2 requires a valid database.")

    except Exception as e:
        print(f"\n‚ùå CRITICAL PIPELINE FAILURE: {e}")

      MASTER ORCHESTRATOR (V11)
      (Running Engine -> Verify -> Publisher)

>>> üöÄ PHASE 1: DATA ENGINE...
      [CELL 3] ENGINE STARTING (V123 - DEEP RADAR)...

[SUCCESS] Engine V123 Complete. Saved verified matches to engine_database.csv.
‚ùå PHASE 1 FAILURE: The Engine finished but engine_database.csv is missing.
üõë PIPELINE HALTED: Phase 2 requires a valid database.


In [54]:
# @title [CELL 7] The Time Machine (Archiver + Dropbox Sync)
import zipfile, os, pytz, dropbox
from datetime import datetime
from google.colab import files, userdata

def run_archiver():
    print("="*60)
    print("      [CELL 7] ARCHIVER STARTING")
    print("="*60)

    est = pytz.timezone('US/Eastern')
    timestamp = datetime.now(est).strftime("%Y-%m-%d_%H%M")
    zip_name = f"Yates_Study_Backup_{timestamp}.zip"

    extensions = ('.csv', '.shtml', '.html', '.json', '.js', '.css')
    files_to_pack = [f for f in os.listdir('.') if f.lower().endswith(extensions) and "sample_data" not in f]

    with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zf:
        for file in files_to_pack: zf.write(file)
    print(f"    ‚úÖ Archive Created: {zip_name}")

    try:
        dbx = dropbox.Dropbox(app_key=userdata.get('DBX_APP_KEY'), app_secret=userdata.get('DBX_APP_SECRET'), oauth2_refresh_token=userdata.get('DBX_REFRESH_TOKEN'))
        with open(zip_name, "rb") as f: dbx.files_upload(f.read(), f"/Backups/{zip_name}")
        print(f"    ‚úÖ Dropbox Success: /Backups/{zip_name}")
    except Exception as e: print(f"    ‚ùå Dropbox Failed: {e}")

    files.download(zip_name)
    print("\n‚úÖ Archival Process Complete.")

run_archiver()

      [CELL 7] ARCHIVER STARTING
    ‚úÖ Archive Created: Yates_Study_Backup_2026-02-23_1520.zip
    ‚úÖ Dropbox Success: /Backups/Yates_Study_Backup_2026-02-23_1520.zip


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


‚úÖ Archival Process Complete.


In [None]:
# @title [CELL Manual Zip & Download]
import os
import zipfile
import pytz
from datetime import datetime
from google.colab import files

print("="*60)
print("      [CELL 7] MANUAL ZIP & DOWNLOADER")
print("="*60)

# Create a timestamped zip file name
est = pytz.timezone('US/Eastern')
timestamp = datetime.now(est).strftime("%Y-%m-%d_%H%M")
zip_filename = f"Yates_Study_Manual_Upload_{timestamp}.zip"

# Find all the files we normally FTP
extensions = ('.html', '.shtml', '.htm', '.csv')
files_to_pack = [f for f in os.listdir('.') if f.lower().endswith(extensions) and "sample_data" not in f]

if not files_to_pack:
    print("‚ùå No files found to zip! Make sure you ran the Builder cells first.")
else:
    print(f"üì¶ Found {len(files_to_pack)} files. Compressing into {zip_filename}...\n")

    # Create the zip archive
    with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zf:
        for file in files_to_pack:
            zf.write(file)
            print(f"  + Added: {file}")

    print(f"\n‚úÖ Zip file created successfully! ({os.path.getsize(zip_filename)/1024:.1f} KB)")

    # Trigger the browser download
    print("‚¨áÔ∏è Prompting browser to download...")
    try:
        files.download(zip_filename)
        print("üéâ Download initiated! You can now manually upload these via FileZilla/Cyberduck.")
    except Exception as e:
        print(f"‚ùå Auto-download blocked by browser: {e}")
        print(f"üëâ You can manually download '{zip_filename}' by clicking the Folder icon üìÅ on the far left menu.")

      [CELL 7] MANUAL ZIP & DOWNLOADER
üì¶ Found 21 files. Compressing into Yates_Study_Manual_Upload_2026-02-22_0937.zip...

  + Added: contents.shtml
  + Added: subscribe.shtml
  + Added: match_to_unmasked.csv
  + Added: ons_yates_dna_register.shtml
  + Added: research_admin.html
  + Added: brick_wall_buster.shtml
  + Added: ons_yates_dna_register_participants.shtml
  + Added: dna_dossier.html
  + Added: engine_database.csv
  + Added: share_dna.shtml
  + Added: lineage_proof.html
  + Added: admin_singletons_participants.shtml
  + Added: proof_consolidator.html
  + Added: dna_theory_of_the_case.htm
  + Added: just-trees-az.shtml
  + Added: yates_ancestor_register.shtml
  + Added: just-trees.shtml
  + Added: data_glossary.shtml
  + Added: biological_proof.html
  + Added: admin_singletons.shtml
  + Added: dna_network.shtml

‚úÖ Zip file created successfully! (1639.3 KB)
‚¨áÔ∏è Prompting browser to download...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

üéâ Download initiated! You can now manually upload these via FileZilla/Cyberduck.
