<a href="https://colab.research.google.com/github/ronyates47/Gedcom-Utils/blob/main/Gold__1_%26_2_%26_3_20251029.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install pandas
!pip install python-gedcom
!pip install openpyxl
!pip install xlsxwriter
!pip install mlxtend
!pip caas_jupyter_tools

ERROR: unknown command "caas_jupyter_tools"


In [3]:
#credentials

import os

# Gmail SMTP creds
os.environ['GMAIL_USER']         = 'yatesvilleron@gmail.com'
os.environ['GMAIL_APP_PASSWORD'] = 'qtziwiblytgrlzvx'

# FTPS upload creds — make sure FTP_PASS is exactly your password, no < or >
os.environ['FTP_HOST']       = 'ftp.one-name.net'
os.environ['FTP_PORT']       = '21'
os.environ['FTP_USER']       = 'admin@yates.one-name.net'
os.environ['FTP_PASS']       = 'v(i83lfQB@dB'


In [4]:
# Cell 1 20250513-cell2 is good to use; adding more lineage functionality next
#!/usr/bin/env python
"""
GEDCOM Composite Score Script using:
 - Chunk-based Parallel Processing for Speed (Stage 1: genealogical line creation)
 - A Trie-based approach, then final "Value" = 5 * (number of couples with node.count >=2) + (total couples)

For ancestral lines where none of the couples are repeated (a one-off line), the Value is still computed.
Now, instead of composite scoring, two new columns are added:
  - Value Range (the numeric bracket)
  - Value Label (a descriptive label)

Exports final CSV/HTML sorted by "Yates DNA Ancestral Line", including a 'haplogroup' column.
"""
import csv
import glob
import logging
import functools
import os
from datetime import datetime
from collections import defaultdict, Counter
import numpy as np
import pandas as pd
from concurrent.futures import ProcessPoolExecutor
from tqdm import tqdm
from IPython.display import display, Javascript

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

###############################################################################
# Global Variables
###############################################################################
anchor_gen1 = None
visited_pairs = set()
generation_table = []

###############################################################################
# Trie Data Structure
###############################################################################
class TrieNode:
    """A simple Trie node for storing a couple and counting how many lines pass here."""
    def __init__(self):
        self.count = 0
        self.children = {}

class Trie:
    def __init__(self):
        self.root = TrieNode()

    def insert_line(self, couples_list):
        current = self.root
        for couple in couples_list:
            if couple not in current.children:
                current.children[couple] = TrieNode()
            current = current.children[couple]
            current.count += 1

    def get_couple_count(self, couples_list):
        counts = []
        current = self.root
        for couple in couples_list:
            if couple in current.children:
                current = current.children[couple]
                counts.append(current.count)
            else:
                counts.append(0)
                break
        return counts

###############################################################################
# Utility: chunk generator
###############################################################################
def chunks(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

###############################################################################
# GedcomDataset
###############################################################################
class GedcomDataset:
    def __init__(self, gen_person):
        self.gen_person = gen_person
        self.extractable_detail = {}
        self.anchor_gen1 = None

    def add_extractable_detail(self, key, value):
        self.extractable_detail[key] = value

    def get_gen_person(self):
        name = self.extractable_detail.get('NAME', '')
        parts = name.split('/', 1)
        first_name = parts[0].split(' ')[0]
        last_name = parts[1].rstrip('/') if len(parts) > 1 else ""
        self.anchor_gen1 = last_name.replace(" ", "") + first_name.replace(" ", "")
        global anchor_gen1
        anchor_gen1 = self.anchor_gen1
        return self.gen_person.strip('@')

    def get_extractable_NPFX(self):
        return self.extractable_detail.get('NPFX', '')

    def get_extractable_cm(self):
        npfx_value = self.extractable_detail.get('NPFX', '')
        if '&' in npfx_value:
            cm_value = npfx_value.split('&')[0].strip()
        elif '**' in npfx_value:
            cm_value = npfx_value.split('**')[0].strip()
        else:
            cm_value = npfx_value.strip()
        try:
            int(cm_value)
            return cm_value
        except ValueError:
            return ''

    def get_extractable_sort(self):
        npfx_value = self.extractable_detail.get('NPFX', '')
        if '&' in npfx_value:
            sort_part = npfx_value.split('&')[1]
            if '**' in sort_part:
                sort_value = sort_part.split('**')[0].strip()
            else:
                sort_value = sort_part.strip()
            return sort_value
        return ''

    def get_extractable_YDNA(self):
        npfx_value = self.extractable_detail.get('NPFX', '')
        if '**' in npfx_value:
            ydna_value = npfx_value.split('**')[1].strip()
            return ydna_value
        return ''

    def get_extractable_FAMC(self):
        return self.extractable_detail.get('FAMC', '').strip('@')

###############################################################################
# Gedcom Class
###############################################################################
class Gedcom:
    def __init__(self, file_name):
        self.file_name = file_name
        self.gedcom_datasets = []
        self.filter_pool = []

    def parse_gedcom(self):
        with open(self.file_name, 'r', encoding='utf-8-sig') as f:
            lines = f.readlines()

        current_dataset = None
        npfx_count = 0
        ydna_count = 0
        total_count = 0

        for line in lines:
            parts = line.strip().split(' ', 2)
            level = int(parts[0])
            tag = parts[1]
            value = parts[2] if len(parts) > 2 else None

            if level == 0 and tag.startswith('@') and tag.endswith('@') and value == 'INDI':
                total_count += 1
                current_dataset = GedcomDataset(tag)
                self.gedcom_datasets.append(current_dataset)
            elif current_dataset is not None:
                if level == 1 and tag in ['NAME', 'FAMC']:
                    current_dataset.add_extractable_detail(tag, value)
                elif level == 2 and tag == 'NPFX':
                    npfx_count += 1
                    current_dataset.add_extractable_detail(tag, value)
                    if '**' in value:
                        ydna_count += 1

        autosomal_count = npfx_count - ydna_count
        print(f"GEDCOM contained {total_count} total records")
        print(f"Records tagged and filtered by NPFX: {npfx_count}")
        print(f"Records with YDNA information: {ydna_count}")
        print(f"Autosomal matches: {autosomal_count}")

        for ds in self.gedcom_datasets:
            if ds.get_extractable_NPFX():
                self.filter_pool.append(ds)

        manual_filter_activated = True
        if manual_filter_activated:
            try:
                df = pd.read_excel('filtered_ids.xlsx')
            except FileNotFoundError:
                logger.warning("filtered_ids.xlsx not found. Skipping second-level manual filter.")
            else:
                manual_filtered_ids = set(df['ID'])
                self.filter_pool = [d for d in self.filter_pool if d.get_gen_person() in manual_filtered_ids]
                print(f"After manual filter, total records: {len(self.filter_pool)}")
                logger.info(f"After manual filter, total records: {len(self.filter_pool)}")

        return autosomal_count

###############################################################################
# quick_extract_name
###############################################################################
def quick_extract_name(full_text):
    name_marker = "\n1 NAME "
    idx = full_text.find(name_marker)
    if idx == -1:
        if full_text.startswith("1 NAME "):
            idx = 0
        else:
            return "UnknownName"
    start = idx + len(name_marker)
    end = full_text.find('\n', start)
    if end == -1:
        end = len(full_text)
    name_line = full_text[start:end].strip()
    if '/' not in name_line:
        return name_line[:10].replace(" ", "")
    first_name, last_name = name_line.split('/', 1)
    last_name = last_name.replace("/", "").strip()
    return last_name[:10].replace(" ", "") + first_name[:10].replace(" ", "")

###############################################################################
# Parents & Ancestors
###############################################################################
def find_parents(individual_id, generation, parents_map):
    global visited_pairs, generation_table
    if individual_id not in parents_map:
        return
    father_id, mother_id = parents_map[individual_id]
    if not father_id and not mother_id:
        return
    pair = (father_id, mother_id)
    if pair not in visited_pairs:
        visited_pairs.add(pair)
        generation_table.append((generation, pair))
    if father_id:
        find_parents(father_id, generation+1, parents_map)
    if mother_id:
        find_parents(mother_id, generation+1, parents_map)

def find_distant_ancestors(individual_id, parents_map, path=None):
    if path is None:
        path = []
    path.append(individual_id)
    if individual_id not in parents_map:
        return [path]
    father_id, mother_id = parents_map[individual_id]
    if not father_id and not mother_id:
        return [path]
    paths = []
    if father_id:
        paths.extend(find_distant_ancestors(father_id, parents_map, path[:]))
    if mother_id:
        paths.extend(find_distant_ancestors(mother_id, parents_map, path[:]))
    return paths if paths else [path]

###############################################################################
# filter_ancestral_line
###############################################################################
def filter_ancestral_line(winning_path_ids, generation_table_local, names_map):
    matching_table = []
    for generation, pair in generation_table_local:
        id1, id2 = pair
        if id1 in winning_path_ids or id2 in winning_path_ids:
            matching_table.append((generation, pair))
    matching_table.sort(key=lambda x: x[0])
    lines = []
    for gen, pair in matching_table:
        name_pair = [names_map.get(pid, "UnknownName") for pid in pair]
        lines.append(f"{name_pair[0]}&{name_pair[1]}")
    lines.reverse()
    return "~~~".join(lines)

###############################################################################
# process_record_wrapper (parallel) - STAGE 1
###############################################################################
def process_record_wrapper(individual_id, gedcom_instance, parents_map, names_map):
    global generation_table, visited_pairs, anchor_gen1
    generation_table = []
    visited_pairs = set()

    find_parents(individual_id, 1, parents_map)
    distant_anc_paths = find_distant_ancestors(individual_id, parents_map)

    best_score = None
    best_path = None
    for path in distant_anc_paths:
        name_path = [names_map.get(pid, "UnknownName") for pid in path]
        score = sum((idx+1) for idx, nm in enumerate(name_path) if 'Yates' in nm)
        if best_score is None or score > best_score:
            best_score = score
            best_path = path

    if not best_path:
        best_path = []

    best_path_cleaned = [pid for pid in best_path if pid != individual_id]
    line_str = filter_ancestral_line(set(best_path_cleaned), generation_table, names_map)

    cm_value = ''
    sort_value = ''
    ydna_value = ''
    for ds in gedcom_instance.filter_pool:
        if ds.get_gen_person() == individual_id:
            cm_value = ds.get_extractable_cm()
            sort_value = ds.get_extractable_sort()
            ydna_value = ds.get_extractable_YDNA()
            break

    short_name = names_map.get(individual_id, "UnknownName")
    # Return columns: ID#, Match to, Name, cM, Yates DNA Ancestral Line, haplogroup
    return [individual_id, sort_value, short_name, cm_value, line_str, ydna_value]

###############################################################################
# main()
###############################################################################
def main():
    def select_gedcom():
        files = glob.glob("*.ged")
        if not files:
            print("No GEDCOM files found.")
            return None
        print("Automatically selecting the first GEDCOM file.")
        return files[0]

    gedcom_file_path = select_gedcom()
    if not gedcom_file_path:
        print("No GEDCOM file selected; exiting.")
        return

    ged = Gedcom(gedcom_file_path)
    autosomal_count = ged.parse_gedcom()
    filter_count = len(ged.filter_pool)

    with open("autosomal_count.txt", "w") as f:
        f.write(str(autosomal_count))

    print("Records tagged and filtered by NPFX:", filter_count)

    with open(gedcom_file_path, 'r', encoding='utf-8') as f:
        raw_data = f.read()

    blocks = raw_data.split('\n0 ')
    all_records = {}
    for blk in blocks:
        blk = blk.strip()
        if not blk:
            continue
        flend = blk.find('\n')
        if flend == -1:
            flend = len(blk)
        first_line = blk[:flend]
        if '@' in first_line:
            start = first_line.find('@') + 1
            end = first_line.find('@', start)
            rec_id = first_line[start:end].strip()
            all_records[rec_id] = blk

    parents_map = {}
    names_map = {}
    for rec_id, txt in all_records.items():
        nm = quick_extract_name("\n" + txt)
        names_map[rec_id] = nm

    families = {}
    for rec_id, txt in all_records.items():
        if 'FAM' in txt[:50]:
            father_idx = txt.find('1 HUSB @')
            husb_id = txt[father_idx+len('1 HUSB @'):txt.find('@', father_idx+len('1 HUSB @'))] if father_idx != -1 else None
            wife_idx = txt.find('1 WIFE @')
            wife_id = txt[wife_idx+len('1 WIFE @'):txt.find('@', wife_idx+len('1 WIFE @'))] if wife_idx != -1 else None
            kids = [ln.split('@')[1] for ln in txt.split('\n') if ln.strip().startswith('1 CHIL @')]
            families[rec_id] = (husb_id, wife_id, kids)

    for fam_id, (f_id, m_id, k_list) in families.items():
        for kid in k_list:
            parents_map[kid] = (f_id, m_id)

    individual_ids = [d.get_gen_person() for d in ged.filter_pool]
    print(f"Processing {len(individual_ids)} individuals with chunk-based parallel...")

    combined_rows = []
    chunk_size = 50
    max_workers = os.cpu_count() or 4
    logger.info("Starting chunk-based parallel processing with %d workers.", max_workers)

    with ProcessPoolExecutor(max_workers=max_workers) as executor, tqdm(total=len(individual_ids), desc="Building Yates Lines (Stage 1)") as pbar:
        for chunk in chunks(individual_ids, chunk_size):
            func = functools.partial(process_record_wrapper, gedcom_instance=ged, parents_map=parents_map, names_map=names_map)
            results = list(executor.map(func, chunk))
            combined_rows.extend(results)
            pbar.update(len(chunk))

    columns = ["ID#", "Match to", "Name", "cM", "Yates DNA Ancestral Line", "haplogroup"]
    df = pd.DataFrame(combined_rows, columns=columns)
    df.index += 1

    def remove_specific_prefix(row):
        prefix = "YatesJohn&SearchingStill~~~YatesWilliam&SearchingStill~~~YatesWilliam&SearchingStill~~~YatesEdmund&CornellMargaret~~~YatesRichard&AshendonJoan~~~YatesJohn&HydeAlice~~~YatesThomas&FauconerElizabeth~~~"
        if row["Yates DNA Ancestral Line"].startswith(prefix):
            row["Yates DNA Ancestral Line"] = row["Yates DNA Ancestral Line"][len(prefix):]
        return row

    df = df.apply(remove_specific_prefix, axis=1)

    logger.info("Building Trie from reversed lines...")
    trie = Trie()
    for _, row in df.iterrows():
        line_str = row["Yates DNA Ancestral Line"]
        if pd.notna(line_str) and line_str.strip():
            trie.insert_line([x.strip() for x in line_str.split("~~~") if x.strip()])

    values, prefix_counts = [], []
    logger.info("Computing 'Value' = 5*(#couples with node.count >=2) + (total couples) ...")
    for _, row in df.iterrows():
        line_str = row["Yates DNA Ancestral Line"]
        if pd.isna(line_str) or not line_str.strip():
            values.append(0)
            prefix_counts.append(0)
        else:
            couples_list = [x.strip() for x in line_str.split("~~~") if x.strip()]
            node_counts = trie.get_couple_count(couples_list)
            prefix_count = sum(1 for c in node_counts if c >= 2)
            values.append(5 * prefix_count + len(couples_list))
            prefix_counts.append(prefix_count)

    df["Value"], df["PrefixCount"] = values, prefix_counts

    def assign_value_range_label(val):
        try:
            v = float(val)
        except:
            return "", ""
        if v >= 60: return ">=60", "1-likely correct"
        if 47 <= v <= 59: return "59~47", "2-lines forming"
        if 34 <= v <= 46: return "46~34", "3-patterns emerging"
        if 21 <= v <= 33: return "33~21", "4-notable patterns"
        if 8 <= v <= 20: return "20~8", "5-patterns stable"
        if 1 <= v <= 7:  return f"{v:.0f}", "6-need research"
        return f"{v:.0f}", "0-uncategorized"

    ranges, labels = zip(*(assign_value_range_label(v) for v in df["Value"]))
    df["Value Range"], df["Value Label"] = ranges, labels

    df.sort_values(by=["Yates DNA Ancestral Line"], inplace=True)
    df.drop("PrefixCount", axis=1, inplace=True)

    csv_name = "final_combined_df_with_value_labels.csv"
    df.to_csv(csv_name, index=False)
    logger.info("Exported final DataFrame to '%s'.", csv_name)

    html_name = "HTML_combined_df_with_value_labels.html"
    css_style = """
    <style>
    table { width: 100%; border-collapse: collapse; margin: 20px 0; }
    table, th, td { border: 1px solid #333; }
    th, td { padding: 8px 12px; text-align: center; }
    th { background-color: #f2f2f2; }
    /* Left-align the last column */
    td:nth-child(7) { text-align: left; }
    </style>
    """
    final_cols = ["ID#", "cM", "haplogroup", "Match to", "Value Range", "Value Label", "Yates DNA Ancestral Line"]
    html_content = css_style + df.to_html(index=False, columns=final_cols, escape=False)
    with open(html_name, "w", encoding="utf-8") as f:
        f.write(html_content)
    logger.info("Exported HTML to '%s'.", html_name)

if __name__ == '__main__':
    main()
    try:
        display(Javascript('alert("✅ GEDCOM processing (and HTML export) is complete!");'))
    except:
        pass

import os
import pandas as pd
import smtplib, ssl
from email.mime.text import MIMEText

def send_email(subject, body, to_addr):
    smtp_server = 'smtp.gmail.com'
    port = 465
    sender = os.environ['GMAIL_USER']
    password = os.environ['GMAIL_APP_PASSWORD']
    msg = MIMEText(body)
    msg['Subject'] = subject
    msg['From'] = sender
    msg['To'] = to_addr
    context = ssl.create_default_context()
    with smtplib.SMTP_SSL(smtp_server, port, context=context) as server:
        server.login(sender, password)
        server.send_message(msg)

# Email summary (only total lines)
df_summary = pd.read_csv("final_combined_df_with_value_labels.csv")
total = len(df_summary)
summary = f"GEDCOM processing complete!\n\nTotal lines: {total}"

send_email(
    subject="✅ Cell #1 Report Ready",
    body=summary,
    to_addr=os.environ['GMAIL_USER']
)





Automatically selecting the first GEDCOM file.
GEDCOM contained 62184 total records
Records tagged and filtered by NPFX: 1556
Records with YDNA information: 1
Autosomal matches: 1555
After manual filter, total records: 7
Records tagged and filtered by NPFX: 7
Processing 7 individuals with chunk-based parallel...


Building Yates Lines (Stage 1): 100%|██████████| 7/7 [00:04<00:00,  1.57it/s]


<IPython.core.display.Javascript object>

In [10]:
#Cell2

# ==============================================================================
# == Gold 2 Refactored - Single Cell Version (COMPLETE & FINAL v2)
# ==============================================================================
#
# == Instructions:
# 1. Add your FTP credentials to the Colab "Secrets" tab (🔑 icon):
#    - FTP_HOST
#    - FTP_USER
#    - FTP_PASS
#    - FTP_DIR (e.g., "gengen" or leave blank for root - this is optional)
# 2. Upload your "final_combined_df_with_value_labels.csv" file.
# 3. Upload your "autosomal_count.txt" file.
# 4. Run this single cell.
#
# ==============================================================================

# ------------------------------------------------------------------------------
# -- [0/7] SECURE CREDENTIAL LOADING
# ------------------------------------------------------------------------------
import os
from google.colab import userdata

# --- This is the new, secure way ---
# It loads secrets from the 🔑 menu and puts them into os.environ
try:
    # Load required secrets
    os.environ['FTP_HOST'] = userdata.get('FTP_HOST')
    os.environ['FTP_USER'] = userdata.get('FTP_USER')
    os.environ['FTP_PASS'] = userdata.get('FTP_PASS')
except userdata.SecretNotFoundError:
    print("❌ ERROR: Critical secrets (FTP_HOST, FTP_USER, FTP_PASS) were not found.")
    print("Please check the '🔑 Secrets' panel on the left and ensure they are spelled correctly.")
    # We don't raise here, to allow 'local save only' mode

# Load optional FTP_DIR secret
try:
    os.environ['FTP_DIR'] = userdata.get('FTP_DIR')
except userdata.SecretNotFoundError:
    os.environ['FTP_DIR'] = '' # Default to empty string if not set

# ------------------------------------------------------------------------------
# -- [1/7] IMPORTS (All modules loaded at the top)
# ------------------------------------------------------------------------------
import re, json, time, io, posixpath, socket
import pandas as pd
from ftplib import FTP_TLS
import urllib.parse as _u
import html as _html
import traceback

# ------------------------------------------------------------------------------
# -- [2/7] CONFIG + FTP + CONSTANTS + RULES
# ------------------------------------------------------------------------------

# Column name defaults used across sections
subject_code_col = "Match to"
path_col = "Yates DNA Ancestral Line"
LINEAGE_HEADER = path_col  # used by Excel builder and HTML table

# Layout
TABLE_WIDTH_PX = 3150
COL_A_PX = 1100  # width for "Match Summary" column in HTML

# Data / Output
CSV_PATH = "final_combined_df_with_value_labels.csv"  # default input
LOCAL_NAME = "ons_yates_dna_register.htm"             # main page filename (local)
REMOTE_NAME = "ons_yates_dna_register.htm"            # remote filename (same name)
LINEAGE_HEADER = "Lineage (Starting with oldest ancestor, the line is:)"
ARROW_ENTITY = "&rarr;"
REMOVE_PERIOD_AT_END = True

# Local exports
MATCH_COUSINS_CSV = "the_match_cousins.csv"  # Column A (what’s shown)

# Remote I/O toggles (same creds/dir as HTML upload)
REMOTE_READ = True      # Pull resolver CSV from server before processing
UPLOAD_COLUMN_A = True  # Push the_match_cousins.csv to server after build

# TNG link pieces for matchee hotlink
TNG_BASE = "https://yates.one-name.net/tng"
TNG_TREE = "tree1"
HOME_URL = "https://yates.one-name.net/ons_yates_dna_register.htm"

# FTP settings (hard timeouts)
FTP_DIR = os.environ.get("FTP_DIR", "").strip()  # e.g., "gengen" or ""
FTP_TIMEOUT = int(os.environ.get("FTP_TIMEOUT", "30"))  # connect + socket timeout
FTP_PASSIVE = True  # passive safest behind NAT

# ---------- Autosomal Count (Colab → Website) ----------
LOCAL_COUNT_FILE = "/content/autosomal_count.txt"  # produced in Colab
REMOTE_COUNT_NAME = "autosomal_count.txt"          # destination on host
COUNT_PUBLIC_URL = f"/{FTP_DIR}/{REMOTE_COUNT_NAME}" if FTP_DIR else f"/{REMOTE_COUNT_NAME}"

# ---------- Resolver (single source: website) ----------
SERVER_PARTIALS_DIR = "partials"
SERVER_MAPPING_BASENAME = "match_to_unmasked.csv"
SERVER_MAPPING_REMOTE = posixpath.join(SERVER_PARTIALS_DIR, SERVER_MAPPING_BASENAME)
SERVER_MAPPING_LOCAL_CACHE = "match_to_unmasked.server.csv"  # temp file we download to

# ---------- FTP helpers (with timeouts & cleanup) ----------
def ftp_connect():
    # Secrets are already loaded in os.environ from the top of the script
    FTP_DIR = os.environ.get("FTP_DIR", "").strip()
    ftps = FTP_TLS(timeout=FTP_TIMEOUT)
    socket.setdefaulttimeout(FTP_TIMEOUT)
    ftps.connect(os.environ['FTP_HOST'], int(os.environ.get('FTP_PORT', 21)))
    ftps.login(os.environ['FTP_USER'], os.environ['FTP_PASS'])
    try:
        ftps.prot_p()  # secure data channel if supported
    except Exception:
        pass
    try:
        ftps.set_pasv(FTP_PASSIVE)
    except Exception:
        pass
    if FTP_DIR:
        try:
            ftps.cwd(FTP_DIR)
        except Exception:
            parts = [p for p in FTP_DIR.split("/") if p]
            for p in parts:
                try:
                    ftps.mkd(p)
                except Exception:
                    pass
                ftps.cwd(p)
    return ftps

def _remote_path(name: str) -> str:
    FTP_DIR = os.environ.get("FTP_DIR", "").strip()
    return posixpath.join(FTP_DIR, name) if FTP_DIR else name

def ftp_download_if_exists(ftps: FTP_TLS, remote_name: str, local_name: str) -> bool:
    try:
        with open(local_name, "wb") as f:
            ftps.retrbinary(f"RETR " + remote_name, f.write)
        print(f"⬇️ Pulled remote file: {remote_name} → {os.path.abspath(local_name)}")
        return True
    except Exception as e:
        print(f"ℹ️ Remote not found or unreadable: {remote_name} ({e})")
        try:
            if os.path.exists(local_name):
                os.remove(local_name)
        except Exception:
            pass
        return False

def ftp_upload_overwrite(ftps: FTP_TLS, local_path: str, remote_name: str):
    try:
        with open(local_path, "rb") as fh:
            ftps.storbinary(f"STOR {remote_name}", fh)
        print(f"⬆️ Uploaded: {local_path} → {remote_name}")
    except Exception as e:
        raise RuntimeError(f"Upload failed for {local_path} → {remote_name}: {e}")

# ------------------------------------------------------------------------------
# -- [3/7] RESOLVER + NAME HELPERS
# ------------------------------------------------------------------------------
def _read_mapping_csv(path: str) -> pd.DataFrame:
    encs = ("iso-8859-15", "utf-8-sig", "utf-8", "cp1252", "latin1")
    last = None
    for enc in encs:
        try:
            df = pd.read_csv(path, encoding=enc, dtype=str, keep_default_na=False)
            break
        except Exception as e:
            last = e
            df = None
    if df is None:
        raise RuntimeError(f"Unable to read mapping CSV {path}: {last}")
    if df.shape[1] < 2:
        raise RuntimeError("Mapping CSV must have at least two columns: code, unmasked")

    df = df.iloc[:, :2].copy()
    df.columns = ["code", "unmasked"]
    df["code"] = df["code"].astype(str).str.strip().str.lower()
    df["unmasked"] = df["unmasked"].astype(str).str.strip()
    df = df[df["code"] != ""].drop_duplicates(subset=["code"], keep="first")
    if df.empty:
        raise RuntimeError("Mapping CSV is empty after normalization.")
    return df

def load_resolver_from_server() -> dict:
    try:
        with ftp_connect() as ftps:
            ok = ftp_download_if_exists(
                ftps,
                _remote_path(SERVER_MAPPING_REMOTE),
                SERVER_MAPPING_LOCAL_CACHE
            )
            try:
                ftps.quit()
            except Exception:
                pass
    except Exception as e:
         raise RuntimeError(
            f"FTP connection failed while trying to load resolver. Check FTP_HOST/USER/PASS env vars. Error: {e}"
         )

    if not ok:
        raise RuntimeError(
            f"Resolver not found on server: /{_remote_path(SERVER_MAPPING_REMOTE)}. "
            f"Upload {SERVER_MAPPING_BASENAME} to /{SERVER_PARTIALS_DIR}/ and re-run."
        )

    df_map = _read_mapping_csv(SERVER_MAPPING_LOCAL_CACHE)
    print(f"✅ Loaded resolver from server: {len(df_map)} codes")
    return dict(zip(df_map["code"], df_map["unmasked"]))  # single source of truth

# Global resolver dict
MATCH_TO_UNMASKED = load_resolver_from_server()

# ---------- Helpers ----------
def find_col(df, patterns, prefer_exact=None):
    cols = list(df.columns)
    lowmap = {c.lower(): c for c in cols}
    if prefer_exact:
        for name in prefer_exact:
            if name in df.columns:
                return name
            if name and name.lower() in lowmap:
                return lowmap[name.lower()]
    for pat in patterns:
        rx = re.compile(pat, re.I)
        for c in cols:
            if rx.search(c):
                return c
    return None

SEP_RE = re.compile(r"\s*(?:→|&rarr;|\u2192|;|>|,|~{2,}|/{2,}|\|{2,})\s*")
def split_tokens(s):
    if pd.isna(s): return []
    if not isinstance(s, str): s = str(s)
    return [p.strip() for p in SEP_RE.split(s) if str(p).strip()]

def _clean_piece(text: str) -> str:
    t = re.sub(r'~+', ' ', str(text))
    t = re.sub(r'\s+', ' ', t)
    return t.strip()

_PARTICLES = {"de","del","della","der","van","von","da","dos","das","di","la","le","du","of"}

def _smart_title(token: str) -> str:
    if not token:
        return token
    token = re.sub(
        r"(^|\b)([a-z])(['’])([a-z])",
        lambda m: m.group(1)+m.group(2).upper()+m.group(3)+m.group(4).upper(),
        token.lower()
    )
    token = "-".join([w.capitalize() for w in token.split("-")])
    token = re.sub(r"\bmc([a-z])",  lambda m: "Mc"+m.group(1).upper(),  token)
    token = re.sub(r"\bmac([a-z])", lambda m: "Mac"+m.group(1).upper(), token)
    return token

def smart_titlecase(name: str) -> str:
    name = _clean_piece(name)
    if not name:
        return name
    if "," in name:
        last, first = [p.strip() for p in name.split(",", 1)]
        pieces = (first + " " + last).split()
    else:
        pieces = name.split()
    out = []
    for i, w in enumerate(pieces):
        out.append(w.lower() if (i>0 and w.lower() in _PARTICLES) else _smart_title(w))
    return " ".join(out)

def surname_given_from_token(token):
    token = token.strip()
    if not token:
        return (token,)
    idx = None
    for i in range(1, len(token)):
        if token[i-1].islower() and token[i].isupper():
            idx = i; break
    if idx is None:
        for i in range(1, len(token)):
            if token[i].isupper():
                idx = i; break
    if idx is None:
        return (token,)
    surname = token[:idx]; given = token[idx:]
    given_spaced = re.sub(r'(?<!^)([A-Z])', r' \1', given)
    return (f"{given_spaced.strip()} {surname.strip()}",)

def normalize_person_name(s: str) -> str:
    if pd.isna(s): return ""
    s = _clean_piece(str(s))
    if "," in s:
        last, first = [p.strip() for p in s.split(",", 1)]
        s = f"{first} {last}"
    if " " not in s and s.isalpha():
        return smart_titlecase(surname_given_from_token(s)[0])
    return smart_titlecase(s)

def truncate_first(name: str, n: int = 4) -> str:
    name = name.strip()
    if not name: return name
    parts = name.split()
    return parts[0][:n] if len(parts) == 1 else f"{parts[0][:n]} {parts[-1]}"

def derive_common_from_first_token(tokens):
    if not tokens:
        return ("", "")
    first = _clean_piece(tokens[0])
    parts = re.split(r"\s*(?:&| and )\s*", first, maxsplit=1, flags=re.I)
    if len(parts) != 2:
        return ("", "")
    def _norm(s):
        return smart_titlecase(s) if " " in s else smart_titlecase(surname_given_from_token(s)[0])
    return (_norm(parts[0]), _norm(parts[1]))

def degree_label_from_generations(g):
    if g <= 1:
        return "parents" if g == 1 else "self"
    if g == 2:
        return "grandparents"
    greats = g - 2
    if greats == 1:
        return "great-grandparents"
    return f"{greats}\u00d7-great-grandparents"

def build_header(subject_name, cm_val, matchee_name_html, gens, husband, wife):
    try:
        cm_str = f"{int(round(float(cm_val)))}"
    except Exception:
        cm_str = (str(cm_val).strip() or "0")
    degree_label = degree_label_from_generations(gens)
    parts = [
        f"{subject_name} is a {cm_str} cM cousin match to {matchee_name_html}, whose",
        f"{degree_label} (back {gens} Gens)",
        "are",
        f"{husband} & {wife}."
    ]
    s = " ".join(parts)
    if REMOVE_PERIOD_AT_END:
        s = re.sub(r'\.\s*$', '', s)
    return s

def resolve_match_to(code: str) -> str:
    if not isinstance(code, str): return ""
    return MATCH_TO_UNMASKED.get(str(code).strip().lower(), str(code))

# ------------------------------------------------------------------------------
# -- [4/7] CSV LOAD + COLUMN DETECTION + ROW HELPERS
# ------------------------------------------------------------------------------
CSV_IN = os.environ.get("CSV_IN", CSV_PATH)

_encs = ("utf-8-sig", "utf-8", "cp1252", "iso-8859-15", "latin1")
_last_err = None
df = None
for _e in _encs:
    try:
        df = pd.read_csv(CSV_IN, encoding=_e, dtype=str, keep_default_na=False)
        break
    except Exception as _ex:
        _last_err = _ex
        df = None
if df is None:
    raise RuntimeError(f"Unable to read CSV: {CSV_IN} ({_last_err})")
print(f"✅ Loaded CSV — {len(df)} rows, {len(df.columns)} columns from {os.path.abspath(CSV_IN)}")

id_col        = find_col(df, [r'^(id#|personid)$'], ["ID#", "ID", "PersonID", "personID"])
match_to_col  = find_col(df, [r'^match\s*to$'], ["Match to","Match"])
name_col      = find_col(df, [r'^name$'], ["Name"])
cm_col        = find_col(df, [r'^(c\s*:?m|cm)$', r'centi.?morgan'], ["cM","cm"])
path_col      = find_col(df, [r'(yates\s*dna\s*ancestral\s*line|ancestral\s*line|lineage)'],
                         ["Yates DNA Ancestral Line","Ancestral Line","Lineage"])

if not id_col:       raise ValueError("CSV missing an ID#/PersonID column.")
if not match_to_col: raise ValueError("CSV missing 'Match to' column.")
if not name_col:     raise ValueError("CSV missing 'Name' column.")
if not cm_col:       raise ValueError("CSV missing 'cM' column.")
if not path_col:     raise ValueError("CSV missing lineage/path column.")

ID_PAT = re.compile(r"\bI\d+\b", re.I)
_CAMEL_WORDS = re.compile(r"[A-Z][a-z]*|[A-Z]+(?![a-z])|[a-z]+")

def extract_person_id(s: str) -> str:
    m = ID_PAT.search(str(s or ""))
    return m.group(0).upper() if m else ""

def _truncate_alpha(s: str, n: int) -> str:
    return re.sub(r"[^A-Za-z]", "", s)[:n]

def norm_matchee_name(raw: str) -> str:
    raw = str(raw or "").strip()
    if not raw:
        return ""
    if " " in raw or "," in raw:
        nm = smart_titlecase(raw)
        parts = nm.split()
        if len(parts) == 1:
            return nm
        given = parts[0]
        surname = parts[-1]
        return f"{_truncate_alpha(given, 7)} {surname}".strip()
    words = _CAMEL_WORDS.findall(raw)
    while words and len(words[0]) == 1:
        words.pop(0)  # drop leading initials
    if not words:
        nm = smart_titlecase(surname_given_from_token(raw)[0])
        ps = nm.split()
        if len(ps) == 1:
            return nm
        return f"{_truncate_alpha(ps[0], 7)} {ps[-1]}".strip()
    surname = smart_titlecase(words[0])
    given_candidates = [w for w in words[1:] if w.lower() != surname.lower()]
    if not given_candidates:
        return surname
    given = smart_titlecase(given_candidates[0])
    return f"{_truncate_alpha(given, 7)} {surname}".strip()

print("✅ Columns:", {"ID": id_col, "Match to": match_to_col, "Name": name_col, "cM": cm_col, "Lineage": path_col})

# ------------------------------------------------------------------------------
# -- [5/7] TRANSFORM & COLUMN A
# ------------------------------------------------------------------------------
headers  = []
lineages = []
findcol  = []

REMOTE_NAME_ABS = "/" + REMOTE_NAME

for _, row in df.iterrows():
    subject_raw  = row.get(match_to_col, "")
    subject_name = normalize_person_name(resolve_match_to(subject_raw))
    subject_name_b = f"<strong>{subject_name}</strong>" if subject_name else subject_name

    pid = extract_person_id(row.get(id_col, ""))
    matchee_name = norm_matchee_name(row.get(name_col, "")) or subject_name
    if pid:
        matchee_html = (
            f'<a href="{TNG_BASE}/verticalchart.php?personID={pid}&tree={TNG_TREE}&parentset=0&display=vertical&generations=15" '
            f'target="_blank">{matchee_name}</a>'
        )
    else:
        matchee_html = matchee_name

    cm_val = row.get(cm_col, "0")

    tokens     = split_tokens(row.get(path_col, ""))
    gens_total = len(tokens)
    tokens_disp = tokens[:7]  # show first 7 pairs

    if "common_husband" in df.columns and "common_wife" in df.columns:
        husband_raw = str(row.get("common_husband", "")).strip()
        wife_raw    = str(row.get("common_wife", "")).strip()
        if not husband_raw and not wife_raw:
            husband_raw, wife_raw = derive_common_from_first_token(tokens)
    else:
        husband_raw, wife_raw = derive_common_from_first_token(tokens)

    header_html = build_header(
        subject_name_b,
        cm_val,
        matchee_html,
        gens_total,
        truncate_first(husband_raw, 7) if husband_raw else "",
        truncate_first(wife_raw, 7) if wife_raw else ""
    )

    if tokens_disp:
        tokens_disp[0] = f"<strong>{tokens_disp[0]}</strong>"
    sep = f" {ARROW_ENTITY} "
    lineage_text = sep.join(tokens_disp) if tokens_disp else ""

    q = _u.quote(subject_name)
    quick = (f'<a class="find-btn" href="{REMOTE_NAME_ABS}?q={q}" target="_blank" rel="noopener" '
             f'title="Open a filtered view for {subject_name}">Find</a>')

    headers.append(header_html)
    lineages.append(lineage_text)
    findcol.append(quick)

LINEAGE_HEADER_SAFE = LINEAGE_HEADER

df["Match Summary"]             = headers
df[LINEAGE_HEADER_SAFE]         = lineages
df["Find"]                      = findcol
display_df = df[["Find", "Match Summary", LINEAGE_HEADER_SAFE]]

display_df[["Match Summary"]].to_csv(MATCH_COUSINS_CSV, index=False, encoding="iso-8859-15")
print("✅ Wrote local CSV (Column A):", os.path.abspath(MATCH_COUSINS_CSV))

# ------------------------------------------------------------------------------
# -- [6/7] HTML (TABLE + CSS + JS)
# ------------------------------------------------------------------------------
display_for_html = display_df

html_table = display_for_html[["Find", "Match Summary", LINEAGE_HEADER_SAFE]].to_html(
    index=False, escape=False, classes="sortable"
)

html_table = html_table.replace(
    '<table border="1" class="dataframe sortable">',
    '<table border="1" class="dataframe sortable" id="refactor-table">', 1
)
html_table = html_table.replace('<tbody>\n<tr>', '<tbody>\n<tr id="first-row">', 1)

html_table = html_table.replace(
    "<th>Match Summary</th>",
    "<th>Match Summary&ndash;click to sort</th>",
    1
)

html_table = html_table.replace(
    f"<th>{_html.escape(LINEAGE_HEADER_SAFE)}</th>",
    "<th>Lineage (Starting with oldest ancestor&ndash;click to sort)</th>",
    1
)

FIND_PX = 118  # width of Find column (checkbox + Email + Find)
colgroup_html = (
    "<colgroup>\n"
    f"  <col style=\"width:{FIND_PX}px;\" />\n"
    f"  <col style=\"width:{COL_A_PX}px;\" />\n"
    "  <col />\n"
    "</colgroup>\n"
)
html_table = html_table.replace(
    '<table border="1" class="dataframe sortable" id="refactor-table">',
    '<table border="1" class="dataframe sortable" id="refactor-table">\n' + colgroup_html, 1
)

html_table = html_table.replace(
    "<th>Find</th>",
    "<th>Find&nbsp;<input type=\"checkbox\" id=\"sel-all\" title=\"Select all visible\" /></th>",
    1
)

html_table_scrolling = '<div class="table-scroll">\n' + html_table + '\n</div>'

TABLE_CSS = (
    "<style type=\"text/css\">\n"
    "  html { scroll-behavior: smooth; }\n"
    "  body { font-family: 'Times New Roman', Georgia, serif; font-size:100%; background:#ffffff; color:#222; margin:0; padding:0; line-height:1.5; }\n"
    f"  .wrap {{ max-width:{TABLE_WIDTH_PX}px; margin:0 auto; background:#ffffff; padding:20px; padding-bottom:48px; }}\n"
    "  a { color:#154b8b; text-decoration:none; } a:hover { text-decoration:underline; }\n"
    "  h1 { margin:0 0 6px 0; font-size:26px; line-height:1.2; text-align:center; }\n"
    "  .updated { font-size:12px; color:#555; text-align:center; margin:2px 0 10px 0; }\n"
    "  .sortbar { margin:6px 0 10px 0; font-size:13px; background:#ffffff; padding:6px 8px; border-radius:6px;\n"
    "             display:flex; flex-wrap:wrap; gap:5px; align-items:center; border:1px solid #ddd; }\n"
    "  .btn { display:inline-block; border:1px solid #5b79b8; background:#5b79b8; color:#fff; padding:4px 9px;\n"
    "         text-decoration:none; cursor:pointer; border-radius:5px; line-height:1.2; transition:background 0.2s, transform 0.1s; user-select:none; }\n"
    "  .btn:hover { background:#4668aa; transform:translateY(-1px); }\n"
    "  input.btn.search { background:#fff; color:#111; border-color:#bbb; }\n"
    "  .btn-mini { font-size:12px; padding:2px 6px; line-height:1.1; margin-left:6px; }\n"
    "  .find-cell { white-space:nowrap; }\n"
    "  .selbox { margin-right:6px; vertical-align:middle; }\n"
    "  .table-scroll { max-height:70vh; overflow-y:auto; overflow-x:auto; border:1px solid #ddd; }\n"
    f"  table.sortable {{ border-collapse:collapse; width:{TABLE_WIDTH_PX}px; table-layout:fixed; }}\n"
    "  table.sortable th, table.sortable td { border:1px solid #ddd; padding:6px 8px; vertical-align:top; }\n"
    "  table.sortable th { background:#e3eaf8; text-align:left; position:sticky; top:0; z-index:2; box-shadow:0 1px 0 #ccc; cursor:pointer; }\n"
    "  table.sortable td { word-wrap:break-word; overflow-wrap:break-word; }\n"
    "  #first-row td { border-top:2px solid #999; }\n"
    "  .back-to-top { position:fixed; right:16px; bottom:16px; padding:6px 10px; border:1px solid #3e5a97; background:#5b79b8; color:#fff;\n"
    "                 cursor:pointer; border-radius:6px; font-size:12px; display:none; z-index:9999; }\n"
    "  .back-to-top:hover { background:#4668aa; }\n"
    "  #dynamicContent { margin:10px 0 14px 0; }\n"
    "  @media screen and (max-width: 820px) { .wrap { padding:12px; } h1 { font-size:22px; } }\n"
    "</style>\n"
)

DYNAMIC_BLOCK = (
    "<div class=\"sortbar\">\n"
    "  <a class=\"btn\" href=\"https://yates.one-name.net/gengen/dna_cousin_surname_study.htm\" target=\"_blank\">Study Details</a>\n"
    "  <a class=\"btn\" href=\"https://yates.one-name.net/gengen/dna_theory_of_the_case.htm\" target=\"_blank\">Theory in Action</a>\n"
    "  <a class=\"btn\" href=\"gengen/images/cousin-calculator.jpg\" target=\"_blank\">Cousin Connection</a>\n"
    "  <a class=\"btn\" href=\"gengen/images/Shared_cM_Project_v4.jpg\" target=\"_blank\">Cousin by DNA</a>\n"
    "  <a class=\"btn\" href=\"partials/match_count.htm\" target=\"_blank\" rel=\"noopener\">Match Count</a>\n"
    "  <a class=\"btn\" href=\"partials/lineage_count.htm\" target=\"_blank\" rel=\"noopener\">Lineage Count</a>\n"
    "  <a class=\"btn\" href=\"/partials/cousin_list_print.htm\" target=\"_blank\">Cousin List</a>\n"
    "  <span class=\"btn\" id=\"email-selected\">Email Selected</span>\n"
    "  <span class=\"btn\" id=\"clear-selected\">Clear</span>\n"
    "  <input type=\"text\" id=\"search-box\" class=\"btn search\" size=\"24\" value=\"\" placeholder=\"Search&hellip;\" "
    "         autocomplete=\"off\" autocapitalize=\"off\" spellcheck=\"false\" inputmode=\"search\" enterkeyhint=\"search\" />\n"
    "</div>\n"
    "<div id=\"dynamicContent\"></div>\n"
)

JS_COUNT_URL = COUNT_PUBLIC_URL.replace("'", "%27")

UPDATED_BLOCK = (
    "<div class=\"updated\">"
    f"<a href=\"{HOME_URL}\" target=\"_blank\" rel=\"noopener\">Home</a>"
    " &nbsp;|&nbsp; Last updated: <span id=\"last-updated\"></span>"
    " &nbsp;|&nbsp; Autosomal matches: <span id=\"auto-count\" class=\"js-count\"></span>"
    "</div>"
)

# ==============================================================================
# ==  🛑 SYNTAX ERROR FIX:
# ==  The entire <script> block below now uses {{ and }} (double braces)
# ==  to correctly escape the f-string.
# ==============================================================================
template_html = f"""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>ONS Yates Study Autosomal DNA Register</title>
{TABLE_CSS}
</head>
<body id="top">
<div class="wrap">
  <h1>ONS Yates Study Autosomal DNA Register</h1>
  {UPDATED_BLOCK}
  {DYNAMIC_BLOCK}
{html_table_scrolling}
</div>
<button id="back-to-top" class="back-to-top">&#9650; Top</button>
<script type="text/javascript">
//<![CDATA[
(function(){{
  function textOf(cell){{ return (cell && (cell.textContent || cell.innerText) || '').replace(/\\s+/g,' ').trim().toLowerCase(); }}
  function sortTable(tbl, colIndex, dir){{
    var tb=tbl && tbl.tBodies ? tbl.tBodies[0] : null; if(!tb) return;
    var rows=[].slice.call(tb.rows||[]);
    var asc=(dir==='asc');
    rows.sort(function(a,b){{
      var A=textOf(a.cells[colIndex]), B=textOf(b.cells[colIndex]);
      if(A<B) return asc?-1:1; if(A>B) return asc?1:-1; return 0;
    }});
    var frag=document.createDocumentFragment();
    for(var i=0;i<rows.length;i++) frag.appendChild(rows[i]);
    tb.appendChild(frag);
    updateSelAll();
  }}

  function bindHeaderSort(){{
    var tbl=document.getElementById('refactor-table'); if(!(tbl && tbl.tHead && tbl.tHead.rows.length)) return; var ths=tbl.tHead.rows[0].cells;
    if(!ths) return;
    for(var i=0;i<ths.length;i++)(function(idx){{
      var th = ths[idx];
      // Skip checkbox header
      if (th.querySelector('input[type=checkbox]')) return;
      var dir='asc';
      th.addEventListener('click',function(){{
        dir=(dir==='asc')?'desc':'asc';
        // Clear arrows
        for (var j = 0; j < ths.length; j++) {{
          ths[j].innerHTML = ths[j].innerHTML.replace(/\\s+[↑↓]/, '');
        }}
        // Add new arrow
        th.innerHTML += (dir === 'asc' ? ' &uarr;' : ' &darr;');
        sortTable(tbl,idx,dir);
      }},false);
    }})(i);
  }}

  var PARTIAL_BASES=['/partials/','partials/','gengen/partials/','/gengen/partials/'];
  function tryFetchSequential(urls,onOK,onFail){{
    if(!urls.length) return onFail('No valid locations'); var url=urls.shift();
    fetch(url,{{cache:'no-store'}}).then(function(r){{ if(!r.ok) throw new Error('HTTP '+r.status); return r.text();}})
      .then(onOK).catch(function(){{ tryFetchSequential(urls,onOK,onFail);}});
  }}
  function bindPartials(){{
    var bar=document.querySelector('.sortbar'); if(!bar) return;
    bar.addEventListener('click',function(e){{
      var btn=e.target && e.target.closest ? e.target.closest('.btn') : null; if(!btn) return;
      var rel=btn.getAttribute('data-load-partial'); if(!rel) return;
      var c=document.getElementById('dynamicContent'); if(!c) return; c.innerHTML='<p><em>Loading latest data&hellip;</em></p>';
      var bust=encodeURIComponent(document.lastModified||(new Date()).toUTCString());
      var bases=PARTIAL_BASES.slice(); var candidates=bases.map(function(b){{return b+rel+'?v='+bust;}});
      tryFetchSequential(candidates.slice(), function(html){{ c.innerHTML=html; }}, function(){{ c.innerHTML='<p style=\\"color:#a00;\\">Could not load content.</p>'; }});
    }});
  }}

  function stampLastUpdated(){{
    var el=document.getElementById('last-updated'); if(!el) return;
    var d=new Date(document.lastModified||new Date());
    function z(n){{return(n<10?'0':'')+n;}}
    el.innerHTML=d.getFullYear()+'-'+z(d.getMonth()+1)+'-'+z(d.getDate())+' '+z(d.getHours())+':'+z(d.getMinutes());
  }}
  function formatWithCommas(n){{
    try{{ var x=parseInt(String(n||'').replace(/[^0-9\\-]/g,''),10); if(isNaN(x)) return ''; return x.toLocaleString('en-US'); }}catch(e){{ return String(n||''); }}
  }}
  function loadAutoCount(){{
    var el=document.getElementById('auto-count'); if(!el) return;
    var url='{JS_COUNT_URL}';
    try{{
      var xhr=new XMLHttpRequest(); xhr.open('GET', url+(url.indexOf('?')>-1?'':'?v='+(new Date()).getTime()), true);
      xhr.onreadystatechange=function(){{ if(xhr.readyState===4){{ if(xhr.status>=200&&xhr.status<300){{
        var m=(xhr.responseText||'').match(/(\\d+)/); var num=m?m[1]:'';
        el.textContent = formatWithCommas(num) || '(unavailable)';
      }} else {{ el.textContent='(unavailable)'; }} }} }};
      xhr.send(null);
    }}catch(e){{ el.textContent='(unavailable)'; }}
  }}

  function getParam(name){{ var m=location.search.match(new RegExp('[?&]'+name+'=([^&]+)')); return m?decodeURIComponent(m[1].replace(/\\+/g,' ')):''; }}
  function bindSearch(){{
    var box=document.getElementById('search-box'); var tbl=document.getElementById('refactor-table'); if(!(box && tbl && tbl.tBodies && tbl.tBodies[0])) return; var tb=tbl.tBodies[0];
    function norm(s){{ return String(s||'').replace(/\\s+/g,' ').toLowerCase(); }}
    function rowText(tr){{ var t=''; for(var i=1;i<tr.cells.length;i++){{ t+= ' ' + (tr.cells[i].textContent||tr.cells[i].innerText||''); }} return norm(t); }} // Start at 1 to skip 'Find'
    var cached=[]; (function seed(){{ var rows=tb.rows; cached=[]; for(var i=0;i<rows.length;i++){{ cached.push({{el:rows[i], txt:rowText(rows[i])}}); }} }})();
    function apply(q){{
      q=norm(q);
      for(var i=0;i<cached.length;i++){{
        var hit = !q || cached[i].txt.indexOf(q)>-1;
        cached[i].el.style.display = hit? '' : 'none';
      }}
      updateSelAll();
    }}
    var to=null; function onInput(){{ if(to) clearTimeout(to); to=setTimeout(function(){{ apply(box.value); }}, 60); }}
    box.addEventListener('input', onInput, false);
    box.addEventListener('search', onInput, false); // for 'x' clear button
    var q0=getParam('q');
    if(q0){{ box.value=q0; apply(q0); try{{ history.replaceState(null, '', location.pathname); }}catch(e){{}} }}
    else {{{{ box.value=''; apply(''); setTimeout(function(){{ if(!getParam('q')){{ box.value=''; apply(''); }} }}, 0); }}}}
  }}

  function visibleRowCheckboxes(){{
    var tbl=document.getElementById('refactor-table');
    if(!(tbl && tbl.tBodies && tbl.tBodies[0])) return [];
    var tb=tbl.tBodies[0], out=[];
    for(var i=0;i<tb.rows.length;i++){{
      var tr=tb.rows[i];
      if(tr.style.display !== 'none'){{
        var cb=tr.querySelector('.selbox');
        if(cb) out.push(cb);
      }}
    }}
    return out;
  }}
  function updateSelAll(){{
    var sa=document.getElementById('sel-all'); if(!sa) return;
    var cbs=visibleRowCheckboxes();
    var all_vis=cbs.length > 0;
    var all_checked=all_vis;
    for(var i=0;i<cbs.length;i++){{
      if(!cbs[i].checked) all_checked=false;
    }}
    sa.checked = all_vis && all_checked;
    sa.indeterminate = all_vis && !all_checked && cbs.some(function(cb){{return cb.checked;}});
  }}
  function bindSelectAll(){{
    var sa=document.getElementById('sel-all'); if(!sa) return;
    sa.addEventListener('click', function(){{
      var cbs=visibleRowCheckboxes();
      for(var i=0;i<cbs.length;i++) cbs[i].checked = sa.checked;
    }}, false);
    var tb=document.getElementById('refactor-table'); if(!tb) return;
    tb.addEventListener('click', function(e){{
      if(e.target && e.target.classList.contains('selbox')){{
        updateSelAll();
      }}
    }}, false);
  }}

  function bindEmail(){{
    var btn=document.getElementById('email-selected'); if(!btn) return;
    btn.addEventListener('click', function(){{
      var cbs=visibleRowCheckboxes();
      var names=[];
      for(var i=0;i<cbs.length;i++){{
        if(cbs[i].checked){{
          var name=cbs[i].getAttribute('data-name');
          if(name) names.push(name);
        }}
      }}
      if(!names.length) return alert('No rows selected.');
      var subj='ONS Yates DNA Register';
      var body = 'Selected cousins:\\n\\n' + names.join('\\n') + '\\n\\n';
      var href='mailto:?subject='+encodeURIComponent(subj)+'&body='+encodeURIComponent(body);
      if(href.length > 2000) href = 'mailto:?subject='+encodeURIComponent(subj);
      window.location.href = href;
    }}, false);
  }}
  function bindClear(){{
    var btn=document.getElementById('clear-selected'); if(!btn) return;
    btn.addEventListener('click', function(){{
      var cbs=visibleRowCheckboxes();
      for(var i=0;i<cbs.length;i++) cbs[i].checked = false;
      updateSelAll();
    }}, false);
  }}

  function bindBackToTop(){{
    var btn=document.getElementById('back-to-top'); if(!btn) return;
    window.addEventListener('scroll', function(){{
      btn.style.display = (window.scrollY > 200) ? 'block' : 'none';
    }}, {{passive:true}});
    btn.addEventListener('click', function(){{
      window.scrollTo(0,0);
    }}, false);
  }}

  function addCheckboxes(){{
    var tbl=document.getElementById('refactor-table');
    if(!(tbl && tbl.tBodies && tbl.tBodies[0])) return;
    var tb=tbl.tBodies[0];
    for(var i=0;i<tb.rows.length;i++){{
      var tr=tb.rows[i];
      var cell=tr.cells[0]; // 'Find' column
      var findBtn=cell ? cell.querySelector('.find-btn') : null;
      var name = findBtn ? (findBtn.getAttribute('title')||'').replace('Open a filtered view for ','') : ('Row '+(i+1));
      if(cell){{
        cell.classList.add('find-cell');
        cell.innerHTML = '<input type=\"checkbox\" class=\"selbox\" title=\"Select this row\" data-name=\"'+name.replace(/"/g,'&quot;')+'\" />' + cell.innerHTML;
      }}
    }}
  }}

  document.addEventListener('DOMContentLoaded', function(){{
    addCheckboxes();
    stampLastUpdated();
    loadAutoCount();
    bindHeaderSort();
    bindPartials();
    bindSearch();
    bindSelectAll();
    bindEmail();
    bindClear();
    bindBackToTop();
  }});

}})();
//]]>
</script>
</body>
</html>
"""

print("✅ HTML template created.")

# ------------------------------------------------------------------------------
# -- [7/7] GENERATE PARTIALS, SAVE, & UPLOAD
# ------------------------------------------------------------------------------

# ====== [NEW FUNCTION: GENERATE PARTIALS - v9 (Ancestor Pair Count)] =====
#
#    ===> REPLACE your old 'generate_partial_files' function with this one <===
#
# ==============================================================================
def generate_partial_files():
    """
    Generates STYLED match_count.htm (3 columns) and lineage_count.htm
    (counts oldest ancestor pair, 3 columns: Count, %, Name).
    Saves them to /content/
    """
    print("--- 📊 Generating partial count files (with styling) ---")

    if 'df' not in globals() or df.empty:
        print("❌ Could not generate partials: Main 'df' is not loaded.")
        return

    # --- Define a *simpler* nav bar for the partial pages ---
    PARTIAL_DYNAMIC_BLOCK = (
        f"<div class=\"sortbar\">\n"
        f"  <a class=\"btn\" href=\"https://yates.one-name.net/gengen/dna_cousin_surname_study.htm\" target=\"_blank\">Study Details</a>\n"
        f"  <a class=\"btn\" href=\"https://yates.one-name.net/gengen/dna_theory_of_the_case.htm\" target=\"_blank\">Theory in Action</a>\n"
        f"  <a class=\"btn\" href=\"gengen/images/cousin-calculator.jpg\" target=\"_blank\">Cousin Connection</a>\n"
        f"  <a class=\"btn\" href=\"gengen/images/Shared_cM_Project_v4.jpg\" target=\"_blank\">Cousin by DNA</a>\n"
        f"  <a class=\"btn\" href=\"/partials/cousin_list_print.htm\" target=\"_blank\">Cousin List</a>\n"
        f"  <a class=\"btn\" href=\"{HOME_URL}\" style=\"background:#3a6b3a; border-color:#3a6b3a;\">&larr; Back to Main Register</a>\n"
        f"</div>\n"
    )

    # --- Define a *simpler* updated block for partials ---
    PARTIAL_UPDATED_BLOCK = (
        "<div class=\"updated\">"
        f"<a href=\"{HOME_URL}\" target=\"_blank\" rel=\"noopener\">Home</a>"
        " &nbsp;|&nbsp; Last updated: <span id=\"last-updated\"></span>"
        "</div>"
    )

    # --- Define the shared HTML template for partial pages ---
    PARTIAL_CSS = TABLE_CSS.replace(
        f"table.sortable {{ border-collapse:collapse; width:{TABLE_WIDTH_PX}px; table-layout:fixed; }}",
        "table.sortable { border-collapse:collapse; width:100%; table-layout:auto; margin:0; }"
    ).replace(
        f"  .wrap {{ max-width:{TABLE_WIDTH_PX}px; margin:0 auto; background:#ffffff; padding:20px; padding-bottom:48px; }}\n",
        "  .wrap { max-width:1200px; margin:0 auto; background:#ffffff; padding:20px; padding-bottom:48px; }\n"
    ).replace(
        ".table-scroll { max-height:70vh; overflow-y:auto; overflow-x:auto; border:1px solid #ddd; }",
        ".table-scroll { max-height:none; overflow-y:visible; overflow-x:auto; border:1px solid #ddd; }"
    )
    PARTIAL_CSS = PARTIAL_CSS.replace(
        "</style>",
        "  table.sortable th, table.sortable td { padding: 4px 6px; } /* Narrower padding */\n</style>"
    )


    # Function to generate the full page HTML using f-string directly
    def create_partial_page(page_title, table_html):
        return f"""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>{page_title}</title>
{PARTIAL_CSS}
</head>
<body id="top">
<div class="wrap">
  <h1>{page_title}</h1>
  {PARTIAL_UPDATED_BLOCK}
  {PARTIAL_DYNAMIC_BLOCK}

  <div class="table-scroll">
    {table_html}
    <a href="{HOME_URL}" style="margin-top:12px; display:inline-block; font-weight:bold;">&larr; Back to Main Register</a>
  </div>

</div>
<button id="back-to-top" class="back-to-top">&#9650; Top</button>
<script type="text/javascript">
//<![CDATA[
(function(){{{{
  function stampLastUpdated(){{{{
    var el=document.getElementById('last-updated'); if(!el) return;
    var d=new Date(document.lastModified||new Date());
    function z(n){{{{return(n<10?'0':'')+n;}}}}
    el.innerHTML=d.getFullYear()+'-'+z(d.getMonth()+1)+'-'+z(d.getDate())+' '+z(d.getHours())+':'+z(d.getMinutes());
  }}}}

  function bindBackToTop(){{{{
    var btn=document.getElementById('back-to-top');
    if (btn) {{
        window.addEventListener('scroll', function(){{{{
        btn.style.display = (window.scrollY > 200) ? 'block' : 'none';
        }}}}, {{{{passive:true}}}});
        btn.addEventListener('click', function(){{{{
        window.scrollTo(0,0);
        }}}}, false);
    }}
  }}}}

  document.addEventListener('DOMContentLoaded', function(){{{{
    stampLastUpdated();
    bindBackToTop();
  }}}});
}})();
//]]>
</script>
</body>
</html>
"""

    try:
        # --- 1. Match Count (3 Columns - Unchanged) ---
        match_data = df[[match_to_col]].copy()
        match_data['Unmasked Name'] = df[match_to_col].apply(resolve_match_to).apply(normalize_person_name)
        match_data.rename(columns={match_to_col: 'Masked Name'}, inplace=True)
        match_counts_df = match_data.groupby(['Masked Name', 'Unmasked Name']).size().reset_index(name='Count')
        match_counts_df = match_counts_df.sort_values(by=['Count', 'Unmasked Name'], ascending=[False, True])

        match_table_html = match_counts_df.to_html(index=False, border=1, classes="sortable match-count-table")
        colgroup_match = """
<colgroup>
    <col style="width: 150px;">
    <col style="width: 300px;">
    <col style="width: 80px; text-align: right;">
</colgroup>
"""
        match_table_html = match_table_html.replace(
            '<table border="1" class="dataframe sortable match-count-table">',
            '<table border="1" class="dataframe sortable match-count-table">' + colgroup_match, 1
        )
        match_html_full = create_partial_page("ONS Yates Study - Match Count", match_table_html)

        with open('/content/match_count.htm', 'w', encoding='utf-8') as f:
            f.write(match_html_full)
        print(f"✅ Generated local file: /content/match_count.htm ({len(match_counts_df)} rows)")


        # --- 2. Lineage Count (Oldest Ancestor Pair - Modified) ---
        total_rows = len(df)
        if total_rows > 0:
            # Get the first token (oldest pair) from each lineage
            first_pairs_raw = df[path_col].apply(lambda x: split_tokens(x)[0] if split_tokens(x) else '')

            # Normalize the names in the first pair
            normalized_pairs = first_pairs_raw.apply(lambda x: " and ".join(derive_common_from_first_token([x])) if x else "Unknown")

            # Count the occurrences of each normalized pair
            ancestor_counts = normalized_pairs.value_counts().reset_index()
            ancestor_counts.columns = ['Ancestor Pair', 'Count']

            # Calculate Percentage
            ancestor_counts['Percentage'] = (ancestor_counts['Count'] / total_rows * 100)

            # Format Percentage column
            ancestor_counts['Percentage'] = ancestor_counts['Percentage'].map('{:.2f}%'.format)

            # Reorder and rename columns for final table
            ancestor_counts_final = ancestor_counts[['Count', 'Percentage', 'Ancestor Pair']]

            # Sort by Count descending, then Ancestor Pair ascending
            ancestor_counts_final = ancestor_counts_final.sort_values(by=['Count', 'Ancestor Pair'], ascending=[False, True])

            # Generate HTML table
            lineage_table_html = ancestor_counts_final.to_html(index=False, border=1, classes="sortable lineage-count-table")

            # Inject <colgroup> for styling (right-align count and percentage)
            colgroup_lineage = """
<colgroup>
    <col style="width: 80px; text-align: right;">
    <col style="width: 100px; text-align: right;">
    <col style="width: auto;">
</colgroup>
"""
            lineage_table_html = lineage_table_html.replace(
                 '<table border="1" class="dataframe sortable lineage-count-table">',
                 '<table border="1" class="dataframe sortable lineage-count-table">' + colgroup_lineage, 1
            )
            lineage_row_count = len(ancestor_counts_final)
        else:
            lineage_table_html = "<p>No lineage data found to generate counts.</p>"
            lineage_row_count = 0

        # Create full page HTML
        lineage_html_full = create_partial_page("ONS Yates Study - Oldest Ancestor Pair Count", lineage_table_html) # Updated title

        with open('/content/lineage_count.htm', 'w', encoding='utf-8') as f:
            f.write(lineage_html_full)
        print(f"✅ Generated local file: /content/lineage_count.htm ({lineage_row_count} rows)")

    except Exception as e:
        print(f"❌ Error while generating partial files: {e}")
        traceback.print_exc()

# ====== [END NEW FUNCTION] ==============================================

# ====== [FUNCTION: SAVE] =======================================================
def save_and_upload_all():
    """
    Generates partials, saves HTML locally, then uploads all 5 files.
    """
    print(f"--- 💾 Starting Save & Upload ---")

    # 1. Generate partial files
    generate_partial_files()

    # 2. Save main HTML file locally
    try:
        with open(LOCAL_NAME, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
            f.write(template_html)
        print(f"✅ Saved main HTML locally: {os.path.abspath(LOCAL_NAME)}")
    except Exception as e:
        raise RuntimeError(f"Failed to save local HTML file {LOCAL_NAME}: {e}")

    # 3. Connect to FTP
    try:
        ftps = ftp_connect()
        print(f"✅ Connected to FTP host.")
    except Exception as e:
        print(f"❌ FTP connect failed: {e}")
        print("Skipping all uploads. Check FTP_HOST/USER/PASS environment variables.")
        return

    # 4. Upload main HTML
    try:
        ftp_upload_overwrite(ftps, LOCAL_NAME, REMOTE_NAME)
    except Exception as e:
        print(f"❌ Main HTML upload failed: {e}")

    # 5. Upload Column A CSV
    if UPLOAD_COLUMN_A:
        if os.path.exists(MATCH_COUSINS_CSV):
            try:
                ftp_upload_overwrite(ftps, MATCH_COUSINS_CSV, MATCH_COUSINS_CSV)
            except Exception as e:
                print(f"❌ Column A CSV upload failed: {e}")
        else:
            print(f"ℹ️ Skipping Column A upload: File not found at {MATCH_COUSINS_CSV}")

    # 6. Upload Autosomal Count
    if os.path.exists(LOCAL_COUNT_FILE):
        try:
            ftp_upload_overwrite(ftps, LOCAL_COUNT_FILE, REMOTE_COUNT_NAME)
        except Exception as e:
            print(f"❌ Autosomal count upload failed: {e}")

    # 7. Upload match_count.htm
    if os.path.exists('/content/match_count.htm'):
        try:
            ftp_upload_overwrite(ftps, '/content/match_count.htm', 'partials/match_count.htm')
        except Exception as e:
            print(f"❌ match_count.htm upload failed: {e}")
    else:
        print(f"ℹ️ Skipping match_count.htm upload: File not found.")

    # 8. Upload lineage_count.htm
    if os.path.exists('/content/lineage_count.htm'):
        try:
            ftp_upload_overwrite(ftps, '/content/lineage_count.htm', 'partials/lineage_count.htm')
        except Exception as e:
            print(f"❌ lineage_count.htm upload failed: {e}")
    else:
        print(f"ℹ️ Skipping lineage_count.htm upload: File not found.")

    # 9. Disconnect
    try:
        ftps.quit()
        print(f"✅ Disconnected from FTP.")
    except Exception:
        pass

    print(f"--- 🎉 Process Complete ---")

# ------------------------------------------------------------------------------
# -- 🚀 EXECUTION BLOCK
# ------------------------------------------------------------------------------
print("--- 🚀 Starting Gold 2 Refactor Build (Single Cell) ---")
try:
    # 1. Check for necessary env vars (which were loaded at the top)
    if not all(os.environ.get(k) for k in ['FTP_HOST', 'FTP_USER', 'FTP_PASS']):
        print("❌ Critical FTP environment variables (FTP_HOST, FTP_USER, FTP_PASS) are not set.")
        print("Running in 'local save only' mode.")

        # We must at least generate the partials for local save
        generate_partial_files()

        # Re-run local save logic
        with open(LOCAL_NAME, "w", encoding="iso-8859-15", errors="xmlcharrefreplace") as f:
            f.write(template_html)
        print(f"✅ Saved main HTML locally: {os.path.abspath(LOCAL_NAME)}")
    else:
        # 2. Call the main function
        save_and_upload_all()

except Exception as e:
    print(f"\n--- ❌ BUILD FAILED ---")
    print("An error occurred during the build process:")
    traceback.print_exc()

⬇️ Pulled remote file: partials/match_to_unmasked.csv → /content/match_to_unmasked.server.csv
✅ Loaded resolver from server: 79 codes
✅ Loaded CSV — 7 rows, 9 columns from /content/final_combined_df_with_value_labels.csv
✅ Columns: {'ID': 'ID#', 'Match to': 'Match to', 'Name': 'Name', 'cM': 'cM', 'Lineage': 'Yates DNA Ancestral Line'}
✅ Wrote local CSV (Column A): /content/the_match_cousins.csv
✅ HTML template created.
--- 🚀 Starting Gold 2 Refactor Build (Single Cell) ---
--- 💾 Starting Save & Upload ---
--- 📊 Generating partial count files (with styling) ---
✅ Generated local file: /content/match_count.htm (1 rows)
✅ Generated local file: /content/lineage_count.htm (2 rows)
✅ Saved main HTML locally: /content/ons_yates_dna_register.htm
✅ Connected to FTP host.
⬆️ Uploaded: ons_yates_dna_register.htm → ons_yates_dna_register.htm
⬆️ Uploaded: the_match_cousins.csv → the_match_cousins.csv
⬆️ Uploaded: /content/autosomal_count.txt → autosomal_count.txt
⬆️ Uploaded: /content/match_count.h

In [13]:
# Gold 3 Ancestor Register (mobile-friendly, sortable)

import pandas as pd
from datetime import datetime
from zoneinfo import ZoneInfo
from ftplib import FTP_TLS
import os

# ————— Load Data —————
df = pd.read_csv("final_combined_df_with_value_labels.csv")

# ————— Blank out any NaN haplogroups —————
df['haplogroup'] = df['haplogroup'].fillna('')

# ————— Hyperlink various IDs —————
hap_base      = "gengen/haplogroup/"
ydna_overview = "gengen/Y-designation-overview.htm"
dar_base      = "https://services.dar.org/Public/DAR_Research/search/?Keyword="
sar_base      = "https://sarpatriots.sar.org/patriot/search?searchText="

def link_value(x):
    if x.startswith("Y-"):
        return f'<a href="{ydna_overview}" target="_blank">{x}</a>'
    if x.startswith("dar-A-"):
        return f'<a href="{dar_base}{x}" target="_blank">{x}</a>'
    if x.startswith("sar-P-"):
        return f'<a href="{sar_base}{x}" target="_blank">{x}</a>'
    if x:
        return f'<a href="{hap_base}{x}.htm" target="_blank">{x}</a>'
    return ''

df['haplogroup'] = df['haplogroup'].apply(link_value)

# ————— Load autosomal counts —————
try:
    with open("autosomal_count.txt", "r") as f:
        autosomal_count = int(f.read().strip())
except:
    autosomal_count = None

prev_count = None
additional_str = ""
if os.path.exists("autosomal_count_prev.txt"):
    try:
        with open("autosomal_count_prev.txt", "r") as f:
            prev_count = int(f.read().strip())
        if autosomal_count is not None and prev_count is not None:
            diff = autosomal_count - prev_count
            additional_str = f" (+{diff} since last run)"
    except:
        pass

# ————— Timestamp —————
now = datetime.now(ZoneInfo("America/New_York"))
updated_str = now.strftime("%d %B %Y at %-I:%M %p EDT")

# ————— Insert Action column —————
df.insert(6, 'Action', '→')

# ————— XHTML Template with search box, sticky header, first two columns —————
full_html_template = """<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8"/>
  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
  <title>Ancestor Register</title>
  <script src="../sorttable.js" type="text/javascript"></script>

  <script>
    // Filter table rows based on search box input
    function filterTable() {
      const query = document.getElementById('searchBox').value.toLowerCase();
      const rows = document.querySelectorAll('#table-container table.sortable tbody tr');
      rows.forEach(row => {
        const cells = row.querySelectorAll('td');
        let match = false;
        cells.forEach(cell => {
          if (cell.textContent.toLowerCase().includes(query)) {
            match = true;
          }
        });
        // Explicitly set display to "table-row" when matching, "none" otherwise
        row.style.display = match ? "table-row" : "none";
      });
    }

    // Scroll to top of table container
    function scrollToTop() {
      const container = document.getElementById('table-container');
      if (!container) return;
      container.scrollTo({ top: 0, behavior: 'smooth' });
    }
  </script>

  <style>
    body { margin: 0; padding: 0; font-family: Arial,Helvetica,sans-serif; background: #faf9d3; font-size: 14px; }
    .intro { padding: 20px; text-align: center; }
    .intro h2 { margin: 0 0 10px; }
    .intro p { margin: 0.5em 0; }
    .meta  { font-size: 0.9em; margin-bottom: 15px; display: inline-block; }
    /* Search box styling */
    #searchBox {
      margin-left: 10px;
      padding: 4px 8px;
      font-size: 0.9em;
      border: 1px solid #333;
      border-radius: 4px;
    }

    .output-table {
      max-height: 75vh;
      overflow: auto;
      -webkit-overflow-scrolling: touch;
      border: 1px solid #333;
      position: relative;
      margin: 0 20px;
    }

    table.sortable {
      width: 100%;
      border-collapse: collapse;
      min-width: 600px;
    }
    th, td {
      border: 1px solid #333;
      padding: 5px 8px;
      background: #faf9d3;
      white-space: nowrap;
    }

    /* 1) Sticky horizontal header */
    th {
      position: sticky;
      top: 0;
      background: #ffffcc;
      z-index: 2;
      text-align: center;
    }
    th:hover { background: #ffeb99; }

    /* 2) Sticky first two columns (including headers) */
    th:nth-child(1), td:nth-child(1) {
      position: sticky;
      left: 0;
      background: #ffffcc;
      z-index: 3;
    }
    th:nth-child(2), td:nth-child(2) {
      position: sticky;
      left: 80px; /* match column 1 width */
      background: #ffffcc;
      z-index: 3;
    }
    th:nth-child(1), th:nth-child(2) {
      z-index: 4; /* ensure header cells sit above others */
    }

    /* adjust min-widths if needed */
    th:nth-child(1), td:nth-child(1) { min-width: 80px; }
    th:nth-child(2), td:nth-child(2) { min-width: 100px; }

    /* other columns normal */
    th:nth-child(7), td:nth-child(7) { width: 40px; }
    th:nth-child(8), td:nth-child(8) { text-align: left; }

    .match { background: #fff; }
    .blank { background: #ccc; color: #ccc; }

    .back-to-top {
      position: fixed;
      bottom: 20px;
      right: 20px;
      background: #333;
      color: #fff;
      padding: 8px 12px;
      border-radius: 4px;
      font-size: 12px;
      opacity: 0.7;
      cursor: pointer;
      z-index: 1000;
    }
    .back-to-top:hover { opacity: 1; }

    @media (max-width: 600px) {
      body { font-size: 12px; }
      table.sortable { min-width: 480px; }
      th, td { padding: 4px 6px; }
    }
  </style>
</head>
<body>
  <div id="top"></div>
  <div class="intro">
    <h2>Ancestor Register</h2>
    <div class="meta">
      Return to <a href="https://yates.one-name.net/ons_yates_dna_register.htm">DNA Register</a> |
      Autosomal matches: {autosomal_count}{additional_str} |
      Updated: {updated_str}
    </div>
<p>
  <input type="text" id="searchBox" placeholder="Search this page..." oninput="filterTable()">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
  <b><i>Click on the header to sort any column</i></b>

</p>

  </div>

  <div class="output-table" id="table-container">
    <!-- TABLE_PLACEHOLDER -->
  </div>

  <div class="back-to-top" onclick="scrollToTop()">
    Back to Top ↑
  </div>
</body>
</html>"""

# ————— Build and inject sortable table —————
final_cols = ["ID#", "Match to", "cM", "haplogroup", "Value Range", "Value Label", "Action", "Yates DNA Ancestral Line"]
html_table = df.to_html(index=False, columns=final_cols, escape=False, classes="sortable")
html_table = html_table.replace('<tbody>\n<tr>', '<tbody>\n<tr id="first-row">', 1)

final_html = (full_html_template
  .replace("{autosomal_count}", str(autosomal_count or "Unknown"))
  .replace("{additional_str}", additional_str)
  .replace("{updated_str}", updated_str)
  .replace("<!-- TABLE_PLACEHOLDER -->", html_table)
)

# ————— Save & upload —————
with open("yates_ancestor_register.htm", "w", encoding="utf-8") as f:
    f.write(final_html)
with FTP_TLS() as ftps:
    ftps.connect(os.environ['FTP_HOST'], int(os.environ.get('FTP_PORT', 21)))
    ftps.login(os.environ['FTP_USER'], os.environ['FTP_PASS'])
    try:
        ftps.delete("yates_ancestor_register.htm")
    except:
        pass
    ftps.storbinary("STOR yates_ancestor_register.htm", open("yates_ancestor_register.htm", "rb"))

# ————— Persist count —————
if autosomal_count is not None:
    with open("autosomal_count_prev.txt", "w") as f:
        f.write(str(autosomal_count))

print("✅ Full DNA Report Card with improved search box, sticky header, and sticky columns uploaded.")

✅ Full DNA Report Card with improved search box, sticky header, and sticky columns uploaded.


In [None]:
# Gold Cell 3 for Y-DNA Grid with Auto-Adjusting Column Widths

import os
import pandas as pd
from datetime import datetime
from zoneinfo import ZoneInfo
from ftplib import FTP_TLS

# ── PATHS ─────────────────────────────────────────────────────────────────
combo_csv  = "/content/y_dna_user_detail_combo.csv"
output_csv = "/content/y_dna_grid.csv"
output_htm = "/content/y_dna_grid.htm"

# ── 1) Load vertical data ─────────────────────────────────────────────────
df = pd.read_csv(combo_csv)

# Rename “Date” → “Era”
if "Date" in df.columns:
    df.rename(columns={"Date": "Era"}, inplace=True)

# ── 2) Insert Action *after* Era ──────────────────────────────────────────
# Era is at index 1, so Action goes at index 2
df.insert(2, "Action", ["→"] * len(df))

# ── 3) Save vertical CSV ─────────────────────────────────────────────────
df.to_csv(output_csv, index=False)
print(f"✅ Saved vertical grid CSV to {output_csv}")

# ── 4) Build HTML ─────────────────────────────────────────────────────────
now = datetime.now(ZoneInfo("America/New_York"))
ts  = now.strftime("%-m/%-d/%y, %-I:%M %p EDT")
cols = df.columns.tolist()

html = f"""<!DOCTYPE html>
<html>
<head><meta charset="UTF-8"><title>Yates Y-DNA Grid</title>
<style>
body {{
  background: #faf9d3;
  font-family: Arial, sans-serif;
  font-size: 14px;
  margin: 0;
  padding: 0;
}}
.container {{
  padding: 10px;
}}
.table-container {{
  overflow-x: auto;
  max-height: 80vh;
}}
table {{
  border: 2px solid #333;
  border-collapse: collapse;
  margin: 0 auto;
}}
table.mainsection {{
  /* allows CSS targeting of blank under “Year” */
}}
thead {{
  display: table-header-group;
}}
thead th {{
  position: sticky;
  top: 0;
  background: #333;
  color: #fff;
  padding: 6px;
  border: 1px solid #999;
  z-index: 3;
}}
a {{
  color: #fff;
  text-decoration: underline;
}}
.era {{
  background: #666;
  color: #eee;
  padding: 6px;
  border: 1px solid #999;
  font-size: 0.9em;
}}
.action {{
  background: #fff;
  padding: 6px;
  border: 1px solid #999;
  text-align: center;
}}
td {{
  padding: 6px;
  border: 1px solid #999;
  text-align: center;
}}
th:nth-child(n+4),
td:nth-child(n+4) {{
  border: 1px solid #333;
}}
.match {{
  background: #fff;
}}
.blank {{
  background: #ccc;
  color: #ccc;
}}
/* make the blank under the “Year” header match the era-cell background */
table.mainsection td.blank:nth-child(2) {{
  background-color: #fdfcd0;
}}
</style>
</head>
<body>
  <div class="container">
    <h1 style="text-align:center">Yates Y-DNA Grid</h1>
    <p style="text-align:center;font-size:0.9em">Updated: {ts}</p>
    <p style="text-align:center;margin-bottom:12px">
      <a href="https://yates.one-name.net/gengen/dna_cousin_surname_study.htm">
        Return to DNA Cousin Surname Study
      </a>
    </p>
    <div class="table-container">
      <table class="mainsection">
        <thead>
          <tr>"""

# Header row
for i, c in enumerate(cols):
    if i == 0:
        html += "<th>SNP</th>"
    elif i == 1:
        html += "<th>Year</th>"
    elif i == 2:
        html += "<th>Action</th>"
    else:
        pid = c.split("-")[0].upper()
        html += (
          '<th>'
          f'<a href="https://yates.one-name.net/tng/verticalchart.php?'
          f'personID={pid}&tree=tree1&parentset=0&display=vertical&generations=15">{c}</a>'
          '</th>'
        )

html += """
          </tr>
        </thead>
        <tbody>"""

# Data rows
for _, row in df.iterrows():
    html += "<tr>"
    for i, c in enumerate(cols):
        v = row[c]
        if i == 0:
            html += f"<td>{v}</td>"
        elif i == 1:
            html += '<td class="blank">–</td>' if pd.isna(v) or not str(v).strip() else f'<td class="era">{v}</td>'
        elif i == 2:
            html += '<td class="blank">–</td>' if pd.isna(v) or not str(v).strip() else f'<td class="action">{v}</td>'
        else:
            html += '<td class="blank">–</td>' if pd.isna(v) or not str(v).strip() else f'<td class="match">{v}</td>'
    html += "</tr>"

html += """
        </tbody>
      </table>
    </div>
  </div>
</body>
</html>"""

with open(output_htm, "w", encoding="utf-8") as f:
    f.write(html)
print(f"✅ Saved vertical XHTML to {output_htm}")

# ── 5) FTP upload ────────────────────────────────────────────────────────
ftp = FTP_TLS()
ftp.connect(os.environ["FTP_HOST"], int(os.environ["FTP_PORT"]))
ftp.login(os.environ["FTP_USER"], os.environ["FTP_PASS"])
ftp.prot_p()
for path in (output_csv, output_htm):
    fn = os.path.basename(path)
    try:
        ftp.delete(fn)
    except:
        pass
    with open(path, "rb") as fp:
        ftp.storbinary(f"STOR {fn}", fp)
ftp.quit()
print("✅ Uploaded CSV & HTML to server")



✅ Saved vertical grid CSV to /content/y_dna_grid.csv
✅ Saved vertical XHTML to /content/y_dna_grid.htm
✅ Uploaded CSV & HTML to server


In [None]:
# EXP

import os
import pandas as pd
from datetime import datetime
from zoneinfo import ZoneInfo
from ftplib import FTP_TLS

# ── CONFIG ───────────────────────────────────────────────────────────────
info_csv   = "/content/haplogroup_info.csv"
user_csv   = "/content/y_dna_user_detail.csv"
output_csv = "/content/y_dna_grid.csv"
output_htm = "/content/y_dna_grid.htm"

# ── 1) Load & prepare haplogroup info ───────────────────────────────────
df_info = pd.read_csv(info_csv)
if "Date" in df_info.columns:
    df_info.rename(columns={"Date": "Era"}, inplace=True)
df_info = df_info.loc[df_info["Haplogroup"].drop_duplicates().index]
hap_order = df_info["Haplogroup"].tolist()
era_map   = dict(zip(df_info["Haplogroup"], df_info.get("Era", [""] * len(df_info))))

# ── 2) Load user detail table ───────────────────────────────────────────
df_users = pd.read_csv(user_csv)
if "User_ID" not in df_users.columns:
    df_users.rename(columns={df_users.columns[0]: "User_ID"}, inplace=True)
user_chains = [
    [str(v) for v in row.drop(labels=["User_ID"]).tolist() if pd.notna(v) and str(v).strip()]
    for _, row in df_users.iterrows()
]

# ── 3) Insert new SNPs after parent ──────────────────────────────────────
for chain in user_chains:
    prev = None
    for h in chain:
        if prev and h not in hap_order:
            idx = hap_order.index(prev)
            hap_order.insert(idx + 1, h)
        prev = h
# Build final eras list
eras = [era_map.get(h, "") for h in hap_order]

# ── 4) Build horizontal grid DataFrame ───────────────────────────────────
for h in hap_order:
    if h not in df_users.columns:
        df_users[h] = ""
df_grid_h = df_users[["User_ID"] + hap_order]

# ── 5) Transform to vertical layout ─────────────────────────────────────
df_vert = df_grid_h.set_index("User_ID").T
# Insert Era as first column
df_vert.insert(0, 'Era', eras)
df_vert.index.name = 'SNP'
df_grid = df_vert.reset_index()

# ── 6) Save vertical CSV ─────────────────────────────────────────────────
df_grid.to_csv(output_csv, index=False)
print(f"✅ Vertical grid CSV saved to {output_csv}")

# ── 7) Generate XHTML (vertical) ────────────────────────────────────────
now = datetime.now(ZoneInfo("America/New_York"))
ts  = now.strftime("%-m/%-d/%y, %-I:%M %p EDT")

template = '''<!DOCTYPE html>
<html><head><meta charset="UTF-8"><title>Yates Y-DNA Grid</title>
<style>
  body { background:#faf9d3; font-family:Arial,Helvetica,sans-serif; font-size:14px; }
  table { width:100%; border:1px solid #333; border-collapse:collapse; table-layout:auto; }
  th { background:#333; color:#fff; padding:6px; border:1px solid #999; }
  .era { background:#666; color:#eee; padding:6px; border:1px solid #999; font-size:0.9em; }
  td { padding:6px; border:1px solid #999; text-align:center; white-space:nowrap; }
  .match { background:#fff; }
  .blank { background:#ccc; color:#ccc; }
</style>
</head><body>
  <h1 style="text-align:center;">Yates Y-DNA Grid</h1>
  <table>
'''  # end template

# Build header row
cols = df_grid.columns.tolist()
header_html = '<tr><th>SNP</th><th>Era</th>' + ''.join(f'<th>{u}</th>' for u in cols[2:]) + '</tr>'

# Build data rows
rows_html = []
for _, row in df_grid.iterrows():
    cells = []
    for u in cols[2:]:
        v = row[u]
        if pd.isna(v) or not str(v).strip():
            cells.append('<td class="blank">–</td>')
        else:
            cells.append(f'<td class="match">{v}</td>')
    rows_html.append(f'<tr><td>{row["SNP"]}</td><td class="era">{row["Era"]}</td>' + ''.join(cells) + '</tr>')

# Combine and save HTML
html = template + header_html + '\n' + '\n'.join(rows_html) + f'''
  </table>
  <p style="text-align:right;font-size:0.9em;">Updated: {ts}</p>
</body>
</html>'''
with open(output_htm, 'w', encoding='utf-8') as f:
    f.write(html)
print(f"✅ Vertical XHTML Grid saved to {output_htm}")

# ── 8) FTP Upload ───────────────────────────────────────────────────────
ftp = FTP_TLS()
ftp.connect(os.environ['FTP_HOST'], int(os.environ.get('FTP_PORT',21)))
ftp.login(os.environ['FTP_USER'], os.environ['FTP_PASS'])
ftp.prot_p()
for path in [output_csv, output_htm]:
    name = os.path.basename(path)
    try: ftp.delete(name)
    except: pass
    with open(path,'rb') as fp:
        ftp.storbinary(f"STOR {name}", fp)
ftp.quit()
print("✅ Uploaded to server.")



✅ Vertical grid CSV saved to /content/y_dna_grid.csv
✅ Vertical XHTML Grid saved to /content/y_dna_grid.htm
✅ Uploaded to server.


In [None]:
# Y-DNA cell 1

# === Cell 1: New user settings ===
USER_ID       = 'I56217'  # the new column header
PATH_STRING   = (      # the SNP chain for this user
    "R-M207 > R-M173 > R-M343 > R-M269 > R-FT266064 > R-FT266579 > R-FTF17042"
)
INSERT_MISSING = True       # if True, adds any SNPs from PATH_STRING that aren't yet rows
MASTER_CSV     = '/content/y_dna_user_detail_combo.csv'
UPDATED_CSV    = '/content/y_dna_user_detail_combo_updated.csv'


In [None]:
# Cell 2: Load → Append User → Save

import pandas as pd

# 1) Load the existing master CSV
df = pd.read_csv(MASTER_CSV)

# 2) Normalize the first column name to 'SNP' for easy matching
first_col = df.columns[0]
if first_col != 'SNP':
    df.rename(columns={first_col: 'SNP'}, inplace=True)

# 3) Parse the new user's SNP chain
chain = PATH_STRING.split('>')

# 4) Optionally insert any SNPs not yet present (appends at bottom)
if INSERT_MISSING:
    missing = [s for s in chain if s not in df['SNP'].values]
    if missing:
        df = pd.concat([df, pd.DataFrame([{'SNP': s} for s in missing])],
                       ignore_index=True)

# 5) Create the new user column in the next free position
df[USER_ID] = ''

# 6) Populate: copy the SNP value into that column where it matches the chain
df.loc[df['SNP'].isin(chain), USER_ID] = df['SNP']

# 7) Save the updated CSV back to /content
df.to_csv(UPDATED_CSV, index=False)
print(f"✅ Updated CSV saved to {UPDATED_CSV}")


✅ Updated CSV saved to /content/y_dna_user_detail_combo_updated.csv
