<a href="https://colab.research.google.com/github/ronyates47/Gedcom-Utils/blob/main/Gold__1_%26_2_%26_3_20250521.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install pandas
!pip install python-gedcom
!pip install openpyxl
!pip install xlsxwriter
!pip install mlxtend


Collecting python-gedcom
  Downloading python_gedcom-1.0.0-py2.py3-none-any.whl.metadata (15 kB)
Downloading python_gedcom-1.0.0-py2.py3-none-any.whl (35 kB)
Installing collected packages: python-gedcom
Successfully installed python-gedcom-1.0.0
Collecting xlsxwriter
  Downloading XlsxWriter-3.2.3-py3-none-any.whl.metadata (2.7 kB)
Downloading XlsxWriter-3.2.3-py3-none-any.whl (169 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m169.4/169.4 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xlsxwriter
Successfully installed xlsxwriter-3.2.3


In [3]:
#credentials

import os

# Gmail SMTP creds
os.environ['GMAIL_USER']         = 'yatesvilleron@gmail.com'
os.environ['GMAIL_APP_PASSWORD'] = 'qtziwiblytgrlzvx'

# FTPS upload creds — make sure FTP_PASS is exactly your password, no < or >
os.environ['FTP_HOST']       = 'ftp.one-name.net'
os.environ['FTP_PORT']       = '21'
os.environ['FTP_USER']       = 'admin@yates.one-name.net'
os.environ['FTP_PASS']       = 'v(i83lfQB@dB'


In [20]:
# Cell 1 20250513
#!/usr/bin/env python
"""
GEDCOM Composite Score Script using:
 - Chunk-based Parallel Processing for Speed (Stage 1: genealogical line creation)
 - A Trie-based approach, then final "Value" = 5 * (number of couples with node.count >=2) + (total couples)

For ancestral lines where none of the couples are repeated (a one-off line), the Value is still computed.
Now, instead of composite scoring, two new columns are added:
  - Value Range (the numeric bracket)
  - Value Label (a descriptive label)

Exports final CSV/HTML sorted by "Yates DNA Ancestral Line", including a 'haplogroup' column.
"""
import csv
import glob
import logging
import functools
import os
from datetime import datetime
from collections import defaultdict, Counter
import numpy as np
import pandas as pd
from concurrent.futures import ProcessPoolExecutor
from tqdm import tqdm
from IPython.display import display, Javascript

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

###############################################################################
# Global Variables
###############################################################################
anchor_gen1 = None
visited_pairs = set()
generation_table = []

###############################################################################
# Trie Data Structure
###############################################################################
class TrieNode:
    """A simple Trie node for storing a couple and counting how many lines pass here."""
    def __init__(self):
        self.count = 0
        self.children = {}

class Trie:
    def __init__(self):
        self.root = TrieNode()

    def insert_line(self, couples_list):
        current = self.root
        for couple in couples_list:
            if couple not in current.children:
                current.children[couple] = TrieNode()
            current = current.children[couple]
            current.count += 1

    def get_couple_count(self, couples_list):
        counts = []
        current = self.root
        for couple in couples_list:
            if couple in current.children:
                current = current.children[couple]
                counts.append(current.count)
            else:
                counts.append(0)
                break
        return counts

###############################################################################
# Utility: chunk generator
###############################################################################
def chunks(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

###############################################################################
# GedcomDataset
###############################################################################
class GedcomDataset:
    def __init__(self, gen_person):
        self.gen_person = gen_person
        self.extractable_detail = {}
        self.anchor_gen1 = None

    def add_extractable_detail(self, key, value):
        self.extractable_detail[key] = value

    def get_gen_person(self):
        name = self.extractable_detail.get('NAME', '')
        parts = name.split('/', 1)
        first_name = parts[0].split(' ')[0]
        last_name = parts[1].rstrip('/') if len(parts) > 1 else ""
        self.anchor_gen1 = last_name.replace(" ", "") + first_name.replace(" ", "")
        global anchor_gen1
        anchor_gen1 = self.anchor_gen1
        return self.gen_person.strip('@')

    def get_extractable_NPFX(self):
        return self.extractable_detail.get('NPFX', '')

    def get_extractable_cm(self):
        npfx_value = self.extractable_detail.get('NPFX', '')
        if '&' in npfx_value:
            cm_value = npfx_value.split('&')[0].strip()
        elif '**' in npfx_value:
            cm_value = npfx_value.split('**')[0].strip()
        else:
            cm_value = npfx_value.strip()
        try:
            int(cm_value)
            return cm_value
        except ValueError:
            return ''

    def get_extractable_sort(self):
        npfx_value = self.extractable_detail.get('NPFX', '')
        if '&' in npfx_value:
            sort_part = npfx_value.split('&')[1]
            if '**' in sort_part:
                sort_value = sort_part.split('**')[0].strip()
            else:
                sort_value = sort_part.strip()
            return sort_value
        return ''

    def get_extractable_YDNA(self):
        npfx_value = self.extractable_detail.get('NPFX', '')
        if '**' in npfx_value:
            ydna_value = npfx_value.split('**')[1].strip()
            return ydna_value
        return ''

    def get_extractable_FAMC(self):
        return self.extractable_detail.get('FAMC', '').strip('@')

###############################################################################
# Gedcom Class
###############################################################################
class Gedcom:
    def __init__(self, file_name):
        self.file_name = file_name
        self.gedcom_datasets = []
        self.filter_pool = []

    def parse_gedcom(self):
        with open(self.file_name, 'r', encoding='utf-8-sig') as f:
            lines = f.readlines()

        current_dataset = None
        npfx_count = 0
        ydna_count = 0
        total_count = 0

        for line in lines:
            parts = line.strip().split(' ', 2)
            level = int(parts[0])
            tag = parts[1]
            value = parts[2] if len(parts) > 2 else None

            if level == 0 and tag.startswith('@') and tag.endswith('@') and value == 'INDI':
                total_count += 1
                current_dataset = GedcomDataset(tag)
                self.gedcom_datasets.append(current_dataset)
            elif current_dataset is not None:
                if level == 1 and tag in ['NAME', 'FAMC']:
                    current_dataset.add_extractable_detail(tag, value)
                elif level == 2 and tag == 'NPFX':
                    npfx_count += 1
                    current_dataset.add_extractable_detail(tag, value)
                    if '**' in value:
                        ydna_count += 1

        autosomal_count = npfx_count - ydna_count
        print(f"GEDCOM contained {total_count} total records")
        print(f"Records tagged and filtered by NPFX: {npfx_count}")
        print(f"Records with YDNA information: {ydna_count}")
        print(f"Autosomal matches: {autosomal_count}")

        for ds in self.gedcom_datasets:
            if ds.get_extractable_NPFX():
                self.filter_pool.append(ds)

        manual_filter_activated = True
        if manual_filter_activated:
            try:
                df = pd.read_excel('filtered_ids.xlsx')
            except FileNotFoundError:
                logger.warning("filtered_ids.xlsx not found. Skipping second-level manual filter.")
            else:
                manual_filtered_ids = set(df['ID'])
                self.filter_pool = [d for d in self.filter_pool if d.get_gen_person() in manual_filtered_ids]
                print(f"After manual filter, total records: {len(self.filter_pool)}")
                logger.info(f"After manual filter, total records: {len(self.filter_pool)}")

        return autosomal_count

###############################################################################
# quick_extract_name
###############################################################################
def quick_extract_name(full_text):
    name_marker = "\n1 NAME "
    idx = full_text.find(name_marker)
    if idx == -1:
        if full_text.startswith("1 NAME "):
            idx = 0
        else:
            return "UnknownName"
    start = idx + len(name_marker)
    end = full_text.find('\n', start)
    if end == -1:
        end = len(full_text)
    name_line = full_text[start:end].strip()
    if '/' not in name_line:
        return name_line[:10].replace(" ", "")
    first_name, last_name = name_line.split('/', 1)
    last_name = last_name.replace("/", "").strip()
    return last_name[:10].replace(" ", "") + first_name[:10].replace(" ", "")

###############################################################################
# Parents & Ancestors
###############################################################################
def find_parents(individual_id, generation, parents_map):
    global visited_pairs, generation_table
    if individual_id not in parents_map:
        return
    father_id, mother_id = parents_map[individual_id]
    if not father_id and not mother_id:
        return
    pair = (father_id, mother_id)
    if pair not in visited_pairs:
        visited_pairs.add(pair)
        generation_table.append((generation, pair))
    if father_id:
        find_parents(father_id, generation+1, parents_map)
    if mother_id:
        find_parents(mother_id, generation+1, parents_map)

def find_distant_ancestors(individual_id, parents_map, path=None):
    if path is None:
        path = []
    path.append(individual_id)
    if individual_id not in parents_map:
        return [path]
    father_id, mother_id = parents_map[individual_id]
    if not father_id and not mother_id:
        return [path]
    paths = []
    if father_id:
        paths.extend(find_distant_ancestors(father_id, parents_map, path[:]))
    if mother_id:
        paths.extend(find_distant_ancestors(mother_id, parents_map, path[:]))
    return paths if paths else [path]

###############################################################################
# filter_ancestral_line
###############################################################################
def filter_ancestral_line(winning_path_ids, generation_table_local, names_map):
    matching_table = []
    for generation, pair in generation_table_local:
        id1, id2 = pair
        if id1 in winning_path_ids or id2 in winning_path_ids:
            matching_table.append((generation, pair))
    matching_table.sort(key=lambda x: x[0])
    lines = []
    for gen, pair in matching_table:
        name_pair = [names_map.get(pid, "UnknownName") for pid in pair]
        lines.append(f"{name_pair[0]}&{name_pair[1]}")
    lines.reverse()
    return "~~~".join(lines)

###############################################################################
# process_record_wrapper (parallel) - STAGE 1
###############################################################################
def process_record_wrapper(individual_id, gedcom_instance, parents_map, names_map):
    global generation_table, visited_pairs, anchor_gen1
    generation_table = []
    visited_pairs = set()

    find_parents(individual_id, 1, parents_map)
    distant_anc_paths = find_distant_ancestors(individual_id, parents_map)

    best_score = None
    best_path = None
    for path in distant_anc_paths:
        name_path = [names_map.get(pid, "UnknownName") for pid in path]
        score = sum((idx+1) for idx, nm in enumerate(name_path) if 'Yates' in nm)
        if best_score is None or score > best_score:
            best_score = score
            best_path = path

    if not best_path:
        best_path = []

    best_path_cleaned = [pid for pid in best_path if pid != individual_id]
    line_str = filter_ancestral_line(set(best_path_cleaned), generation_table, names_map)

    cm_value = ''
    sort_value = ''
    ydna_value = ''
    for ds in gedcom_instance.filter_pool:
        if ds.get_gen_person() == individual_id:
            cm_value = ds.get_extractable_cm()
            sort_value = ds.get_extractable_sort()
            ydna_value = ds.get_extractable_YDNA()
            break

    short_name = names_map.get(individual_id, "UnknownName")
    # Return columns: ID#, Match to, Name, cM, Yates DNA Ancestral Line, haplogroup
    return [individual_id, sort_value, short_name, cm_value, line_str, ydna_value]

###############################################################################
# main()
###############################################################################
def main():
    def select_gedcom():
        files = glob.glob("*.ged")
        if not files:
            print("No GEDCOM files found.")
            return None
        print("Automatically selecting the first GEDCOM file.")
        return files[0]

    gedcom_file_path = select_gedcom()
    if not gedcom_file_path:
        print("No GEDCOM file selected; exiting.")
        return

    ged = Gedcom(gedcom_file_path)
    autosomal_count = ged.parse_gedcom()
    filter_count = len(ged.filter_pool)

    with open("autosomal_count.txt", "w") as f:
        f.write(str(autosomal_count))

    print("Records tagged and filtered by NPFX:", filter_count)

    with open(gedcom_file_path, 'r', encoding='utf-8') as f:
        raw_data = f.read()

    blocks = raw_data.split('\n0 ')
    all_records = {}
    for blk in blocks:
        blk = blk.strip()
        if not blk:
            continue
        flend = blk.find('\n')
        if flend == -1:
            flend = len(blk)
        first_line = blk[:flend]
        if '@' in first_line:
            start = first_line.find('@') + 1
            end = first_line.find('@', start)
            rec_id = first_line[start:end].strip()
            all_records[rec_id] = blk

    parents_map = {}
    names_map = {}
    for rec_id, txt in all_records.items():
        nm = quick_extract_name("\n" + txt)
        names_map[rec_id] = nm

    families = {}
    for rec_id, txt in all_records.items():
        if 'FAM' in txt[:50]:
            father_idx = txt.find('1 HUSB @')
            husb_id = txt[father_idx+len('1 HUSB @'):txt.find('@', father_idx+len('1 HUSB @'))] if father_idx != -1 else None
            wife_idx = txt.find('1 WIFE @')
            wife_id = txt[wife_idx+len('1 WIFE @'):txt.find('@', wife_idx+len('1 WIFE @'))] if wife_idx != -1 else None
            kids = [ln.split('@')[1] for ln in txt.split('\n') if ln.strip().startswith('1 CHIL @')]
            families[rec_id] = (husb_id, wife_id, kids)

    for fam_id, (f_id, m_id, k_list) in families.items():
        for kid in k_list:
            parents_map[kid] = (f_id, m_id)

    individual_ids = [d.get_gen_person() for d in ged.filter_pool]
    print(f"Processing {len(individual_ids)} individuals with chunk-based parallel...")

    combined_rows = []
    chunk_size = 50
    max_workers = os.cpu_count() or 4
    logger.info("Starting chunk-based parallel processing with %d workers.", max_workers)

    with ProcessPoolExecutor(max_workers=max_workers) as executor, tqdm(total=len(individual_ids), desc="Building Yates Lines (Stage 1)") as pbar:
        for chunk in chunks(individual_ids, chunk_size):
            func = functools.partial(process_record_wrapper, gedcom_instance=ged, parents_map=parents_map, names_map=names_map)
            results = list(executor.map(func, chunk))
            combined_rows.extend(results)
            pbar.update(len(chunk))

    columns = ["ID#", "Match to", "Name", "cM", "Yates DNA Ancestral Line", "haplogroup"]
    df = pd.DataFrame(combined_rows, columns=columns)
    df.index += 1

    def remove_specific_prefix(row):
        prefix = "YatesJohn&SearchingStill~~~YatesWilliam&SearchingStill~~~YatesWilliam&SearchingStill~~~YatesEdmund&CornellMargaret~~~YatesRichard&AshendonJoan~~~YatesJohn&HydeAlice~~~YatesThomas&WhiteFrances~~~"
        if row["Yates DNA Ancestral Line"].startswith(prefix):
            row["Yates DNA Ancestral Line"] = row["Yates DNA Ancestral Line"][len(prefix):]
        return row

    df = df.apply(remove_specific_prefix, axis=1)

    logger.info("Building Trie from reversed lines...")
    trie = Trie()
    for _, row in df.iterrows():
        line_str = row["Yates DNA Ancestral Line"]
        if pd.notna(line_str) and line_str.strip():
            trie.insert_line([x.strip() for x in line_str.split("~~~") if x.strip()])

    values, prefix_counts = [], []
    logger.info("Computing 'Value' = 5*(#couples with node.count >=2) + (total couples) ...")
    for _, row in df.iterrows():
        line_str = row["Yates DNA Ancestral Line"]
        if pd.isna(line_str) or not line_str.strip():
            values.append(0)
            prefix_counts.append(0)
        else:
            couples_list = [x.strip() for x in line_str.split("~~~") if x.strip()]
            node_counts = trie.get_couple_count(couples_list)
            prefix_count = sum(1 for c in node_counts if c >= 2)
            values.append(5 * prefix_count + len(couples_list))
            prefix_counts.append(prefix_count)

    df["Value"], df["PrefixCount"] = values, prefix_counts

    def assign_value_range_label(val):
        try:
            v = float(val)
        except:
            return "", ""
        if v >= 60: return ">=60", "1-likely correct"
        if 47 <= v <= 59: return "59~47", "2-lines forming"
        if 34 <= v <= 46: return "46~34", "3-patterns emerging"
        if 21 <= v <= 33: return "33~21", "4-notable patterns"
        if 8 <= v <= 20: return "20~8", "5-patterns stable"
        if 1 <= v <= 7:  return f"{v:.0f}", "6-need research"
        return f"{v:.0f}", "0-uncategorized"

    ranges, labels = zip(*(assign_value_range_label(v) for v in df["Value"]))
    df["Value Range"], df["Value Label"] = ranges, labels

    df.sort_values(by=["Yates DNA Ancestral Line"], inplace=True)
    df.drop("PrefixCount", axis=1, inplace=True)

    csv_name = "final_combined_df_with_value_labels.csv"
    df.to_csv(csv_name, index=False)
    logger.info("Exported final DataFrame to '%s'.", csv_name)

    html_name = "HTML_combined_df_with_value_labels.html"
    css_style = """
    <style>
    table { width: 100%; border-collapse: collapse; margin: 20px 0; }
    table, th, td { border: 1px solid #333; }
    th, td { padding: 8px 12px; text-align: center; }
    th { background-color: #f2f2f2; }
    /* Left-align the last column */
    td:nth-child(7) { text-align: left; }
    </style>
    """
    final_cols = ["ID#", "cM", "haplogroup", "Match to", "Value Range", "Value Label", "Yates DNA Ancestral Line"]
    html_content = css_style + df.to_html(index=False, columns=final_cols, escape=False)
    with open(html_name, "w", encoding="utf-8") as f:
        f.write(html_content)
    logger.info("Exported HTML to '%s'.", html_name)

if __name__ == '__main__':
    main()
    try:
        display(Javascript('alert("✅ GEDCOM processing (and HTML export) is complete!");'))
    except:
        pass

import smtplib, ssl
from email.mime.text import MIMEText

def send_email(subject, body, to_addr):
    smtp_server = 'smtp.gmail.com'
    port = 465
    sender = os.environ['GMAIL_USER']
    password = os.environ['GMAIL_APP_PASSWORD']
    msg = MIMEText(body)
    msg['Subject'] = subject
    msg['From'] = sender
    msg['To'] = to_addr
    context = ssl.create_default_context()
    with smtplib.SMTP_SSL(smtp_server, port, context=context) as server:
        server.login(sender, password)
        server.send_message(msg)

# Email summary
df_summary = pd.read_csv("final_combined_df_with_value_labels.csv")
total = len(df_summary)
top5 = df_summary.sort_values('Value', ascending=False).head(5)['Yates DNA Ancestral Line'].tolist()
summary = f"GEDCOM processing complete!\n\nTotal lines: {total}\nTop 5 lines:\n" + "\n".join(f"- {line}" for line in top5)
send_email(subject="✅ Cell #1 Report Ready", body=summary, to_addr=os.environ['GMAIL_USER'])




Automatically selecting the first GEDCOM file.
GEDCOM contained 60443 total records
Records tagged and filtered by NPFX: 1474
Records with YDNA information: 90
Autosomal matches: 1384
After manual filter, total records: 250
Records tagged and filtered by NPFX: 250
Processing 250 individuals with chunk-based parallel...


Building Yates Lines (Stage 1): 100%|██████████| 250/250 [02:38<00:00,  1.58it/s]


<IPython.core.display.Javascript object>

In [22]:
# TEMP 2.4 (WORKS and is pending as GOLD #2)

import pandas as pd
from datetime import datetime
from zoneinfo import ZoneInfo
from ftplib import FTP_TLS
import os

# ————— Load Data —————
df = pd.read_csv("final_combined_df_with_value_labels.csv")

# ————— Blank out any NaN haplogroups —————
df['haplogroup'] = df['haplogroup'].fillna('')

# ————— Hyperlink various IDs —————
hap_base      = "gengen/haplogroup/"
ydna_overview = "gengen/Y-designation-overview.htm"
dar_base      = "https://services.dar.org/Public/DAR_Research/search/?Keyword="
sar_base      = "https://sarpatriots.sar.org/patriot/search?searchText="

def link_value(x):
    if x.startswith("Y-"):
        return f'<a href="{ydna_overview}" target="_blank">{x}</a>'
    if x.startswith("dar-A-"):
        return f'<a href="{dar_base}{x}" target="_blank">{x}</a>'
    if x.startswith("sar-P-"):
        return f'<a href="{sar_base}{x}" target="_blank">{x}</a>'
    if x:
        return f'<a href="{hap_base}{x}.htm" target="_blank">{x}</a>'
    return ''

df['haplogroup'] = df['haplogroup'].apply(link_value)

# ————— Load autosomal counts —————
try:
    with open("autosomal_count.txt", "r") as f:
        autosomal_count = int(f.read().strip())
except:
    autosomal_count = None

prev_count = None
additional_str = ""
if os.path.exists("autosomal_count_prev.txt"):
    try:
        with open("autosomal_count_prev.txt", "r") as f:
            prev_count = int(f.read().strip())
        if autosomal_count is not None and prev_count is not None:
            diff = autosomal_count - prev_count
            additional_str = f" (+{diff} since last run)"
    except:
        pass

# ————— Timestamp —————
now = datetime.now(ZoneInfo("America/New_York"))
updated_str = now.strftime("%d %B %Y at %-I:%M %p EDT")

# ————— Insert Action column —————
df.insert(6, 'Action', '→')

# ————— XHTML Template with sticky header, first two columns, and intro restored —————
full_html_template = """<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8"/>
  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
  <title>DNA Report Card</title>
  <script src="../sorttable.js" type="text/javascript"></script>
  <style>
    body { margin:0; padding:0; font-family:Arial,Helvetica,sans-serif; background:#faf9d3; font-size:14px; }
    .intro { padding:20px; text-align:center; }
    .intro h2 { margin:0 0 10px; }
    .intro p { margin:0.5em 0; }
    .meta  { font-size:0.9em; margin-bottom:15px; }

    .output-table {
      max-height:75vh;
      overflow:auto;
      -webkit-overflow-scrolling:touch;
      border:1px solid #333;
      position:relative;
    }

    table.sortable {
      width:100%; border-collapse:collapse; min-width:600px;
    }
    th, td {
      border:1px solid #333; padding:5px 8px; background:#faf9d3; white-space:nowrap;
    }

    /* 1) Sticky horizontal header */
    th {
      position: sticky;
      top: 0;
      background: #ffffcc;
      z-index: 2;
      text-align: center;
    }
    th:hover { background:#ffeb99; }

    /* 2) Sticky first two columns (including their headers) */
    th:nth-child(1), td:nth-child(1) {
      position: sticky;
      left: 0;
      background: #ffffcc;
      z-index: 3;
    }
    th:nth-child(2), td:nth-child(2) {
      position: sticky;
      left: 80px; /* match column 1 width */
      background: #ffffcc;
      z-index: 3;
    }
    th:nth-child(1), th:nth-child(2) {
      z-index: 4; /* ensure header cells sit above others */
    }

    /* adjust min-widths if needed */
    th:nth-child(1), td:nth-child(1) { min-width:80px; }
    th:nth-child(2), td:nth-child(2) { min-width:100px; }

    /* other columns normal */
    th:nth-child(7), td:nth-child(7) { width:40px; }
    th:nth-child(8), td:nth-child(8) { text-align:left; }

    .match { background:#fff; }
    .blank { background:#ccc; color:#ccc; }

    .back-to-top {
      position:fixed; bottom:20px; right:20px;
      background:#333; color:#fff; padding:8px 12px;
      border-radius:4px; font-size:12px; opacity:0.7; cursor:pointer; z-index:1000;
    }
    .back-to-top:hover { opacity:1; }

    @media(max-width:600px){
      body { font-size:12px; }
      table.sortable { min-width:480px; }
      th, td { padding:4px 6px; }
    }
  </style>
</head>
<body>
  <div id="top"></div>
  <div class="intro">
    <h2>DNA Report Card</h2>
    <div class="meta">
      Return to <a href="https://yates.one-name.net/gengen/dna_cousin_surname_study.htm">Study Home</a> |
      Autosomal matches: {autosomal_count}{additional_str} |
      Updated: {updated_str}
    </div>
    <p>Imagine you have a report card for your family tree that tells you how your family tree compares to other collateral family tree lines.</p>
    <p>Think of value like the total number of points you get from finding all the important family connections in your tree and comparing them to all the other trees included in the Yates study.</p>
    <p>We then group them to signal which ones have potential: <b>>60:</b> likely correct, <b>59–47:</b> forming, <b>46–34:</b> emerging, <b>33–21:</b> notable, <b>20–8:</b> stable, <b>7–1:</b> research.</p>
    <p><b><i>Click on the header to sort any column</i></b> (And, remember <a href="https://yates.one-name.net/gengen/dna_theory_of_the_case.htm" target="_blank">what this is telling us...</a>)</p>
  </div>

  <div class="output-table" id="table-container">
    <!-- TABLE_PLACEHOLDER -->
  </div>

  <div class="back-to-top" onclick="document.getElementById('table-container').scrollTo({top:0,behavior:'smooth'})">
    Back to Top ↑
  </div>
</body>
</html>"""

# ————— Build and inject sortable table —————
final_cols = ["ID#","Match to","cM","haplogroup","Value Range","Value Label","Action","Yates DNA Ancestral Line"]
html_table = df.to_html(index=False, columns=final_cols, escape=False, classes="sortable")
html_table = html_table.replace('<tbody>\n<tr>', '<tbody>\n<tr id="first-row">', 1)

final_html = (full_html_template
  .replace("{autosomal_count}", str(autosomal_count or "Unknown"))
  .replace("{additional_str}", additional_str)
  .replace("{updated_str}", updated_str)
  .replace("<!-- TABLE_PLACEHOLDER -->", html_table)
)

# ————— Save & upload —————
with open("dna_cousin_surname_app.htm", "w", encoding="utf-8") as f:
    f.write(final_html)
with FTP_TLS() as ftps:
    ftps.connect(os.environ['FTP_HOST'], int(os.environ.get('FTP_PORT',21)))
    ftps.login(os.environ['FTP_USER'], os.environ['FTP_PASS'])
    try: ftps.delete("dna_cousin_surname_app.htm")
    except: pass
    ftps.storbinary("STOR dna_cousin_surname_app.htm", open("dna_cousin_surname_app.htm","rb"))

# ————— Persist count —————
if autosomal_count is not None:
    with open("autosomal_count_prev.txt","w") as f:
        f.write(str(autosomal_count))

print("✅ Full DNA Report Card with intro, sticky header, and columns uploaded.")


✅ Full DNA Report Card with intro, sticky header, and columns uploaded.


In [21]:
# TEMP 2.3

import pandas as pd
from datetime import datetime
from zoneinfo import ZoneInfo
from ftplib import FTP_TLS
import os

# ————— Load Data —————
df = pd.read_csv("final_combined_df_with_value_labels.csv")

# ————— Blank out any NaN haplogroups —————
df['haplogroup'] = df['haplogroup'].fillna('')

# ————— Hyperlink various IDs —————
hap_base      = "gengen/haplogroup/"
ydna_overview = "gengen/Y-designation-overview.htm"
dar_base      = "https://services.dar.org/Public/DAR_Research/search/?Keyword="
sar_base      = "https://sarpatriots.sar.org/patriot/search?searchText="

def link_value(x):
    if x.startswith("Y-"):
        return f'<a href="{ydna_overview}" target="_blank">{x}</a>'
    if x.startswith("dar-A-"):
        return f'<a href="{dar_base}{x}" target="_blank">{x}</a>'
    if x.startswith("sar-P-"):
        return f'<a href="{sar_base}{x}" target="_blank">{x}</a>'
    if x:
        return f'<a href="{hap_base}{x}.htm" target="_blank">{x}</a>'
    return ''

df['haplogroup'] = df['haplogroup'].apply(link_value)

# ————— Load autosomal counts —————
try:
    with open("autosomal_count.txt", "r") as f:
        autosomal_count = int(f.read().strip())
except:
    autosomal_count = None

prev_count = None
additional_str = ""
if os.path.exists("autosomal_count_prev.txt"):
    try:
        with open("autosomal_count_prev.txt", "r") as f:
            prev_count = int(f.read().strip())
        if autosomal_count is not None and prev_count is not None:
            diff = autosomal_count - prev_count
            additional_str = f" (+{diff} since last run)"
    except:
        pass

# ————— Timestamp —————
now = datetime.now(ZoneInfo("America/New_York"))
updated_str = now.strftime("%d %B %Y at %-I:%M %p EDT")

# ————— Insert Action column —————
df.insert(6, 'Action', '→')

# ————— XHTML Template with true sticky header + first two columns —————
full_html_template = """<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8"/>
  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
  <title>DNA Report Card</title>
  <script src="../sorttable.js" type="text/javascript"></script>
  <style>
    body { margin:0; padding:0; font-family:Arial,Helvetica,sans-serif; background:#faf9d3; font-size:14px; }
    .intro { padding:20px; text-align:center; }
    .meta  { font-size:0.9em; margin-bottom:15px; }

    .output-table {
      max-height:75vh;
      overflow:auto;
      -webkit-overflow-scrolling:touch;
      border:1px solid #333;
      position:relative;
    }

    table.sortable {
      width:100%; border-collapse:collapse; min-width:600px;
    }
    th, td {
      border:1px solid #333; padding:5px 8px; background:#faf9d3; white-space:nowrap;
    }

    /* 1) Sticky horizontal header */
    th {
      position: sticky;
      top: 0;
      background: #ffffcc;
      z-index: 2;
      text-align: center;
    }
    th:hover { background:#ffeb99; }

    /* 2) Sticky first two columns (including their headers) */
    /* first column */
    th:nth-child(1), td:nth-child(1) {
      position: sticky;
      left: 0;
      background: #ffffcc;
      z-index: 3;
    }
    /* second column */
    th:nth-child(2), td:nth-child(2) {
      position: sticky;
      left: 80px; /* match your first-column width */
      background: #ffffcc;
      z-index: 3;
    }
    /* ensure header cells of sticky cols sit above other headers */
    th:nth-child(1), th:nth-child(2) {
      z-index: 4;
    }

    /* adjust min-widths if needed */
    th:nth-child(1), td:nth-child(1) { min-width:80px; }
    th:nth-child(2), td:nth-child(2) { min-width:100px; }

    /* other columns normal */
    th:nth-child(7), td:nth-child(7) { width:40px; }
    th:nth-child(8), td:nth-child(8) { text-align:left; }

    .match { background:#fff; }
    .blank { background:#ccc; color:#ccc; }

    .back-to-top {
      position:fixed; bottom:20px; right:20px;
      background:#333; color:#fff; padding:8px 12px;
      border-radius:4px; font-size:12px; opacity:0.7; cursor:pointer; z-index:1000;
    }
    .back-to-top:hover { opacity:1; }

    @media(max-width:600px) {
      body { font-size:12px; }
      table.sortable { min-width:480px; }
      th, td { padding:4px 6px; }
    }
  </style>
</head>
<body>
  <div class="intro">
    <h2>DNA Report Card</h2>
    <div class="meta">
      Return to <a href="https://yates.one-name.net/gengen/dna_cousin_surname_study.htm">Study Home</a> |
      Autosomal matches: {autosomal_count}{additional_str} |
      Updated: {updated_str}
    </div>
  </div>

  <div class="output-table" id="table-container">
    <!-- TABLE_PLACEHOLDER -->
  </div>

  <div class="back-to-top" onclick="document.getElementById('table-container').scrollTo({top:0,behavior:'smooth'})">
    Back to Top ↑
  </div>
</body>
</html>"""

# ————— Build and inject sortable table —————
final_cols = ["ID#","Match to","cM","haplogroup","Value Range","Value Label","Action","Yates DNA Ancestral Line"]
html_table = df.to_html(index=False, columns=final_cols, escape=False, classes="sortable")

# Tag first data row if needed
html_table = html_table.replace('<tbody>\n<tr>', '<tbody>\n<tr id="first-row">', 1)

final_html = (full_html_template
  .replace("{autosomal_count}", str(autosomal_count or "Unknown"))
  .replace("{additional_str}", additional_str)
  .replace("{updated_str}", updated_str)
  .replace("<!-- TABLE_PLACEHOLDER -->", html_table)
)

# ————— Save & upload —————
with open("dna_cousin_surname_app.htm", "w", encoding="utf-8") as f:
    f.write(final_html)
with FTP_TLS() as ftps:
    ftps.connect(os.environ['FTP_HOST'], int(os.environ.get('FTP_PORT',21)))
    ftps.login(os.environ['FTP_USER'], os.environ['FTP_PASS'])
    try: ftps.delete("dna_cousin_surname_app.htm")
    except: pass
    ftps.storbinary("STOR dna_cousin_surname_app.htm", open("dna_cousin_surname_app.htm","rb"))

# ————— Persist count —————
if autosomal_count is not None:
    with open("autosomal_count_prev.txt","w") as f:
        f.write(str(autosomal_count))

print("✅ DNA Report Card with sticky header and first two columns uploaded.")





✅ DNA Report Card with sticky header and first two columns uploaded.


In [11]:
#TEMP 2.2 (KEEP, nails to back to top row)

import pandas as pd
from datetime import datetime
from zoneinfo import ZoneInfo
from ftplib import FTP_TLS
import os

# ————— Load Data —————
df = pd.read_csv("final_combined_df_with_value_labels.csv")

# ————— Blank out any NaN haplogroups —————
df['haplogroup'] = df['haplogroup'].fillna('')

# ————— Hyperlink various IDs —————
hap_base      = "gengen/haplogroup/"
ydna_overview = "gengen/Y-designation-overview.htm"
dar_base      = "https://services.dar.org/Public/DAR_Research/search/?Keyword="
sar_base      = "https://sarpatriots.sar.org/patriot/search?searchText="

def link_value(x):
    if x.startswith("Y-"):
        return f'<a href="{ydna_overview}" target="_blank">{x}</a>'
    if x.startswith("dar-A-"):
        return f'<a href="{dar_base}{x}" target="_blank">{x}</a>'
    if x.startswith("sar-P-"):
        return f'<a href="{sar_base}{x}" target="_blank">{x}</a>'
    if x:
        return f'<a href="{hap_base}{x}.htm" target="_blank">{x}</a>'
    return ''

df['haplogroup'] = df['haplogroup'].apply(link_value)

# ————— Load autosomal counts —————
try:
    with open("autosomal_count.txt","r") as f:
        autosomal_count = int(f.read().strip())
except:
    autosomal_count = None

prev_count = None
additional_str = ""
if os.path.exists("autosomal_count_prev.txt"):
    try:
        with open("autosomal_count_prev.txt","r") as f:
            prev_count = int(f.read().strip())
        if autosomal_count is not None and prev_count is not None:
            diff = autosomal_count - prev_count
            additional_str = f" (+{diff} since last run)"
    except:
        pass

# ————— Timestamp —————
now = datetime.now(ZoneInfo("America/New_York"))
updated_str = now.strftime("%d %B %Y at %-I:%M %p EDT")

# ————— Insert Action column —————
df.insert(6, 'Action', '→')

# ————— XHTML Template with only “Back to Top” control —————
full_html_template = """<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8"/>
  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
  <title>DNA Report Card</title>
  <script src="../sorttable.js" type="text/javascript"></script>
  <script>
    function scrollToTop() {
      const container = document.getElementById('table-container');
      if (!container) return;
      container.scrollTo({ top: 0, behavior: 'smooth' });
    }
  </script>
  <style>
    body { margin:0; padding:0; font-family:Arial,Helvetica,sans-serif; background:#faf9d3; font-size:14px; }
    .intro { padding:20px; text-align:center; }
    .meta  { font-size:0.9em; margin-bottom:15px; }

    .output-table {
      max-height: 75vh;
      overflow: auto;
      -webkit-overflow-scrolling: touch;
      border: 1px solid #333;
      position: relative;
    }

    table.sortable {
      width:100%;
      border-collapse:collapse;
      min-width:600px;
    }
    th, td {
      border:1px solid #333;
      padding:5px 8px;
      background:#faf9d3;
      white-space:nowrap;
    }
    th {
      position: sticky;
      top: 0;
      background: #ffffcc;
      z-index: 2;
      text-align: center;
    }
    th:hover { background:#ffeb99; }

    th:nth-child(3), td:nth-child(3) { min-width:140px; }
    td:nth-child(6)               { min-width:180px; }
    th:nth-child(7), td:nth-child(7) { width:40px; }
    th:nth-child(8), td:nth-child(8) { text-align:left; }

    .match { background:#fff; }
    .blank { background:#ccc; color:#ccc; }

    .back-to-top {
      position:fixed;
      bottom:20px;
      right:20px;
      background:#333;
      color:#fff;
      padding:8px 12px;
      border-radius:4px;
      text-decoration:none;
      font-size:12px;
      opacity:0.7;
      cursor:pointer;
      z-index:1000;
    }
    .back-to-top:hover { opacity:1; }

    @media (max-width:600px) {
      body { font-size:12px; }
      table.sortable { min-width:480px; }
      th, td { padding:4px 6px; }
    }
  </style>
</head>
<body>
  <div class="intro">
    <h2>DNA Report Card</h2>
    <div class="meta">
      Return to <a href="https://yates.one-name.net/gengen/dna_cousin_surname_study.htm">Study Home</a> |
      Autosomal matches: {autosomal_count}{additional_str} |
      Updated: {updated_str}
    </div>
  </div>

  <div class="output-table" id="table-container">
    <!-- TABLE_PLACEHOLDER -->
  </div>

  <div class="back-to-top" onclick="scrollToTop()">Back to Top ↑</div>
</body>
</html>"""

# ————— Build and inject sortable table with first-row ID (function remains) —————
final_cols = ["ID#","cM","haplogroup","Match to","Value Range","Value Label","Action","Yates DNA Ancestral Line"]
html_table = df.to_html(index=False, columns=final_cols, escape=False, classes="sortable")
html_table = html_table.replace('<tbody>\n<tr>', '<tbody>\n<tr id="first-row">', 1)

final_html = (
    full_html_template
      .replace("{autosomal_count}", str(autosomal_count or "Unknown"))
      .replace("{additional_str}", additional_str)
      .replace("{updated_str}", updated_str)
      .replace("<!-- TABLE_PLACEHOLDER -->", html_table)
)

# ————— Save & upload —————
with open("dna_cousin_surname_app.htm", "w", encoding="utf-8") as f:
    f.write(final_html)
with FTP_TLS() as ftps:
    ftps.connect(os.environ['FTP_HOST'], int(os.environ.get('FTP_PORT',21)))
    ftps.login(os.environ['FTP_USER'], os.environ['FTP_PASS'])
    ftps.storbinary("STOR dna_cousin_surname_app.htm", open("dna_cousin_surname_app.htm","rb"))

# ————— Persist count —————
if autosomal_count is not None:
    with open("autosomal_count_prev.txt","w") as f:
        f.write(str(autosomal_count))

print("✅ DNA Report Card updated (old return-to-first-row flag removed) and uploaded.")


✅ DNA Report Card updated (old return-to-first-row flag removed) and uploaded.


In [9]:
#TEMP 2

# Full updated script with container-backed scroll controls for First Row and Top

import pandas as pd
from datetime import datetime
from zoneinfo import ZoneInfo
from ftplib import FTP_TLS
import os

# ————— Load Data —————
df = pd.read_csv("final_combined_df_with_value_labels.csv")

# ————— Blank out any NaN haplogroups —————
df['haplogroup'] = df['haplogroup'].fillna('')

# ————— Hyperlink various IDs —————
hap_base      = "gengen/haplogroup/"
ydna_overview = "gengen/Y-designation-overview.htm"
dar_base      = "https://services.dar.org/Public/DAR_Research/search/?Keyword="
sar_base      = "https://sarpatriots.sar.org/patriot/search?searchText="

def link_value(x):
    if x.startswith("Y-"):
        return f'<a href="{ydna_overview}" target="_blank">{x}</a>'
    if x.startswith("dar-A-"):
        return f'<a href="{dar_base}{x}" target="_blank">{x}</a>'
    if x.startswith("sar-P-"):
        return f'<a href="{sar_base}{x}" target="_blank">{x}</a>'
    if x:
        return f'<a href="{hap_base}{x}.htm" target="_blank">{x}</a>'
    return ''

df['haplogroup'] = df['haplogroup'].apply(link_value)

# ————— Load autosomal counts —————
try:
    with open("autosomal_count.txt","r") as f:
        autosomal_count = int(f.read().strip())
except:
    autosomal_count = None

prev_count = None
additional_str = ""
if os.path.exists("autosomal_count_prev.txt"):
    try:
        with open("autosomal_count_prev.txt","r") as f:
            prev_count = int(f.read().strip())
        if autosomal_count is not None and prev_count is not None:
            diff = autosomal_count - prev_count
            additional_str = f" (+{diff} since last run)"
    except:
        pass

# ————— Timestamp —————
now = datetime.now(ZoneInfo("America/New_York"))
updated_str = now.strftime("%d %B %Y at %-I:%M %p EDT")

# ————— Insert Action column —————
df.insert(6,'Action','→')

# ————— XHTML Template with two scroll controls —————
full_html_template = """<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8"/>
  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
  <title>DNA Report Card</title>
  <script src="../sorttable.js" type="text/javascript"></script>

  <script>
    function scrollToFirstRow() {
      const container = document.getElementById('table-container');
      const row       = document.getElementById('first-row');
      if (!container || !row) return;
      const cRect = container.getBoundingClientRect();
      const rRect = row.getBoundingClientRect();
      const offset = (rRect.top - cRect.top) + container.scrollTop;
      container.scrollTo({ top: offset, behavior: 'smooth' });
    }
    function scrollToTop() {
      const container = document.getElementById('table-container');
      if (!container) return;
      container.scrollTo({ top: 0, behavior: 'smooth' });
    }
  </script>

  <style>
    body { margin:0; padding:0; font-family:Arial,Helvetica,sans-serif; background:#faf9d3; font-size:14px; }
    .intro { padding:20px; text-align:center; }
    .meta  { font-size:0.9em; margin-bottom:15px; }

    .output-table {
      max-height: 75vh;
      overflow: auto;
      -webkit-overflow-scrolling: touch;
      border: 1px solid #333;
      position: relative;
    }

    table.sortable {
      width:100%; border-collapse:collapse; min-width:600px;
    }
    th, td {
      border:1px solid #333; padding:5px 8px; background:#faf9d3; white-space:nowrap;
    }
    th { position: sticky; top:0; background:#ffffcc; z-index:2; text-align:center; }
    th:hover { background:#ffeb99; }

    th:nth-child(3), td:nth-child(3) { min-width:140px; }
    td:nth-child(6)               { min-width:180px; }
    th:nth-child(7), td:nth-child(7) { width:40px; }
    th:nth-child(8), td:nth-child(8) { text-align:left; }

    .match { background:#fff; }
    .blank { background:#ccc; color:#ccc; }

    #first-row { scroll-margin-top: 40px; }

    .back-to-top {
      position:fixed; bottom:20px; right:20px;
      background:#333; color:#fff; padding:8px 12px;
      border-radius:4px; text-decoration:none; font-size:12px;
      opacity:0.7; cursor:pointer; z-index:1000;
    }
    .back-to-top:hover { opacity:1; }

    @media(max-width:600px){
      body { font-size:12px; }
      table.sortable { min-width:480px; }
      th, td { padding:4px 6px; }
    }
  </style>
</head>
<body>
  <div class="intro">
    <h2>DNA Report Card</h2>
    <div class="meta">
      Return to <a href="https://yates.one-name.net/gengen/dna_cousin_surname_study.htm">Study Home</a> |
      Autosomal matches: {autosomal_count}{additional_str} |
      Updated: {updated_str}
    </div>
  </div>

  <div class="output-table" id="table-container">
    <!-- TABLE_PLACEHOLDER -->
  </div>

  <div class="back-to-top" onclick="scrollToFirstRow()">Back to First Row ↑</div>
  <div class="back-to-top" style="bottom:60px;" onclick="scrollToTop()">Back to Top ↑</div>
</body>
</html>"""

# ————— Build table and inject first-row ID —————
final_cols = ["ID#","cM","haplogroup","Match to","Value Range","Value Label","Action","Yates DNA Ancestral Line"]
html_table = df.to_html(index=False, columns=final_cols, escape=False, classes="sortable")
html_table = html_table.replace('<tbody>\n<tr>', '<tbody>\n<tr id="first-row">', 1)

final_html = (full_html_template
  .replace("{autosomal_count}", str(autosomal_count or "Unknown"))
  .replace("{additional_str}", additional_str)
  .replace("{updated_str}", updated_str)
  .replace("<!-- TABLE_PLACEHOLDER -->", html_table)
)

# ————— Save & upload —————
with open("dna_cousin_surname_app.htm", "w", encoding="utf-8") as f:
    f.write(final_html)
with FTP_TLS() as ftps:
    ftps.connect(os.environ['FTP_HOST'], int(os.environ.get('FTP_PORT',21)))
    ftps.login(os.environ['FTP_USER'], os.environ['FTP_PASS'])
    ftps.storbinary("STOR dna_cousin_surname_app.htm", open("dna_cousin_surname_app.htm","rb"))

if autosomal_count is not None:
    with open("autosomal_count_prev.txt","w") as f:
        f.write(str(autosomal_count))

print("✅ Updated DNA Report Card with dual scroll controls generated and uploaded.")





✅ Updated DNA Report Card with dual scroll controls generated and uploaded.


In [None]:
# Gold 2 create the Report Card (mobile-friendly, sortable)

import pandas as pd
from datetime import datetime
from zoneinfo import ZoneInfo
from ftplib import FTP_TLS
import os

# ————— Load Data —————
df = pd.read_csv("final_combined_df_with_value_labels.csv")

# ————— Blank out any NaN haplogroups —————
df['haplogroup'] = df['haplogroup'].fillna('')

# ————— Hyperlink various IDs —————
hap_base      = "gengen/haplogroup/"
ydna_overview = "gengen/Y-designation-overview.htm"
dar_base      = "https://services.dar.org/Public/DAR_Research/search/?Keyword="
sar_base      = "https://sarpatriots.sar.org/patriot/search?searchText="

def link_value(x):
    if x.startswith("Y-"):
        return f'<a href="{ydna_overview}" target="_blank">{x}</a>'
    if x.startswith("dar-A-"):
        return f'<a href="{dar_base}{x}" target="_blank">{x}</a>'
    if x.startswith("sar-P-"):
        return f'<a href="{sar_base}{x}" target="_blank">{x}</a>'
    if x:
        return f'<a href="{hap_base}{x}.htm" target="_blank">{x}</a>'
    return ''

df['haplogroup'] = df['haplogroup'].apply(link_value)

# ————— Load autosomal counts —————
try:
    with open("autosomal_count.txt","r") as f:
        autosomal_count = int(f.read().strip())
except:
    autosomal_count = None

prev_count = None
additional_str = ""
if os.path.exists("autosomal_count_prev.txt"):
    try:
        with open("autosomal_count_prev.txt","r") as f:
            prev_count = int(f.read().strip())
        if autosomal_count is not None and prev_count is not None:
            diff = autosomal_count - prev_count
            additional_str = f" (+{diff} since last run)"
    except:
        pass

# ————— Timestamp —————
now = datetime.now(ZoneInfo("America/New_York"))
updated_str = now.strftime("%d %B %Y at %-I:%M %p EDT")

# ————— Insert Action column —————
df.insert(6,'Action','→')

# ————— XHTML Template with its own scrollable container and sticky headers —————
full_html_template = """<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8"/>
  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
  <title>DNA Report Card</title>
  <script src="../sorttable.js" type="text/javascript"></script>
  <style>
    body { margin:0; padding:0; font-family:Arial,Helvetica,sans-serif; background:#faf9d3; font-size:14px; }
    .intro { padding:20px; text-align:center; }
    .intro h2 { margin:0 0 10px; }
    .intro p { margin:0.5em 0; }
    .meta { font-size:0.9em; margin-bottom:15px; }

    /* make the table container scroll both axes, limit height */
    .output-table {
      max-height: 75vh;
      overflow-y: auto;
      overflow-x: auto;
      -webkit-overflow-scrolling: touch;
      border: 1px solid #333;
    }

    table.sortable {
      width:100%;
      border-collapse: collapse;
      min-width:600px;
    }
    th, td {
      border:1px solid #333;
      padding:5px 8px;
      background:#faf9d3;
      white-space:nowrap;
    }
    /* apply sticky to every header cell */
    th {
      position: sticky;
      top: 0;
      background: #ffffcc;
      z-index: 2;
      text-align: center;
    }
    th:hover { background:#ffeb99; }

    th:nth-child(3), td:nth-child(3) { min-width:140px; }
    td:nth-child(6) { min-width:180px; }
    th:nth-child(7), td:nth-child(7) { width:40px; }
    th:nth-child(8), td:nth-child(8) { text-align:left; }

    .match { background:#fff; }
    .blank { background:#ccc; color:#ccc; }

    /* Back-to-Top button */
    .back-to-top {
      position: fixed;
      bottom: 20px;
      right: 20px;
      background: #333;
      color: #fff;
      padding: 8px 12px;
      text-decoration: none;
      border-radius: 4px;
      font-size: 12px;
      z-index: 1000;
      opacity: 0.7;
    }
    .back-to-top:hover { opacity: 1; }

    @media (max-width:600px) {
      body { font-size:12px; }
      table.sortable { min-width:480px; }
      th, td { padding:4px 6px; }
    }
  </style>
</head>
<body>
  <div id="top"></div>
  <div class="intro">
    <h2>DNA Report Card</h2>
    <div class="meta">
      Return to <a href="https://yates.one-name.net/gengen/dna_cousin_surname_study.htm">Study Home</a> |
      Autosomal matches: {autosomal_count}{additional_str} |
      Updated: {updated_str}
    </div>
    <p>Imagine you have a report card for your family tree that tells you how your family tree compares to other collateral family tree lines.</p>
    <p>Think of value like the total number of points you get from finding all the important family connections in your tree and comparing them to all the other trees included in the Yates study.</p>
    <p>We then group them to signal which ones have potential: <b>&gt;60:</b> likely correct, <b>59–47:</b> forming, <b>46–34:</b> emerging, <b>33–21:</b> notable, <b>20–8:</b> stable, <b>7–1:</b> research.</p>
    <p><b><i>Click on the header to sort any column</i></b> (And, remember <a href="https://yates.one-name.net/gengen/dna_theory_of_the_case.htm" target="_blank">what this is telling us...</a>)</p>
  </div>

  <div class="output-table">
    <!-- TABLE_PLACEHOLDER -->
  </div>

  <a href="#top" class="back-to-top">Back to Top ↑</a>
</body>
</html>"""

# ————— Build and inject sortable table —————
final_cols = ["ID#","cM","haplogroup","Match to","Value Range","Value Label","Action","Yates DNA Ancestral Line"]
html_table = df.to_html(index=False, columns=final_cols, escape=False, classes="sortable")
final_html = (
    full_html_template
      .replace("{autosomal_count}", str(autosomal_count or "Unknown"))
      .replace("{additional_str}", additional_str)
      .replace("{updated_str}", updated_str)
      .replace("<!-- TABLE_PLACEHOLDER -->", html_table)
)

# ————— Save & upload —————
with open("dna_cousin_surname_app.htm", "w", encoding="utf-8") as f:
    f.write(final_html)
with FTP_TLS() as ftps:
    ftps.connect(os.environ['FTP_HOST'], int(os.environ.get('FTP_PORT',21)))
    ftps.login(os.environ['FTP_USER'], os.environ['FTP_PASS'])
    ftps.prot_p()
    try: ftps.delete("dna_cousin_surname_app.htm")
    except: pass
    ftps.storbinary("STOR dna_cousin_surname_app.htm", open("dna_cousin_surname_app.htm","rb"))

# ————— Persist count —————
if autosomal_count is not None:
    with open("autosomal_count_prev.txt","w") as f:
        f.write(str(autosomal_count))

print("✅ Scrollable DNA Report Card with sticky headers generated and uploaded.")



✅ Scrollable DNA Report Card with sticky headers generated and uploaded.


In [None]:
# save as backup 2

import pandas as pd
from datetime import datetime
from zoneinfo import ZoneInfo
from ftplib import FTP_TLS
import os

# ————— Load Data —————
df = pd.read_csv("final_combined_df_with_value_labels.csv")

# ————— Blank out any NaN haplogroups —————
df['haplogroup'] = df['haplogroup'].fillna('')

# ————— Hyperlink various IDs —————
hap_base      = "gengen/haplogroup/"
ydna_overview = "gengen/Y-designation-overview.htm"
dar_base      = "https://services.dar.org/Public/DAR_Research/search/?Keyword="
sar_base      = "https://sarpatriots.sar.org/patriot/search?searchText="

def link_value(x):
    if x.startswith("Y-"):
        return f'<a href="{ydna_overview}" target="_blank">{x}</a>'
    if x.startswith("dar-A-"):
        return f'<a href="{dar_base}{x}" target="_blank">{x}</a>'
    if x.startswith("sar-P-"):
        return f'<a href="{sar_base}{x}" target="_blank">{x}</a>'
    if x:
        return f'<a href="{hap_base}{x}.htm" target="_blank">{x}</a>'
    return ''

df['haplogroup'] = df['haplogroup'].apply(link_value)

# ————— Load autosomal counts —————
try:
    with open("autosomal_count.txt","r") as f:
        autosomal_count = int(f.read().strip())
except:
    autosomal_count = None
prev_count = None
additional_str = ""
if os.path.exists("autosomal_count_prev.txt"):
    try:
        with open("autosomal_count_prev.txt","r") as f:
            prev_count = int(f.read().strip())
        if autosomal_count is not None and prev_count is not None:
            diff = autosomal_count - prev_count
            additional_str = f" (+{diff} since last run)"
    except:
        pass

# ————— Timestamp —————
now = datetime.now(ZoneInfo("America/New_York"))
updated_str = now.strftime("%d %B %Y at %-I:%M %p EDT")

# ————— Insert Action column —————
df.insert(6,'Action','→')

# ————— XHTML Template with responsive/mobile-friendly CSS, sorting, and Back-to-Top link —————
full_html_template = """<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8"/>
  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
  <title>DNA Report Card</title>
  <script src="../sorttable.js" type="text/javascript"></script>
  <style>
    body { margin:0; padding:0; font-family:Arial,Helvetica,sans-serif; background:#faf9d3; font-size:14px; }
    .intro { padding:20px; text-align:center; }
    .intro h2 { margin:0 0 10px; }
    .intro p { margin:0.5em 0; }
    .meta { font-size:0.9em; margin-bottom:15px; }
    .output-table { overflow-x:auto; -webkit-overflow-scrolling:touch; }
    table.sortable { width:100%; border-collapse:collapse; min-width:600px; }
    th, td { border:1px solid #333; padding:5px 8px; background:#faf9d3; white-space:nowrap; }
    th { background:#ffffcc; position:sticky; top:0; z-index:2; text-align:center; }
    th:hover { background:#ffeb99; }
    th:nth-child(3), td:nth-child(3) { min-width:140px; }
    td:nth-child(6) { min-width:180px; }
    th:nth-child(7), td:nth-child(7) { width:40px; }
    th:nth-child(8), td:nth-child(8) { text-align:left; }
    .match { background:#fff; }
    .blank { background:#ccc; color:#ccc; }

    /* Back-to-Top button */
    .back-to-top {
      position: fixed;
      bottom: 20px;
      right: 20px;
      background: #333;
      color: #fff;
      padding: 8px 12px;
      text-decoration: none;
      border-radius: 4px;
      font-size: 12px;
      z-index: 1000;
      opacity: 0.7;
    }
    .back-to-top:hover {
      opacity: 1;
    }

    @media (max-width:600px) {
      body { font-size:12px; }
      table.sortable { min-width:480px; }
      th, td { padding:4px 6px; }
    }
  </style>
</head>
<body>
  <!-- anchor for Back-to-Top -->
  <div id="top"></div>

  <div class="intro">
    <h2>DNA Report Card</h2>
    <div class="meta">
      Return to <a href="https://yates.one-name.net/gengen/dna_cousin_surname_study.htm">Study Home</a> |
      Autosomal matches: {autosomal_count}{additional_str} |
      Updated: {updated_str}
    </div>
    <p>Imagine you have a report card for your family tree that tells you how your family tree compares to other collateral family tree lines.</p>
    <p>Think of value like the total number of points you get from finding all the important family connections in your tree and comparing them to all the other trees included in the Yates study.</p>
    <p>We then group them to signal which ones have potential: <b>&gt;60:</b> likely correct, <b>59–47:</b> forming, <b>46–34:</b> emerging, <b>33–21:</b> notable, <b>20–8:</b> stable, <b>7–1:</b> research.</p>
    <p><b><i>Click on the header to sort any column</i></b> (And, remember <a href="https://yates.one-name.net/gengen/dna_theory_of_the_case.htm" target="_blank">what this is telling us...</a>)</p>
  </div>

  <div class="output-table">
    <!-- TABLE_PLACEHOLDER -->
  </div>

  <!-- Back-to-Top link -->
  <a href="#top" class="back-to-top">Back to Top ↑</a>
</body>
</html>"""


# ————— Build and inject sortable table —————
final_cols = ["ID#","cM","haplogroup","Match to","Value Range","Value Label","Action","Yates DNA Ancestral Line"]
html_table = df.to_html(index=False, columns=final_cols, escape=False, classes="sortable")
final_html = (full_html_template
               .replace("{autosomal_count}", str(autosomal_count or "Unknown"))
               .replace("{additional_str}", additional_str)
               .replace("{updated_str}", updated_str)
             ).replace("<!-- TABLE_PLACEHOLDER -->", html_table)

# ————— Save & upload —————
with open("dna_cousin_surname_app.htm", "w", encoding="utf-8") as f:
    f.write(final_html)
with FTP_TLS() as ftps:
    ftps.connect(os.environ['FTP_HOST'], int(os.environ.get('FTP_PORT',21)))
    ftps.login(os.environ['FTP_USER'], os.environ['FTP_PASS'])
    ftps.prot_p()
    try: ftps.delete("dna_cousin_surname_app.htm")
    except: pass
    ftps.storbinary("STOR dna_cousin_surname_app.htm", open("dna_cousin_surname_app.htm","rb"))

# ————— Persist count —————
if autosomal_count is not None:
    with open("autosomal_count_prev.txt","w") as f:
        f.write(str(autosomal_count))

print("✅ Mobile-friendly sortable DNA Report Card generated and uploaded.")



✅ Mobile-friendly sortable DNA Report Card generated and uploaded.


In [None]:
# Gold Cell 3 for Y-DNA Grid with Auto-Adjusting Column Widths

import os
import pandas as pd
from datetime import datetime
from zoneinfo import ZoneInfo
from ftplib import FTP_TLS

# ── PATHS ─────────────────────────────────────────────────────────────────
combo_csv  = "/content/y_dna_user_detail_combo.csv"
output_csv = "/content/y_dna_grid.csv"
output_htm = "/content/y_dna_grid.htm"

# ── 1) Load vertical data ─────────────────────────────────────────────────
df = pd.read_csv(combo_csv)

# Rename “Date” → “Era”
if "Date" in df.columns:
    df.rename(columns={"Date": "Era"}, inplace=True)

# ── 2) Insert Action *after* Era ──────────────────────────────────────────
# Era is at index 1, so Action goes at index 2
df.insert(2, "Action", ["→"] * len(df))

# ── 3) Save vertical CSV ─────────────────────────────────────────────────
df.to_csv(output_csv, index=False)
print(f"✅ Saved vertical grid CSV to {output_csv}")

# ── 4) Build HTML ─────────────────────────────────────────────────────────
now = datetime.now(ZoneInfo("America/New_York"))
ts  = now.strftime("%-m/%-d/%y, %-I:%M %p EDT")
cols = df.columns.tolist()

html = f"""<!DOCTYPE html>
<html>
<head><meta charset="UTF-8"><title>Yates Y-DNA Grid</title>
<style>
body {{
  background: #faf9d3;
  font-family: Arial, sans-serif;
  font-size: 14px;
  margin: 0;
  padding: 0;
}}
.container {{
  padding: 10px;
}}
.table-container {{
  overflow-x: auto;
  max-height: 80vh;
}}
table {{
  border: 2px solid #333;
  border-collapse: collapse;
  margin: 0 auto;
}}
table.mainsection {{
  /* allows CSS targeting of blank under “Year” */
}}
thead {{
  display: table-header-group;
}}
thead th {{
  position: sticky;
  top: 0;
  background: #333;
  color: #fff;
  padding: 6px;
  border: 1px solid #999;
  z-index: 3;
}}
a {{
  color: #fff;
  text-decoration: underline;
}}
.era {{
  background: #666;
  color: #eee;
  padding: 6px;
  border: 1px solid #999;
  font-size: 0.9em;
}}
.action {{
  background: #fff;
  padding: 6px;
  border: 1px solid #999;
  text-align: center;
}}
td {{
  padding: 6px;
  border: 1px solid #999;
  text-align: center;
}}
th:nth-child(n+4),
td:nth-child(n+4) {{
  border: 1px solid #333;
}}
.match {{
  background: #fff;
}}
.blank {{
  background: #ccc;
  color: #ccc;
}}
/* make the blank under the “Year” header match the era-cell background */
table.mainsection td.blank:nth-child(2) {{
  background-color: #fdfcd0;
}}
</style>
</head>
<body>
  <div class="container">
    <h1 style="text-align:center">Yates Y-DNA Grid</h1>
    <p style="text-align:center;font-size:0.9em">Updated: {ts}</p>
    <p style="text-align:center;margin-bottom:12px">
      <a href="https://yates.one-name.net/gengen/dna_cousin_surname_study.htm">
        Return to DNA Cousin Surname Study
      </a>
    </p>
    <div class="table-container">
      <table class="mainsection">
        <thead>
          <tr>"""

# Header row
for i, c in enumerate(cols):
    if i == 0:
        html += "<th>SNP</th>"
    elif i == 1:
        html += "<th>Year</th>"
    elif i == 2:
        html += "<th>Action</th>"
    else:
        pid = c.split("-")[0].upper()
        html += (
          '<th>'
          f'<a href="https://yates.one-name.net/tng/verticalchart.php?'
          f'personID={pid}&tree=tree1&parentset=0&display=vertical&generations=15">{c}</a>'
          '</th>'
        )

html += """
          </tr>
        </thead>
        <tbody>"""

# Data rows
for _, row in df.iterrows():
    html += "<tr>"
    for i, c in enumerate(cols):
        v = row[c]
        if i == 0:
            html += f"<td>{v}</td>"
        elif i == 1:
            html += '<td class="blank">–</td>' if pd.isna(v) or not str(v).strip() else f'<td class="era">{v}</td>'
        elif i == 2:
            html += '<td class="blank">–</td>' if pd.isna(v) or not str(v).strip() else f'<td class="action">{v}</td>'
        else:
            html += '<td class="blank">–</td>' if pd.isna(v) or not str(v).strip() else f'<td class="match">{v}</td>'
    html += "</tr>"

html += """
        </tbody>
      </table>
    </div>
  </div>
</body>
</html>"""

with open(output_htm, "w", encoding="utf-8") as f:
    f.write(html)
print(f"✅ Saved vertical XHTML to {output_htm}")

# ── 5) FTP upload ────────────────────────────────────────────────────────
ftp = FTP_TLS()
ftp.connect(os.environ["FTP_HOST"], int(os.environ["FTP_PORT"]))
ftp.login(os.environ["FTP_USER"], os.environ["FTP_PASS"])
ftp.prot_p()
for path in (output_csv, output_htm):
    fn = os.path.basename(path)
    try:
        ftp.delete(fn)
    except:
        pass
    with open(path, "rb") as fp:
        ftp.storbinary(f"STOR {fn}", fp)
ftp.quit()
print("✅ Uploaded CSV & HTML to server")



✅ Saved vertical grid CSV to /content/y_dna_grid.csv
✅ Saved vertical XHTML to /content/y_dna_grid.htm
✅ Uploaded CSV & HTML to server


In [None]:
# EXP

import os
import pandas as pd
from datetime import datetime
from zoneinfo import ZoneInfo
from ftplib import FTP_TLS

# ── CONFIG ───────────────────────────────────────────────────────────────
info_csv   = "/content/haplogroup_info.csv"
user_csv   = "/content/y_dna_user_detail.csv"
output_csv = "/content/y_dna_grid.csv"
output_htm = "/content/y_dna_grid.htm"

# ── 1) Load & prepare haplogroup info ───────────────────────────────────
df_info = pd.read_csv(info_csv)
if "Date" in df_info.columns:
    df_info.rename(columns={"Date": "Era"}, inplace=True)
df_info = df_info.loc[df_info["Haplogroup"].drop_duplicates().index]
hap_order = df_info["Haplogroup"].tolist()
era_map   = dict(zip(df_info["Haplogroup"], df_info.get("Era", [""] * len(df_info))))

# ── 2) Load user detail table ───────────────────────────────────────────
df_users = pd.read_csv(user_csv)
if "User_ID" not in df_users.columns:
    df_users.rename(columns={df_users.columns[0]: "User_ID"}, inplace=True)
user_chains = [
    [str(v) for v in row.drop(labels=["User_ID"]).tolist() if pd.notna(v) and str(v).strip()]
    for _, row in df_users.iterrows()
]

# ── 3) Insert new SNPs after parent ──────────────────────────────────────
for chain in user_chains:
    prev = None
    for h in chain:
        if prev and h not in hap_order:
            idx = hap_order.index(prev)
            hap_order.insert(idx + 1, h)
        prev = h
# Build final eras list
eras = [era_map.get(h, "") for h in hap_order]

# ── 4) Build horizontal grid DataFrame ───────────────────────────────────
for h in hap_order:
    if h not in df_users.columns:
        df_users[h] = ""
df_grid_h = df_users[["User_ID"] + hap_order]

# ── 5) Transform to vertical layout ─────────────────────────────────────
df_vert = df_grid_h.set_index("User_ID").T
# Insert Era as first column
df_vert.insert(0, 'Era', eras)
df_vert.index.name = 'SNP'
df_grid = df_vert.reset_index()

# ── 6) Save vertical CSV ─────────────────────────────────────────────────
df_grid.to_csv(output_csv, index=False)
print(f"✅ Vertical grid CSV saved to {output_csv}")

# ── 7) Generate XHTML (vertical) ────────────────────────────────────────
now = datetime.now(ZoneInfo("America/New_York"))
ts  = now.strftime("%-m/%-d/%y, %-I:%M %p EDT")

template = '''<!DOCTYPE html>
<html><head><meta charset="UTF-8"><title>Yates Y-DNA Grid</title>
<style>
  body { background:#faf9d3; font-family:Arial,Helvetica,sans-serif; font-size:14px; }
  table { width:100%; border:1px solid #333; border-collapse:collapse; table-layout:auto; }
  th { background:#333; color:#fff; padding:6px; border:1px solid #999; }
  .era { background:#666; color:#eee; padding:6px; border:1px solid #999; font-size:0.9em; }
  td { padding:6px; border:1px solid #999; text-align:center; white-space:nowrap; }
  .match { background:#fff; }
  .blank { background:#ccc; color:#ccc; }
</style>
</head><body>
  <h1 style="text-align:center;">Yates Y-DNA Grid</h1>
  <table>
'''  # end template

# Build header row
cols = df_grid.columns.tolist()
header_html = '<tr><th>SNP</th><th>Era</th>' + ''.join(f'<th>{u}</th>' for u in cols[2:]) + '</tr>'

# Build data rows
rows_html = []
for _, row in df_grid.iterrows():
    cells = []
    for u in cols[2:]:
        v = row[u]
        if pd.isna(v) or not str(v).strip():
            cells.append('<td class="blank">–</td>')
        else:
            cells.append(f'<td class="match">{v}</td>')
    rows_html.append(f'<tr><td>{row["SNP"]}</td><td class="era">{row["Era"]}</td>' + ''.join(cells) + '</tr>')

# Combine and save HTML
html = template + header_html + '\n' + '\n'.join(rows_html) + f'''
  </table>
  <p style="text-align:right;font-size:0.9em;">Updated: {ts}</p>
</body>
</html>'''
with open(output_htm, 'w', encoding='utf-8') as f:
    f.write(html)
print(f"✅ Vertical XHTML Grid saved to {output_htm}")

# ── 8) FTP Upload ───────────────────────────────────────────────────────
ftp = FTP_TLS()
ftp.connect(os.environ['FTP_HOST'], int(os.environ.get('FTP_PORT',21)))
ftp.login(os.environ['FTP_USER'], os.environ['FTP_PASS'])
ftp.prot_p()
for path in [output_csv, output_htm]:
    name = os.path.basename(path)
    try: ftp.delete(name)
    except: pass
    with open(path,'rb') as fp:
        ftp.storbinary(f"STOR {name}", fp)
ftp.quit()
print("✅ Uploaded to server.")



✅ Vertical grid CSV saved to /content/y_dna_grid.csv
✅ Vertical XHTML Grid saved to /content/y_dna_grid.htm
✅ Uploaded to server.


In [None]:
# Y-DNA cell 1

# === Cell 1: New user settings ===
USER_ID       = 'I56217'  # the new column header
PATH_STRING   = (      # the SNP chain for this user
    "R-M207 > R-M173 > R-M343 > R-M269 > R-FT266064 > R-FT266579 > R-FTF17042"
)
INSERT_MISSING = True       # if True, adds any SNPs from PATH_STRING that aren't yet rows
MASTER_CSV     = '/content/y_dna_user_detail_combo.csv'
UPDATED_CSV    = '/content/y_dna_user_detail_combo_updated.csv'


In [None]:
# Cell 2: Load → Append User → Save

import pandas as pd

# 1) Load the existing master CSV
df = pd.read_csv(MASTER_CSV)

# 2) Normalize the first column name to 'SNP' for easy matching
first_col = df.columns[0]
if first_col != 'SNP':
    df.rename(columns={first_col: 'SNP'}, inplace=True)

# 3) Parse the new user's SNP chain
chain = PATH_STRING.split('>')

# 4) Optionally insert any SNPs not yet present (appends at bottom)
if INSERT_MISSING:
    missing = [s for s in chain if s not in df['SNP'].values]
    if missing:
        df = pd.concat([df, pd.DataFrame([{'SNP': s} for s in missing])],
                       ignore_index=True)

# 5) Create the new user column in the next free position
df[USER_ID] = ''

# 6) Populate: copy the SNP value into that column where it matches the chain
df.loc[df['SNP'].isin(chain), USER_ID] = df['SNP']

# 7) Save the updated CSV back to /content
df.to_csv(UPDATED_CSV, index=False)
print(f"✅ Updated CSV saved to {UPDATED_CSV}")


✅ Updated CSV saved to /content/y_dna_user_detail_combo_updated.csv
