<a href="https://colab.research.google.com/github/ronyates47/Gedcom-Utils/blob/main/refactoring_with_03_1421.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install pandas
!pip install python-gedcom
!pip install openpyxl
!pip install xlsxwriter


Collecting python-gedcom
  Downloading python_gedcom-1.0.0-py2.py3-none-any.whl.metadata (15 kB)
Downloading python_gedcom-1.0.0-py2.py3-none-any.whl (35 kB)
Installing collected packages: python-gedcom
Successfully installed python-gedcom-1.0.0
Collecting xlsxwriter
  Downloading XlsxWriter-3.2.2-py3-none-any.whl.metadata (2.8 kB)
Downloading XlsxWriter-3.2.2-py3-none-any.whl (165 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m165.1/165.1 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xlsxwriter
Successfully installed xlsxwriter-3.2.2


In [None]:
# 01_28_2033(works!)_2025_stable

#!/usr/bin/env python
"""
GEDCOM Composite Score Script with Enhanced Optimizations
-----------------------------------------------------------
This script parses a GEDCOM file, calculates a "Value" from shared ancestral segments,
computes standardized metrics (Z-Score, Robust Z-Score, Percentile Rank), and creates a
Composite Score and descriptor (High, Moderate, Medium, Low, or Very Low) for each record.
It uses enhanced parallel processing with chunking, increased caching, and attempts to
minimize memory overhead. The final DataFrame (with "Name" suppressed) is exported to CSV
and HTML (with the Yates DNA Ancestral Line left aligned).
"""

# Standard Libraries
import csv, glob, logging, functools, os
from datetime import datetime
from collections import defaultdict, Counter
from itertools import combinations
import numpy as np
import pandas as pd
from concurrent.futures import ProcessPoolExecutor

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Global variables used in ancestral processing
anchor_gen1 = None
visited_pairs = set()
generation_table = []

################################################################################
#                                GedcomDataset Class                           #
################################################################################
class GedcomDataset:
    """Stores and extracts details for an individual from a GEDCOM file."""
    def __init__(self, gen_person):
        self.gen_person = gen_person
        self.extractable_detail = {}
        self.anchor_gen1 = None

    def add_extractable_detail(self, key, value):
        """Store a detail value (e.g., NAME, NPFX) under the given key."""
        self.extractable_detail[key] = value

    def get_gen_person(self):
        """Return the individual's ID (without '@') and set the anchor name."""
        name = self.extractable_detail.get('NAME', '')
        parts = name.split('/', 1)
        first_name = parts[0].split(' ')[0]
        last_name = parts[1].rstrip('/') if len(parts) > 1 else ""
        self.anchor_gen1 = last_name.replace(" ", "") + first_name.replace(" ", "")
        global anchor_gen1
        anchor_gen1 = self.anchor_gen1
        return self.gen_person.strip('@')

    def get_anchor_gen1(self):
        """Return the anchor name."""
        return self.anchor_gen1

    def get_extractable_NPFX(self):
        """Return the NPFX field value."""
        return self.extractable_detail.get('NPFX', '')

    def get_extractable_cm(self):
        """Extract and return the cM value from the NPFX field."""
        npfx_value = self.extractable_detail.get('NPFX', '')
        if '&' in npfx_value:
            cm_value = npfx_value.split('&')[0].strip()
        elif '**' in npfx_value:
            cm_value = npfx_value.split('**')[0].strip()
        else:
            cm_value = npfx_value.strip()
        try:
            int(cm_value)
            return cm_value
        except ValueError:
            return ''

    def get_extractable_sort(self):
        """Extract and return the sort value (used as 'Match to') from the NPFX field."""
        npfx_value = self.extractable_detail.get('NPFX', '')
        if '&' in npfx_value:
            sort_part = npfx_value.split('&')[1]
            if '**' in sort_part:
                sort_value = sort_part.split('**')[0].strip()
            else:
                sort_value = sort_part.strip()
            return sort_value
        else:
            return ''

    def get_extractable_YDNA(self):
        """Extract and return the YDNA value from the NPFX field."""
        npfx_value = self.extractable_detail.get('NPFX', '')
        if '**' in npfx_value:
            ydna_value = npfx_value.split('**')[1].strip()
            return ydna_value
        else:
            return ''

    def get_extractable_FAMC(self):
        """Return the FAMC value without '@' symbols."""
        return self.extractable_detail.get('FAMC', '').strip('@')

################################################################################
#                           Utility Functions                                  #
################################################################################
def extract_name(record):
    """
    Extracts the first and last name from a GEDCOM record.
    Returns a concatenation of last and first names, or "UnknownName" if not found.
    """
    name_start = record.find('1 NAME ') + 6
    name_end = record.find('\n', name_start)
    if name_start == 5 or name_end == -1:
        return "UnknownName"
    name = record[name_start:name_end].strip()
    if '/' not in name:
        return name[:10].replace(" ", "")
    first_name, last_name = name.split('/', 1)
    return last_name[:10].rstrip('/').replace(" ", "") + first_name[:10].replace(" ", "")

name_to_id = {}

################################################################################
#                               Gedcom Class                                   #
################################################################################
class Gedcom:
    """
    Parses a GEDCOM file and builds a filter pool of individuals with NPFX data.
    """
    def __init__(self, file_name):
        self.file_name = file_name
        self.gedcom_datasets = []
        self.filter_pool = []

    @staticmethod
    def get_standard_name(file_path):
        file_name = file_path.split('/')[-1]
        if '.' in file_name:
            file_name = file_name.rsplit('.', 1)[0]
        return file_name.replace(' ', '_').lower()

    def parse_gedcom(self):
        """Parses the GEDCOM file and filters individuals with NPFX data."""
        global name_to_id
        with open(self.file_name, 'r', encoding='utf-8-sig') as f:
            gedcom_lines = f.readlines()
        current_dataset = None
        npfx_count = 0
        ydna_count = 0
        total_count = 0
        for line in gedcom_lines:
            parts = line.strip().split(' ', 2)
            level = int(parts[0])
            tag = parts[1]
            value = parts[2] if len(parts) > 2 else None
            if level == 0 and tag.startswith('@') and tag.endswith('@') and value == 'INDI':
                total_count += 1
                current_dataset = GedcomDataset(tag)
                self.gedcom_datasets.append(current_dataset)
                individual_name = current_dataset.get_anchor_gen1()
                individual_id = current_dataset.get_gen_person()
                name_to_id[individual_name] = individual_id
            elif current_dataset is not None:
                if level == 1 and tag in ['NAME', 'FAMC']:
                    current_dataset.add_extractable_detail(tag, value)
                elif level == 2 and tag == 'NPFX':
                    npfx_count += 1
                    current_dataset.add_extractable_detail(tag, value)
                    if '**' in value:
                        ydna_count += 1
        autosomal_count = npfx_count - ydna_count
        print("Automatically selecting the first GEDCOM file.")
        print(f"GEDCOM contained {total_count} total records")
        print(f"Records tagged and filtered by NPFX: {npfx_count}")
        print(f"Records with YDNA information: {ydna_count}")
        print(f"Autosomal matches: {autosomal_count}")
        for dataset in self.gedcom_datasets:
            if dataset.get_extractable_NPFX():
                self.filter_pool.append(dataset)
        manual_filter_activated = True
        if manual_filter_activated:
            try:
                df = pd.read_excel('filtered_ids.xlsx')
            except FileNotFoundError:
                print("filtered_ids.xlsx not found. Skipping second-level manual filter.")
                logger.warning("filtered_ids.xlsx not found. Skipping second-level manual filter.")
            else:
                manual_filtered_ids = set(df['ID'])
                print(f"Manual filter IDs loaded: {len(manual_filtered_ids) - 1}")
                logger.info(f"Manual filter IDs loaded: {len(manual_filtered_ids) - 1}")
                self.filter_pool = [dataset for dataset in self.filter_pool if dataset.get_gen_person() in manual_filtered_ids]
                print(f"After manual filter, total records: {len(self.filter_pool)}")
                logger.info(f"After manual filter, total records: {len(self.filter_pool)}")

################################################################################
# Caching: Increase lru_cache maxsize for compute_value_for_line
################################################################################
@functools.lru_cache(maxsize=5000)
def compute_value_for_line(ancestral_line, sorted_segments_hashable):
    """
    Compute the Value for an ancestral line using precomputed shared segments.
    The second parameter is a frozenset (hashable) version of the shared segments dictionary.
    Returns an integer score.
    """
    sorted_segments = dict(sorted_segments_hashable)
    if pd.isna(ancestral_line) or ancestral_line.strip() == "":
        return 0
    lines = [line.strip() for line in ancestral_line.split("~~~") if line.strip()]
    value = 0
    lines_copy = lines.copy()
    for segment, freq in sorted_segments.items():
        segment_list = list(segment)
        counter_seg = Counter(segment_list)
        counter_lines = Counter(lines_copy)
        if all(counter_lines[k] >= counter_seg[k] for k in counter_seg):
            value += freq
            for k, cnt in counter_seg.items():
                for _ in range(cnt):
                    lines_copy.remove(k)
    value += len(lines_copy)
    return value

################################################################################
# Other Utility Functions: extract_id, find_parents, etc.
################################################################################
def extract_id(record):
    """Extracts the ID from a GEDCOM record."""
    id_start = record.find('@') + 1
    id_end = record.find('@', id_start)
    if id_start == 0 or id_end == -1:
        return "UnknownID"
    return record[id_start:id_end].strip()

def find_parents(individual_id, generation, records):
    if individual_id not in records:
        return
    record = records[individual_id]
    famc_start = record.find('1 FAMC @') + 8
    famc_end = record.find('@', famc_start)
    famc_id = record[famc_start:famc_end]
    if famc_id not in records:
        return
    fam_record = records[famc_id]
    wife_start = fam_record.find('1 WIFE @') + 8
    wife_end = fam_record.find('@', wife_start)
    mother_id = fam_record[wife_start:wife_end]
    husb_start = fam_record.find('1 HUSB @') + 8
    husb_end = fam_record.find('@', husb_start)
    father_id = fam_record[husb_start:husb_end]
    if mother_id and mother_id in records and father_id and father_id in records:
        parent_pair = (father_id, mother_id)
        if parent_pair not in visited_pairs:
            visited_pairs.add(parent_pair)
            generation_table.append((generation, parent_pair))
    if mother_id:
        find_parents(mother_id, generation + 1, records)
    if father_id:
        find_parents(father_id, generation + 1, records)

def find_distant_ancestors(individual_id, records, path=None):
    path = path if path is not None else []
    path.append(individual_id)
    if individual_id not in records:
        return [path]
    record = records[individual_id]
    famc_start = record.find('1 FAMC @') + 8
    famc_end = record.find('@', famc_start)
    famc_id = record[famc_start:famc_end]
    if famc_id not in records:
        return [path]
    fam_record = records[famc_id]
    wife_start = fam_record.find('1 WIFE @') + 8
    wife_end = fam_record.find('@', wife_start)
    mother_id = fam_record[wife_start:wife_end]
    husb_start = fam_record.find('1 HUSB @') + 8
    husb_end = fam_record.find('@', husb_start)
    father_id = fam_record[husb_start:husb_end]
    if father_id is None and mother_id is None:
        return [path]
    paths = []
    if father_id:
        new_path = list(path)
        paths.extend(find_distant_ancestors(father_id, records, new_path))
    if mother_id:
        new_path = list(path)
        paths.extend(find_distant_ancestors(mother_id, records, new_path))
    return paths

def calculate_score(distant_ancestors_paths, records):
    """Calculates a score based on ancestral paths containing 'Yates'."""
    name_paths = []
    for path in distant_ancestors_paths:
        name_path = [extract_name(records.get(id, '')) for id in path]
        name_paths.append(name_path)
    path_scores = {}
    for idx, name_path in enumerate(name_paths):
        score = 0
        for generation, name in enumerate(name_path):
            if 'Yates' in name:
                score += (generation + 1)
        path_scores[idx] = score
    if path_scores:
        winning_path_index = max(path_scores, key=path_scores.get)
        winning_path_score = path_scores[winning_path_index]
        winning_path_names = name_paths[winning_path_index]
        winning_path_ids = distant_ancestors_paths[winning_path_index]
    else:
        winning_path_score = 0
        winning_path_names = []
        winning_path_ids = []
    return winning_path_score, winning_path_names, winning_path_ids

def filter_ancestral_line(winning_path_ids, generation_table):
    matching_table = []
    for generation, pair in generation_table:
        id1, id2 = pair
        if id1 in winning_path_ids or id2 in winning_path_ids:
            matching_table.append((generation, pair))
    return matching_table

def process_individual(individual_id, gedcom_instance, records):
    """Processes an individual to build its ancestral line and related details."""
    global generation_table, visited_pairs, anchor_gen1
    generation_table = []
    visited_pairs = set()
    find_parents(individual_id, 1, records)
    distant_ancestors_paths = find_distant_ancestors(individual_id, records)
    winning_path_score, winning_path_names, winning_path_ids = calculate_score(distant_ancestors_paths, records)
    filtered_ancestral_line = filter_ancestral_line(winning_path_ids, generation_table)
    filtered_ancestral_line.sort(key=lambda x: x[0])
    filtered_ancestral_line_names = []
    for dataset in gedcom_instance.filter_pool:
        if dataset.get_gen_person() == individual_id:
            cm_value = dataset.get_extractable_cm()
            sort_value = dataset.get_extractable_sort()
            ydna_value = dataset.get_extractable_YDNA()
            anchor_gen1 = dataset.get_anchor_gen1()
            break
    else:
        cm_value = ''
        sort_value = ''
        ydna_value = ''
        anchor_gen1 = None
    for generation, pair in filtered_ancestral_line:
        name_pair = [extract_name(records.get(id, '')) for id in pair]
        formatted_name_pair = f"{name_pair[0]}&{name_pair[1]}"
        filtered_ancestral_line_names.append(formatted_name_pair)
    filtered_ancestral_line_names.reverse()
    filtered_ancestral_line_str = "~~~".join(filtered_ancestral_line_names)
    if anchor_gen1 in filtered_ancestral_line_names:
        raise ValueError(f"anchor_gen1 ({anchor_gen1}) was mistakenly included in the ancestral line.")
    individual_data = {
        'cM': cm_value,
        'Sort': sort_value,
        'YDNA': ydna_value,
        'Filtered Ancestral Line': filtered_ancestral_line_str
    }
    return individual_data, filtered_ancestral_line_str

################################################################################
# Parallel Processing: Wrapper for processing a single record
################################################################################
def process_record_wrapper(individual_id, gedcom_instance, records):
    """
    Wrapper to process an individual record.
    Returns a list: [individual_id, sort, name, cM, filtered ancestral line string].
    """
    individual_data, filtered_line = process_individual(individual_id, gedcom_instance, records)
    cm = individual_data["cM"]
    sort = individual_data["Sort"]
    name = extract_name(records.get(individual_id, ""))
    return [individual_id, sort, name, cm, filtered_line]

################################################################################
# Utility: Function to split list into chunks
################################################################################
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

################################################################################
# Main Execution
################################################################################
def main():
    # Define select_gedcom_file within main
    def select_gedcom_file():
        gedcom_files = glob.glob('*.ged')
        if not gedcom_files:
            print("No GEDCOM files found.")
            return None
        print("Automatically selecting the first GEDCOM file.")
        return gedcom_files[0]

    gedcom_file_path = select_gedcom_file()
    if gedcom_file_path:
        gedcom_instance = Gedcom(gedcom_file_path)
        gedcom_instance.parse_gedcom()
        individual_ids = [dataset.get_gen_person() for dataset in gedcom_instance.filter_pool]
        print(f"Records tagged and filtered by NPFX: {len(individual_ids)}")
        def extract_id(record):
            id_start = record.find('@') + 1
            id_end = record.find('@', id_start)
            if id_start == 0 or id_end == -1:
                return "UnknownID"
            return record[id_start:id_end].strip()
        with open(gedcom_file_path, 'r', encoding='utf-8') as file:
            data = file.read()
        data = data.split('\n0 ')
        records = {extract_id(record): record for record in data}
    else:
        print("No GEDCOM file selected; exiting.")
        return

    # Parallel processing: Process records in parallel in chunks.
    combined_df_rows = []
    chunk_size = 50  # Adjust chunk size as needed
    # We'll use a ProcessPoolExecutor with an increased number of workers if available.
    max_workers = os.cpu_count() or 4
    logger.info("Using %s workers for parallel processing.", max_workers)
    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        for chunk in chunks(individual_ids, chunk_size):
            # Use functools.partial to fix gedcom_instance and records
            process_func = functools.partial(process_record_wrapper, gedcom_instance=gedcom_instance, records=records)
            results = list(executor.map(process_func, chunk))
            combined_df_rows.extend(results)

    ################################################################################
    # Create and Populate Main DataFrame (combined_df)
    ################################################################################
    columns = ["ID#", "Match to", "Name", "cM", "Yates DNA Ancestral Line"]
    combined_df = pd.DataFrame(combined_df_rows, columns=columns)

    # Build a value_store dictionary (if needed)
    value_store = {}
    for _, row in combined_df.iterrows():
        value_store[row["ID#"]] = {
            "Match to": row["Match to"],
            "Name": row["Name"],
            "cM": row["cM"],
            "Yates DNA Ancestral Line": row["Yates DNA Ancestral Line"],
            "Value": None
        }

    def remove_prefix(row):
        """Remove unwanted prefix from the ancestral line."""
        ancestral_line = row["Yates DNA Ancestral Line"]
        prefix_to_remove = "YatesJohn&HydeAlice~~~YatesThomas&WhiteFrances~~~"
        if ancestral_line.startswith(prefix_to_remove):
            row["Yates DNA Ancestral Line"] = ancestral_line[len(prefix_to_remove):]
        return row

    combined_df = combined_df.apply(remove_prefix, axis=1)
    ordered_columns = ["ID#", "Match to", "Name", "cM", "Yates DNA Ancestral Line"]
    combined_df = combined_df[ordered_columns]
    combined_df.index += 1
    combined_df.sort_values(by=["Match to", "Yates DNA Ancestral Line"], ascending=[False, True], inplace=True)

    ################################################################################
    # Optimize Segmentation Calculation: Vectorize "Value" Calculation with Caching
    ################################################################################
    def identify_all_shared_segments(df, ancestral_col, min_shared=2, min_size=2):
        """
        Identify all shared segments in the ancestral line.
        Returns a dictionary with segment tuples as keys and frequencies as values.
        """
        segment_counts = defaultdict(int)
        for _, row in df.iterrows():
            if pd.isna(row[ancestral_col]):
                continue
            lines = [line.strip() for line in row[ancestral_col].split("~~~") if line.strip()]
            for size in range(min_size, len(lines) + 1):
                for subset in combinations(sorted(lines), size):
                    segment_counts[subset] += 1
        shared_segments = {segment: count for segment, count in segment_counts.items() if count >= min_shared}
        return dict(sorted(shared_segments.items(), key=lambda x: len(x[0]), reverse=True))

    df_process = combined_df[["ID#", "Yates DNA Ancestral Line"]].copy()
    df_process["Value"] = 0
    ancestral_col = "Yates DNA Ancestral Line"
    shared_segments_found = identify_all_shared_segments(df_process, ancestral_col, min_shared=2, min_size=2)
    seg_df = pd.DataFrame(
        [("~~~".join(seg), freq) for seg, freq in shared_segments_found.items()],
        columns=["Segment", "Frequency"]
    ).sort_values(by="Frequency", ascending=False)
    # Convert shared_segments_found to a hashable type
    shared_segments_hashable = frozenset(shared_segments_found.items())
    df_process["Value"] = df_process[ancestral_col].apply(lambda line: compute_value_for_line(str(line), shared_segments_hashable))
    id_to_value_map = df_process.set_index("ID#")["Value"].to_dict()
    merged_df = combined_df.merge(df_process[["ID#", "Value"]], on="ID#", how="left", suffixes=("", "_new"))
    if "Value_new" in merged_df.columns:
        merged_df["Value"] = merged_df["Value_new"]
        merged_df.drop(columns=["Value_new"], inplace=True)
    combined_df = merged_df
    combined_df.sort_values(by="Yates DNA Ancestral Line", ascending=False, inplace=True)

    ################################################################################
    # Calculate Additional Scores: Z-Score, Robust Z-Score, and Percentile Rank
    ################################################################################
    combined_df["Value"] = pd.to_numeric(combined_df["Value"], errors="coerce")
    mean_value = combined_df["Value"].mean()
    std_value = combined_df["Value"].std()
    combined_df["Z-Score"] = (combined_df["Value"] - mean_value) / std_value
    median_value = combined_df["Value"].median()
    mad_value = np.median(np.abs(combined_df["Value"] - median_value))
    if mad_value == 0:
        combined_df["Robust Z-Score"] = 0
    else:
        combined_df["Robust Z-Score"] = (combined_df["Value"] - median_value) / (mad_value * 1.4826)
    combined_df["Percentile Rank"] = combined_df["Value"].rank(pct=True) * 100

    ################################################################################
    # Composite Metric Integration:
    # Compute "Composite Score" as the average of the absolute Z-Score,
    # absolute Robust Z-Score, and normalized Percentile Rank,
    # then assign a descriptor.
    ################################################################################
    combined_df["Composite Score"] = (
        combined_df["Z-Score"].abs() +
        combined_df["Robust Z-Score"].abs() +
        (combined_df["Percentile Rank"] / 100)
    ) / 3
    combined_df["Composite Score"] = combined_df["Composite Score"].round(2)

    def assign_descriptor(score):
        """Assigns a descriptor based on the composite score."""
        if score >= 2.0:
            return "High"
        elif score >= 1.5:
            return "Moderate"
        elif score >= 1.0:
            return "Medium"
        elif score >= 0.5:
            return "Low"
        else:
            return "Very Low"

    combined_df["Composite Significance"] = combined_df["Composite Score"].apply(assign_descriptor)

    ################################################################################
    # Repair Final DataFrame Columns:
    # Final order: "ID#", "Match to", "Value", "Composite Score", "Composite Significance", "Yates DNA Ancestral Line"
    # (Note: "Name" is suppressed from the output.)
    ################################################################################
    final_columns = ["ID#", "Match to", "Value", "Composite Score", "Composite Significance", "Yates DNA Ancestral Line"]
    combined_df = combined_df[final_columns]
    logger.info("Final DataFrame columns: %s", combined_df.columns.tolist())
    print(combined_df.head(10))

    ################################################################################
    # Export the final DataFrame with the additional scores to CSV
    ################################################################################
    output_filename = "final_combined_df_with_composite_scores.csv"
    combined_df.to_csv(output_filename, index=False)
    logger.info("Final DataFrame with composite scores exported to '%s'.", output_filename)

    ################################################################################
    # Export the Final DataFrame to HTML with left-aligned Yates DNA Ancestral Line
    ################################################################################
    html_filename = "HTML_combined_df_with_composite_scores.html"
    css_style = """
    <style>
    table {
      width: 100%;
      border-collapse: collapse;
      margin: 20px 0;
    }
    table, th, td {
      border: 1px solid #333;
    }
    th, td {
      padding: 8px 12px;
      text-align: center;
    }
    th {
      background-color: #f2f2f2;
    }
    /* Left-align the 6th column ("Yates DNA Ancestral Line") */
    td:nth-child(6) {
      text-align: left;
    }
    </style>
    """
    html_content = css_style + combined_df.to_html(
        index=False,
        columns=final_columns,
        escape=False
    )
    with open(html_filename, "w", encoding="utf-8") as f:
        f.write(html_content)
    logger.info("Final DataFrame exported to HTML file '%s'.", html_filename)

if __name__ == '__main__':
    main()


Automatically selecting the first GEDCOM file.
Automatically selecting the first GEDCOM file.
GEDCOM contained 58271 total records
Records tagged and filtered by NPFX: 1301
Records with YDNA information: 76
Autosomal matches: 1225
Manual filter IDs loaded: 499
After manual filter, total records: 500
Records tagged and filtered by NPFX: 500


In [5]:
%prun main()


Automatically selecting the first GEDCOM file.
Automatically selecting the first GEDCOM file.
GEDCOM contained 58271 total records
Records tagged and filtered by NPFX: 1301
Records with YDNA information: 76
Autosomal matches: 1225
Manual filter IDs loaded: 749
After manual filter, total records: 750
Records tagged and filtered by NPFX: 750


Process ForkProcess-3:
Traceback (most recent call last):
  File "/usr/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/lib/python3.11/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.11/concurrent/futures/process.py", line 249, in _process_worker
    call_item = call_queue.get(block=True)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/queues.py", line 103, in get
    res = self._recv_bytes()
          ^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/connection.py", line 437, in _recv_bytes
    return self._recv(size)
           ^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/connection.py", line 402, in _recv
    buf.write(chunk)
KeyboardInterrupt
E

KeyboardInterrupt: 

Process ForkProcess-4:
Traceback (most recent call last):
  File "/usr/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
    self.run()


In [None]:
!pip install line_profiler
%lprun -f compute_value_for_line compute_value_for_line("YatesWilliam~~~YatesLevi~~~YatesAmbrose", frozenset({(('YatesWilliam', 'YatesLevi', 'YatesAmbrose'), 5)}))


In [None]:
%%timeit
compute_value_for_line("YatesWilliam~~~YatesLevi~~~YatesAmbrose", frozenset({(('YatesWilliam', 'YatesLevi', 'YatesAmbrose'), 5)}))
