<a href="https://colab.research.google.com/github/ronyates47/Gedcom-Utils/blob/main/a_Surname_GEDCOM_Utility_stable_v_230818_1129hrsEXP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
!pip install pandas
!pip install python-gedcom



In [9]:
#!pip install pandas
#!pip install python-gedcom
# Base script-stable-selects GEDCOM-correct output-Build with Bard 23-18-0728

import glob
from gedcom.element.individual import IndividualElement
from gedcom.parser import Parser
import pandas as pd

# Initialize the last_prime_surname variable
last_prime_surname = None

class Gedcom:
    def __init__(self, file_name):
        self.file_name = file_name
        self.gedcom_datasets = []
        self.smallpile_records = []

    @staticmethod

    def get_standard_name(file_path):
        file_name = file_path.split('/')[-1]
        if '.' in file_name:
            file_name = file_name.rsplit('.', 1)[0]
        standard_name = file_name.replace(' ', '_').lower()
        return standard_name

def parse_gedcom(self, file_name):
        with open(file_name, 'r', encoding='utf-8-sig') as f:
            gedcom_lines = f.readlines()

        current_dataset = None
        npfx_count = 0
        total_count = 0
        for line in gedcom_lines:
            parts = line.strip().split(' ', 2)
            level = int(parts[0])
            tag = parts[1]
            value = parts[2] if len(parts) > 2 else None

            if level == 0 and tag.startswith('@') and tag.endswith('@') and value == 'INDI':
                total_count += 1
                current_dataset = GedcomDataset(tag)
                self.gedcom_datasets.append(current_dataset)
            elif current_dataset is not None:
                if level == 1 and tag in ['NAME', 'FAMC']:
                    current_key = tag
                    current_dataset.add_extractable_detail(current_key, value)
                elif level == 2 and tag == 'PREFIX':
                    npfx_count += 1
                    current_dataset.npfx = value


        for dataset in self.gedcom_datasets:
            if dataset.get_extractable_PREFIX():
                self.smallpile_records.append(dataset)

            print(f'Found {total_count} bigpile_records Parsed (INDI)')
            print(f'Found {len(self.smallpile_records)} Parsed into smallpile_records')

class GedcomDataset:
    def __init__(self, gen_person):
        self.gen_person = gen_person
        self.extractable_detail = {}

    def add_extractable_detail(self, key, value):
        self.extractable_detail[key] = value

    def get_gen_person(self):
        name = self.extractable_detail.get('NAME', '')
        first_name, last_name = name.split('/', 1)
        first_name = first_name.split(' ')[0]
        last_name = last_name.rstrip('/')
        self.anchor_gen1 = last_name.replace(" ", "") + first_name.replace(" ", "")
        return self.gen_person.strip('@')

    def get_anchor_gen1(self):
        return self.anchor_gen1

    def get_extractable_NPFX(self):
        return self.extractable_detail.get('NPFX', '')

    def get_extractable_cm(self):
        npfx_value = self.extractable_detail.get('NPFX', '')
        if '&' in npfx_value:
            cm_value = npfx_value.split('&')[0].strip()
        else:
            cm_value = npfx_value.strip()
        try:
            int(cm_value)
            return cm_value
        except ValueError:
            return 'error'

    def get_extractable_sort(self):
        npfx_value = self.extractable_detail.get('NPFX', '')
        if '&' in npfx_value:
            sort_value = npfx_value.split('&')[1].strip()
            return sort_value
        else:
            return ''

    def get_extractable_FAMC(self):
        return self.extractable_detail.get('FAMC', '').strip('@')

def select_gedcom_file():
    gedcom_files = glob.glob('*.ged')
    if not gedcom_files:
        print("No GEDCOM files found in the Colab contents.")
        return None

    print("List of GEDCOM files:")
    for i, file_name in enumerate(gedcom_files, 1):
        print(f"{i}. {file_name}")

    while True:
        try:
            selected_num = int(input("Enter the number of the GEDCOM file you want to use: "))
            if 1 <= selected_num <= len(gedcom_files):
                return gedcom_files[selected_num - 1]
            else:
                print("Invalid number. Please enter a valid number from the list.")
        except ValueError:
            print("Invalid input. Please enter a valid number.")

def extract_id(record):
    if record in smallpile_records:
        id_start = record.find('@') + 1
        id_end = record.find('@', id_start)
        return record[id_start:id_end]
    else:
        return None

def extract_smallpile_record_name(record):
    """
    Extracts the name from a smallpile_record.

    Args:
        record: The record to extract the name from.

    Returns:
        The name of the record.
    """

    if record in smallpile_records:
        name_start = record.find('1 NAME ') + 6
        name_end = record.find('\n', name_start)
        name = record[name_start:name_end]
        first_name, last_name = name.split('/', 1)
        first_name = first_name[:10]
        last_name = last_name[:10].rstrip('/')
        return last_name.replace(" ", "") + first_name.replace(" ", "")
    else:
        return None

def extract_ancestral_line(record):
    ancestral_line = []
    current_person = record
    while current_person.get_extractable_FAMC():
        ancestral_line.append(current_person)
        current_person = GedcomDataset(current_person.get_extractable_FAMC())
    return ancestral_line

def find_line_of_interest(lines, surname):
    for line in lines:
        for individual in line:
            if surname in individual[1]:
                return line
    return None

def input_prime_surname(last_prime_surname=None):
    if last_prime_surname:
        last_name = input(f"Enter prime_surname (default: {last_prime_surname}): ")
        if not last_name:
            last_name = last_prime_surname
    else:
        last_name = input("Enter prime_surname: ")
    return last_name

# Call the function to let the user input prime_surname
prime_surname = input_prime_surname(last_prime_surname)

# Store the value of prime_surname for later use
last_prime_surname = prime_surname

# Call the function to let the user select the GEDCOM file
gedcom_file_path = select_gedcom_file()
if gedcom_file_path:
    # Use the selected GEDCOM file path to create an instance of the Gedcom class
    gedcom_instance = Gedcom(gedcom_file_path)
    gedcom_instance.parse_gedcom()

#	 Run the process_individual function for each individual in the individuals list and create a combined DataFrame
combined_df_rows = []
for name, individual_id in individuals:
    individual_data = process_individual(individual_id, gedcom_instance)
    cm = individual_data['cM']
    sort = individual_data['Sort']
    line_of_interest = find_line_of_interest(combined_df, prime_surname)['Ancestral line_of_interest']
    most_distant_ancestor = generation_table.iloc[0]['Parent Pair']  # Get the most distant ancestor from the last row of generation_table
    combined_df_rows.append([individual_id, name, sort, cm, most_distant_ancestor, parent_pairs_a10])

combined_df = pd.DataFrame(combined_df_rows, columns=['ID#', 'Name', 'Match to', 'cM', 'Most Distant Ancestor', 'Ancestral line_of_interest'])

# Function to create hotlinks
def create_hotlink(row):
    url_base = "https://yates.one-name.net/tng/verticalchart.php?personID="
    person_id = row['ID#']
    hotlink = f'<a href="{url_base}{person_id}&tree=tree1&parentset=0&display=vertical&generations=8" target="_blank">{person_id}</a>'
    return hotlink

# Apply the hotlink function to create the 'LUN#' column
combined_df['LUN#'] = combined_df.apply(lambda row: create_hotlink(row), axis=1)

# Change the order of the columns
combined_df = combined_df[['ID#', 'Name', 'Match to', 'cM', 'Most Distant Ancestor', 'LUN#', 'Ancestral line_of_interest']]

# Adjust index to start from 1 instead of 0
combined_df.index = combined_df.index + 1

# Print all records from the DataFrame
print(combined_df)

# Export the combined_df DataFrame to an Excel file
combined_df.to_excel('/content/output.xlsx', index=False)


Enter prime_surname: yates
List of GEDCOM files:
1. yates-one-name-study.ged
Enter the number of the GEDCOM file you want to use: 1


AttributeError: ignored