In [85]:
#Uncomment these lines to install relevant libraries

#!pip install re
#!pip install matplotlib
#!pip install pygraphviz

In [87]:
import re
import matplotlib.pyplot as plt
import pygraphviz as pgv

class Person:
    def __init__(self, generation, name, birthdate="", spouse=None):
        self.generation = generation
        self.name = name
        self.birthdate = birthdate
        self.spouse = spouse
        self.children = []
    
    def add_child(self, child):
        self.children.append(child)
    
    def get_generation(self):
        return self.generation
    
    def get_name(self):
        return self.name
    
    def get_birthdate(self):
        return self.birthdate
    
    def get_spouse(self):
        return self.spouse
    
    def get_children(self):
        return self.children

def parse_line(line):
    # Split the line by spaces
    parts = line.split(" ")
  
    # Check if the first part is "sp:"
    if parts[0] == "sp:":
        # If it is, the name is everything after the "sp:"
        name = re.sub(r'\([^)]*\)', '', " ".join(parts[1:])).replace("\n", "")
        # The first digit or "sp:" value is "sp:"
        first_value = "sp:"
    else:
        # If it's not "sp:", the first digit or "sp:" value is the first part
        first_value = int((parts[0])[0])
        prev = first_value
        # The name is everything after the first value
        name = re.sub(r'\([^)]*\)', '', " ".join(parts[1:])).replace("\n", "")
  
  # Find the birth year by searching for the "b." string and extracting the following 4 digits
    
    birth_year = "-"
    for part in parts:
        if "b." in part:
            birth_year = part[3:7]
            break
  
    # Return a tuple with the first digit or "sp:", name and birth year
    return (first_value, name, birth_year)


def process_dataset(directory: str):

    tree_names = []    

    # Open the file in read-only mode
    with open(directory, 'r') as f:
        # Read the file line by line
        for line in f:
            # Find the index of the first digit or "sp:" in the line
            index = 0
            while index < len(line) and not line[index].isdigit() and not line[index:index+3] == "sp:":
                index += 1
            # Print the line starting from the first digit or "sp:"
            #print(line[index:])
            tree_names.append(line[index:])

    people = []
    family = []

    for name in tree_names:
        res = parse_line(name)
        p = Person(res[0], res[1], res[2])
        people.append(p)

    for idx, person in enumerate(people):

        if isinstance(person.generation, int):
            if person.generation > 1:
                
                parent = next(p for p in reversed(people[0:idx]) if p.generation == person.generation - 1)
                parent.children.append(person)
                if parent not in family:
                    family.append(parent)
        else:
            people[idx-1].spouse = person
        
    return family

def create_tree(family: list, directory: str):

    # Create a new directed graph using PyGraphviz
    graph = pgv.AGraph(directed=True)

    # Iterate through the list of Person objects and add nodes and edges to the graph
    for person in family:
        # Add a node for the person
        node_label = f"{person.name}\n{person.birthdate}"
        graph.add_node(person.name, label=node_label)

        # If the person has a spouse, add a node for the spouse and an edge between them
        if person.spouse is not None:
            spouse_node_label = f"{person.spouse.name}\n{person.spouse.birthdate}"
            person_node_label = f"{person.name}\n{person.birthdate}"
            graph.add_node(person.name, label=person_node_label + " m. " + spouse_node_label)
            #graph.add_edge(person.name, person.spouse.name, label="Spouse")

        # If the person has children, add nodes for each child and an edge between the parent and child
        if person.children:
            for child in person.children:
                child_node_label = f"{child.name}\n{child.birthdate}"
                graph.add_node(child.name, label=child_node_label)
                graph.add_edge(person.name, child.name, label="Child")

    # Set some graph attributes to make the output look nice
    graph.graph_attr["rankdir"] = "LR"
    graph.graph_attr["ratio"] = "auto"
    graph.graph_attr["overlap"] = "scale"
    graph.node_attr["shape"] = "rectangle"
    graph.edge_attr["arrowsize"] = "0.5"
    graph.graph_attr["dpi"] = "1000"

    graph.layout(prog='dot')
    graph.draw(directory, format='png')
    print("Family Tree saved, please check this file directory")

people_list = process_dataset("simpsons.txt")                  #REPLACE "simpsons.txt" WITH A FAMILY TREE 
create_tree(people_list, directory='simspons_family_tree.png') #TEXTFILE WITH SPECIFIED FORMAT

Family Tree saved, please check this file directory


In [4]:
#If your family tree file is in UTF-16 and not UTF-8, this cell will convert the file to UTF-8 format

# Open the UTF-16 encoded file in read-only mode
with open('simpsons.txt', 'r', encoding='utf-16') as f:
  # Read the contents of the file into a variable
  contents = f.read()

# Open a new file in write mode
with open('simpsons_utf8.txt', 'w', encoding='utf-8') as f:
  # Write the contents of the original file to the new file, using UTF-8 encoding
  f.write(contents)