In [1]:
import uuid
from uuid import UUID
import random
from collections import Counter
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import itertools

In [2]:
class GenomeSimulator:
    def __init__(self):
        self.generation = 1
        self.people = dict()
        self.start_year = 1800
    
    def setup(self, population=32):
        for _ in range(population):
            value = uuid.uuid4()
            self.people[value] = {'name':value,
                                  'genome': [value] * 20_000,
                                  'generation': self.generation,
                                  'sex':random.randint(0,1),
                                  'birth_year':random.randint(0,10) + self.start_year,
                                  'partner': None,
                                  'parents': None,
                                  'ancestors': []}
         
    def genome_similarity(self, person_a, person_b):
        matching = 0
        for gene in range(20_000):
            if person_a['genome'][gene] == person_b['genome'][gene]:
                matching += 1
        return matching / 20_000 * 100

    def make_partners(self):
        self.generation += 1
        candidates = {key:value for key, value in self.people.items() if value['partner'] is None}
        males = {key:value for key, value in candidates.items() if value['sex'] == 1}
        females = {key:value for key, value in candidates.items() if value['sex'] == 0}
        for father in males.values():
            # mother = females[random.choice(list(females.keys()))]
            # similarity = self.genome_similarity(father,mother)
            mother = ''
            close_relatives = 0
            
#             candidates = {}
#             close_relatives = 0
#             for key, woman in females.items():
#                 if self.genome_similarity(father,woman) < 2:
#                     candidates[key] = woman
#                 else:
#                     close_relatives += 1
#             mother = candidates[random.choice(list(candidates.keys()))]
            for woman in females.values():
                if not mother:
                    if self.genome_similarity(father,woman) < 2:
                        mother = woman
                    else:
                        close_relatives += 1
            if mother:
                #print(f"{close_relatives}")
                for child in range(1,4):
                    genome = []
                    for gene in range(20_000):
                        if random.randint(0,1) == 0:
                            genome.append(father['genome'][gene])
                        else:
                            genome.append(mother['genome'][gene])
                    candidates[father['name']]['partner'] = mother['name']
                    candidates[mother['name']]['partner'] = father['name']
                    new_name = uuid.uuid4()
                    birth_year = max(father['birth_year'], mother['birth_year']) + random.randint(20,35)
                    ancestors = father['ancestors'] + [father['name']] + mother['ancestors'] + [mother['name']]
                    self.people[new_name] = {'name':new_name,
                                             'genome': genome,
                                             'generation': father['generation']+1,
                                             'sex': random.randint(0,1),
                                             'birth_year': birth_year,
                                             'partner': None,
                                             'parents':[father['name'],mother['name']],
                                             'ancestors':ancestors}
                females.pop(mother['name'])
            else:
                print(f"{close_relatives}, no mother")

In [3]:
def genome_similarity(person_a, person_b):
    matching = 0
    for gene in range(20_000):
        if person_a['genome'][gene] == person_b['genome'][gene]:
            matching += 1
    return matching / 20_000 * 100

In [4]:
simulation = GenomeSimulator()
simulation.setup(population=128)
len(simulation.people.items())

128

In [None]:
for generation in range(1,20):
    simulation.make_partners()
    print("generation %s: %s" % (generation, datetime.now()))

generation 1: 2023-05-25 18:27:07.543448
generation 2: 2023-05-25 18:27:12.368787
generation 3: 2023-05-25 18:27:19.549611
generation 4: 2023-05-25 18:27:31.135609
generation 5: 2023-05-25 18:27:50.662230
8, no mother
8, no mother
8, no mother
7, no mother
7, no mother
7, no mother
generation 6: 2023-05-25 18:28:26.995035
generation 7: 2023-05-25 18:29:21.217226
generation 8: 2023-05-25 18:30:44.073609
184, no mother
generation 9: 2023-05-25 18:35:00.578257


In [None]:
len(simulation.people)

In [None]:
Counter([x['sex'] for x in simulation.people.values()])

In [None]:
Counter([x['sex'] for x in simulation.people.values() if x['partner'] is None])

In [None]:
simulation.generation

In [None]:
list(simulation.people.values())[-1]

In [None]:
l1 = []
count = 0
for item in simulation.people.values():
    if item['genome'] not in l1:
        count += 1
        l1.append(item['genome'])

In [None]:
count

In [None]:
len(simulation.people)

In [None]:
unpartnered = pd.DataFrame(Counter([x['birth_year'] for x in simulation.people.values() if x['partner'] is None]).items()).sort_values(1)

In [None]:
unpartnered.sort_values(0)

In [None]:
plt.scatter(x=unpartnered[0], y=unpartnered[1])

In [None]:
Counter([x['generation'] for x in simulation.people.values() if x['partner'] is None])

In [None]:
datetime.now()

In [None]:
Counter([x['generation'] for x in simulation.people.values()])

In [None]:
last_generation = [x['name'] for x in simulation.people.values() if x['generation'] == 4]

In [None]:
people = pd.DataFrame(simulation.people).transpose()
people

In [None]:
fifth_gen = list(itertools.combinations(people[people['generation']==8].index,2))
relatedness = list()
for combo in fifth_gen:
    relatedness.append(genome_similarity(people.loc[combo[0]],
                                         people.loc[combo[1]]))
relatedness

In [None]:
plt.hist(relatedness)

In [None]:
people[people['generation']==6]

In [None]:
fifth_gen = list(itertools.combinations(people[people['generation']==6].index,2))
for combo in fifth_gen:
    anc = set(people.loc[combo[0]]['ancestors'])
    if people.loc[combo[1]]['sex'] != people.loc[combo[0]]['sex']:
        print( len(anc & set(people.loc[combo[1]]['ancestors'])) / len(anc))