In [None]:
# Import Libraries
import numpy as np
import pandas as pd
import random
import copy
import statistics
import time
import math

In [None]:
# Helper Functions

# Function for generating the number of children a couple have
# Uses a normal distribution centered on the mean number of children
def num_children(children_avg):
    return max(1,round(np.random.normal(children_avg,3.0)))

In [None]:
# Function for running one simulation

def simulation(starting_population, total_generations, childless_avg, children_avg):

    # Starting Variables (Fixed)
    human_id_count = 0
    starting_generation = 1
    starting_year = 1060
    year_per_gen = 30
    
    # Create People Table
    people = pd.DataFrame(columns = ['id','generation','sex','ancestor_set','ancestor_total_count'])

    # 1: First Generation
    tic = time.perf_counter() #start timer
    
    i = 0
    first_gen = []
    while i < starting_population:
        generation = starting_generation
        sex = random.randint(0, 1)
        first_gen.append([human_id_count, generation, sex, '', 0])
        human_id_count = human_id_count + 1
        i = i + 1
    
    first_gen_df = pd.DataFrame(first_gen, columns=['id','generation','sex','ancestor_set','ancestor_total_count'])
    people = people.append(first_gen_df, ignore_index = True)
    
    toc = time.perf_counter() #end timer
    print(f" - Generation 1: {(sum(people['generation']==1))} --- Time: {toc - tic:0.4f} seconds")
    
    
    # 2: Further Generations
    
    gen_count = starting_generation + 1
    
    while gen_count < (total_generations + 1):
        
        tic = time.perf_counter() #start timer
        
        old_generation = people[people['generation']==(gen_count-1)]
        
        # Assign Childless
        childless = []
        for k in old_generation.index:
            if np.random.random() < childless_avg:
                childless.append(1)
            else:
                childless.append(0)
        
        old_generation['childless'] = childless
        
        
        # Create parent lists
        old_women = old_generation[(old_generation['sex']==0) & (old_generation['childless']==0)].index.tolist()
        old_men = old_generation[(old_generation['sex']==1) & (old_generation['childless']==0)].index.tolist()
        
        next_gen = []
        for mother in old_women:
            if len(old_men) > 0:
                random.shuffle(old_men)
                father = old_men.pop()
                
                children_count = num_children(children_avg)
                
                # Create Children

                while children_count > 0:
                    
                    ancestors = [{mother,father}]
                    ancestor_count = 2
                    
                    mother_ancestors = people.iloc[mother]['ancestor_set']
                    father_ancestors = people.iloc[father]['ancestor_set']
                    
                    for index, generation in enumerate(mother_ancestors):
                        new_ancestors = generation.union(father_ancestors[index])
                        ancestor_count = ancestor_count + len(new_ancestors)
                        ancestors.append(new_ancestors)
                    
                    sex = random.randint(0, 1)
                    child = [human_id_count, gen_count, sex, ancestors, ancestor_count]
                    next_gen.append(child)
                    human_id_count = human_id_count + 1
                    children_count = children_count - 1
        
        next_gen_df = pd.DataFrame(next_gen, columns=['id','generation','sex','ancestor_set','ancestor_total_count'])
        people = people.append(next_gen_df, ignore_index = True)

        toc = time.perf_counter() #end timer
        print(f" - Generation {gen_count}: {(sum(people['generation']==gen_count))} --- Time: {toc - tic:0.4f} seconds")

        gen_count = gen_count + 1
    
    
    # 3 Caculate Descendant Columns
    tic = time.perf_counter() #start timer
    
    # Create Black Sets for Descendants
    all_descendants = [[] for i in range(len(people.index))]
    i = 0
    while i < len(all_descendants):
        j = 0
        while j < total_generations:
            new_set = set()
            all_descendants[i].append(new_set)
            j = j + 1
        i = i + 1
    
    descendants_total_count = [0] * len(people.index)
    all_ancestors = people['ancestor_set']
    generation_table = people['generation']
    
    # Add Descedents and Count them
    #  NOTE: High Perforance cost, uncomment to add
    
#     person_id = 0
#     for persons_ancestors in all_ancestors: #pick one person's ancestor list
#         for generation in persons_ancestors: #pick one generation from that list
#             for ancestor in generation: #pick one ancestor from that generation
#                 all_descendants[ancestor][generation_table[person_id]-1].add(person_id)
#                 descendants_total_count[ancestor] = descendants_total_count[ancestor] + 1
#         person_id = person_id + 1
    
    
    # 4 Calculate Generation Count Columns
    
    # Ancestors Per Generation
    ancestors_gen_count = []
    for person in all_ancestors:
        gen_count = []
        for generation in person:
            gen_count.append(len(generation))
        ancestors_gen_count.append(gen_count)

    # Descendants Per Generation
#     descendants_gen_count = []
#     for person in all_descendants:
#         gen_count = []
#         for generation in person:
#             gen_count.append(len(generation))
#         descendants_gen_count.append(gen_count)
    
    
    # Add new columns to people table
    people['ancestors_gen_count'] = ancestors_gen_count
#     people['descendants_set'] = all_descendants
#     people['descendants_total_count'] = descendants_total_count
#     people['descendants_gen_count'] = descendants_gen_count
    
    
    toc = time.perf_counter() #end timer
    print(f" - Descendants Calculated --- Time: {toc - tic:0.4f} seconds")
    

    
    # 5 Caculate Statistics
    
    tic = time.perf_counter() #start timer
    
    # 5.1 Lookup Tables for Population of each Generation
    population_table = people['generation'].value_counts().sort_index().tolist()
    
    # 5.2 Lookup Tables for Theoretical Max Ancestors
    ancestors_theoretical = []
    ancestors_theoretical_sum = []
    i = 0
    while i < total_generations:
        ancestors_theoretical.append(2**i)
        if i > 0:
            ancestors_theoretical_sum.append(ancestors_theoretical_sum[i-1]+ancestors_theoretical[i])
        else:
            ancestors_theoretical_sum.append(0)
        i = i + 1
    
    # 5.3 Generation Statistics Table
    
    i = 0
    stats_table = []
    while i < total_generations:
        
        # Population Size Stats
        generation = i + 1
        population = population_table[i]
        
        if i == (total_generations - 1):
            expected_growth = 0
            growth_luck = 0
        else:
            expected_growth = math.floor((population/2)) * (1 - childless_avg) * children_avg
            growth_luck = ((population_table[i+1]-expected_growth)/population_table[i+1])*100
            
        # Ancestor Stats
        
        theoretical_max_ancestors = ancestors_theoretical_sum[i]
        
        gen_ancestors = people['ancestor_total_count'][people['generation']==(i+1)]
        avg_ancestors = gen_ancestors.mean()
        max_ancestors = gen_ancestors.max()
        min_ancestors = gen_ancestors.min()
            
        row = [generation, population, expected_growth, growth_luck, theoretical_max_ancestors, avg_ancestors, max_ancestors, min_ancestors]
        stats_table.append(row)
        i = i + 1
    
    stats = pd.DataFrame(stats_table, columns=['generation','population','expected_growth','growth_luck','theoretical_max_ancestors','avg_ancestors','max_ancestors','min_ancestors'])
    
    
    
    toc = time.perf_counter() #end timer
    print(f" - Statistics Calculated --- Time: {toc - tic:0.4f} seconds")
    
    
    # Uncomment below to display results
#     display(people) 
    display(stats)
    return(stats)
#     return(people)

In [None]:
# Function for running one experiment (includes multiple repeated simulations)

def experiment(repeats, starting_population, total_generations, childless_avg, children_avg):
    
    tix = time.perf_counter() #start timer
    print("============================================================")
    print(f"Experiment - Pop: {starting_population} Gen: {total_generations} Repeats: {repeats}")
    print("------------------------------------------------------------")
    
    results = []
    summary = []
    
    # Run Simulations
    i = 0
    while i < repeats:
        print(f"Simulation {i+1}:")
        
        tic = time.perf_counter() #start timer
        result = simulation(starting_population, total_generations, childless_avg, children_avg)
        toc = time.perf_counter() #end timer
        
        print(f"Total Simulation {i+1} Time: {toc - tic:0.4f} seconds")
        print("------------------------------------------------------------")
        results.append(result)
        i = i + 1
    
    # Sumarize Results
    # <><><><><><><><><><><> todo <><><><><><><><><><><>
    
    summary = pd.concat(results).groupby(level=0).mean()
    
    display(summary)
    
    
    tox = time.perf_counter() #end timer
    print(f"Total Experiment Time: {tox - tix:0.4f} seconds")
    print("============================================================")
    
    return(results, summary)

In [None]:
# Set up Experiments Here

# Simulation Parameters
# repeats = 100
# starting_population = 1000
# total_generations = 10
# childless_avg = 0.55
# children_avg = 5


outcome = experiment(30,500,10,0.55,5)
outcome = experiment(30,1000,10,0.55,5)
outcome = experiment(30,5000,10,0.55,5)

In [None]:
# Combine Experiment Results

In [None]:
# visualizations