In [1]:
import anytree
from collections import Counter, defaultdict
from copy import copy
from hstrat import hstrat
from interval_search import doubling_search
from iterpop import iterpop as ip
import itertools as it
from keyname import keyname as kn
from nbmetalog import nbmetalog as nbm
import opytional as opyt
import pandas as pd
from phylotrackpy import systematics
import random
from slugify import slugify
import sys
from tqdm import tqdm
import typing


In [2]:
random.seed(1) # ensure reproducibility
sys.setrecursionlimit(1000000)


In [3]:
nbm.print_metadata()


context: ci
hostname: 7ab68eaf880c
interpreter: 3.8.12 (default, Jan 15 2022, 18:39:47)  [GCC 7.5.0]
nbcellexec: 3
nbname: ecology_simulation_synchronous
nbpath: /opt/hereditary-stratigraph-concept/binder/ecological-inference/ecology_simulation_synchronous.ipynb
revision: null
session: bf41ab37-5993-497b-af23-1c2c7bbbf772
timestamp: 2022-12-01T10:35:02Z00:00


IPython==7.16.1
keyname==0.4.1
yaml==5.3.1
anytree==2.8.0
hstrat==0.3.2
iterpop==0.4.0
nbmetalog==0.2.6
opytional==0.1.0
pandas==1.1.2
phylotrackpy==0.1.1
re==2.2.1
ipython_genutils==0.2.0
logging==0.5.1.2
zmq==22.3.0
json==2.0.9
ipykernel==5.5.3


# Pick Hereditary Stratigraph Configurations


In [4]:
def make_conditions(num_generations: int) -> pd.DataFrame:
    res = []
    for condemner_factory, target_column_bits, differentia_bit_width in it.product(
        [
            hstrat.StratumRetentionCondemnerTaperedDepthProportionalResolution,
            hstrat.StratumRetentionCondemnerRecencyProportionalResolution,
        ],
        [
            1024,
            1024 * 8,
            1024 * 16,
        ],
        [
            1,
            8,
            64,
        ],
    ):
        policy_param = doubling_search(
            lambda x: \
                condemner_factory(x + 1).CalcNumStrataRetainedExact(num_generations)
                * differentia_bit_width > target_column_bits or x >= num_generations,
            1 if condemner_factory == hstrat.StratumRetentionCondemnerTaperedDepthProportionalResolution else 0,
        )

        actual_column_strata = condemner_factory(policy_param).CalcNumStrataRetainedExact(num_generations)
        actual_column_bits = actual_column_strata * differentia_bit_width

        res.append({
            'Retention Policy' : condemner_factory.__name__[25:],
            'Differentia Bit Width' : differentia_bit_width,
            'Retention Policy Resolution Parameter' : policy_param,
            'Target Retained Bits' : target_column_bits,
            'Actual Retained Bits' : actual_column_bits,
            'Retained Bits Error' : actual_column_bits - target_column_bits,
            'Actual Retained Strata' : actual_column_strata,
            'condemner' : condemner_factory(policy_param),
        })
    return pd.DataFrame.from_records(res)


In [5]:
# picked in another data frame to minimize retained bits error
# for fair comparisons between configurations
conditions_df = make_conditions(36864)
conditions_df.drop('condemner', axis=1)


Unnamed: 0,Retention Policy,Differentia Bit Width,Retention Policy Resolution Parameter,Target Retained Bits,Actual Retained Bits,Retained Bits Error,Actual Retained Strata
0,TaperedDepthProportionalResolution,1,511,1024,1023,-1,1023
1,TaperedDepthProportionalResolution,8,63,1024,1016,-8,127
2,TaperedDepthProportionalResolution,64,7,1024,960,-64,15
3,TaperedDepthProportionalResolution,1,4095,8192,8191,-1,8191
4,TaperedDepthProportionalResolution,8,511,8192,8184,-8,1023
5,TaperedDepthProportionalResolution,64,63,8192,8128,-64,127
6,TaperedDepthProportionalResolution,1,8191,16384,16383,-1,16383
7,TaperedDepthProportionalResolution,8,1023,16384,16376,-8,2047
8,TaperedDepthProportionalResolution,64,127,16384,16320,-64,255
9,RecencyProportionalResolution,1,109,1024,1024,0,1024


In [6]:
make_bundle = lambda: hstrat.HereditaryStratigraphicColumnBundle({
    kn.pack({
        'differentia' : row['Differentia Bit Width'],
        'policy' : row['Retention Policy'],
        'resolution' : row['Retention Policy Resolution Parameter'],
        'target_bits' : row['Target Retained Bits'],
        'actual_bits' : row['Actual Retained Bits'],
        'bits_error' : row['Retained Bits Error'],
        'actual_strata' : row['Actual Retained Strata'],
    }) \
        : hstrat.HereditaryStratigraphicColumn(
            stratum_differentia_bit_width=row['Differentia Bit Width'],
            stratum_retention_condemner=row['condemner'],
            stratum_ordered_store_factory=hstrat.HereditaryStratumOrderedStoreDict,
    )
    for __, row in conditions_df.iterrows()
})


# Set Up Population


In [7]:
uid_generator = it.count()


In [8]:
tracker = systematics.Systematics(lambda org: org.uid, True, True, False, False)


In [9]:
class Organism:
    def __init__(self, phenotype, ):
        self.uid = next(uid_generator)
        self.phenotype = phenotype
        self.hstrat_column = make_bundle()
#         self.taxon = systematics.Taxon(self)

    def CloneDescendant(self):
        offspring = copy(self)
        offspring.hstrat_column = self.hstrat_column.CloneDescendant()
        offspring.uid = next(uid_generator)
#         offspring.taxon = tracker.add_org(offspring, self.taxon)
        return offspring


In [10]:
population = [
    Organism(phenotype)
    for phenotype in range(100)
]


# Do Evolution


In [11]:
def do_generation(population: typing.List[Organism]) -> typing.List[Organism]:

    phenotype_frequencies = Counter([individual.phenotype for individual in population])
    def get_fitness(individual: Organism) -> float:
           return 1.0 / phenotype_frequencies[individual.phenotype]

    def do_tournament(tournament_size: int) -> Organism:
        contestants = random.sample(population, tournament_size)
        winner = max(contestants, key=get_fitness)
        return winner.CloneDescendant()

    new_population = [
        do_tournament(7) for __ in population
    ]
    return new_population


In [12]:
# for generation in tqdm(range(36864 - 1)):
for generation in tqdm(range(100)):
    population = do_generation(population)


100%|██████████| 100/100 [00:02<00:00, 35.70it/s]


# Extract Pairwise MRCA Estimates for Extant Organisms


In [13]:
res = []
for extant1, extant2 in tqdm([*it.product(population, population)]):
    if extant1 != extant2:
        bounds = extant1.hstrat_column.CalcRankOfMrcaBoundsWith(extant2.hstrat_column)
        for impl in extant1.hstrat_column:
            res.append({
                'Phenotype' \
                    : extant1.phenotype,
                'Column Configuration' \
                    : impl,
                'Differentia Bit Width' \
                    : kn.unpack(impl)['differentia'],
                'Stratum Retention Policy' \
                    : kn.unpack(impl)['policy'],
                'Stratum Retention Policy Resolution Parameter' \
                    : kn.unpack(impl)['resolution'],
                'Stratigraphic Column Actual Retained Bits' \
                    : extant1.hstrat_column[impl].GetNumStrataRetained() * int(kn.unpack(impl)['differentia']),
                'Stratigraphic Column Target Retained Bits' \
                    : kn.unpack(impl)['target_bits'],
                'Stratigraphic Column Actual Num Retained Strata' \
                    : extant1.hstrat_column[impl].GetNumStrataRetained(),
                'Taxon Compared From' \
                    : extant1.uid,
                'Taxon Compared To' \
                    : extant2.uid,
                'Generation of Taxon Compared From' \
                    : extant1.hstrat_column.GetNumStrataDeposited(),
                'Generation of Taxon Compared To' \
                    : extant2.hstrat_column.GetNumStrataDeposited(),
                'Generation Of MRCA Lower Bound (inclusive)' \
                    : opyt.apply_if(
                        bounds[impl],
                        lambda x: x[0],
                    ),
                'Generation Of MRCA Upper Bound (exclusive)' \
                    : opyt.apply_if(
                        bounds[impl],
                        lambda x: x[0],
                    ),
                'MRCA Bound Confidence' \
                    : extant1.hstrat_column[impl].CalcRankOfMrcaBoundsWithProvidedConfidenceLevel(),
                'Rank of Earliest Detectable Mrca With' \
                    : extant1.hstrat_column[impl].CalcRankOfEarliestDetectableMrcaWith(extant2.hstrat_column[impl]),
            })

res_df = pd.DataFrame.from_records(res)


100%|██████████| 10000/10000 [00:23<00:00, 423.42it/s]


In [14]:
res_df


Unnamed: 0,Phenotype,Column Configuration,Differentia Bit Width,Stratum Retention Policy,Stratum Retention Policy Resolution Parameter,Stratigraphic Column Actual Retained Bits,Stratigraphic Column Target Retained Bits,Stratigraphic Column Actual Num Retained Strata,Taxon Compared From,Taxon Compared To,Generation of Taxon Compared From,Generation of Taxon Compared To,Generation Of MRCA Lower Bound (inclusive),Generation Of MRCA Upper Bound (exclusive),MRCA Bound Confidence,Rank of Earliest Detectable Mrca With
0,42,actual_bits=1023+actual_strata=1023+bits_error...,1,TaperedDepthProportionalResolution,511,101,1024,101,10000,10001,101,101,77,77,0.968750,4
1,42,actual_bits=1016+actual_strata=127+bits_error=...,8,TaperedDepthProportionalResolution,63,808,1024,101,10000,10001,101,101,81,81,0.996094,0
2,42,actual_bits=960+actual_strata=15+bits_error=-6...,64,TaperedDepthProportionalResolution,7,896,1024,14,10000,10001,101,101,80,80,1.000000,0
3,42,actual_bits=8191+actual_strata=8191+bits_error...,1,TaperedDepthProportionalResolution,4095,101,8192,101,10000,10001,101,101,77,77,0.968750,4
4,42,actual_bits=8184+actual_strata=1023+bits_error...,8,TaperedDepthProportionalResolution,511,808,8192,101,10000,10001,101,101,81,81,0.996094,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178195,42,actual_bits=8192+actual_strata=1024+bits_error...,8,RecencyProportionalResolution,109,808,8192,101,10099,10098,101,101,24,24,0.996094,0
178196,42,actual_bits=8192+actual_strata=128+bits_error=...,64,RecencyProportionalResolution,9,2624,8192,41,10099,10098,101,101,24,24,1.000000,0
178197,42,actual_bits=16383+actual_strata=16383+bits_err...,1,RecencyProportionalResolution,3924,101,16384,101,10099,10098,101,101,20,20,0.968750,4
178198,42,actual_bits=16360+actual_strata=2045+bits_erro...,8,RecencyProportionalResolution,250,808,16384,101,10099,10098,101,101,24,24,0.996094,0


# Save Pairwise MRCA Estimates to File


In [15]:
res_df.to_csv(
    kn.pack({
        'a' : 'pairwise_mrca_estimates',
        'simulation' : 'ecology_synchronous',
        'tournament_size' : 7,
        'ext' : '.csv.gz',
    }),
    compression='gzip',
)
