## Set Up Dependencies


In [None]:
import pandas as pd
import seaborn as sns
from slugify import slugify
from teeplot import teeplot as tp


## Fetch and Preprocess Data


In [None]:
df = pd.read_parquet("https://osf.io/k8zga/download")


In [None]:
df = df.rename(
    columns={
        "annotation_size_bits": "Annotation Size (bits)",
        "algorithm": "Algorithm",
        "population_size": "Population Size",
        "num_generations": "Num Generations",
        "num_islands": "Num Islands",
        "num_niches": "Num Niches",
        "tournament_size": "Tournament Size",
        "replicate": "Replicate",
        "downsample": "Downsample",
        "differentia_width_bits": "Differentia Width (bits)",
    },
)


In [None]:
df["Dynamics"] = (
    "islands"
    + df["Num Islands"].astype(str)
    + "-niches"
    + df["Num Niches"].astype(str)
    + "-tsize"
    + df["Tournament Size"].astype(str)
)


In [None]:
df["Scenario"] = df["Dynamics"].map(
    {
        "islands1-niches1-tsize2": "plain",
        "islands1-niches1-tsize1": "drift",
        "islands4-niches2-tsize2": "mild",
        "islands64-niches8-tsize2": "rich",
    }
)


In [None]:
df["algo"] = df["Algorithm"].map(
    {
        "surf-steady": "surf\nsteady",
        "col-steady": "col\nsteady",
        "surf-hybrid": "surf\nhybrid",
        "surf-tilted": "surf\ntilted",
        "col-tilted": "col\ntilted",
    },
)


In [None]:
df["Scale"] = (
    "npop"
    + df["Population Size"].astype(str)
    + "-ngen"
    + df["Num Generations"].astype(str)
)


In [None]:
df["Outcome Code"] = (
    df["triplet match, strict"] + 2 * df["triplet match, strict/lax"]
)


In [None]:
df["Outcome"] = df["Outcome Code"].map(
    {
        3: "correct",
        2: "unresolved",
        0: "incorrect",
    },
)


## Plot Sampled Triplet Comparisons


In [None]:
row_order = [
    # "surf-steady",
    # "col-steady",
    # "surf-hybrid",
    # "surf-tilted",
    # "col-tilted",
    # "surf\nsteady",
    "col\nsteady",
    "surf\nhybrid",
    "surf\ntilted",
    # "col\ntilted",
]

# col_order = [
#     "npop4096-ngen10000",
#     "npop4096-ngen100000",
#     "npop65536-ngen10000",
#     "npop65536-ngen100000",
# ]
col_order = [
    "plain",
    "mild",
    "rich",
    "drift",
    # "spatial/ecological mild",
    # "spatial/ecological strong",
]

sns.set_theme(
    style="white",
    rc={"axes.facecolor": (0.8, 0.8, 0.8, 0.0)},
)

groupby = ["Scale", "Annotation Size (bits)", "Differentia Width (bits)"]
for what, group in (
    df[df["Downsample"] == 500]
    # .sample(
    #     frac=0.02,
    # )
    .groupby(groupby)
):
    group["Time Ago"] = group["origin_time"].max() + 1 - group["origin_time"]
    print(what)
    dosave, g = tp.tee(
        sns.displot,
        data=group,
        x="Time Ago",
        row="algo",
        col="Scenario",
        hue="Outcome",
        bins=10,
        multiple="fill",
        common_norm=False,
        aspect=2,
        height=1.3,
        log_scale=(True, False),
        kind="hist",
        hue_order=["incorrect", "unresolved", "correct"],
        row_order=row_order,
        col_order=col_order,
        facet_kws=dict(margin_titles=True, sharex="col"),
        teeplot_callback=True,
        teeplot_outattrs=dict(
            zip(map(slugify, groupby), map(str, what)),
        ),
    )
    # g.figure.subplots_adjust(hspace=0.05)
    g.set_titles(col_template="{col_name}", row_template="{row_name}")
    g.set(yticks=[], ylabel="")
    g.despine(bottom=True, left=True)
    dosave()


## Reproducibility


In [None]:
import datetime

datetime.datetime.now().isoformat()


In [None]:
%load_ext watermark
%watermark


In [None]:
!pip freeze
