## Set Up Dependencies

In [None]:
import matplotlib as mpl
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns
from slugify import slugify
from teeplot import teeplot as tp


## Fetch and Preprocess Data

In [None]:
df = pd.read_parquet("https://osf.io/wa32b/download")


In [None]:
df = df.rename(
    columns={
        "annotation_size_bits": "Annotation Size (bits)",
        "algorithm": "Algorithm",
        "population_size": "Population Size",
        "num_generations": "Num Generations",
        "num_islands": "Num Islands",
        "num_niches": "Num Niches",
        "tournament_size": "Tournament Size",
        "replicate": "Replicate",
        "downsample": "Downsample",
        "differentia_width_bits": "Differentia Width (bits)",
    },
)


In [None]:
df["Dynamics"] = (
    "islands"
    + df["Num Islands"].astype(str)
    + "-niches"
    + df["Num Niches"].astype(str)
    + "-tsize"
    + df["Tournament Size"].astype(str)
)


In [None]:
df["Scenario"] = df["Dynamics"].map(
    {
        "islands1-niches1-tsize2": "plain",
        "islands1-niches1-tsize1": "drift",
        "islands4-niches2-tsize2": "mild structure",
        "islands64-niches8-tsize2": "rich structure",
    }
)


In [None]:
df["algo"] = df["Algorithm"].map(
    {
        "surf-steady": "\nsurf\nsteady",
        "col-steady": "\ncol\nsteady",
        "surf-hybrid": "\nsurf\nhybrid",
        "surf-tilted": "\nsurf\ntilted",
        "col-tilted": "\ncol\ntilted",
    },
)


In [None]:
df["Scale"] = (
    "npop"
    + df["Population Size"].astype(str)
    + "-ngen"
    + df["Num Generations"].astype(str)
)


In [None]:
row_order = [
    # "surf-steady",
    # "col-steady",
    # "surf-hybrid",
    # "surf-tilted",
    # "col-tilted",
    # "surf\nsteady",
    "\ncol\nsteady",
    "\nsurf\nhybrid",
    "\nsurf\ntilted",
    # "col\ntilted",
]


def joyhist(*args, **kwargs):
    sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})

    g = sns.displot(
        *args,
        **kwargs,
        kind="hist",
        row_order=row_order,
        hue_order=["true", "reconstructed"],
        multiple="fill",
        common_norm=False,
        aspect=3,
        height=1.1,
        log_scale=(True, False),
        bins=20,
        element="step",
        palette=["none", "#1f77b4"],
        facet_kws=dict(margin_titles=True, sharex="col"),
        legend=False,
    )
    g.refline(y=0.5, linestyle="--", linewidth=2, color="black")
    g.refline(y=0.0, linestyle="-", linewidth=3, color="black")

    g.figure.subplots_adjust(hspace=-0.4)

    g.set_titles("")
    g.set(yticks=[], ylabel="")
    g.despine(bottom=True, left=True)
    g.set_titles(col_template="{col_name}", row_template="{row_name}")

    for r, arr in enumerate(g.axes):
        for c, ax in enumerate(arr):
            color = plt.get_cmap("tab10_r")(r)
            for patch in ax.get_children():
                try:
                    if isinstance(patch, mpl.collections.PolyCollection):
                        patch.set_facecolor(color)
                        patch.set_alpha(0.5)
                        break
                except AttributeError:
                    pass

            # adapted from https://cduvallet.github.io/posts/2018/11/facetgrid-ylabel-access
            # Rotate facet labels
            if ax.texts:
                # This contains the right ylabel text
                txt = ax.texts[0]
                ax.text(
                    txt.get_unitless_position()[0],
                    txt.get_unitless_position()[1],
                    txt.get_text(),
                    transform=ax.transAxes,
                    va="center",
                    # fontsize="large",
                )
                # Remove the original text
                ax.texts[0].remove()
    return g


In [None]:
groupby = ["Scale", "Annotation Size (bits)", "Differentia Width (bits)"]
for what, group in df[df["Downsample"] == 500].groupby(groupby):
    group["Time Ago"] = group["origin_time"].max() + 1 - group["origin_time"]
    tp.tee(
        joyhist,
        data=group,
        x="Time Ago",
        row="algo",
        col="Scenario",
        col_order=[
            "plain",
            "mild structure",
            "rich structure",
            "drift",
        ],
        hue="kind",
        teeplot_outattrs=dict(
            zip(map(slugify, groupby), map(str, what)),
        ),
    )


## Reproducibility

In [None]:
import datetime
datetime.datetime.now().isoformat()

In [None]:
%load_ext watermark
%watermark

In [None]:
!pip freeze