## Set Up Dependencies


In [None]:
import seaborn as sns
import pandas as pd
import pecking
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pecking
from teeplot import teeplot as tp

## Data Retrieval and Preprocessing

In [None]:
df = pd.read_parquet("https://osf.io/ah7v5/download")


In [None]:
df["Num Reconstructed Inner Nodes"] = df["num_reconstructed_inner_nodes"]
df["Num True Inner Nodes"] = df["num_true_inner_nodes"]
df["Raw Triplet Distance"] = df["triplet_distance_raw"]
df["Sampled Triplet Distance Strict Reconst"] = df[
    "sampled_triplet_distance_strict_reconst"
]
df["Sampled Triplet Distance Strict"] = df["sampled_triplet_distance_strict"]
df["Sampled Triplet Distance Lax"] = df["sampled_triplet_distance_lax"]
df["Differentia Width (bits)"] = df["differentia_width_bits"]
df["Annotation Size (bits)"] = df["annotation_size_bits"]
df["Algorithm"] = df["algorithm"]
df["Population Size"] = df["population_size"]
df["Num Generations"] = df["num_generations"]
df["Num Islands"] = df["num_islands"]
df["Num Niches"] = df["num_niches"]
df["Tournament Size"] = df["tournament_size"]
df["Replicate"] = df["replicate"]
df["Downsample"] = df["downsample"]


In [None]:
df["Inner Node Resolution"] = (
    df["Num Reconstructed Inner Nodes"] / df["Num True Inner Nodes"]
)
df["Inner Node Loss"] = 1 - df["Inner Node Resolution"]


In [None]:
df["Dynamics"] = (
    "islands"
    + df["Num Islands"].astype(str)
    + "-niches"
    + df["Num Niches"].astype(str)
    + "-tsize"
    + df["Tournament Size"].astype(str)
)


In [None]:
df["Scale"] = (
    "npop"
    + df["Population Size"].astype(str)
    + "-ngen"
    + df["Num Generations"].astype(str)
)


In [None]:
df["Triplet\nDistance"] = df["Raw Triplet Distance"]
df["Sampled\nTriplet\nDistance\nStrict"] = df["Sampled Triplet Distance Strict"]
df["Sampled\nTriplet\nDistance\nStrict Reconst"] = df[
    "Sampled Triplet Distance Strict Reconst"
]
df["Sampled\nTriplet\nDistance\nLax"] = df["Sampled Triplet Distance Lax"]
df["Inner\nNode\nResolution"] = df["Inner Node Resolution"]
df["Inner\nNode\nLoss"] = df["Inner Node Loss"]
df["Num\nReconstructed\nInner\nNodes"] = df["Num Reconstructed Inner Nodes"]
df["Num\nTrue\nInner\nNodes"] = df["Num True Inner Nodes"]


In [None]:
df["Scenario"] = df["Dynamics"].map(
    {
        "islands1-niches1-tsize2": "plain",
        "islands1-niches1-tsize1": "drift",
        "islands4-niches2-tsize2": "spatial/ecological mild",
        "islands64-niches8-tsize2": "spatial/ecological strong",
    }
)

In [None]:
dfx = df.melt(
    id_vars=[
        "Algorithm",
        "Annotation Size (bits)",
        "Differentia Width (bits)",
        "Downsample",
        "Dynamics",
        "Scenario",
        "Scale",
        "Population Size",
        "Num Generations",
        "Num Islands",
        "Num Niches",
        "Tournament Size",
        "Replicate",
    ],
    value_vars=[
        # "Triplet\nDistance",
        "Sampled\nTriplet\nDistance\nStrict",
        "Sampled\nTriplet\nDistance\nStrict Reconst",
        # "Sampled\nTriplet\nDistance\nLax",
        "Inner\nNode\nLoss",
    ],
)
dfx.head()


## Visualize Main Metrics

In [None]:
hue_order = [
    "surf-steady",
    "col-steady",
    "surf-hybrid",
    "surf-tilted",
    "col-tilted",
]


for sizebits, group in dfx.groupby("Annotation Size (bits)"):
    tp.tee(
        pecking.peckplot,
        data=group.reset_index(drop=True),
        score="value",
        x="variable",
        y="value",
        col="Scenario",
        row="Scale",
        hue="Algorithm",
        x_group="outer",
        order=["Triplet\nDistance", "Inner\nNode\nLoss"],
        hue_order=hue_order,
        skim_hatches=("..OO", "*"),
        skim_labels=("Worst", "Best"),
        # skimmers=[pecking.skim_highest],
        margin_titles=True,
        legend_width_inches=3,
        teeplot_outattrs={"a": "focal", "sizebits": sizebits},
        teeplot_outexclude=["post", "teeplot_postprocess"],
        teeplot_postprocess="teed.figure.subplots_adjust(right=0.84)",
    )


## Visualize All Metrics

In [None]:
hue_order = [
    "surf-steady",
    "col-steady",
    "surf-hybrid",
    "surf-tilted",
    "col-tilted",
]


for sizebits, group in dfx.groupby("Annotation Size (bits)"):
    tp.tee(
        pecking.peckplot,
        data=group.reset_index(drop=True),
        score="value",
        x="variable",
        y="value",
        col="Scenario",
        row="Scale",
        hue="Algorithm",
        x_group="outer",
        hue_order=hue_order,
        skim_hatches=("..OO", "*"),
        skim_labels=("Worst", "Best"),
        margin_titles=True,
        legend_width_inches=3,
        teeplot_outattrs={"a": "all", "sizebits": sizebits},
        teeplot_outexclude=["post", "teeplot_postprocess"],
        teeplot_postprocess="teed.figure.subplots_adjust(right=0.84)",
    )


## Comparison Between Population Sizes


In [None]:
x_order = [
    "surf-steady",
    # "col-steady",
    "surf-hybrid",
    "surf-tilted",
    # "col-tilted",
]
hue_order = [
    "npop4096-ngen100000",
    "npop65536-ngen100000",
]


for (sizebits, diffwidth, dsamp), group in dfx.groupby(
    [
        "Annotation Size (bits)",
        "Differentia Width (bits)",
        "Downsample",
    ],
):
    tp.tee(
        pecking.peckplot,
        data=group,
        score="value",
        x="Algorithm",
        y="value",
        col="Scenario",
        row="variable",
        hue="Scale",
        x_group="outer",
        y_group="ignore",
        hue_group="inner",
        order=x_order,
        hue_order=hue_order,
        skim_hatches=("*",),
        skim_labels=("Best",),
        skimmers=[pecking.skim_lowest],
        margin_titles=True,
        legend_width_inches=3,
        teeplot_outattrs={
            "a": "all",
            "sizebits": sizebits,
            "diffwidth": diffwidth,
            "dsamp": dsamp,
        },
        teeplot_outexclude=["post", "teeplot_postprocess"],
        teeplot_postprocess="teed.figure.subplots_adjust(right=0.84)",
    )


## Reproducibility Information

In [None]:
import datetime

datetime.datetime.now().isoformat()

In [None]:
%load_ext watermark
%watermark

In [None]:
!pip freeze