## Set Up Dependencies


In [None]:
import datetime
import os

from cliffs_delta import cliffs_delta
import pandas as pd
import pecking
import matplotlib as mpl
from matplotlib import pyplot as plt
import pecking
import seaborn as sns
from slugify import slugify
from teeplot import teeplot as tp

from pylib._describe_effect import describe_effect
from pylib._styler_to_pdf import styler_to_pdf


## Data Retrieval and Preprocessing


In [None]:
df = pd.read_parquet("https://osf.io/ah7v5/download")


In [None]:
df["Num Reconstructed Inner Nodes"] = df["num_reconstructed_inner_nodes"]
df["Num True Inner Nodes"] = df["num_true_inner_nodes"]
df["Raw Triplet Distance"] = df["triplet_distance_raw"]
df["Sampled Triplet Distance Strict Reconst"] = df[
    "sampled_triplet_distance_strict_reconst"
]
df["Strict Triplet Distance"] = df["sampled_triplet_distance_strict"]
df["Lax Triplet Distance"] = df["sampled_triplet_distance_lax"]
df["Differentia Width (bits)"] = df["differentia_width_bits"]
df["Annotation Size (bits)"] = df["annotation_size_bits"]
df["Algorithm"] = df["algorithm"]
df["Population Size"] = df["population_size"]
df["Num Generations"] = df["num_generations"]
df["Num Islands"] = df["num_islands"]
df["Num Niches"] = df["num_niches"]
df["Tournament Size"] = df["tournament_size"]
df["Replicate"] = df["replicate"]
df["Downsample"] = df["downsample"]


In [None]:
df = df[df["Annotation Size (bits)"] == 256].copy()


In [None]:
df["Inner Node Resolution"] = (
    df["Num Reconstructed Inner Nodes"] / df["Num True Inner Nodes"]
)
df["Inner Node Loss"] = 1 - df["Inner Node Resolution"]


In [None]:
df["Dynamics"] = (
    "islands"
    + df["Num Islands"].astype(str)
    + "-niches"
    + df["Num Niches"].astype(str)
    + "-tsize"
    + df["Tournament Size"].astype(str)
)


In [None]:
df["Scale"] = (
    "npop"
    + df["Population Size"].astype(str)
    + "-ngen"
    + df["Num Generations"].astype(str)
)


In [None]:
df["Scenario"] = df["Dynamics"].map(
    {
        "islands1-niches1-tsize2": "plain",
        "islands1-niches1-tsize1": "drift",
        "islands4-niches2-tsize2": "mild\nstructure",
        "islands64-niches8-tsize2": "rich\nstructure",
    }
)


In [None]:
df["Policy"] = df["Algorithm"].map(
    {
        "surf-tilted": "Tilted",
        "col-tilted": "Tilted",
        "surf-hybrid": "Hybrid",
        "surf-steady": "Steady",
        "col-steady": "Steady",
    }
)


In [None]:
df["Implementation"] = df["Algorithm"].map(
    {
        "surf-tilted": "Surface",
        "col-tilted": "Column",
        "surf-hybrid": "Surface",
        "surf-steady": "Surface",
        "col-steady": "Column",
    }
)


In [None]:
for col in df.columns:
    df[col.replace(" ", "\n")] = df[col]


In [None]:
sensitivity_analysis_variables = [
    "Population\nSize",
    "Downsample",
    "Num\nGenerations",
]


In [None]:
dfx = df.melt(
    id_vars=[
        "Algorithm",
        "Annotation Size (bits)",
        "Annotation\nSize\n(bits)",
        "annotation_size_bits",
        "Differentia Width (bits)",
        "Differentia\nWidth\n(bits)",
        "differentia_width_bits",
        "Downsample",
        "downsample",
        "Dynamics",
        "Implementation",
        "Policy",
        "Scenario",
        "Scale",
        "Population Size",
        "Population\nSize",
        "population_size",
        "Num Generations",
        "Num\nGenerations",
        "num_generations",
        "Num Islands",
        "Num Niches",
        "Tournament Size",
        "Replicate",
    ],
    value_vars=[
        # "Strict\nTriplet\nDistance",
        "Lax\nTriplet\nDistance",
        "Inner\nNode\nLoss",
    ],
)
dfx.head()


## Visualize Main Metrics


In [None]:
for name, group in dfx.groupby(sensitivity_analysis_variables):
    group_variables = dict(zip(sensitivity_analysis_variables, name))

    group["Evolutionary Scenario"] = group["Scenario"].apply(
        lambda x: x.split("\n")[0],
    )
    group["variable"] = group["variable"].str.replace(
        "Lax\nTriplet\nDistance",
        "Accuracy Error\n(Lax Triplet)",
    ).str.replace(
        "Inner\nNode\nLoss",
        "Precision Error\n(Node Loss)",
    )
    group["Chkpt\nNum\nBits"] = group["Differentia\nWidth\n(bits)"]
    row_order = [
        # "Strict\nTriplet\nDistance",
        "Accuracy Error\n(Lax Triplet)",
        "Precision Error\n(Node Loss)",
    ]

    with tp.teed(
        pecking.peckplot,
        data=group.reset_index(drop=True).rename(
            columns={"Implementation": "Impl"},
        ),
        score="value",
        x="Evolutionary Scenario",
        y="value",
        # col="Num Generations",
        col="Policy",
        col_order=[
            "Tilted",
            # "Hybrid",
            # "Steady",
        ],
        row="variable",
        row_order=row_order,
        hue="Chkpt\nNum\nBits",
        x_group="outer",
        order=[
            "plain",
            "mild",
            "rich",
            "drift",
        ],
        skim_hatches=("*",),
        skim_labels=("Best",),
        skimmers=[pecking.skim_lowest],
        margin_titles=True,
        height=1.6,
        aspect=1.6,
        facet_kws={
            "ylim": (-0.2, 1),
        },
        teeplot_outattrs={
            slugify(k): str(v) for k, v in group_variables.items()
        },
        # clip_on=False,
    ) as teed:
        teed.set_titles(
            col_template="",
            row_template="{row_name}",
        )
        plt.tight_layout()
        teed.figure.subplots_adjust(right=0.41)
        teed.figure.subplots_adjust(hspace=0.1)
        for ax in teed.axes.flat:
            plt.setp(ax.get_xticklabels(), rotation=60)

    display(group_variables)


## Reproducibility


In [None]:
datetime.datetime.now().isoformat()


In [None]:
%load_ext watermark
%watermark


In [None]:
!pip freeze
