In [None]:
%load_ext watermark


In [None]:
import os

from IPython.display import display
import numpy as np
import pandas as pd
import seaborn as sns
from teeplot import teeplot as tp

import pylib  # noqa: F401


In [None]:
%watermark -diwmuv -iv


In [None]:
teeplot_subdir = os.environ.get("NOTEBOOK_NAME", "2025-10-27-trafficsim_msprime")
teeplot_subdir


## Prep Data


In [None]:
df_traffic = pd.read_parquet("https://osf.io/download/7jv4d/")
display(df_traffic.describe()), display(df_traffic.head()), display(df_traffic.tail());


In [None]:
df_duration = pd.read_parquet("https://osf.io/download/hrq6a/")
display(df_duration.describe()), display(df_duration.head()), display(df_duration.tail());


In [None]:
df_traffic.groupby(["num_demes", "time_thresh"])["migration_count"].max()


In [None]:
df_traffic.groupby(["num_demes", "time_thresh"])["migration_count"].mean()


In [None]:
df_duration.groupby("num_demes")["origin_time"].max()


## Block sample data


In [None]:
pop_size, = df_traffic["deme_size"].unique()
pop_size


In [None]:
df_traffic["task_id"] = df_traffic["slurm_array_task_id"].rank(method="dense").astype(int) - 1
df_traffic["block"] = df_traffic["task_id"] // 900

df_traffic_by_block = df_traffic[
    df_traffic["block"] < 10
].groupby(
    ["block", "num_demes", "time_thresh"],
)["migration_count"].agg(["mean", "max"]).reset_index()
df_traffic_by_block


In [None]:
dfm = df_traffic_by_block.melt(
    id_vars=["block", "num_demes", "time_thresh"],
    value_vars=["mean", "max"],
    var_name="statistic",
    value_name="migration_count",
)


In [None]:
dfm["Memory Use (kB)"] = (dfm["migration_count"] + pop_size) * 8 / 1024


In [None]:
dfm["strategy"] = (
    "tracking\n(+prune)"
    + np.where(dfm["time_thresh"] != np.inf, "\n(+batch)", "")
    + np.where(dfm["statistic"] == "mean", "\n(+balance)", "")
)


In [None]:
dfm["flavor"] = "tracking"


## Plotting


In [None]:
dfp = pd.concat(
    [
        pd.DataFrame.from_records(
            [
                {
                    "flavor": "reconstruction",
                    "strategy": "reconst\n(32 bit)",
                    "Memory Use (kB)": pop_size * 4 / 1024,
                },
                {
                    "flavor": "reconstruction",
                    "strategy": "reconst\n(64 bit)",
                    "Memory Use (kB)": pop_size * 8 / 1024,
                },
                {
                    "flavor": "reconstruction",
                    "strategy": "reconst\n(256 bit)",
                    "Memory Use (kB)": pop_size * 32 / 1024,
                },
                {
                    "flavor": "tracking",
                    "strategy": "tracking\n(serial)",
                    "Memory Use (kB)": pop_size * 2 * 8 / 1024,
                },
            ]
        ),
        dfm[dfm["time_thresh"].isin([100_000, np.inf])],
    ],
    ignore_index=True,
)


In [None]:
with tp.teed(
    sns.barplot,
    data=dfp,
    x="strategy",
    y="Memory Use (kB)",
    errorbar=("pi", 100),
    hue="flavor",
    legend=False,
    palette="Pastel2",
    teeplot_subdir=teeplot_subdir,
    teeplot_outexclude=["palette"],
) as ax:
    ax.figure.set_size_inches(8, 1.5)
    ax.set_ylim(0, None)
    ax.set_ylabel("Memory kB\n (estimated)")
    ax.set_xlabel("")
    sns.despine(ax=ax)
    for i, container in enumerate(ax.containers):
        ax.bar_label(
            container,
            fmt="%.1f",
            fontsize=11,
            label_type=["edge", "center"][i],
        )
