In [None]:
%load_ext watermark


In [None]:
import itertools as it
import os

from IPython.display import display
import matplotlib as mpl
import polars as pl
import seaborn as sns
from slugify import slugify
from teeplot import teeplot as tp


In [None]:
%watermark -diwmuv -iv


In [None]:
tp.save[".pgf"] = True
teeplot_subdir = os.environ.get(
    "NOTEBOOK_NAME", "2025-06-01-cpp-bench-memory-native-tilted"
)
teeplot_subdir


## Prep Data


In [None]:
algo_names = {
    "dstream.circular_algo": "simple ringbuf",
    "control_throwaway_algo": "no-operation",
    "dstream.tilted_algo": "extended ringbuf",
    "dstream_tilted_algo": "extended ringbuf LUT",
    "doubling_tilted_algo": "naive doubling",
    "zhao_tilted_algo": "pyrimidal bucket",
    "zhao_tilted_full_algo": "saturating bucket",
}


df = pl.concat(
    [
        pl.read_csv("https://osf.io/cbqpx/download"),
    ],
)
print("\n".join(df["algo_name"].unique().to_list()))
df = (
    df.cast(
        {
            "memory_bytes": pl.Int32,
            "num_items": pl.Int32,
            "num_sites": pl.Int32,
            "duration_s": pl.Float64,
            "replicate": pl.Int32,
        },
    )
    .filter(
        pl.col("algo_name").is_in(
            algo_names.keys(),
        ),
        pl.col("num_items") == 1_000_000,
    )
    .with_columns(
        algorithm=pl.col("algo_name").map_elements(
            algo_names.__getitem__,
            return_dtype=str,
        ),
        num_retained=(
            pl.when(pl.col("algo_name") == "dstream_tilted_algo")
            .then(pl.col("num_sites"))
            .when(pl.col("algo_name") == "doubling_tilted_algo")
            .then(pl.col("num_sites") * 0.75)
            .when(pl.col("algo_name") == "zhao_tilted_algo")
            .then(19)
            .when(pl.col("algo_name") == "zhao_tilted_full_algo")
            .then(pl.col("num_sites"))
            .otherwise(pl.col("num_sites"))
        )
    )
    .with_columns(
        (pl.col("memory_bytes") / pl.col("num_retained")).alias("bytes per item"),
    )
    .with_columns(
        (8 * pl.col("memory_bytes") / pl.col("num_retained")).alias("bits per item"),
    )
    .with_columns(pl.col("data_type").alias("data type"))
)

display(df.describe()), display(df.head()), display(df.tail());


In [None]:
assert (df.group_by(
    ["num_items", "num_sites", "algorithm", "data type"],
).agg(
    pl.col("memory_bytes").n_unique(),
)["memory_bytes"] == 1).all()


In [None]:
hue_order = [
    "extended ringbuf",
    "saturating bucket",
    "pyrimidal bucket",
    # "extended ringbuf LUT",
    # "simple ringbuf",
    "naive doubling",
]


In [None]:
palette = sns.color_palette("muted")
palette_assignments = {
    "extended ringbuf": palette[0],
    "extended ringbuf LUT": palette[1],
    "saturating bucket": palette[2],
    "pyrimidal bucket": palette[3],
    "simple ringbuf": palette[4],
    "naive doubling": palette[5],
    "no-operation": palette[6],
}


## Plot


## Memory Use


In [None]:
for unit, mark, excl, rc in it.product(
    ["bits", "bytes"],
    [True, False],
    [[], ["pyrimidal bucket"]],
    [{}, {"font.family": "serif"}],
):
    excl_hue_order = [hue for hue in hue_order if hue not in excl]
    with mpl.rc_context(rc=rc):
        with tp.teed(
            sns.catplot,
            data=df.filter(
                pl.col("num_items") == 1_000_000,
            ),
            x="num_sites",
            y=f"{unit} per item",
            hue="algorithm",
            hue_order=excl_hue_order,
            col="data type",
            col_wrap=2,
            col_order=["bit", "byte", "word", "double word"],
            kind="bar",
            errorbar=("pi", 100),
            margin_titles=True,
            aspect=2,
            height=1.6,
            palette=[*map(palette_assignments.get, excl_hue_order)],
            sharex=True,
            sharey=False,
            teeplot_outattrs={
                "excl": slugify("-".join(excl)),
                "mark": mark,
            },
            teeplot_subdir=teeplot_subdir,
        ) as g:
            g.set(ylim=(0, None))
            g.set_titles(col_template="{col_name} dtype")
            g.set_xlabels("Buffer Capacity (item count)")
            g.set_ylabels(f"Memory per Item\n({unit})")
            sns.move_legend(
                g,
                "lower center",
                bbox_to_anchor=(0.4, 0.95),
                columnspacing=0.7,
                labelspacing=0.2,
                ncol=4,
                frameon=False,
                title=None,
            )
            for idx, ax in enumerate(g.axes.flat):
                ax.set_ylim(0, ax.get_ylim()[1] * 1.6)
                for container in ax.containers:
                    ax.bar_label(
                        container,
                        fmt=" %.1f",
                        label_type="edge",
                        rotation=90,
                        padding=2,
                    )
                if mark:
                    unit_scale = {"bits": 1, "bytes": 1 / 8}[unit]
                    bits = [1, 8, 16, 32][idx]
                    ax.axhline(
                        bits * unit_scale,
                        color="k",
                        linestyle="--",
                    )
