In [None]:
%load_ext watermark


In [None]:
import os

from IPython.display import display
import matplotlib as mpl
import polars as pl
import seaborn as sns
from teeplot import teeplot as tp


In [None]:
%watermark -diwmuv -iv


In [None]:
tp.save[".pgf"] = True
teeplot_subdir = os.environ.get(
    "NOTEBOOK_NAME", "2025-06-01-cpp-bench-memory-native-tilted"
)
teeplot_subdir


## Prep Data


In [None]:
algo_names = {
    "dstream_tilted_algo": "dstream tilted",
    "doubling_tilted_algo": "doubling tilted",
    "zhao_tilted_algo": "zhao tilted",
    "zhao_tilted_full_algo": "zhao tilted full",
}


df = pl.concat(
    [
        pl.read_csv("https://osf.io/r9ajf/download"),
    ],
)
df = (
    df.cast(
        {
            "memory_bytes": pl.Int32,
            "num_items": pl.Int32,
            "num_sites": pl.Int32,
            "duration_s": pl.Float64,
            "replicate": pl.Int32,
        },
    )
    .filter(
        pl.col("algo_name").is_in(
            algo_names.keys(),
        ),
        pl.col("num_items") == 1_000_000,
    )
    .with_columns(
        algorithm=pl.col("algo_name").map_elements(
            algo_names.__getitem__,
            return_dtype=str,
        ),
        num_retained=(
            pl.when(pl.col("algo_name") == "dstream_tilted_algo")
            .then(pl.col("num_sites"))
            .when(pl.col("algo_name") == "doubling_tilted_algo")
            .then(pl.col("num_sites") * 0.75)
            .when(pl.col("algo_name") == "zhao_tilted_algo")
            .then(19)
            .when(pl.col("algo_name") == "zhao_tilted_full_algo")
            .then(pl.col("num_sites"))
            .otherwise(None)
        )
    )
    .with_columns(
        (pl.col("memory_bytes") / pl.col("num_retained")).alias("bytes per item"),
    )
    .with_columns(
        (8 * pl.col("memory_bytes") / pl.col("num_retained")).alias("bits per item"),
    )
    .with_columns(pl.col("data_type").alias("data type"))
)

display(df.describe()), display(df.head()), display(df.tail());


In [None]:
assert (df.group_by(
    ["num_items", "num_sites", "algorithm", "data type"],
).agg(
    pl.col("memory_bytes").n_unique(),
)["memory_bytes"] == 1).all()


## Plot


## Memory Use


In [None]:
for rc in [{}, {"font.family": "serif"}]:
    with mpl.rc_context(rc=rc):
        with tp.teed(
            sns.catplot,
            data=df.filter(
                pl.col("num_items") == 1_000_000,
            ),
            x="num_sites",
            y="bits per item",
            hue="algorithm",
            col="data type",
            col_wrap=2,
            col_order=["bit", "byte", "word", "double word"],
            kind="bar",
            errorbar=("pi", 100),
            margin_titles=True,
            aspect=2,
            height=1.6,
            palette="Set2",
            sharey=False,
            teeplot_subdir=teeplot_subdir,
        ) as g:
            g.set(ylim=(0, None))
            g.set_xlabels("Buffer Capacity (item count)")
            g.set_ylabels("Memory per Item\n(bits)")
            sns.move_legend(
                g,
                "lower center",
                bbox_to_anchor=(0.4, 0.95),
                ncol=4,
                frameon=False,
                title=None,
            )
            for row, ax in enumerate(g.axes.flat):
                ax.bar_label(
                    ax.containers[0],
                    fmt=" %.1f",
                    label_type="edge",
                    rotation=90,
                    padding=2,
                )
                ax.bar_label(
                    ax.containers[1],
                    fmt=" %.1f",
                    label_type="edge",
                    rotation=90,
                    padding=2,
                )
                ax.bar_label(
                    ax.containers[2],
                    fmt=" %.1f",
                    label_type="center",
                    rotation=90,
                    padding=2,
                    color="white",
                )
                ax.bar_label(
                    ax.containers[3],
                    fmt=" %.1f",
                    label_type="center",
                    rotation=90,
                    padding=2,
                    color="white",
                )


In [None]:
for rc in [{}, {"font.family": "serif"}]:
    with mpl.rc_context(rc=rc):
        with tp.teed(
            sns.catplot,
            data=df.filter(
                pl.col("num_items") == 1_000_000,
            ),
            x="num_sites",
            y="bytes per item",
            hue="algorithm",
            col="data type",
            col_wrap=2,
            col_order=["bit", "byte", "word", "double word"],
            kind="bar",
            errorbar=("pi", 100),
            margin_titles=True,
            aspect=2,
            height=1.6,
            palette="Set2",
            sharey=False,
            teeplot_subdir=teeplot_subdir,
        ) as g:
            g.set(ylim=(0, None))
            g.set_xlabels("Buffer Capacity (item count)")
            g.set_ylabels("Memory per Item\n(bytes)")
            sns.move_legend(
                g,
                "lower center",
                bbox_to_anchor=(0.4, 0.95),
                ncol=4,
                frameon=False,
                title=None,
            )
            for row, ax in enumerate(g.axes.flat):
                ax.bar_label(
                    ax.containers[0],
                    fmt=" %.1f",
                    label_type="edge",
                    rotation=90,
                    padding=2,
                )
                ax.bar_label(
                    ax.containers[1],
                    fmt=" %.1f",
                    label_type="edge",
                    rotation=90,
                    padding=2,
                )
                ax.bar_label(
                    ax.containers[2],
                    fmt=" %.1f",
                    label_type="center",
                    rotation=90,
                    padding=2,
                    color="white",
                )
                ax.bar_label(
                    ax.containers[3],
                    fmt=" %.1f",
                    label_type="center",
                    rotation=90,
                    padding=2,
                    color="white",
                )
