In [None]:
%load_ext watermark


In [None]:
import os

from IPython.display import display
import pandas as pd
import polars as pl
import seaborn as sns
from teeplot import teeplot as tp


In [None]:
%watermark -diwmuv -iv


In [None]:
teeplot_subdir = os.environ.get("NOTEBOOK_NAME", "2025-01-18-cpp-bench-memory")
teeplot_subdir


## Prep Data


In [None]:
df = pl.concat(
    [
        pl.read_csv("https://osf.io/m6wne/download"),
        pl.read_csv("https://osf.io/sb4zw/download"),
    ],
)
df = (
    df.cast(
        {
            "memory_bytes": pl.Int32,
            "num_items": pl.Int32,
            "num_sites": pl.Int32,
            "duration_s": pl.Float64,
            "replicate": pl.Int32,
        },
    )
    .with_columns(
        duration_per_item_ns=(
            pl.col("duration_s") * 1_000_000_000 / pl.col("num_items")
        )
    )
    .with_columns(
        algorithm=pl.col("algo_name").map_elements(
            {
                "control_ring_algo": "control: ringbuf",
                "control_throwaway_algo": "control: discard",
                "dstream.steady_algo": "dstream steady",
                "dstream.stretched_algo": "dstream stretched",
                "dstream.tilted_algo": "dstream tilted",
                "naive_steady_algo": "naive steady",
            }.__getitem__,
            return_dtype=str,
        )
    )
)

display(df.describe()), display(df.head()), display(df.tail());


## Plot


## Memory Use


In [None]:
with tp.teed(
    sns.relplot,
    data=df,
    x="num_sites",
    y="memory_bytes",
    hue="algo_name",
    col="num_items",
    palette="muted",
    kind="line",
    errorbar=("pi", 100),
    teeplot_subdir=teeplot_subdir,
) as g:
    g.set(xscale="log", yscale="log")


In [None]:
# Plot speedup
with tp.teed(
    sns.catplot,
    data=df.filter(
        pl.col("algo_name").is_in(
            [
                "dstream.steady_algo",
                "naive_steady_algo",
            ],
        ),
    ).with_columns(
        strategy=pl.col("algo_name").map_elements(
            {
                "dstream.steady_algo": "dstream",
                "naive_steady_algo": "naive",
            }.__getitem__,
            return_dtype=str,
        ),
    ),
    x="num_sites",
    y="memory_bytes",
    hue="strategy",
    kind="bar",
    aspect=1.5,
    height=2,
    palette="Set2",
    teeplot_subdir=teeplot_subdir,
) as g:
    g.set(yscale="log")
    g.set(ylim=(0, None))
    g.set_xlabels("Buffer Size")
    g.set_ylabels("Memory (bytes)")
    for ax in g.axes.flat:
        ax.bar_label(
            ax.containers[0],
            fmt="%d B",
            label_type="edge",
            rotation=90,
            padding=3,
        )
        ax.bar_label(
            ax.containers[1],
            fmt="%d B",
            label_type="center",
            rotation=90,
            color="white",
        )
    sns.move_legend(
        g,
        "lower center",
        bbox_to_anchor=(0.5, 1),
        ncol=3,
        frameon=False,
        title=None,
    )


## relative memory savings


In [None]:
df_memory_savings = (
    df.filter(
        pl.col("algo_name").is_in(["dstream.steady_algo", "naive_steady_algo"])
    )
    .group_by(["num_items", "num_sites", "replicate"])
    .agg(
        memory_savings=(
            pl.col("memory_bytes")
            .filter(pl.col("algo_name") == "naive_steady_algo")
            .mean()
            / pl.col("memory_bytes")
            .filter(pl.col("algo_name") == "dstream.steady_algo")
            .mean()
        )
    )
)
with pd.option_context("display.max_columns", None):
    with pd.option_context("display.max_rows", None):
        display(df_memory_savings.group_by(["num_sites"]).mean().to_pandas())


# Plot savings
with tp.teed(
    sns.catplot,
    data=df_memory_savings.cast({"num_sites": str}),
    x="num_sites",
    y="memory_savings",
    kind="bar",
    aspect=1.5,
    height=2,
    teeplot_subdir=teeplot_subdir,
) as g:
    g.set(ylim=(0, None))
    for ax in g.axes.flat:
        ax.bar_label(ax.containers[0], fmt="$%d\\times$")


## absolute memory savings


In [None]:
df_memory_savings = (
    df.filter(
        pl.col("algo_name").is_in(["dstream.steady_algo", "naive_steady_algo"])
    )
    .group_by(["num_items", "num_sites", "replicate"])
    .agg(
        memory_savings=(
            pl.col("memory_bytes")
            .filter(pl.col("algo_name") == "naive_steady_algo")
            .mean()
            - pl.col("memory_bytes")
            .filter(pl.col("algo_name") == "dstream.steady_algo")
            .mean()
        )
    )
)

# Plot savings
with tp.teed(
    sns.catplot,
    data=df_memory_savings.cast({"num_sites": str}),
    x="num_sites",
    y="memory_savings",
    kind="bar",
    aspect=1.5,
    height=2,
    teeplot_subdir=teeplot_subdir,
) as g:
    g.set(ylim=(0, None))
    for ax in g.axes.flat:
        ax.bar_label(ax.containers[0], fmt="%d B")
