In [None]:
%load_ext watermark


In [None]:
import os

from IPython.display import display
import outset as otst
import pandas as pd
import polars as pl
import seaborn as sns
from teeplot import teeplot as tp

import pylib  # noqa: F401


In [None]:
%watermark -diwmuv -iv


In [None]:
teeplot_subdir = os.environ.get("NOTEBOOK_NAME", "2025-01-18-cpp-bench")
teeplot_subdir


## Prep Data


In [None]:
df = pl.concat(
    [
        pl.read_csv("https://osf.io/m6wne/download"),
        pl.read_csv("https://osf.io/sb4zw/download"),
    ],
)
df = (
    df.cast(
        {
            "memory_bytes": pl.Int32,
            "num_items": pl.Int32,
            "num_sites": pl.Int32,
            "duration_s": pl.Float64,
            "replicate": pl.Int32,
        },
    )
    .with_columns(
        duration_per_item_ns=(
            pl.col("duration_s") * 1_000_000_000 / pl.col("num_items")
        )
    )
    .with_columns(
        algorithm=pl.col("algo_name").map_elements(
            {
                "control_ring_algo": "control: ringbuf",
                "control_throwaway_algo": "control: discard",
                "dstream.steady_algo": "dstream steady",
                "dstream.stretched_algo": "dstream stretched",
                "dstream.tilted_algo": "dstream tilted",
                "naive_steady_algo": "naive steady",
            }.__getitem__,
            return_dtype=str,
        )
    )
)
# df = df.filter(pl.col("num_items") == 1_000_000)
display(df.describe()), display(df.head()), display(df.tail());


## Example Plot


In [None]:
with tp.teed(
    sns.relplot,
    data=df,
    x="num_sites",
    y="memory_bytes",
    hue="algo_name",
    col="num_items",
    palette="muted",
    kind="line",
    errorbar=("pi", 100),
    teeplot_subdir=teeplot_subdir,
) as g:
    g.set(xscale="log", yscale="log")


In [None]:
df_memory_savings = (
    df.filter(
        pl.col("algo_name").is_in(["dstream.steady_algo", "naive_steady_algo"])
    )
    .group_by(["num_items", "num_sites", "replicate"])
    .agg(
        memory_savings=(
            pl.col("memory_bytes")
            .filter(pl.col("algo_name") == "naive_steady_algo")
            .mean()
            / pl.col("memory_bytes")
            .filter(pl.col("algo_name") == "dstream.steady_algo")
            .mean()
        )
    )
)
with pd.option_context("display.max_columns", None):
    with pd.option_context("display.max_rows", None):
        display(df_memory_savings.group_by(["num_sites"]).mean().to_pandas())


# Plot speedup
with tp.teed(
    sns.catplot,
    data=df_memory_savings.cast({"num_sites": str}),
    x="num_sites",
    y="memory_savings",
    kind="bar",
    aspect=1.5,
    height=2,
    teeplot_subdir=teeplot_subdir,
) as g:
    g.set(ylim=(0, None))
    for ax in g.axes.flat:
        ax.bar_label(ax.containers[0], fmt="$%d\\times$")


In [None]:
df_memory_savings = (
    df.filter(
        pl.col("algo_name").is_in(["dstream.steady_algo", "naive_steady_algo"])
    )
    .group_by(["num_items", "num_sites", "replicate"])
    .agg(
        memory_savings=(
            pl.col("memory_bytes")
            .filter(pl.col("algo_name") == "naive_steady_algo")
            .mean()
            - pl.col("memory_bytes")
            .filter(pl.col("algo_name") == "dstream.steady_algo")
            .mean()
        )
    )
)

# Plot speedup
with tp.teed(
    sns.catplot,
    data=df_memory_savings.cast({"num_sites": str}),
    x="num_sites",
    y="memory_savings",
    kind="bar",
    aspect=1.5,
    height=2,
    teeplot_subdir=teeplot_subdir,
) as g:
    g.set(ylim=(0, None))
    for ax in g.axes.flat:
        ax.bar_label(ax.containers[0], fmt="%d B")


In [None]:
with tp.teed(
    sns.relplot,
    data=df,
    x="num_sites",
    y="duration_s",
    col="compiler",
    hue="algo_name",
    style="algo_name",
    palette="muted",
    kind="line",
    aspect=1.5,
    markers=["^", "X"],
    ms=8,
    height=2,
    teeplot_subdir=teeplot_subdir,
) as g:
    g.set(xscale="log", yscale="log")
    sns.move_legend(
        g,
        "lower center",
        bbox_to_anchor=(0.35, 1),
        ncol=2,
        title=None,
        frameon=False,
    )
    g.tight_layout()


In [None]:
with tp.teed(
    sns.relplot,
    data=df,
    x="num_sites",
    y="duration_s",
    col="algo_name",
    hue="compiler",
    style="compiler",
    facet_kws=dict(sharey=False),
    palette="muted",
    kind="line",
    aspect=1.5,
    markers=["^", "X"],
    ms=8,
    height=2,
    teeplot_subdir=teeplot_subdir,
) as g:
    g.set(xscale="log", yscale="log")
    sns.move_legend(
        g,
        "lower center",
        bbox_to_anchor=(0.35, 1),
        ncol=2,
        title=None,
        frameon=False,
    )
    g.tight_layout()


In [None]:
with tp.teed(
    sns.relplot,
    data=df,
    x="num_items",
    y="duration_s",
    col="algo_name",
    row="num_sites",
    hue="compiler",
    style="compiler",
    facet_kws=dict(margin_titles=True, sharey=False),
    palette="muted",
    kind="line",
    aspect=1.5,
    markers=["^", "X"],
    ms=8,
    height=2,
    teeplot_subdir=teeplot_subdir,
) as g:
    g.set(xscale="log", yscale="log")
    sns.move_legend(
        g,
        "lower center",
        bbox_to_anchor=(0.35, 1),
        ncol=2,
        title=None,
        frameon=False,
    )
    g.tight_layout()


In [None]:
import matplotlib.ticker as mpl_ticker


In [None]:
for exclude in "", "dstream.stretched_algo,dstream.tilted_algo":
    data = (
        df.with_columns(
            is_naive=pl.col("algo_name") == "naive_steady_algo",
        )
        .filter(pl.col("num_items") == 1_000_000)
        .filter(~pl.col("algo_name").is_in(exclude.split(",")))
        .sort(
            "algorithm",
        )
        .to_pandas()
        .astype(
            {"algorithm": "category"},
        )
    )
    with tp.teed(
        otst.OutsetGrid,
        data=data,
        x="num_sites",
        y="duration_per_item_ns",
        row="is_naive",
        row_order=[False],
        marqueeplot_kws=dict(
            frame_outer_pad=(4.0, 0.2),
            frame_inner_pad=(4.0, 0.2),
            leader_stretch=0.8,
            frame_edge_kws={"alpha": 0.1},
            leader_edge_kws={"alpha": 0.1},
        ),
        height=2.2,
        aspect=1.2,
        palette="tab10_r",
        teeplot_subdir=teeplot_subdir,
        teeplot_outattrs=dict(exclude=exclude),
    ) as g:
        g.map_dataframe(
            sns.lineplot,
            x="num_sites",
            y="duration_per_item_ns",
            alpha=0.7,
            hue="algorithm",
            style="algorithm",
            style_order=sorted(data["algorithm"].unique()),
            markers=["X", "^", ".", "o", "d", "s"],
            ms=8,
            palette="tab10_r",
            zorder=otst.util.SplitKwarg(None, -1),
        )
        g.set(xlim=(32 * 1.5, 8192 / 1.5), xscale="log", yscale="log")
        g.marqueeplot(equalize_aspect=False)
        g.set(xlim=(32 * 1.5, 8192 / 1.5), xscale="log", yscale="log")
        g.set_xlabels("Buffer Size")
        g.set_ylabels("Ingest Time (ns)")
        yticks = [1.3, 2, 3, 5, 8, 12]
        ytick_labels = [" ", "2", "3", "5", "8", "12"]
        if exclude:
            yticks = [1.3, 2, 3, 4, 5, 6]
            ytick_labels = ["", "2", "3", "4", "5", " "]
        g.axes.flat[1].yaxis.set_major_locator(mpl_ticker.FixedLocator(yticks))
        g.axes.flat[1].set_yticks(yticks)
        g.axes.flat[1].set_yticklabels(ytick_labels)
        g.tight_layout()
        ax = g.axes.flat[1]
        ax.set_title("(zoomed inset)", size=10)
        labels = [item.get_text() for item in ax.get_xticklabels()]
        empty_string_labels = [""] * len(labels)
        ax.set_xticklabels(empty_string_labels)
        g.set(
            xticks=[64, 256, 1024, 4096],
            xticklabels=["64", "256", "1024", "4096"],
        )
        g.axes.flat[0].set_ylim(1, None)
        g.add_legend()


In [None]:
# 1) Compute mean duration per group x algo
df_agg = df.group_by(
    ["compiler", "num_items", "num_sites", "replicate", "algo_name"]
).agg(mean_duration_s=pl.col("duration_s").mean())

# 2) Compute mean duration *just for* "naive_steady_algo"
df_naive = (
    df.filter(pl.col("algo_name") == "naive_steady_algo")
    .group_by(["compiler", "num_items", "num_sites", "replicate"])
    .agg(naive_mean_duration_s=pl.col("duration_s").mean())
)

# 3) Join the two on compiler,num_items,num_sites,replicate
df_speedup = df_agg.join(
    df_naive,
    on=["compiler", "num_items", "num_sites", "replicate"],
    how="left",
).with_columns(
    # 4) Speedup = ratio vs. naive_steady_algo
    (pl.col("naive_mean_duration_s") / pl.col("mean_duration_s")).alias(
        "speedup"
    )
)

with pd.option_context("display.max_columns", None):
    with pd.option_context("display.max_rows", None):
        display(
            df_speedup.filter(pl.col("num_items") == 1_000_000)
            .group_by(["num_sites", "algo_name"])
            .agg(
                speedup_mean=pl.col("speedup").mean(),
                speedup_std=pl.col("speedup").std(),
            )
            .to_pandas()
        )

# Now plot speedup on a log scale
with tp.teed(
    sns.catplot,
    data=df_speedup.cast({"num_sites": str}).filter(
        pl.col("algo_name") != "naive_steady_algo"
    ),
    x="num_sites",
    y="speedup",
    order=list(map(str, sorted(df_speedup["num_sites"].unique()))),
    hue="algo_name",
    col="compiler",
    kind="bar",
    aspect=1.5,
    height=2,
    teeplot_subdir=teeplot_subdir,
) as g:
    g.set(yscale="log", ylim=(1, None))
    for ax in g.axes.flat:
        # You can adjust the format string to highlight the × symbol, or use plain text.
        ax.bar_label(
            ax.containers[1],
            fmt="$%d\\times$",
            label_type="center",
            rotation=90,
        )
