In [None]:
%load_ext watermark


In [None]:
from IPython.display import display
from backstrip import backplot
import polars as pl
import seaborn as sns
from teeplot import teeplot as tp


In [None]:
%watermark -diwmuv -iv


## Prep Data


In [None]:
cpu_perf = (
    pl.concat(
        [
            pl.read_parquet(
                "https://osf.io/z65g3/download",
                use_pyarrow=True,
            ).with_columns(
                pl.lit("CPU").alias("hardware"),
                pl.lit("50/50 counter-based").alias("experiment design"),
                pl.lit(256 * 81 * 81).alias("net population size"),
            ),
            pl.read_parquet(
                "https://osf.io/tcv9q/download",
                use_pyarrow=True,
            ).with_columns(
                pl.lit("CPU").alias("hardware"),
                pl.lit("de novo counter-based").alias("experiment design"),
                pl.lit(256 * 81 * 81).alias("net population size"),
            ),
        ],
    )
    .filter(pl.col("tile") == 0)
    .with_columns(
        (
            pl.col("net population size") / pl.col("tsc seconds per cycle")
        ).alias("throughput (agent-generations/sec)"),
    )
)
display(cpu_perf)
display(cpu_perf.describe())


In [None]:
cupy_perf = (
    pl.concat(
        [
            pl.read_parquet(
                "https://osf.io/jz3eq/download",
                use_pyarrow=True,
            ).with_columns(
                pl.lit("GPU").alias("hardware"),
                pl.lit("50/50 counter-based").alias("experiment design"),
                pl.lit(256 * 243 * 243).alias("net population size"),
            ),
            pl.read_parquet(
                "https://osf.io/ya29p/download",
                use_pyarrow=True,
            ).with_columns(
                pl.lit("GPU").alias("hardware"),
                pl.lit("de novo counter-based").alias("experiment design"),
                pl.lit(256 * 243 * 243).alias("net population size"),
            ),
        ],
    )
    .filter(pl.col("tile") == 0)
    .with_columns(
        (
            pl.col("net population size") / pl.col("tsc seconds per cycle")
        ).alias("throughput (agent-generations/sec)"),
    )
)
display(cupy_perf)
display(cupy_perf.describe())


In [None]:
wse_perf = pl.concat(
    [
        pl.read_parquet(
            "https://osf.io/h3tu4/download",
            use_pyarrow=True,
        )
        .filter(pl.col("tile") == 749 * 993 - 1)
        .with_columns(
            pl.lit("WSE").alias("hardware"),
            pl.lit("50/50 site-explicit").alias("experiment design"),
            pl.lit(256 * 750 * 994).alias("net population size"),
        ),
        pl.read_parquet(
            "https://osf.io/w97nx/download",
            use_pyarrow=True,
        )
        .filter(pl.col("tile") == 749 * 993 - 1)
        .with_columns(
            pl.lit("WSE").alias("hardware"),
            pl.lit("de novo site-explicit").alias("experiment design"),
            pl.lit(256 * 750 * 994).alias("net population size"),
        ),
        pl.read_parquet(
            "https://osf.io/vh9pb/download",
            use_pyarrow=True,
        )
        .filter(pl.col("tile") == 749 * 993 - 1)
        .with_columns(
            pl.lit("WSE").alias("hardware"),
            pl.lit("50/50 counter-based").alias("experiment design"),
            pl.lit(256 * 750 * 994).alias("net population size"),
        ),
        pl.read_parquet(
            "https://osf.io/zp9gt/download",
            use_pyarrow=True,
        )
        .filter(pl.col("tile") == 749 * 993 - 1)
        .with_columns(
            pl.lit("WSE").alias("hardware"),
            pl.lit("de novo counter-based").alias("experiment design"),
            pl.lit(256 * 750 * 994).alias("net population size"),
        ),
    ],
).with_columns(
    (pl.col("net population size") / pl.col("tsc seconds per cycle")).alias(
        "throughput (agent-generations/sec)"
    ),
)


In [None]:
df = pl.concat([cpu_perf, cupy_perf, wse_perf])
display(df)
display(df.describe())


## Absolute performance


In [None]:
tp.tee(
    backplot,
    data=df.to_pandas(),
    col="experiment design",
    x="throughput (agent-generations/sec)",
    hue="hardware",
    aspect=2,
    col_wrap=2,
    height=1.5,
    orient="h",
    log_scale=(True, False),
    palette=sns.color_palette("Accent"),
    sharey=False,
    teeplot_outexclude="post",
    teeplot_postprocess="""
for ax in teed.axes.flat[2:]:
    ax.set_ylim(0.1, 0.5)
""",
)


In [None]:
df.group_by("hardware", "experiment design",).agg(
    pl.col("throughput (agent-generations/sec)")
    .mean()
    .alias("mean_throughput"),
    pl.col("throughput (agent-generations/sec)").std().alias("std_throughput"),
    pl.col("tsc cycle hertz").mean().alias("mean_speed"),
    pl.col("tsc cycle hertz").std().alias("std_speed"),
)


## Relative Performance


In [None]:
# Step 1: Filter for GPU and calculate the mean throughput per experiment design
cpu_means = (
    df.filter(pl.col("hardware").str.contains("CPU"))
    .group_by("experiment design")
    .agg(pl.mean("throughput (agent-generations/sec)").alias("cpu_mean"))
)
print(cpu_means)

# Step 2: Join the GPU mean back to the original DataFrame
df_with_mean = df.join(cpu_means, on="experiment design", how="left")

# Step 3: Calculate the speedup by dividing throughput by the GPU mean
df_with_speedup = df_with_mean.with_columns(
    (pl.col("throughput (agent-generations/sec)") / pl.col("cpu_mean")).alias(
        "speedup"
    )
)

# Show the resulting DataFrame with speedup
df_with_speedup


In [None]:
tp.tee(
    backplot,
    data=df_with_speedup.to_pandas(),
    y="experiment design",
    x="speedup",
    hue="hardware",
    aspect=2,
    height=2,
    order=["50/50 counter-based", "de novo counter-based"],
    orient="h",
    log_scale=(True, False),
    palette=sns.color_palette("Accent"),
)


In [None]:
df_with_speedup.group_by("hardware", "experiment design",).agg(
    pl.col("speedup").mean().alias("mean_speedup"),
    pl.col("speedup").std().alias("std_speedup"),
)
