In [None]:
%load_ext watermark


In [None]:
from IPython.display import display
from backstrip import backplot
from matplotlib import ticker as mpl_ticker
import polars as pl
import seaborn as sns
from teeplot import teeplot as tp


In [None]:
%watermark -diwmuv -iv


## Prep Data


In [None]:
df = pl.concat(
    [
        # pl.read_parquet(
        #     "https://osf.io/3nxh4/download",
        #     # [
        #     #     "https://osf.io/3nxh4/download",
        #     #     "https://osf.io/6tj37/download",
        #     #     "https://osf.io/q96rk/download",
        #     #     "https://osf.io/e6fmt/download",
        #     #     "https://osf.io/avypr/download",
        #     # ],
        #     use_pyarrow=True,
        # ).filter(
        #     pl.col("tile") == 749 * 993 - 1
        # ).with_columns(
        #     pl.lit("WSE CS-2").alias("hardware"),
        #     pl.lit("50/50 poisson").alias("experiment design"),
        #     pl.lit(32 * 750 * 994).alias("net population size")
        # ),
        pl.read_parquet(
            "https://osf.io/vh9pb/download",
            use_pyarrow=True,
        )
        .filter(pl.col("tile") == 749 * 993 - 1)
        .with_columns(
            pl.lit("WSE CS-2").alias("hardware"),
            pl.lit("50/50 poisson").alias("experiment design"),
            pl.lit(256 * 750 * 994).alias("net population size"),
        ),
        pl.read_parquet(
            "https://osf.io/sx92b/download",
            # [
            #     "https://osf.io/sx92b/download",
            #     "https://osf.io/ebtgm/download",
            #     "https://osf.io/jndbw/download",
            #     "https://osf.io/wq4bt/download",
            #     "https://osf.io/qsv4j/download",
            #     "https://osf.io/s8cq3/download",
            # ],
            use_pyarrow=True,
        )
        .filter(pl.col("tile") == 749 * 993 - 1)
        .with_columns(
            pl.lit("WSE CS-2").alias("hardware"),
            pl.lit("50/50 poisson").alias("experiment design"),
            pl.lit(2048 * 750 * 994).alias("net population size"),
        ),
        pl.read_parquet(
            "https://osf.io/ebtgm/download",
            use_pyarrow=True,
        )
        .filter(pl.col("tile") == 749 * 993 - 1)
        .with_columns(
            pl.lit("WSE CS-2").alias("hardware"),
            pl.lit("50/50 poisson").alias("experiment design"),
            pl.lit(2048 * 750 * 994).alias("net population size"),
        ),
        pl.read_parquet(
            "https://osf.io/jndbw/download",
            use_pyarrow=True,
        )
        .filter(pl.col("tile") == 749 * 993 - 1)
        .with_columns(
            pl.lit("WSE CS-2").alias("hardware"),
            pl.lit("50/50 poisson").alias("experiment design"),
            pl.lit(2048 * 750 * 994).alias("net population size"),
        ),
        pl.read_parquet(
            "https://osf.io/wq4bt/download",
            use_pyarrow=True,
        )
        .filter(pl.col("tile") == 749 * 993 - 1)
        .with_columns(
            pl.lit("WSE CS-2").alias("hardware"),
            pl.lit("50/50 poisson").alias("experiment design"),
            pl.lit(2048 * 750 * 994).alias("net population size"),
        ),
        pl.read_parquet(
            "https://osf.io/qsv4j/download",
            use_pyarrow=True,
        )
        .filter(pl.col("tile") == 749 * 993 - 1)
        .with_columns(
            pl.lit("WSE CS-2").alias("hardware"),
            pl.lit("50/50 poisson").alias("experiment design"),
            pl.lit(2048 * 750 * 994).alias("net population size"),
        ),
        pl.read_parquet(
            "https://osf.io/s8cq3/download",
            use_pyarrow=True,
        )
        .filter(pl.col("tile") == 749 * 993 - 1)
        .with_columns(
            pl.lit("WSE CS-2").alias("hardware"),
            pl.lit("50/50 poisson").alias("experiment design"),
            pl.lit(2048 * 750 * 994).alias("net population size"),
        ),
    ],
    how="diagonal_relaxed",
)


In [None]:
df = df.with_columns(
    (pl.col("net population size") / pl.col("tsc seconds per cycle")).alias(
        "throughput (agent-generations/sec)"
    ),
).with_columns(
    pl.col("net population size")
    .map_elements(
        lambda x: f"${x:.2e} }}$".replace("e+", r"\!\!\times\!\!10^{"),
        return_dtype=pl.String,
    )
    .alias("net population"),
)
display(df)
display(df.describe())


## Throughput


In [None]:
saveit, g = tp.tee(
    backplot,
    data=df.to_pandas(),
    x="throughput (agent-generations/sec)",
    hue="net population",
    hue_order=sorted(df["net population"].unique(), key=float),
    col="experiment design",
    aspect=2.0,
    height=1.7,
    orient="h",
    # log_scale=(True, False),
    palette=sns.color_palette("Pastel1"),
    sharex=False,
    sharey=False,
    teeplot_callback=True,
)

for ax in g.axes.flat:
    ax.xaxis.set_major_formatter(
        mpl_ticker.FuncFormatter(
            lambda x, _: f"${x:.1e} }}$".replace("e+", r"\!\!\times\!\!10^{"),
        ),
    )

saveit()


## Speed and Througnhput


In [None]:
df.group_by("hardware", "experiment design", "net population size",).agg(
    pl.col("throughput (agent-generations/sec)")
    .mean()
    .alias("mean_throughput"),
    pl.col("throughput (agent-generations/sec)").std().alias("std_throughput"),
    pl.col("tsc cycle hertz").mean().alias("mean_speed"),
    pl.col("tsc cycle hertz").std().alias("std_speed"),
)
