In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv("perf/metrics.csv")
df.head()


In [None]:
ok = df[df["status"] == "ok"].copy()
ok.loc[ok["ttft_s"] < 0, "ttft_s"] = np.nan
ok.loc[ok["latency_s"] < 0, "latency_s"] = np.nan
ok.loc[ok["tpot"] <= 0, "tpot"] = np.nan
ok.shape


In [None]:
def q(x, p):
    x = pd.Series(x).dropna().values
    if len(x) == 0:
        return np.nan
    return float(np.quantile(x, p))

summary = (
    ok.groupby(
        [
            "scenario",
            "prompt_type",
            "concurrency",
            "num_batch",
            "stop_setting",
            "cache_mode",
        ]
    )
    .agg(
        n=("request_id", "count"),
        lat_p50=("latency_s", lambda x: q(x, 0.50)),
        lat_p95=("latency_s", lambda x: q(x, 0.95)),
        lat_p99=("latency_s", lambda x: q(x, 0.99)),
        ttft_p50=("ttft_s", lambda x: q(x, 0.50)),
        ttft_p95=("ttft_s", lambda x: q(x, 0.95)),
        ttft_p99=("ttft_s", lambda x: q(x, 0.99)),
        tpot_med=("tpot", lambda x: q(x, 0.50)),
    )
    .reset_index()
)

summary.head()


In [None]:
for (ptype, stop_setting, cache_mode), sub in summary.groupby(
    ["prompt_type", "stop_setting", "cache_mode"]
):
    for nb, sub2 in sub.groupby("num_batch"):
        sub2 = sub2.sort_values("concurrency")
        plt.figure()
        plt.plot(sub2["concurrency"], sub2["lat_p50"], marker="o", label="p50")
        plt.plot(sub2["concurrency"], sub2["lat_p95"], marker="o", label="p95")
        plt.plot(sub2["concurrency"], sub2["lat_p99"], marker="o", label="p99")
        plt.title(f"Latency vs Concurrency | {ptype} | stop={stop_setting} | cache={cache_mode} | batch={nb}")
        plt.xlabel("Concurrency")
        plt.ylabel("Latency (s)")
        plt.legend()
        plt.show()


In [None]:
for (ptype, stop_setting, cache_mode), sub in summary.groupby(
    ["prompt_type", "stop_setting", "cache_mode"]
):
    for nb, sub2 in sub.groupby("num_batch"):
        sub2 = sub2.sort_values("concurrency")
        plt.figure()
        plt.plot(sub2["concurrency"], sub2["ttft_p50"], marker="o", label="p50")
        plt.plot(sub2["concurrency"], sub2["ttft_p95"], marker="o", label="p95")
        plt.plot(sub2["concurrency"], sub2["ttft_p99"], marker="o", label="p99")
        plt.title(f"TTFT vs Concurrency | {ptype} | stop={stop_setting} | cache={cache_mode} | batch={nb}")
        plt.xlabel("Concurrency")
        plt.ylabel("TTFT (s)")
        plt.legend()
        plt.show()


In [None]:
if ok["tpot"].notna().any():
    plt.figure()
    ok["tpot"].dropna().hist(bins=40)
    plt.title("TPOT distribution")
    plt.xlabel("tokens/sec")
    plt.ylabel("count")
    plt.show()
