In [None]:
import ibis
import gcsfs
import plotly.express as px

from ibis_bench.utils.monitor import get_timings_dir

ibis.options.interactive = True
ibis.options.repr.interactive.max_rows = 20
ibis.options.repr.interactive.max_columns = None

# dark mode for px
px.defaults.template = "plotly_dark"

In [None]:
# YOLO
import warnings

warnings.filterwarnings("ignore")

In [None]:
PROJECT = "voltrondata-demo"
BUCKET = "ibis-benchy"

fs = gcsfs.GCSFileSystem(project=PROJECT)
fs.ls(f"{BUCKET}/{get_timings_dir()}")[-5:]

In [None]:
con = ibis.connect("duckdb://")
con.register_filesystem(fs)

In [None]:
t = (
    con.read_json(f"gs://{BUCKET}/{get_timings_dir()}/*.json", ignore_errors=True)
    .mutate(
        timestamp=ibis._["timestamp"].cast("timestamp"),
    )
    .cache()
)
t

In [None]:
t.count()

In [None]:
agg = (
    t.filter(t["sf"] >= 1)
    # .filter((t["system"].contains("duckdb")) | (t["system"].contains("datafusion")))
    # .filter(t["query_number"] == 1)
    .group_by("system", "sf", "n_partitions", "query_number")
    .agg(
        mean_execution_seconds=t["execution_seconds"].mean(),
        max_peak_cpu=t["peak_cpu"].max(),
        max_peak_memory=t["peak_memory"].max(),
    )
    .order_by(
        ibis.desc("sf"),
        ibis.asc("n_partitions"),
        ibis.asc("query_number"),
        ibis.desc("system"),
        ibis.asc("mean_execution_seconds"),
    )
)
agg

In [None]:
t["session_id"].value_counts()

In [None]:
t["system"].value_counts()

In [None]:
sfs = agg.select("sf").distinct().to_pandas()["sf"].tolist()
sfs

In [None]:
for sf in sorted(sfs):
    c = px.bar(
        agg.filter(agg["sf"] == sf),
        x="query_number",
        y="mean_execution_seconds",
        color="system",
        barmode="group",
        title=f"Scale Factor {sf}",
    )
    c.show()