Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
210 commits
Select commit Hold shift + click to select a range
09a57ad
Remove more scalar_at from sparse arrays (#7439)
gatesn May 1, 2026
5e5572b
chore: codspeed display names (#7752)
0ax1 May 1, 2026
ace3d8e
Fix Claude workflow cancellation (#7775)
gatesn May 4, 2026
44a6367
Allow bot approvals (#7774)
gatesn May 4, 2026
12e72c1
Update dependency lucide-react to v1.14.0 (#7769)
renovate[bot] May 4, 2026
f843857
fix: make ExtDType metadata deserialization total over byte input (#7…
gatesn May 4, 2026
2887cfb
Fix logo on benchmarks website (#7779)
connortsui20 May 4, 2026
e0a0527
Update dependency typescript-eslint to v8.59.2 (#7764)
renovate[bot] May 4, 2026
d3ff1f1
[claude] feat(bench): emit v3 JSONL records and dual-write to bench s…
connortsui20 May 4, 2026
e0a2bdf
Benchmarks Website Version 3 (#7643)
connortsui20 May 4, 2026
4c09d79
Lock file maintenance (#7771)
renovate[bot] May 4, 2026
f94ed0f
Lock file maintenance (#7772)
renovate[bot] May 4, 2026
eb3da9b
Update Gradle to v9.5.0 (#7770)
renovate[bot] May 4, 2026
0dd4ff6
Update dependency globals to v17.6.0 (#7768)
renovate[bot] May 4, 2026
fba5bd4
Update dependency eslint to v10.3.0 (#7767)
renovate[bot] May 4, 2026
d37761b
Update crate-ci/typos action to v1.46.0 (#7766)
renovate[bot] May 4, 2026
4e5f433
Update storybook monorepo to v10.3.6 (#7765)
renovate[bot] May 4, 2026
fce5284
Update release-drafter/release-drafter action to v7.2.1 (#7763)
renovate[bot] May 4, 2026
4eb9a4b
Update taiki-e/install-action digest to 711e1c3 (#7762)
renovate[bot] May 4, 2026
1a14926
Update anthropics/claude-code-action digest to 2cc1ac1 (#7761)
renovate[bot] May 4, 2026
1c7b179
Remove bad action (#7786)
connortsui20 May 4, 2026
64d9161
Fix weird signature of with_slots functions (#7758)
robert3005 May 5, 2026
903ee6c
skip[fuzz]: less jobs to run for fuzzer (#7788)
joseph-isaacs May 5, 2026
922ea0a
Try to fix benchmakrs-website tests on windows (#7794)
robert3005 May 5, 2026
a7c4c76
Make struct cast implementation pluggable (#7684)
robert3005 May 5, 2026
96b02a2
Fix benchmark group parsing for v3 (#7797)
connortsui20 May 5, 2026
a31a0db
Allow Claude to open PRs (#7798)
gatesn May 5, 2026
1718eb3
Add Patches lookup benchmarks (#7795)
palaska May 5, 2026
aabade1
bench: CUDA sync load-to-device benchmark (#7799)
0ax1 May 5, 2026
7a0b70f
Add more benchmarks for intersect by rank (#7800)
robert3005 May 5, 2026
d7c22ba
feat[vortex-cuda]: GPU FSST decompression kernel (#7776)
asubiotto May 5, 2026
abafe92
Preload data on benchmarks website (#7802)
connortsui20 May 5, 2026
121452a
Improve UI of v3 benchmarks website (#7801)
connortsui20 May 5, 2026
a9dc3a2
Fix zoom on v3 benchmarks website (#7803)
connortsui20 May 5, 2026
924409f
Run vortex-mask benchmarks with codspeed (#7804)
robert3005 May 5, 2026
fb0f605
Update dependency pip to v26.1 [SECURITY] (#7805)
renovate[bot] May 6, 2026
afea5e8
perf[array]: outline the array parts data from the DynArray (#7750)
joseph-isaacs May 6, 2026
0b790d2
use rstest for some test cleanups (#7813)
a10y May 6, 2026
f307edc
bench: CUDA host-to-device copy modes (#7815)
0ax1 May 6, 2026
ab8a199
feat: check VortexReadAt::read_at results in the I/O driver (#7783)
danking May 6, 2026
a2edb2f
fix: partition in filter should err (not panic). (#7816)
joseph-isaacs May 6, 2026
aab4c34
[claude] Move benchmarks-website planning docs into code comments (#7…
connortsui20 May 6, 2026
eda8c2c
fix(ffi): double-free in partition scan on error (#7817)
m7kss1 May 6, 2026
115b3ba
fix[sparse]: ensure cached patches are updated and use `PatchesData` …
joseph-isaacs May 7, 2026
2ff9ba4
fix[py]: remove mimalloc in library (#7826)
joseph-isaacs May 7, 2026
9f508b3
bench[gpu]: overlap of compute and copying (#7828)
0ax1 May 7, 2026
6331959
Use smallvec for internal stats storage (#7823)
robert3005 May 7, 2026
750457a
feat: iterative execution for SparseArray (#7711)
joseph-isaacs May 7, 2026
eda04e1
bench[gpu]: CUDA device memory pool benchmarks (#7831)
0ax1 May 7, 2026
017a93c
Add a new AggregateFn for UncompressedSize (#7715)
AdamGS May 7, 2026
8e0b58c
Fix features of vortex-python (#7837)
robert3005 May 7, 2026
d86cece
skip[array]: add a benchmark of the optimize loop (#7840)
joseph-isaacs May 8, 2026
f99cc16
perf[array]: small vec for slots (#7825)
joseph-isaacs May 8, 2026
823991f
feat: all_non_distinct aggregation make fuzzer faster (#7827)
joseph-isaacs May 8, 2026
31840e2
skip[fuzz]: run the fuzzer for longer less times (#7842)
joseph-isaacs May 8, 2026
b212667
Improve intersect_by_rank performance (#7744)
robert3005 May 8, 2026
fb2feb2
Stop ignoring tests (#7844)
robert3005 May 8, 2026
d0d9a8b
fix[bitpacked]: slice patches in execute method (not reduce). (#7839)
joseph-isaacs May 8, 2026
f3d5f09
Collect slots directly into smallvec instead of using vec and into (#…
robert3005 May 8, 2026
ff12040
TurboQuant again! (#7829)
connortsui20 May 8, 2026
96dbb87
Faster Mask::from_slices (#7857)
gatesn May 9, 2026
af8d9d0
Update react monorepo to v19.2.6 (#7866)
renovate[bot] May 11, 2026
76d2173
Update release-drafter/release-drafter action to v7.3.0 (#7868)
renovate[bot] May 11, 2026
f3dfa0c
Update tailwindcss monorepo to v4.3.0 (#7872)
renovate[bot] May 11, 2026
b3afa6f
Update opentelemetry-rust monorepo to 0.32.0 (#7867)
renovate[bot] May 11, 2026
24a5885
Update Rust crate noodles-bgzf to 0.47.0 (#7869)
renovate[bot] May 11, 2026
74dc39f
Update dependency io.netty:netty-bom to v4.2.13.Final (#7865)
renovate[bot] May 11, 2026
867670f
Lock file maintenance (#7864)
renovate[bot] May 11, 2026
ab7b253
Update taiki-e/install-action digest to fa0dd4c (#7861)
renovate[bot] May 11, 2026
b2a49b8
Update aws-actions/configure-aws-credentials digest to d979d5b (#7860)
renovate[bot] May 11, 2026
a2fafac
Update Rust crate noodles-vcf to 0.88.0 (#7870)
renovate[bot] May 11, 2026
12045d7
Update anthropics/claude-code-action digest to 476e359 (#7859)
renovate[bot] May 11, 2026
f01d1d7
Pin actions/checkout action to de0fac2 (#7858)
renovate[bot] May 11, 2026
1fb891d
Lock file maintenance (#7863)
renovate[bot] May 11, 2026
1e7ad82
Update all patch updates (#7862)
renovate[bot] May 11, 2026
04fc7f6
skip[fuzz]: fix random_list gen (#7875)
joseph-isaacs May 11, 2026
93d0b4e
bench: dyn dispatch / standalone perf comparsion (#7883)
0ax1 May 11, 2026
7b317ef
Remove unused protobuf dependency in java bindings (#7886)
robert3005 May 11, 2026
af7839a
fix[array]: get_item reduce + struct wrong nullability (#7887)
joseph-isaacs May 11, 2026
e160125
Update dependency urllib3 to v2.7.0 [SECURITY] (#7888)
renovate[bot] May 11, 2026
92962a4
simplify projection expression for SELECT * in duckdb (#7885)
myrrc May 12, 2026
2ee2033
Widen `VarBinBuilder` offets during FSST compress (#7853)
connortsui20 May 12, 2026
9e2991c
Add Executor::spawn_io (#7894)
AdamGS May 12, 2026
3e93048
tracing logger for duckdb (#7892)
myrrc May 12, 2026
6f54d3d
Reorder agg kernel dispatch, and have Combined use inner accumulators…
gatesn May 12, 2026
c663fa9
Update flatbuffers generated (#7899)
connortsui20 May 12, 2026
e56c80d
Add `DType::Union` variant carrying just `Nullability` (#7901)
connortsui20 May 12, 2026
68e7f7b
fix[gpu]: CUDA sliced patch views with device patches (#7911)
0ax1 May 13, 2026
7929482
Pin nightly toolchain version in public-api (#7909)
robert3005 May 13, 2026
c59e16f
use nonnull in lifetime_wrapper (#7908)
myrrc May 13, 2026
59fcd14
Fix `IsSorted` return dtype (#7914)
connortsui20 May 13, 2026
d4f40a9
Add statistic expression (#7854)
gatesn May 13, 2026
cab6036
Consistent `DType` match (#7916)
connortsui20 May 13, 2026
919e31e
Use many connections on the benchmarks server (#7852)
connortsui20 May 13, 2026
da19bca
Add Arrow to Vortex conversion to C FFI (#7906)
robert3005 May 13, 2026
7349cd6
Add more benchmarks for take on filter array (#7876)
robert3005 May 14, 2026
7668bef
register_splits to get both offset and relative row_range (#7913)
onursatici May 14, 2026
95f429d
Remove needless fmt_sql (#7409)
gatesn May 14, 2026
8c5a0f1
1: Remove chunked special case from stat execution (#7928)
gatesn May 14, 2026
a514cef
Centralize aggregate stat bridge (#7931)
gatesn May 14, 2026
b3e1673
2: Add NullCount aggregate function (#7933)
gatesn May 14, 2026
1f6fb0a
Use async_fs file for java and python writes to avoid object_store bu…
robert3005 May 14, 2026
c0b6f31
Add a nicer progress bar and file-based filter (#7942)
AdamGS May 15, 2026
e3cc14f
Add stats rewrite session API (#7930)
gatesn May 15, 2026
b71de62
Switch python runtime to CurrentThreadRuntime (#7896)
robert3005 May 15, 2026
3006be6
Updated Variant array and the new VariantGet expression (#7877)
AdamGS May 15, 2026
3000bc0
feat[fastlanes]: allow delta to support signed bases (#7923)
joseph-isaacs May 15, 2026
b54dde0
Update Claude Code action to v1.0.123 (#7946)
joseph-isaacs May 15, 2026
da03e17
Fix segfault when calling vx CLI from Python bindings (#7947)
joseph-isaacs May 15, 2026
13c06b8
Mimic duckdb's post-filter cardinality estimates (#7895)
myrrc May 15, 2026
254f91b
perf[array]: add the SimplifyCache to `optimize` (#7948)
joseph-isaacs May 15, 2026
96dda71
Clippy deny absolute_paths longer than 3 elements (#7950)
robert3005 May 15, 2026
d71d3d3
pluggable registry for input/export arrow kernels (#7824)
a10y May 15, 2026
d91d24a
Update dependency typescript-eslint to v8.59.3 (#7963)
renovate[bot] May 18, 2026
f21d8cd
Update slf4j monorepo to v2.0.18 (#7964)
renovate[bot] May 18, 2026
343ed9e
Update dependency eslint to v10.4.0 (#7966)
renovate[bot] May 18, 2026
fdd1ed0
Update plugin com.diffplug.spotless to v8.5.1 (#7968)
renovate[bot] May 18, 2026
e2e79f7
Update Rust crate dashmap to v6.2.1 (#7969)
renovate[bot] May 18, 2026
280f429
Update storybook monorepo to v10.4.0 (#7970)
renovate[bot] May 18, 2026
f94cbd0
Update arrow-rs (#7965)
renovate[bot] May 18, 2026
3c9f0e2
Update dependency lucide-react to v1.16.0 (#7967)
renovate[bot] May 18, 2026
5d2ae8b
duckdb: flatten runend arrays on export if requested (#7951)
myrrc May 18, 2026
d45538e
fix[gpu]: handle sliced BP arrays in CUDA (#7912)
0ax1 May 18, 2026
da4dfbd
Update all patch updates (#7962)
renovate[bot] May 18, 2026
35794cc
Update taiki-e/install-action digest to 7be9fd8 (#7961)
renovate[bot] May 18, 2026
829d40a
Update EmbarkStudios/cargo-deny-action digest to 6c8f9fa (#7960)
renovate[bot] May 18, 2026
afb9f10
Lock file maintenance (#7975)
renovate[bot] May 18, 2026
2a5c895
Update cloudflare/wrangler-action action to v4 (#7972)
renovate[bot] May 18, 2026
c8d915a
Fix semantic merge clippy error (#7981)
robert3005 May 18, 2026
d25c33f
Update anthropics/claude-code-action digest to 51ea8ea (#7959)
renovate[bot] May 18, 2026
c573cef
Lock file maintenance (#7976)
renovate[bot] May 18, 2026
97f21d0
Update Rust crate lance-encoding to v6 (#7974)
renovate[bot] May 18, 2026
bf1527e
fix[gpu]: retain device buffers for dyn dispatch kernel (#7980)
0ax1 May 18, 2026
399cd61
move getrandom backend override to config.toml (#7983)
onursatici May 18, 2026
52e26d1
Close stale prs action needs actions write to update its own cache (#…
robert3005 May 18, 2026
faf7e42
BufferMut::zeroed_aligned stores actually allocated length instead of…
robert3005 May 18, 2026
7b47788
bench: bit-packed compare-constant baseline (#8012)
joseph-isaacs May 18, 2026
5330f74
Approving a bot pr will retrigger approval check (#7982)
robert3005 May 19, 2026
2fa51e2
chore[gpu]: error out in case of mixed cpu/gpu execution (#8016)
0ax1 May 19, 2026
8aaaab8
Add Samply and benchmark skills (#8021)
gatesn May 19, 2026
ba5064a
Slice list/list_view elements in duckdb exporter (#8020)
myrrc May 19, 2026
f97805d
Thread scope dtype through stats rewrites (#8024)
gatesn May 19, 2026
a88e6b1
Fix nullability of Constant BetweenReduce rule (#8029)
robert3005 May 20, 2026
2033ef5
Remove type coercion (#8032)
gatesn May 20, 2026
1d5d234
Bump DuckDB to 1.5.3 (#8031)
AdamGS May 20, 2026
008c1d9
Avoid re-building vortex-duckdb twice (#8035)
AdamGS May 20, 2026
971aa1c
Revert "Remove type coercion" (#8041)
gatesn May 20, 2026
19a1fb3
Propagate ExecutionCtx through CASE WHEN binary merges (#8040)
dimitarvdimitrov May 20, 2026
aeb5436
Allow writing Variant to files and test parquet-variant IO (#7945)
AdamGS May 21, 2026
3a6db6c
chore: polish `ArrowDeviceArray` (#8023)
0ax1 May 21, 2026
06cf4a3
Fallback from fsst specialised like expression if there are escape ch…
robert3005 May 21, 2026
a8fb30e
Add pruning aggregate functions (#8025)
gatesn May 21, 2026
f852d72
fix: build CUDA kernels as multi-arch fatbin with PTX fallback (#8047)
0ax1 May 21, 2026
c54ce7e
Benchmarks Website V3: Admin and Auto-Deploy (#7849)
connortsui20 May 21, 2026
012d0ec
Revert "fix: build CUDA kernels as multi-arch fatbin with PTX fallbac…
robert3005 May 22, 2026
dba7935
Add bit-packed cast benchmark (#8058)
joseph-isaacs May 22, 2026
21a7cc8
Add TakeExecute implementation for FilterArray (#7393)
robert3005 May 22, 2026
0e6915b
Skip computation of expression return dtype unless there are no chunk…
robert3005 May 22, 2026
1a5079b
Fix random-access benchmark console display (#8054)
robert3005 May 22, 2026
1241e14
Add bit-packed widening cast pushdown (#8059)
joseph-isaacs May 22, 2026
251e603
perf[array]: cast to prim faster (#8062)
joseph-isaacs May 22, 2026
c275f2c
perf[array]: bool filter kernel optimisation (#7125)
joseph-isaacs May 22, 2026
90dea93
When delegating to arrow comparison make sure that both left and righ…
vortex-claude[bot] May 22, 2026
96ffd3f
Handle overflow in decimal between implementation if the passed scala…
vortex-claude[bot] May 22, 2026
7dace0d
Expression pushdown for duckdb (#7727)
myrrc May 22, 2026
ae19fe7
Explicit Precision::Absent variant instead of Option<Precision> (#8042)
AdamGS May 22, 2026
495f30e
Document existing TurboQuant types (#8053)
connortsui20 May 22, 2026
2699169
perf: remove implicit `ListViewArray` rebuild during `take` and `filt…
mhk197 May 24, 2026
8570410
Update dependency @types/react to v19.2.15 (#8087)
renovate[bot] May 25, 2026
7770582
Update dependency @tanstack/react-virtual to v3.13.25 (#8086)
renovate[bot] May 25, 2026
4d1a181
Update dependency io.netty:netty-bom to v4.2.14.Final (#8088)
renovate[bot] May 25, 2026
fd2e6f2
Update dependency typescript-eslint to v8.59.4 (#8089)
renovate[bot] May 25, 2026
7f7716e
Update plugin com.palantir.java-format to v2.91.0 (#8092)
renovate[bot] May 25, 2026
8e91870
Update storybook monorepo to v10.4.1 (#8090)
renovate[bot] May 25, 2026
1f491a7
Update junit-framework monorepo to v6.1.0 (#8091)
renovate[bot] May 25, 2026
0a097b0
Fix dtype mismatch in Filter::take logic (#8073)
robert3005 May 26, 2026
fa4245e
Update EmbarkStudios/cargo-deny-action digest to a531616 (#8083)
renovate[bot] May 26, 2026
dbfe521
Simplify some of the buffer impl (#8098)
AdamGS May 26, 2026
94d1f5a
test: improve arrow device array test coverage (#8101)
0ax1 May 26, 2026
237b348
Use plain functions instead of vtab in duckdb (#8102)
myrrc May 26, 2026
4823563
like stat falsification respects escape characters (#8103)
onursatici May 26, 2026
f900433
wipe duckdb source in build.rs if not fully extracted before (#8106)
onursatici May 26, 2026
0ec59d3
Add FilterPushdown support to spark data source (#7785)
robert3005 May 26, 2026
581e9dd
Update all patch updates (#8085)
renovate[bot] May 26, 2026
ab493e5
`PrimitiveArrayExt::narrow` expose `ExecutionCtx` (#8096)
joseph-isaacs May 26, 2026
ae30d83
Update taiki-e/install-action digest to 920ab18 (#8084)
renovate[bot] May 26, 2026
6ddc4d5
fastlanes: streaming compare + between kernels for BitPacked (#8015)
joseph-isaacs May 26, 2026
79190a8
Add CITATION.cff (#8112)
mprammer May 26, 2026
1819685
Add built-in stats rewrite rules (#7935)
gatesn May 27, 2026
3a805d0
Lock file maintenance (#8095)
renovate[bot] May 27, 2026
3acac72
Lock file maintenance (#8094)
renovate[bot] May 27, 2026
0e4ffc3
Update docker/setup-buildx-action digest to d7f5e7f (#8082)
renovate[bot] May 27, 2026
2fbb6cb
Update docker/login-action digest to 650006c (#8081)
renovate[bot] May 27, 2026
e7d297f
Update docker/build-push-action digest to f9f3042 (#8080)
renovate[bot] May 27, 2026
d28125f
Update codecov/codecov-action digest to e79a696 (#8079)
renovate[bot] May 27, 2026
12965d7
Update anthropics/claude-code-action digest to 787c5a0 (#8078)
renovate[bot] May 27, 2026
c84205b
Update actions/stale digest to eb5cf3a (#8077)
renovate[bot] May 27, 2026
25c0ff3
feat: support nullable Arrow Device array export (#8104)
0ax1 May 27, 2026
d7134a9
don't redownload duckdb for every branch (#7747)
myrrc May 27, 2026
4b089af
perf: aggregate min/max (#8061)
joseph-isaacs May 27, 2026
b8236af
Fix native library publish for non amd64 platforms (#8108)
robert3005 May 27, 2026
fab01a1
Use plain functions for copy function in duckdb (#8109)
myrrc May 27, 2026
76b0ad8
Remove pre df 53.0 polarsignals benchmark workaround (#8122)
robert3005 May 27, 2026
8265aa1
Remove public-api lockfiles (#8099)
AdamGS May 27, 2026
9e5ed2e
Add Sparse pushdown kernels for is_constant, sum, and compare (#8028)
joseph-isaacs May 27, 2026
e22c9dc
Use CachedId for ArrayExpr scalar function ID (#8124)
joseph-isaacs May 27, 2026
39fd9e6
fsst like to respect sql escape codes in the pattern (#8107)
onursatici May 27, 2026
7bfe690
Correctly handle Nan/Inf comparison in ALP between reduce (#8126)
robert3005 May 27, 2026
649fe0a
TurboQuant: better centroid initialization (#8116)
connortsui20 May 27, 2026
d76c266
Merge upstream Vortex 0.73.0 into spiceai-52
lukekim May 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
437 changes: 437 additions & 0 deletions .agents/skills/bench-performance/SKILL.md

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions .agents/skills/bench-performance/agents/openai.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
interface:
display_name: "Bench Performance"
short_description: "Iterate on Vortex benchmark performance"
default_prompt: "Use $bench-performance to investigate a vx-bench query performance issue with comparisons, logs, metrics, and profiling."
policy:
allow_implicit_invocation: true
101 changes: 101 additions & 0 deletions .agents/skills/bench-performance/scripts/compare_gh_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#!/usr/bin/env python3
"""Summarize Vortex benchmark gh-json / JSONL output with target ratios."""

from __future__ import annotations

import argparse
import json
import statistics
from collections import defaultdict
from pathlib import Path
from typing import Any


def load_records(paths: list[Path]) -> list[dict[str, Any]]:
records: list[dict[str, Any]] = []
for path in paths:
with path.open("r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line or not line.startswith("{"):
continue
try:
record = json.loads(line)
except json.JSONDecodeError:
continue
if "target" in record and "value" in record:
record["_source"] = str(path)
records.append(record)
return records


def target_name(record: dict[str, Any]) -> str:
target = record.get("target") or {}
engine = target.get("engine", "?")
fmt = target.get("format", "?")
return f"{engine}:{fmt}"


def query_name(record: dict[str, Any]) -> str:
name = str(record.get("name", ""))
if "/" in name:
return name.split("/", 1)[0]
return name or str(record.get("_source", "unknown"))


def ns_to_ms(value: float) -> float:
return value / 1_000_000.0


def runtime_summary(record: dict[str, Any]) -> str:
runtimes = record.get("all_runtimes")
if not isinstance(runtimes, list) or not runtimes:
value = float(record["value"])
return f"{ns_to_ms(value):.3f}/{ns_to_ms(value):.3f}/{ns_to_ms(value):.3f}"
values = sorted(float(v) for v in runtimes if isinstance(v, (int, float)))
if not values:
value = float(record["value"])
return f"{ns_to_ms(value):.3f}/{ns_to_ms(value):.3f}/{ns_to_ms(value):.3f}"
return f"{ns_to_ms(values[0]):.3f}/{ns_to_ms(statistics.median(values)):.3f}/{ns_to_ms(values[-1]):.3f}"


def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("jsonl", nargs="+", type=Path, help="gh-json JSONL files")
parser.add_argument(
"--baseline",
help="Baseline target such as datafusion:parquet. Defaults to first target per query.",
)
args = parser.parse_args()

records = load_records(args.jsonl)
if not records:
print("No benchmark records found.")
return 1

groups: dict[str, list[dict[str, Any]]] = defaultdict(list)
for record in records:
groups[query_name(record)].append(record)

print("query\ttarget\tvalue_ms\tratio\tmin/median/max_ms\tsource")
for query in sorted(groups):
rows = sorted(groups[query], key=target_name)
baseline = None
if args.baseline:
baseline = next((r for r in rows if target_name(r) == args.baseline), None)
if baseline is None:
baseline = rows[0]
baseline_value = float(baseline["value"])

for record in rows:
value = float(record["value"])
ratio = value / baseline_value if baseline_value else float("nan")
print(
f"{query}\t{target_name(record)}\t{ns_to_ms(value):.3f}\t"
f"{ratio:.2f}x\t{runtime_summary(record)}\t{record['_source']}"
)
return 0


if __name__ == "__main__":
raise SystemExit(main())
114 changes: 114 additions & 0 deletions .agents/skills/bench-performance/scripts/compare_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#!/usr/bin/env python3
"""Compare Vortex benchmark --show-metrics text logs."""

from __future__ import annotations

import argparse
import re
from pathlib import Path

METRIC_RE = re.compile(r"^\s*([A-Za-z0-9_.\[\]-]+)=(.+?)\s*$")
NUMBER_UNIT_RE = re.compile(r"^\s*([-+]?[0-9]*\.?[0-9]+)\s*([A-Za-z/]+)?\s*$")

UNIT_SCALE = {
"": 1.0,
"K": 1_000.0,
"M": 1_000_000.0,
"B": 1.0,
"KB": 1_000.0,
"MB": 1_000_000.0,
"GB": 1_000_000_000.0,
"ns": 1e-9,
"us": 1e-6,
"µs": 1e-6,
"ms": 1e-3,
"s": 1.0,
}


def parse_value(raw: str) -> tuple[float | None, str]:
normalized = raw.strip().replace("\u00b5", "µ")
match = NUMBER_UNIT_RE.match(normalized)
if not match:
return None, raw.strip()
value = float(match.group(1))
unit = match.group(2) or ""
scale = UNIT_SCALE.get(unit)
if scale is None:
return value, unit
return value * scale, unit


def parse_metrics(path: Path) -> dict[str, tuple[float | None, str, str]]:
metrics: dict[str, tuple[float | None, str, str]] = {}
with path.open("r", encoding="utf-8") as f:
for line in f:
match = METRIC_RE.match(line)
if not match:
continue
name, raw = match.groups()
parsed, unit = parse_value(raw)
metrics[name] = (parsed, raw.strip(), unit)
return metrics


def default_metrics(all_metrics: list[dict[str, tuple[float | None, str, str]]]) -> list[str]:
preferred = [
"vortex.io.read.duration_count",
"vortex.io.read.total_size",
"vortex.io.read.duration_max",
"vortex.io.read.size_max",
"vortex.file.segments.cache.misses",
"vortex.file.segments.cache.hits",
"io.requests.individual",
"io.requests.coalesced",
"time_elapsed_opening",
"time_elapsed_processing",
"time_elapsed_scanning_total",
"time_elapsed_scanning_until_data",
"output_rows",
"output_bytes",
]
present = set().union(*(m.keys() for m in all_metrics))
return [metric for metric in preferred if metric in present]


def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("logs", nargs="+", type=Path)
parser.add_argument(
"--metrics",
help="Comma-separated metric names. Defaults to common Vortex scan metrics.",
)
args = parser.parse_args()

parsed = [(path, parse_metrics(path)) for path in args.logs]
metric_names = (
[m.strip() for m in args.metrics.split(",") if m.strip()]
if args.metrics
else default_metrics([metrics for _, metrics in parsed])
)

if not metric_names:
print("No metrics found.")
return 1

baseline_metrics = parsed[0][1]
print("metric\t" + "\t".join(str(path) for path, _ in parsed))
for metric in metric_names:
cells = []
baseline_value = baseline_metrics.get(metric, (None, "", ""))[0]
for _, metrics in parsed:
value, raw, _unit = metrics.get(metric, (None, "", ""))
if not raw:
cells.append("-")
elif baseline_value and value is not None:
cells.append(f"{raw} ({value / baseline_value:.2f}x)")
else:
cells.append(raw)
print(metric + "\t" + "\t".join(cells))
return 0


if __name__ == "__main__":
raise SystemExit(main())
141 changes: 141 additions & 0 deletions .agents/skills/bench-performance/scripts/summarize_conjunct_debug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#!/usr/bin/env python3
"""Summarize Vortex conjunct evaluation tracing lines."""

from __future__ import annotations

import argparse
import collections
import re
import statistics
from pathlib import Path

FIELD_RE = re.compile(
r"(?P<key>[A-Za-z_][A-Za-z0-9_]*)="
r"(?P<value>\"(?:[^\"\\]|\\.)*\"|Some\([^)]+\)|None|[^\s]+)"
)
FIRST_FIELD_RE = re.compile(r" [A-Za-z_][A-Za-z0-9_]*=")


def parse_value(raw: str) -> str:
if raw.startswith('"') and raw.endswith('"'):
return raw[1:-1]
if raw.startswith("Some(") and raw.endswith(")"):
return raw[5:-1]
return raw


def as_int(fields: dict[str, str], key: str) -> int:
value = fields.get(key)
if value in (None, "None"):
return 0
return int(value)


def as_float(fields: dict[str, str], key: str) -> float:
value = fields.get(key)
if value in (None, "None"):
return 0.0
return float(value)


def message_for(line: str) -> str | None:
rest_match = re.search(r":\d+: (?P<rest>.*)$", line.rstrip())
rest = rest_match.group("rest") if rest_match else line.rstrip()
first_field = FIRST_FIELD_RE.search(rest)
message = rest[: first_field.start() if first_field else len(rest)].strip()
if "conjunct" in message and "evaluated" in message:
return message
return None


def quantile(values: list[int], fraction: float) -> float:
if not values:
return 0.0
ordered = sorted(values)
index = min(len(ordered) - 1, max(0, round((len(ordered) - 1) * fraction)))
return float(ordered[index])


def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("logs", nargs="+", type=Path)
parser.add_argument("--top-orders", type=int, default=12)
parser.add_argument("--message-regex", help="Only include messages matching this regex")
args = parser.parse_args()

message_re = re.compile(args.message_regex) if args.message_regex else None
groups: dict[tuple[str, str, str], list[dict[str, str]]] = collections.defaultdict(list)
order_counts: collections.Counter[tuple[str, ...]] = collections.Counter()
order_input_rows: collections.defaultdict[tuple[str, ...], int] = collections.defaultdict(int)
current_windows: dict[tuple[str, str, str, str], list[str]] = {}
rows = 0

for path in args.logs:
with path.open("r", encoding="utf-8", errors="replace") as f:
for line in f:
message = message_for(line)
if message is None or (message_re is not None and not message_re.search(message)):
continue
fields = {match.group("key"): parse_value(match.group("value")) for match in FIELD_RE.finditer(line)}
original_idx = (
fields.get("original_idx") or fields.get("conjunct_idx") or fields.get("child_idx") or "?"
)
conjunct = fields.get("conjunct", "")
groups[(message, original_idx, conjunct)].append(fields)
rows += 1

window_key = (
fields.get("scan_label", ""),
fields.get("coord_start", ""),
fields.get("coord_end", ""),
fields.get("output_coord_hash", ""),
)
order = current_windows.setdefault(window_key, [])
if not order:
order_input_rows[tuple(order)] += 0
order.append(original_idx)
if as_int(fields, "output_rows") == 0:
order_tuple = tuple(order)
order_counts[order_tuple] += 1
order_input_rows[order_tuple] += as_int(fields, "input_rows")
current_windows.pop(window_key, None)

for order in current_windows.values():
if order:
order_tuple = tuple(order)
order_counts[order_tuple] += 1

if rows == 0:
print("No conjunct debug rows found.")
return 1

print(f"rows={rows:,}")
print(
"message\tconjunct\tindex\tevents\tinput_rows\toutput_rows\tcompute_input_rows\tcompute_output_rows\telapsed_ms\tcompute_per_input"
)
for (message, idx, conjunct), entries in sorted(groups.items(), key=lambda item: item[0]):
input_rows = sum(as_int(e, "input_rows") for e in entries)
output_rows = sum(as_int(e, "output_rows") for e in entries)
compute_input = sum(as_int(e, "compute_input_rows") or as_int(e, "input_rows") for e in entries)
compute_output = sum(as_int(e, "compute_output_rows") or as_int(e, "output_rows") for e in entries)
elapsed = sum(as_float(e, "elapsed_ms") for e in entries)
ratio = compute_input / input_rows if input_rows else 0.0
print(
f"{message}\t{conjunct}\t{idx}\t{len(entries):,}\t{input_rows:,}\t{output_rows:,}\t"
f"{compute_input:,}\t{compute_output:,}\t{elapsed:,.3f}\t{ratio:.3f}"
)

print("\norders:")
for order, count in order_counts.most_common(args.top_orders):
print(f" {order}: count={count:,} input_rows={order_input_rows[order]:,}")

print("\nevents_per_window:")
lengths = [len(order) for order, count in order_counts.items() for _ in range(count)]
if lengths:
print(f" median={statistics.median(lengths):.0f} p90={quantile(lengths, 0.90):.0f} max={max(lengths)}")

return 0


if __name__ == "__main__":
raise SystemExit(main())
Loading
Loading