Skip to content

Commit 0fd1967

Browse files
feat: add query + incremental regression benchmarks and README footprint section
Add 4 new regression benchmarks (query depth scaling, diff-impact latency, incremental build tiers, import resolution throughput) with dual-engine support, report updaters, and CI workflow jobs. Add lightweight footprint section to README with live shields.io badges for unpacked size, dependency stars, and weekly downloads.
1 parent fcc19d6 commit 0fd1967

7 files changed

Lines changed: 836 additions & 0 deletions

File tree

.github/workflows/benchmark.yml

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,129 @@ jobs:
146146
--head "$BRANCH" \
147147
--title "docs: update embedding benchmarks" \
148148
--body "Automated embedding benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
149+
150+
query-benchmark:
151+
runs-on: ubuntu-latest
152+
if: >-
153+
github.event_name == 'workflow_dispatch' ||
154+
github.event.workflow_run.conclusion == 'success'
155+
permissions:
156+
contents: write
157+
pull-requests: write
158+
159+
steps:
160+
- uses: actions/checkout@v4
161+
with:
162+
fetch-depth: 0
163+
ref: main
164+
token: ${{ secrets.GITHUB_TOKEN }}
165+
166+
- uses: actions/setup-node@v4
167+
with:
168+
node-version: "22"
169+
170+
- run: npm install
171+
172+
- name: Run query benchmark
173+
run: node scripts/query-benchmark.js 2>/dev/null > query-benchmark-result.json
174+
175+
- name: Update query report
176+
run: node scripts/update-query-report.js query-benchmark-result.json
177+
178+
- name: Upload query result
179+
uses: actions/upload-artifact@v4
180+
with:
181+
name: query-benchmark-result
182+
path: query-benchmark-result.json
183+
184+
- name: Check for changes
185+
id: changes
186+
run: |
187+
if git diff --quiet HEAD -- generated/QUERY-BENCHMARKS.md; then
188+
echo "changed=false" >> "$GITHUB_OUTPUT"
189+
else
190+
echo "changed=true" >> "$GITHUB_OUTPUT"
191+
fi
192+
193+
- name: Commit and push via PR
194+
if: steps.changes.outputs.changed == 'true'
195+
env:
196+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
197+
run: |
198+
git config user.name "github-actions[bot]"
199+
git config user.email "github-actions[bot]@users.noreply.github.com"
200+
201+
BRANCH="benchmark/query-$(date +%Y%m%d-%H%M%S)"
202+
git checkout -b "$BRANCH"
203+
git add generated/QUERY-BENCHMARKS.md
204+
git commit -m "docs: update query benchmarks"
205+
git push origin "$BRANCH"
206+
207+
gh pr create \
208+
--base main \
209+
--head "$BRANCH" \
210+
--title "docs: update query benchmarks" \
211+
--body "Automated query benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
212+
213+
incremental-benchmark:
214+
runs-on: ubuntu-latest
215+
if: >-
216+
github.event_name == 'workflow_dispatch' ||
217+
github.event.workflow_run.conclusion == 'success'
218+
permissions:
219+
contents: write
220+
pull-requests: write
221+
222+
steps:
223+
- uses: actions/checkout@v4
224+
with:
225+
fetch-depth: 0
226+
ref: main
227+
token: ${{ secrets.GITHUB_TOKEN }}
228+
229+
- uses: actions/setup-node@v4
230+
with:
231+
node-version: "22"
232+
233+
- run: npm install
234+
235+
- name: Run incremental benchmark
236+
run: node scripts/incremental-benchmark.js 2>/dev/null > incremental-benchmark-result.json
237+
238+
- name: Update incremental report
239+
run: node scripts/update-incremental-report.js incremental-benchmark-result.json
240+
241+
- name: Upload incremental result
242+
uses: actions/upload-artifact@v4
243+
with:
244+
name: incremental-benchmark-result
245+
path: incremental-benchmark-result.json
246+
247+
- name: Check for changes
248+
id: changes
249+
run: |
250+
if git diff --quiet HEAD -- generated/INCREMENTAL-BENCHMARKS.md; then
251+
echo "changed=false" >> "$GITHUB_OUTPUT"
252+
else
253+
echo "changed=true" >> "$GITHUB_OUTPUT"
254+
fi
255+
256+
- name: Commit and push via PR
257+
if: steps.changes.outputs.changed == 'true'
258+
env:
259+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
260+
run: |
261+
git config user.name "github-actions[bot]"
262+
git config user.email "github-actions[bot]@users.noreply.github.com"
263+
264+
BRANCH="benchmark/incremental-$(date +%Y%m%d-%H%M%S)"
265+
git checkout -b "$BRANCH"
266+
git add generated/INCREMENTAL-BENCHMARKS.md
267+
git commit -m "docs: update incremental benchmarks"
268+
git push origin "$BRANCH"
269+
270+
gh pr create \
271+
--base main \
272+
--head "$BRANCH" \
273+
--title "docs: update incremental benchmarks" \
274+
--body "Automated incremental benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."

CONTRIBUTING.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,8 @@ description.
153153
|-----------|-----------------|-------------|
154154
| `node scripts/benchmark.js` | Build speed (native vs WASM), query latency | Changes to `builder.js`, `parser.js`, `queries.js`, `resolve.js`, `db.js`, or the native engine |
155155
| `node scripts/embedding-benchmark.js` | Search recall (Hit@1/3/5/10) across models | Changes to `embedder.js` or embedding strategies |
156+
| `node scripts/query-benchmark.js` | Query depth scaling, diff-impact latency | Changes to `queries.js`, `resolve.js`, or `db.js` |
157+
| `node scripts/incremental-benchmark.js` | Incremental build, import resolution throughput | Changes to `builder.js`, `resolve.js`, `parser.js`, or `journal.js` |
156158

157159
### How to report results
158160

README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,20 @@ Self-measured on every release via CI ([build benchmarks](generated/BUILD-BENCHM
384384

385385
Metrics are normalized per file for cross-version comparability. Times above are for a full initial build — incremental rebuilds only re-parse changed files.
386386

387+
### Lightweight Footprint
388+
389+
<a href="https://www.npmjs.com/package/@optave/codegraph"><img src="https://img.shields.io/npm/unpacked-size/@optave/codegraph?style=flat-square&label=unpacked%20size" alt="npm unpacked size" /></a>
390+
391+
Only **3 runtime dependencies** — everything else is optional or a devDependency:
392+
393+
| Dependency | What it does | | |
394+
|---|---|---|---|
395+
| [better-sqlite3](https://github.com/WiseLibs/better-sqlite3) | Fast, synchronous SQLite driver | ![GitHub stars](https://img.shields.io/github/stars/WiseLibs/better-sqlite3?style=flat-square&label=%E2%AD%90) | ![npm downloads](https://img.shields.io/npm/dw/better-sqlite3?style=flat-square&label=%F0%9F%93%A5%2Fwk) |
396+
| [commander](https://github.com/tj/commander.js) | CLI argument parsing | ![GitHub stars](https://img.shields.io/github/stars/tj/commander.js?style=flat-square&label=%E2%AD%90) | ![npm downloads](https://img.shields.io/npm/dw/commander?style=flat-square&label=%F0%9F%93%A5%2Fwk) |
397+
| [web-tree-sitter](https://github.com/tree-sitter/tree-sitter) | WASM tree-sitter bindings | ![GitHub stars](https://img.shields.io/github/stars/tree-sitter/tree-sitter?style=flat-square&label=%E2%AD%90) | ![npm downloads](https://img.shields.io/npm/dw/web-tree-sitter?style=flat-square&label=%F0%9F%93%A5%2Fwk) |
398+
399+
Optional: `@huggingface/transformers` (semantic search), `@modelcontextprotocol/sdk` (MCP server) — lazy-loaded only when needed.
400+
387401
## 🤖 AI Agent Integration
388402

389403
### MCP Server

scripts/incremental-benchmark.js

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
#!/usr/bin/env node
2+
3+
/**
4+
* Incremental build benchmark — measures build tiers and import resolution.
5+
*
6+
* Measures full build, no-op rebuild, and single-file rebuild for both
7+
* native and WASM engines. Also benchmarks import resolution throughput:
8+
* native batch vs JS fallback.
9+
*
10+
* Usage: node scripts/incremental-benchmark.js > result.json
11+
*/
12+
13+
import fs from 'node:fs';
14+
import path from 'node:path';
15+
import { performance } from 'node:perf_hooks';
16+
import { fileURLToPath, pathToFileURL } from 'node:url';
17+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
18+
const root = path.resolve(__dirname, '..');
19+
20+
const pkg = JSON.parse(fs.readFileSync(path.join(root, 'package.json'), 'utf8'));
21+
const dbPath = path.join(root, '.codegraph', 'graph.db');
22+
23+
const { buildGraph } = await import(pathToFileURL(path.join(root, 'src', 'builder.js')).href);
24+
const { statsData } = await import(pathToFileURL(path.join(root, 'src', 'queries.js')).href);
25+
const { resolveImportPath, resolveImportsBatch, resolveImportPathJS } = await import(
26+
pathToFileURL(path.join(root, 'src', 'resolve.js')).href
27+
);
28+
const { isNativeAvailable } = await import(
29+
pathToFileURL(path.join(root, 'src', 'native.js')).href
30+
);
31+
32+
// Redirect console.log to stderr so only JSON goes to stdout
33+
const origLog = console.log;
34+
console.log = (...args) => console.error(...args);
35+
36+
const RUNS = 3;
37+
const PROBE_FILE = path.join(root, 'src', 'queries.js');
38+
39+
function median(arr) {
40+
const sorted = [...arr].sort((a, b) => a - b);
41+
const mid = Math.floor(sorted.length / 2);
42+
return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
43+
}
44+
45+
function round1(n) {
46+
return Math.round(n * 10) / 10;
47+
}
48+
49+
/**
50+
* Benchmark build tiers for a given engine.
51+
*/
52+
async function benchmarkBuildTiers(engine) {
53+
// Full build (delete DB first)
54+
const fullTimings = [];
55+
for (let i = 0; i < RUNS; i++) {
56+
if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath);
57+
const start = performance.now();
58+
await buildGraph(root, { engine, incremental: false });
59+
fullTimings.push(performance.now() - start);
60+
}
61+
const fullBuildMs = Math.round(median(fullTimings));
62+
63+
// No-op rebuild (nothing changed)
64+
const noopTimings = [];
65+
for (let i = 0; i < RUNS; i++) {
66+
const start = performance.now();
67+
await buildGraph(root, { engine, incremental: true });
68+
noopTimings.push(performance.now() - start);
69+
}
70+
const noopRebuildMs = Math.round(median(noopTimings));
71+
72+
// 1-file change rebuild
73+
const original = fs.readFileSync(PROBE_FILE, 'utf8');
74+
let oneFileRebuildMs;
75+
try {
76+
const oneFileTimings = [];
77+
for (let i = 0; i < RUNS; i++) {
78+
fs.writeFileSync(PROBE_FILE, original + `\n// probe-${i}\n`);
79+
const start = performance.now();
80+
await buildGraph(root, { engine, incremental: true });
81+
oneFileTimings.push(performance.now() - start);
82+
}
83+
oneFileRebuildMs = Math.round(median(oneFileTimings));
84+
} finally {
85+
fs.writeFileSync(PROBE_FILE, original);
86+
// One final incremental build to restore DB state
87+
await buildGraph(root, { engine, incremental: true });
88+
}
89+
90+
return { fullBuildMs, noopRebuildMs, oneFileRebuildMs };
91+
}
92+
93+
/**
94+
* Collect all import pairs by scanning source files for ES import statements.
95+
*/
96+
function collectImportPairs() {
97+
const srcDir = path.join(root, 'src');
98+
const files = fs.readdirSync(srcDir).filter((f) => f.endsWith('.js'));
99+
const importRe = /(?:^|\n)\s*import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
100+
101+
const pairs = [];
102+
for (const file of files) {
103+
const absFile = path.join(srcDir, file);
104+
const content = fs.readFileSync(absFile, 'utf8');
105+
let match;
106+
while ((match = importRe.exec(content)) !== null) {
107+
pairs.push({ fromFile: absFile, importSource: match[1] });
108+
}
109+
}
110+
return pairs;
111+
}
112+
113+
/**
114+
* Benchmark import resolution: native batch vs JS fallback.
115+
*/
116+
function benchmarkResolve(inputs) {
117+
const aliases = null; // codegraph itself has no path aliases
118+
119+
// Native batch
120+
let nativeBatchMs = null;
121+
let perImportNativeMs = null;
122+
if (isNativeAvailable()) {
123+
const timings = [];
124+
for (let i = 0; i < RUNS; i++) {
125+
const start = performance.now();
126+
resolveImportsBatch(inputs, root, aliases);
127+
timings.push(performance.now() - start);
128+
}
129+
nativeBatchMs = round1(median(timings));
130+
perImportNativeMs = inputs.length > 0 ? round1(nativeBatchMs / inputs.length) : 0;
131+
}
132+
133+
// JS fallback (call the exported JS implementation)
134+
const jsTimings = [];
135+
for (let i = 0; i < RUNS; i++) {
136+
const start = performance.now();
137+
for (const { fromFile, importSource } of inputs) {
138+
resolveImportPathJS(fromFile, importSource, root, aliases);
139+
}
140+
jsTimings.push(performance.now() - start);
141+
}
142+
const jsFallbackMs = round1(median(jsTimings));
143+
const perImportJsMs = inputs.length > 0 ? round1(jsFallbackMs / inputs.length) : 0;
144+
145+
return {
146+
imports: inputs.length,
147+
nativeBatchMs,
148+
jsFallbackMs,
149+
perImportNativeMs,
150+
perImportJsMs,
151+
};
152+
}
153+
154+
// ── Run benchmarks ───────────────────────────────────────────────────────
155+
156+
console.error('Benchmarking WASM engine...');
157+
const wasm = await benchmarkBuildTiers('wasm');
158+
console.error(` full=${wasm.fullBuildMs}ms noop=${wasm.noopRebuildMs}ms 1-file=${wasm.oneFileRebuildMs}ms`);
159+
160+
// Get file count from the WASM-built graph
161+
const stats = statsData(dbPath);
162+
const files = stats.files.total;
163+
164+
let native = null;
165+
if (isNativeAvailable()) {
166+
console.error('Benchmarking native engine...');
167+
native = await benchmarkBuildTiers('native');
168+
console.error(` full=${native.fullBuildMs}ms noop=${native.noopRebuildMs}ms 1-file=${native.oneFileRebuildMs}ms`);
169+
} else {
170+
console.error('Native engine not available — skipping native build benchmark');
171+
}
172+
173+
// Import resolution benchmark (uses existing graph)
174+
console.error('Benchmarking import resolution...');
175+
const inputs = collectImportPairs();
176+
console.error(` ${inputs.length} import pairs collected`);
177+
const resolve = benchmarkResolve(inputs);
178+
console.error(` native=${resolve.nativeBatchMs}ms js=${resolve.jsFallbackMs}ms`);
179+
180+
// Restore console.log for JSON output
181+
console.log = origLog;
182+
183+
const result = {
184+
version: pkg.version,
185+
date: new Date().toISOString().slice(0, 10),
186+
files,
187+
wasm: {
188+
fullBuildMs: wasm.fullBuildMs,
189+
noopRebuildMs: wasm.noopRebuildMs,
190+
oneFileRebuildMs: wasm.oneFileRebuildMs,
191+
},
192+
native: native
193+
? {
194+
fullBuildMs: native.fullBuildMs,
195+
noopRebuildMs: native.noopRebuildMs,
196+
oneFileRebuildMs: native.oneFileRebuildMs,
197+
}
198+
: null,
199+
resolve,
200+
};
201+
202+
console.log(JSON.stringify(result, null, 2));

0 commit comments

Comments
 (0)