Skip to content

Commit 56a0517

Browse files
test: add embedding strategy benchmark script
Standalone script that compares structured vs source strategies against 14 real search queries on the project's own graph. Reports Hit@1/3/5 and per-query rank comparison. Usage: node tests/search/embedding-benchmark.js [--model minilm] Impact: 1 functions changed, 0 affected
1 parent c5dcd59 commit 56a0517

1 file changed

Lines changed: 124 additions & 0 deletions

File tree

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
#!/usr/bin/env node
2+
3+
/**
4+
* Embedding strategy benchmark — compares structured vs source strategies
5+
* against real search queries on the current project's graph.
6+
*
7+
* Prerequisites:
8+
* - @huggingface/transformers installed
9+
* - codegraph build already run (graph.db exists)
10+
*
11+
* Usage:
12+
* node tests/search/embedding-benchmark.js
13+
* node tests/search/embedding-benchmark.js --model minilm
14+
*/
15+
16+
import path from 'node:path';
17+
import { buildEmbeddings, DEFAULT_MODEL, MODELS, searchData } from '../../src/embedder.js';
18+
19+
const model = process.argv.includes('--model')
20+
? process.argv[process.argv.indexOf('--model') + 1]
21+
: DEFAULT_MODEL;
22+
23+
const rootDir = '.';
24+
const dbPath = path.resolve('.codegraph/graph.db');
25+
26+
// Queries with expected best-match symbol name
27+
const QUERIES = [
28+
{ q: 'parse source code with tree-sitter', expect: 'parseFilesAuto' },
29+
{ q: 'find circular dependencies', expect: 'findCycles' },
30+
{ q: 'build dependency graph from source files', expect: 'buildGraph' },
31+
{ q: 'resolve import path to actual file', expect: 'resolveImportPath' },
32+
{ q: 'cosine similarity between vectors', expect: 'cosineSim' },
33+
{ q: 'export graph as DOT format', expect: 'exportDOT' },
34+
{ q: 'semantic search with embeddings', expect: 'search' },
35+
{ q: 'incremental file hashing', expect: 'hashFile' },
36+
{ q: 'load configuration from file', expect: 'loadConfig' },
37+
{ q: 'extract functions and classes from code', expect: 'extractJavaScript' },
38+
{ q: 'impact analysis of code changes', expect: 'diffImpactData' },
39+
{ q: 'start MCP server for AI agents', expect: 'startMCPServer' },
40+
{ q: 'watch files for changes', expect: 'watchProject' },
41+
{ q: 'reciprocal rank fusion for multi-query search', expect: 'multiSearchData' },
42+
];
43+
44+
async function benchmark(strategy) {
45+
await buildEmbeddings(rootDir, model, dbPath, { strategy });
46+
47+
let hits1 = 0;
48+
let hits3 = 0;
49+
let hits5 = 0;
50+
const details = [];
51+
52+
for (const { q, expect: expected } of QUERIES) {
53+
const data = await searchData(q, dbPath, { minScore: 0.01, limit: 10 });
54+
if (!data) continue;
55+
56+
const names = data.results.map((r) => r.name);
57+
const rank = names.indexOf(expected) + 1; // 0 = not found
58+
if (rank === 1) hits1++;
59+
if (rank >= 1 && rank <= 3) hits3++;
60+
if (rank >= 1 && rank <= 5) hits5++;
61+
62+
const matchScore = rank > 0 ? data.results[rank - 1].similarity.toFixed(3) : 'miss';
63+
details.push({
64+
q: q.slice(0, 50),
65+
expected,
66+
rank: rank || '>10',
67+
actual: names[0],
68+
matchScore,
69+
});
70+
}
71+
72+
return { strategy, hits1, hits3, hits5, total: QUERIES.length, details };
73+
}
74+
75+
const modelConfig = MODELS[model];
76+
console.log('=== Embedding Strategy Benchmark ===');
77+
console.log(`Model: ${model} (${modelConfig.dim}d, ${modelConfig.contextWindow} token context)`);
78+
console.log(`Queries: ${QUERIES.length}`);
79+
console.log('');
80+
81+
const structured = await benchmark('structured');
82+
const source = await benchmark('source');
83+
84+
// Summary table
85+
console.log('');
86+
console.log('=== RESULTS ===');
87+
console.log('');
88+
console.log(`${'Metric'.padEnd(12)}${'structured'.padEnd(16)}${'source'.padEnd(16)}delta`);
89+
for (const [label, key] of [
90+
['Hit@1', 'hits1'],
91+
['Hit@3', 'hits3'],
92+
['Hit@5', 'hits5'],
93+
]) {
94+
const s = structured[key];
95+
const o = source[key];
96+
const sp = `${s}/${structured.total} (${((s / structured.total) * 100).toFixed(0)}%)`;
97+
const op = `${o}/${source.total} (${((o / source.total) * 100).toFixed(0)}%)`;
98+
const delta = s - o;
99+
const sign = delta > 0 ? '+' : '';
100+
console.log(`${label.padEnd(12)}${sp.padEnd(16)}${op.padEnd(16)}${sign}${delta}`);
101+
}
102+
103+
// Per-query comparison
104+
console.log('');
105+
console.log(`${'Query'.padEnd(52)}${'Expected'.padEnd(22)}Struct Source`);
106+
for (let i = 0; i < QUERIES.length; i++) {
107+
const s = structured.details[i];
108+
const o = source.details[i];
109+
const sw =
110+
typeof s.rank === 'number' && (typeof o.rank !== 'number' || s.rank < o.rank) ? '*' : ' ';
111+
const ow =
112+
typeof o.rank === 'number' && (typeof s.rank !== 'number' || o.rank < s.rank) ? '*' : ' ';
113+
console.log(
114+
s.q.padEnd(52) +
115+
s.expected.padEnd(22) +
116+
String(s.rank).padEnd(4) +
117+
sw +
118+
' ' +
119+
String(o.rank).padEnd(4) +
120+
ow,
121+
);
122+
}
123+
console.log('');
124+
console.log('* = better rank for that query');

0 commit comments

Comments
 (0)