Skip to content

Commit 5730a65

Browse files
feat: add embedding regression test with real ML model validation
Add integration test that runs the embed+search pipeline with the real minilm model to catch regressions from model updates, embedding text format changes, or pipeline bugs. Includes a dedicated CI workflow (weekly + on relevant PR changes) with HuggingFace model caching.
1 parent e16dfeb commit 5730a65

File tree

3 files changed

+177
-2
lines changed

3 files changed

+177
-2
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: Embedding Regression
2+
3+
on:
4+
schedule:
5+
- cron: '0 6 * * 1' # Monday 6am UTC
6+
workflow_dispatch:
7+
pull_request:
8+
paths:
9+
- 'src/embedder.js'
10+
- 'tests/search/**'
11+
- 'package.json'
12+
13+
concurrency:
14+
group: embedding-regression-${{ github.ref }}
15+
cancel-in-progress: true
16+
17+
jobs:
18+
embedding-regression:
19+
runs-on: ubuntu-latest
20+
name: Embedding regression tests
21+
timeout-minutes: 15
22+
23+
steps:
24+
- uses: actions/checkout@v4
25+
26+
- name: Setup Node.js
27+
uses: actions/setup-node@v4
28+
with:
29+
node-version: 22
30+
31+
- name: Install dependencies
32+
run: npm install
33+
34+
- name: Cache HuggingFace models
35+
uses: actions/cache@v4
36+
with:
37+
path: ~/.cache/huggingface
38+
key: hf-models-minilm-v1
39+
40+
- name: Run embedding regression tests
41+
run: npx vitest run tests/search/embedding-regression.test.js

src/embedder.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,10 +173,10 @@ function initEmbeddingsSchema(db) {
173173
/**
174174
* Build embeddings for all functions/methods/classes in the graph.
175175
*/
176-
export async function buildEmbeddings(rootDir, modelKey) {
176+
export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
177177
// path already imported at top
178178
// fs already imported at top
179-
const dbPath = findDbPath(null);
179+
const dbPath = customDbPath || findDbPath(null);
180180

181181
const db = new Database(dbPath);
182182
initEmbeddingsSchema(db);
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
/**
2+
* Embedding regression test — validates the embed+search pipeline
3+
* produces semantically meaningful results with a real ML model.
4+
*
5+
* Skips automatically when @huggingface/transformers is not installed.
6+
* Run explicitly: npx vitest run tests/search/embedding-regression.test.js
7+
*/
8+
9+
import fs from 'node:fs';
10+
import os from 'node:os';
11+
import path from 'node:path';
12+
import Database from 'better-sqlite3';
13+
import { afterAll, beforeAll, describe, expect, test } from 'vitest';
14+
15+
// Detect whether transformers is available (optional dep)
16+
let hasTransformers = false;
17+
try {
18+
await import('@huggingface/transformers');
19+
hasTransformers = true;
20+
} catch {
21+
// not installed — tests will skip
22+
}
23+
24+
// Lazy-import to avoid top-level errors when transformers is missing
25+
const { buildGraph } = await import('../../src/builder.js');
26+
const { buildEmbeddings, searchData } = await import('../../src/embedder.js');
27+
28+
// Same ES-module fixture files used by build.test.js
29+
const FIXTURE_FILES = {
30+
'math.js': `
31+
export function add(a, b) { return a + b; }
32+
export function multiply(a, b) { return a * b; }
33+
export function square(x) { return multiply(x, x); }
34+
`.trimStart(),
35+
'utils.js': `
36+
import { add, square } from './math.js';
37+
export function sumOfSquares(a, b) { return add(square(a), square(b)); }
38+
export class Calculator {
39+
compute(x, y) { return sumOfSquares(x, y); }
40+
}
41+
`.trimStart(),
42+
'index.js': `
43+
import { sumOfSquares, Calculator } from './utils.js';
44+
import { add } from './math.js';
45+
export function main() {
46+
console.log(add(1, 2));
47+
console.log(sumOfSquares(3, 4));
48+
const calc = new Calculator();
49+
console.log(calc.compute(5, 6));
50+
}
51+
`.trimStart(),
52+
};
53+
54+
let tmpDir, dbPath;
55+
56+
describe.skipIf(!hasTransformers)('embedding regression (real model)', () => {
57+
beforeAll(async () => {
58+
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-embed-regression-'));
59+
for (const [name, content] of Object.entries(FIXTURE_FILES)) {
60+
fs.writeFileSync(path.join(tmpDir, name), content);
61+
}
62+
63+
// Build the dependency graph
64+
await buildGraph(tmpDir, { skipRegistry: true });
65+
dbPath = path.join(tmpDir, '.codegraph', 'graph.db');
66+
67+
// Build embeddings with the smallest/fastest model
68+
await buildEmbeddings(tmpDir, 'minilm', dbPath);
69+
}, 120_000);
70+
71+
afterAll(() => {
72+
if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true });
73+
});
74+
75+
describe('smoke tests', () => {
76+
test('stored at least 6 embeddings', () => {
77+
const db = new Database(dbPath, { readonly: true });
78+
const count = db.prepare('SELECT COUNT(*) as c FROM embeddings').get().c;
79+
db.close();
80+
expect(count).toBeGreaterThanOrEqual(6);
81+
});
82+
83+
test('metadata records correct model and dimension', () => {
84+
const db = new Database(dbPath, { readonly: true });
85+
const model = db.prepare("SELECT value FROM embedding_meta WHERE key = 'model'").get().value;
86+
const dim = db.prepare("SELECT value FROM embedding_meta WHERE key = 'dim'").get().value;
87+
db.close();
88+
expect(model).toBe('Xenova/all-MiniLM-L6-v2');
89+
expect(Number(dim)).toBe(384);
90+
});
91+
92+
test('search returns results with positive similarity', async () => {
93+
const data = await searchData('add numbers', dbPath, { minScore: 0.01 });
94+
expect(data).not.toBeNull();
95+
expect(data.results.length).toBeGreaterThan(0);
96+
for (const r of data.results) {
97+
expect(r.similarity).toBeGreaterThan(0);
98+
}
99+
});
100+
});
101+
102+
describe('regression queries', () => {
103+
/**
104+
* Helper: search for a query and assert that a given function name
105+
* appears within the top N results.
106+
*/
107+
async function expectInTopN(query, expectedName, topN) {
108+
const data = await searchData(query, dbPath, { minScore: 0.01, limit: topN });
109+
expect(data).not.toBeNull();
110+
const names = data.results.map((r) => r.name);
111+
expect(names).toContain(expectedName);
112+
}
113+
114+
test('"add two numbers together" finds add in top 3', async () => {
115+
await expectInTopN('add two numbers together', 'add', 3);
116+
});
117+
118+
test('"multiply values" finds multiply in top 3', async () => {
119+
await expectInTopN('multiply values', 'multiply', 3);
120+
});
121+
122+
test('"compute the square of a number" finds square in top 3', async () => {
123+
await expectInTopN('compute the square of a number', 'square', 3);
124+
});
125+
126+
test('"sum of squares calculation" finds sumOfSquares in top 3', async () => {
127+
await expectInTopN('sum of squares calculation', 'sumOfSquares', 3);
128+
});
129+
130+
test('"main entry point function" finds main in top 5', async () => {
131+
await expectInTopN('main entry point function', 'main', 5);
132+
});
133+
});
134+
});

0 commit comments

Comments
 (0)