Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 166 additions & 0 deletions __tests__/edges-unique.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/**
* Edge Uniqueness Tests
*
* Regression tests for the bug where `INSERT OR IGNORE INTO edges` was
* silently a no-op: the only candidate key was the AUTOINCREMENT id (which
* never conflicts), so duplicate edges accumulated on every re-emission /
* re-resolution.
*
* Fix: a UNIQUE index on (source, target, kind, COALESCE(line, -1),
* COALESCE(col, -1)) backs a fresh-install schema and is also applied via
* migration v4 (with a dedup pass over existing rows).
*/

import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { DatabaseConnection } from '../src/db';
import { QueryBuilder } from '../src/db/queries';
import { Edge, Node } from '../src/types';
import { runMigrations, getCurrentVersion, CURRENT_SCHEMA_VERSION } from '../src/db/migrations';

function tempDb(): { dir: string; db: DatabaseConnection; q: QueryBuilder } {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-edges-unique-'));
const db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
const q = new QueryBuilder(db.getDb());
return { dir, db, q };
}

function cleanup(dir: string, db: DatabaseConnection) {
db.close();
if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
}

function makeNode(id: string, name: string): Node {
return {
id,
kind: 'function',
name,
qualifiedName: `f::${name}`,
filePath: 'a.ts',
language: 'typescript',
startLine: 1,
endLine: 1,
startColumn: 0,
endColumn: 0,
updatedAt: Date.now(),
};
}

function edgesCount(db: DatabaseConnection): number {
const row = db.getDb().prepare('SELECT COUNT(*) as c FROM edges').get() as { c: number };
return row.c;
}

describe('Edge UNIQUE constraint (bug #2)', () => {
let dir: string;
let db: DatabaseConnection;
let q: QueryBuilder;

beforeEach(() => {
({ dir, db, q } = tempDb());
q.insertNodes([makeNode('n1', 'foo'), makeNode('n2', 'bar')]);
});

afterEach(() => cleanup(dir, db));

it('rejects duplicate (source, target, kind, line, col)', () => {
const e: Edge = { source: 'n1', target: 'n2', kind: 'calls', line: 10, column: 5 };
q.insertEdge(e);
q.insertEdge(e); // INSERT OR IGNORE — should be a no-op now
expect(edgesCount(db)).toBe(1);
});

it('treats two NULL line edges as duplicates (COALESCE in unique index)', () => {
const e: Edge = { source: 'n1', target: 'n2', kind: 'calls' };
q.insertEdge(e);
q.insertEdge(e);
expect(edgesCount(db)).toBe(1);
});

it('allows same source/target/kind on different lines', () => {
q.insertEdge({ source: 'n1', target: 'n2', kind: 'calls', line: 1 });
q.insertEdge({ source: 'n1', target: 'n2', kind: 'calls', line: 2 });
expect(edgesCount(db)).toBe(2);
});

it('allows same source/target/line on different kinds', () => {
q.insertEdge({ source: 'n1', target: 'n2', kind: 'calls', line: 1 });
q.insertEdge({ source: 'n1', target: 'n2', kind: 'references', line: 1 });
expect(edgesCount(db)).toBe(2);
});

it('insertEdges (batch) dedupes within the same call', () => {
const e: Edge = { source: 'n1', target: 'n2', kind: 'calls', line: 1, column: 1 };
q.insertEdges([e, e, e]);
expect(edgesCount(db)).toBe(1);
});

it('survives the same edge being re-emitted across many cycles', () => {
const e: Edge = { source: 'n1', target: 'n2', kind: 'calls', line: 1 };
for (let i = 0; i < 100; i++) {
q.insertEdge(e);
}
expect(edgesCount(db)).toBe(1);
});
});

describe('Migration v4: dedup existing edges', () => {
let dir: string;
let dbPath: string;

beforeEach(() => {
dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-migr-v4-'));
dbPath = path.join(dir, 'test.db');
});

afterEach(() => {
if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
});

it('collapses pre-existing duplicates and adds the UNIQUE index', () => {
// Build a v3-shaped database manually: schema, but simulate a stale
// version row + insert duplicates that the missing UNIQUE index let
// through. We use the real initialize() path then drop the index +
// version row to back-date the DB.
const db = DatabaseConnection.initialize(dbPath);
db.getDb().exec(`DROP INDEX IF EXISTS idx_edges_unique;`);
db.getDb().exec(`DELETE FROM schema_versions;`);
db.getDb().prepare(
'INSERT INTO schema_versions (version, applied_at, description) VALUES (3, ?, ?)'
).run(Date.now(), 'simulated v3');

const q = new QueryBuilder(db.getDb());
q.insertNodes([makeNode('n1', 'foo'), makeNode('n2', 'bar')]);
// Force-insert duplicates via raw SQL (bypassing the constraint that
// is now absent). Three rows that should collapse to one.
const stmt = db.getDb().prepare(
'INSERT INTO edges (source, target, kind, line, col) VALUES (?, ?, ?, ?, ?)'
);
stmt.run('n1', 'n2', 'calls', 10, 5);
stmt.run('n1', 'n2', 'calls', 10, 5);
stmt.run('n1', 'n2', 'calls', 10, 5);
// And one with NULL line/col, also duplicated
stmt.run('n1', 'n2', 'references', null, null);
stmt.run('n1', 'n2', 'references', null, null);

expect(edgesCount(db)).toBe(5);
expect(getCurrentVersion(db.getDb())).toBe(3);

// Run migrations forward
runMigrations(db.getDb(), 3);

expect(getCurrentVersion(db.getDb())).toBe(CURRENT_SCHEMA_VERSION);
expect(CURRENT_SCHEMA_VERSION).toBeGreaterThanOrEqual(4);
// 3 calls dups → 1, 2 references dups → 1
expect(edgesCount(db)).toBe(2);

// Now the constraint is enforced: another duplicate insert is a no-op.
const q2 = new QueryBuilder(db.getDb());
q2.insertEdge({ source: 'n1', target: 'n2', kind: 'calls', line: 10, column: 5 });
expect(edgesCount(db)).toBe(2);

db.close();
});
});
2 changes: 1 addition & 1 deletion __tests__/foundation.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ describe('Database Connection', () => {

const version = db.getSchemaVersion();
expect(version).not.toBeNull();
expect(version?.version).toBe(3);
expect(version?.version).toBe(4);

db.close();
});
Expand Down
2 changes: 1 addition & 1 deletion __tests__/pr19-improvements.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ describe('Best-Candidate Resolution', () => {
describe('Schema v2 Migration', () => {
it.skipIf(!HAS_SQLITE)('should have correct current schema version', async () => {
const { CURRENT_SCHEMA_VERSION } = await import('../src/db/migrations');
expect(CURRENT_SCHEMA_VERSION).toBe(3);
expect(CURRENT_SCHEMA_VERSION).toBe(4);
});

it.skipIf(!HAS_SQLITE)('should have migration for version 2', async () => {
Expand Down
23 changes: 22 additions & 1 deletion src/db/migrations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import { SqliteDatabase } from './sqlite-adapter';
/**
* Current schema version
*/
export const CURRENT_SCHEMA_VERSION = 3;
export const CURRENT_SCHEMA_VERSION = 4;

/**
* Migration definition
Expand Down Expand Up @@ -54,6 +54,27 @@ const migrations: Migration[] = [
`);
},
},
{
version: 4,
description: 'Dedup edges and enforce UNIQUE(source, target, kind, line, col) so INSERT OR IGNORE actually dedupes',
up: (db) => {
// Without a UNIQUE constraint the existing `INSERT OR IGNORE INTO
// edges` was a no-op for dedup purposes (the only candidate key was
// the AUTOINCREMENT id, which never conflicts). Existing databases
// can therefore contain accumulated duplicates from re-emission /
// re-resolution. Collapse those before adding the constraint, then
// create the UNIQUE index that future inserts will conflict against.
db.exec(`
DELETE FROM edges
WHERE id NOT IN (
SELECT MIN(id) FROM edges
GROUP BY source, target, kind, COALESCE(line, -1), COALESCE(col, -1)
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_edges_unique
ON edges(source, target, kind, COALESCE(line, -1), COALESCE(col, -1));
`);
},
},
];

/**
Expand Down
9 changes: 9 additions & 0 deletions src/db/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,15 @@ CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);
CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source, kind);
CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target, kind);

-- Uniqueness for (source, target, kind, line, col). The id column is an
-- AUTOINCREMENT primary key, so without this index `INSERT OR IGNORE`
-- would never see a conflict — duplicate edges would silently accumulate
-- on every re-resolution / re-emission. COALESCE keeps two NULL line/col
-- values comparable as equal (SQLite treats raw NULLs in a UNIQUE index
-- as distinct).
CREATE UNIQUE INDEX IF NOT EXISTS idx_edges_unique
ON edges(source, target, kind, COALESCE(line, -1), COALESCE(col, -1));

-- File indexes
CREATE INDEX IF NOT EXISTS idx_files_language ON files(language);
CREATE INDEX IF NOT EXISTS idx_files_modified_at ON files(modified_at);
Expand Down