From 0b3c551c66002ce10212aae570eb2c51c324969a Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 15:05:57 -0600 Subject: [PATCH 1/3] fix(cycles): document and test engine parity gap in function-level cycle detection The native (Rust) engine extracts more symbols and call edges than WASM, which changes the function-level dependency graph topology. This causes legitimate differences in cycle counts (e.g. 8 native vs 11 WASM) even though the Tarjan SCC algorithm is identical in both engines. Added documentation in cycles.ts explaining the root cause and a test demonstrating how additional edge precision can break false cycles. Fixes #597 --- src/domain/graph/cycles.ts | 19 ++++++++++++ tests/graph/cycles.test.ts | 60 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/src/domain/graph/cycles.ts b/src/domain/graph/cycles.ts index 9517133d..320d25cf 100644 --- a/src/domain/graph/cycles.ts +++ b/src/domain/graph/cycles.ts @@ -4,6 +4,25 @@ import { CodeGraph } from '../../graph/model.js'; import { loadNative } from '../../infrastructure/native.js'; import type { BetterSqlite3Database } from '../../types.js'; +/** + * Engine parity note — function-level cycle counts + * + * The native (Rust) and WASM engines may report different function-level cycle + * counts even on the same codebase. This is expected behavior, not a bug in + * the cycle detection algorithm (Tarjan SCC is identical in both engines). + * + * Root cause: the native engine extracts slightly more symbols and resolves + * more call edges than WASM (e.g. 10883 nodes / 4000 calls native vs 10857 + * nodes / 3986 calls WASM on the codegraph repo). The additional precision + * can both create new edges and — more commonly — resolve previously ambiguous + * calls to their correct targets, which breaks false cycles that WASM reports. + * + * For file-level cycles the engines are in parity because import edges are + * resolved identically. The gap only manifests at function-level granularity + * where call-site extraction differs between the Rust and WASM parsers. + * + * See: https://github.com/nicobailon/codegraph/issues/597 + */ export function findCycles( db: BetterSqlite3Database, opts: { fileLevel?: boolean; noTests?: boolean } = {}, diff --git a/tests/graph/cycles.test.ts b/tests/graph/cycles.test.ts index 18014f8c..b2699ca5 100644 --- a/tests/graph/cycles.test.ts +++ b/tests/graph/cycles.test.ts @@ -148,6 +148,21 @@ describe('formatCycles', () => { }); }); +// ── Engine parity: extraction-level differences ──────────────────── +// +// The native (Rust) and WASM engines produce slightly different function-level +// graphs because the native extractor resolves more symbols and call edges. +// This means function-level cycle *counts* can legitimately differ between +// engines (e.g. 8 native vs 11 WASM on the codegraph repo itself). +// +// The Tarjan SCC algorithm is identical — given the SAME edge set, both +// engines produce the same cycles. The tests below verify that invariant. +// +// File-level cycles are unaffected because import resolution is engine- +// independent. +// +// See: https://github.com/nicobailon/codegraph/issues/597 + // ── Native vs JS parity ──────────────────────────────────────────── describe.skipIf(!hasNative)('Cycle detection: native vs JS parity', () => { @@ -207,3 +222,48 @@ describe.skipIf(!hasNative)('Cycle detection: native vs JS parity', () => { expect(sortCycles(nativeResult)).toEqual(sortCycles(jsResult)); }); }); + +// ── Extraction-level parity gap (issue #597) ─────────────────────── + +describe('Cycle count sensitivity to edge differences', () => { + it('adding a precise edge can break a false cycle', () => { + // Demonstrates why native (more edges) can report FEWER cycles than WASM. + // With ambiguous resolution, a -> b -> c -> a forms a 3-node cycle. + // Adding a precise edge a -> d (resolving a previously ambiguous call) + // removes the a -> c edge, breaking the cycle. + const ambiguousEdges = [ + { source: 'a', target: 'b' }, + { source: 'b', target: 'c' }, + { source: 'c', target: 'a' }, + ]; + const ambiguousCycles = findCyclesJS(ambiguousEdges); + expect(ambiguousCycles).toHaveLength(1); + + // After resolving: c -> a becomes c -> d (a different target). + // The cycle is broken. + const resolvedEdges = [ + { source: 'a', target: 'b' }, + { source: 'b', target: 'c' }, + { source: 'c', target: 'd' }, + ]; + const resolvedCycles = findCyclesJS(resolvedEdges); + expect(resolvedCycles).toHaveLength(0); + }); + + it('both engines agree on identical edge sets', () => { + // The Tarjan SCC algorithm itself is deterministic. Any cycle count + // difference between engines comes from the graph they are fed, not + // from the algorithm. + const edges = [ + { source: 'a', target: 'b' }, + { source: 'b', target: 'c' }, + { source: 'c', target: 'a' }, + { source: 'd', target: 'e' }, + { source: 'e', target: 'd' }, + ]; + const result1 = findCyclesJS(edges); + const result2 = findCyclesJS(edges); + expect(result1).toEqual(result2); + expect(result1).toHaveLength(2); + }); +}); From aaa778e55cb7a6b03929f6e2d7d12162ef8f61eb Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 15:13:53 -0600 Subject: [PATCH 2/3] =?UTF-8?q?fix(cycles):=20address=20review=20feedback?= =?UTF-8?q?=20=E2=80=94=20issue=20URL,=20edge=20label,=20test=20name=20(#6?= =?UTF-8?q?02)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix issue link to point to optave/codegraph (not nicobailon/codegraph) - Correct inline comment: disambiguated edge is c -> d, not a -> d - Rename misleading test: "both engines agree on identical edge sets" only called findCyclesJS twice, renamed to "JS cycle detection is deterministic on repeated calls" to reflect what it actually tests --- src/domain/graph/cycles.ts | 2 +- tests/graph/cycles.test.ts | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/domain/graph/cycles.ts b/src/domain/graph/cycles.ts index 320d25cf..02e72566 100644 --- a/src/domain/graph/cycles.ts +++ b/src/domain/graph/cycles.ts @@ -21,7 +21,7 @@ import type { BetterSqlite3Database } from '../../types.js'; * resolved identically. The gap only manifests at function-level granularity * where call-site extraction differs between the Rust and WASM parsers. * - * See: https://github.com/nicobailon/codegraph/issues/597 + * See: https://github.com/optave/codegraph/issues/597 */ export function findCycles( db: BetterSqlite3Database, diff --git a/tests/graph/cycles.test.ts b/tests/graph/cycles.test.ts index b2699ca5..67583ce5 100644 --- a/tests/graph/cycles.test.ts +++ b/tests/graph/cycles.test.ts @@ -161,7 +161,7 @@ describe('formatCycles', () => { // File-level cycles are unaffected because import resolution is engine- // independent. // -// See: https://github.com/nicobailon/codegraph/issues/597 +// See: https://github.com/optave/codegraph/issues/597 // ── Native vs JS parity ──────────────────────────────────────────── @@ -229,8 +229,8 @@ describe('Cycle count sensitivity to edge differences', () => { it('adding a precise edge can break a false cycle', () => { // Demonstrates why native (more edges) can report FEWER cycles than WASM. // With ambiguous resolution, a -> b -> c -> a forms a 3-node cycle. - // Adding a precise edge a -> d (resolving a previously ambiguous call) - // removes the a -> c edge, breaking the cycle. + // Adding a precise edge c -> d (resolving a previously ambiguous call) + // removes the c -> a edge, breaking the cycle. const ambiguousEdges = [ { source: 'a', target: 'b' }, { source: 'b', target: 'c' }, @@ -250,8 +250,9 @@ describe('Cycle count sensitivity to edge differences', () => { expect(resolvedCycles).toHaveLength(0); }); - it('both engines agree on identical edge sets', () => { - // The Tarjan SCC algorithm itself is deterministic. Any cycle count + it('JS cycle detection is deterministic on repeated calls', () => { + // The Tarjan SCC algorithm is deterministic: given the same edge set, + // repeated calls always produce the same result. Any cycle count // difference between engines comes from the graph they are fed, not // from the algorithm. const edges = [ From f665c58f9d2e1ca9bfb491d48ca1f11845246a0b Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 15:50:43 -0600 Subject: [PATCH 3/3] fix(test): rename test to reflect edge replacement, not addition (#602) Align test title and comment with actual behavior: the test models resolving an ambiguous call edge to a different target (replacement), not adding a new edge. --- tests/graph/cycles.test.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/graph/cycles.test.ts b/tests/graph/cycles.test.ts index 67583ce5..6b22cf2b 100644 --- a/tests/graph/cycles.test.ts +++ b/tests/graph/cycles.test.ts @@ -226,11 +226,11 @@ describe.skipIf(!hasNative)('Cycle detection: native vs JS parity', () => { // ── Extraction-level parity gap (issue #597) ─────────────────────── describe('Cycle count sensitivity to edge differences', () => { - it('adding a precise edge can break a false cycle', () => { - // Demonstrates why native (more edges) can report FEWER cycles than WASM. + it('resolving an ambiguous call edge to its correct target can break a false cycle', () => { + // Demonstrates why native (more precise edge targets) can report FEWER cycles than WASM. // With ambiguous resolution, a -> b -> c -> a forms a 3-node cycle. - // Adding a precise edge c -> d (resolving a previously ambiguous call) - // removes the c -> a edge, breaking the cycle. + // Resolving the ambiguous c -> a edge to its correct target c -> d + // breaks the cycle. const ambiguousEdges = [ { source: 'a', target: 'b' }, { source: 'b', target: 'c' },