From 50ec4a070575b2eae754c63389096ddf14be3e62 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Thu, 19 Mar 2026 02:44:44 -0600 Subject: [PATCH 1/4] =?UTF-8?q?perf:=20reduce=20query=20latency=20regressi?= =?UTF-8?q?on=20from=203.1.4=20=E2=86=92=203.3.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three targeted fixes for the +28–56% query latency regression: 1. Pin benchmark hub target to stable function names (buildGraph, openDb, loadConfig) instead of auto-selecting the most-connected node. Barrel/type files becoming the hub made version-to-version comparison meaningless. 2. Gate implementors queries in bfsTransitiveCallers — check once whether the graph has any 'implements' edges before doing per-node findNodeById + findImplementors lookups. Skips all implementor overhead for codebases without interface/trait hierarchies. 3. Cache loadConfig() results per cwd. The config file is read from disk on every fnImpactData and diffImpactData call; caching eliminates redundant fs.existsSync + readFileSync + JSON.parse per query invocation. Impact: 5 functions changed, 123 affected --- scripts/query-benchmark.js | 27 ++++++++++++++++++++++++++- src/domain/analysis/impact.js | 20 ++++++++++++++++++-- src/infrastructure/config.js | 24 +++++++++++++++++++++--- 3 files changed, 65 insertions(+), 6 deletions(-) diff --git a/scripts/query-benchmark.js b/scripts/query-benchmark.js index b225c577..b2c091c1 100644 --- a/scripts/query-benchmark.js +++ b/scripts/query-benchmark.js @@ -111,8 +111,31 @@ function round1(n) { return Math.round(n * 10) / 10; } +// Pinned hub targets — stable function names that exist across versions. +// Auto-selecting the most-connected node makes version-to-version comparison +// meaningless when barrel/type files get added or removed. +const PINNED_HUB_CANDIDATES = ['buildGraph', 'openDb', 'loadConfig']; + function selectTargets() { const db = new Database(dbPath, { readonly: true }); + + // Try pinned candidates first for a stable hub across versions + let hub = null; + for (const candidate of PINNED_HUB_CANDIDATES) { + const row = db + .prepare( + `SELECT n.name FROM nodes n + JOIN edges e ON e.source_id = n.id OR e.target_id = n.id + WHERE n.name = ? AND n.file NOT LIKE '%test%' AND n.file NOT LIKE '%spec%' + LIMIT 1`, + ) + .get(candidate); + if (row) { + hub = row.name; + break; + } + } + const rows = db .prepare( `SELECT n.name, COUNT(e.id) AS cnt @@ -127,7 +150,9 @@ function selectTargets() { if (rows.length === 0) throw new Error('No nodes with edges found in graph'); - const hub = rows[0].name; + // Fall back to most-connected if no pinned candidate found + if (!hub) hub = rows[0].name; + const mid = rows[Math.floor(rows.length / 2)].name; const leaf = rows[rows.length - 1].name; return { hub, mid, leaf }; diff --git a/src/domain/analysis/impact.js b/src/domain/analysis/impact.js index c2ea3540..2ce1dbbf 100644 --- a/src/domain/analysis/impact.js +++ b/src/domain/analysis/impact.js @@ -24,6 +24,19 @@ import { findMatchingNodes } from './symbol-lookup.js'; const INTERFACE_LIKE_KINDS = new Set(['interface', 'trait']); +/** + * Check whether the graph contains any 'implements' edges. + * Cached per db handle so the query runs at most once per connection. + */ +const _hasImplementsCache = new WeakMap(); +function hasImplementsEdges(db) { + if (_hasImplementsCache.has(db)) return _hasImplementsCache.get(db); + const row = db.prepare("SELECT 1 FROM edges WHERE kind = 'implements' LIMIT 1").get(); + const result = !!row; + _hasImplementsCache.set(db, result); + return result; +} + /** * BFS traversal to find transitive callers of a node. * When an interface/trait node is encountered (either as the start node or @@ -40,6 +53,9 @@ export function bfsTransitiveCallers( startId, { noTests = false, maxDepth = 3, includeImplementors = true, onVisit } = {}, ) { + // Skip all implementor lookups when the graph has no implements edges + const resolveImplementors = includeImplementors && hasImplementsEdges(db); + const visited = new Set([startId]); const levels = {}; let frontier = [startId]; @@ -47,7 +63,7 @@ export function bfsTransitiveCallers( // Seed: if start node is an interface/trait, include its implementors at depth 1. // Implementors go into a separate list so their callers appear at depth 2, not depth 1. const implNextFrontier = []; - if (includeImplementors) { + if (resolveImplementors) { const startNode = findNodeById(db, startId); if (startNode && INTERFACE_LIKE_KINDS.has(startNode.kind)) { const impls = findImplementors(db, startId); @@ -88,7 +104,7 @@ export function bfsTransitiveCallers( // If a caller is an interface/trait, also pull in its implementors // Implementors are one extra hop away, so record at d+1 - if (includeImplementors && INTERFACE_LIKE_KINDS.has(c.kind)) { + if (resolveImplementors && INTERFACE_LIKE_KINDS.has(c.kind)) { const impls = findImplementors(db, c.id); for (const impl of impls) { if (!visited.has(impl.id) && (!noTests || !isTestFile(impl.file))) { diff --git a/src/infrastructure/config.js b/src/infrastructure/config.js index e8439ab0..c6f12c6e 100644 --- a/src/infrastructure/config.js +++ b/src/infrastructure/config.js @@ -130,12 +130,19 @@ export const DEFAULTS = { }, }; +// Per-cwd config cache — avoids re-reading the config file on every query call. +// The config file rarely changes within a single process lifetime. +const _configCache = new Map(); + /** * Load project configuration from a .codegraphrc.json or similar file. - * Returns merged config with defaults. + * Returns merged config with defaults. Results are cached per cwd. */ export function loadConfig(cwd) { cwd = cwd || process.cwd(); + const cached = _configCache.get(cwd); + if (cached) return cached; + for (const name of CONFIG_FILES) { const filePath = path.join(cwd, name); if (fs.existsSync(filePath)) { @@ -148,13 +155,24 @@ export function loadConfig(cwd) { merged.query.excludeTests = Boolean(config.excludeTests); } delete merged.excludeTests; - return resolveSecrets(applyEnvOverrides(merged)); + const result = resolveSecrets(applyEnvOverrides(merged)); + _configCache.set(cwd, result); + return result; } catch (err) { debug(`Failed to parse config ${filePath}: ${err.message}`); } } } - return resolveSecrets(applyEnvOverrides({ ...DEFAULTS })); + const defaults = resolveSecrets(applyEnvOverrides({ ...DEFAULTS })); + _configCache.set(cwd, defaults); + return defaults; +} + +/** + * Clear the config cache. Useful in tests or after config file changes. + */ +export function clearConfigCache() { + _configCache.clear(); } const ENV_LLM_MAP = { From 1468ef1a464f079bcb4df7ea0622e1d3c2f818ae Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Thu, 19 Mar 2026 02:53:37 -0600 Subject: [PATCH 2/4] fix: return structuredClone from config cache and guard benchmark db handle Prevent callers from mutating the cached config object by returning a deep clone on cache hits. Add try/finally to selectTargets() so the database handle is closed even if a query throws. Impact: 2 functions changed, 1 affected --- scripts/query-benchmark.js | 6 +++++- src/infrastructure/config.js | 6 ++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/scripts/query-benchmark.js b/scripts/query-benchmark.js index b2c091c1..230caf77 100644 --- a/scripts/query-benchmark.js +++ b/scripts/query-benchmark.js @@ -118,6 +118,7 @@ const PINNED_HUB_CANDIDATES = ['buildGraph', 'openDb', 'loadConfig']; function selectTargets() { const db = new Database(dbPath, { readonly: true }); + try { // Try pinned candidates first for a stable hub across versions let hub = null; @@ -146,7 +147,6 @@ function selectTargets() { ORDER BY cnt DESC`, ) .all(); - db.close(); if (rows.length === 0) throw new Error('No nodes with edges found in graph'); @@ -156,6 +156,10 @@ function selectTargets() { const mid = rows[Math.floor(rows.length / 2)].name; const leaf = rows[rows.length - 1].name; return { hub, mid, leaf }; + + } finally { + db.close(); + } } function benchDepths(fn, name, depths) { diff --git a/src/infrastructure/config.js b/src/infrastructure/config.js index c6f12c6e..961e2848 100644 --- a/src/infrastructure/config.js +++ b/src/infrastructure/config.js @@ -141,7 +141,7 @@ const _configCache = new Map(); export function loadConfig(cwd) { cwd = cwd || process.cwd(); const cached = _configCache.get(cwd); - if (cached) return cached; + if (cached) return structuredClone(cached); for (const name of CONFIG_FILES) { const filePath = path.join(cwd, name); @@ -169,7 +169,9 @@ export function loadConfig(cwd) { } /** - * Clear the config cache. Useful in tests or after config file changes. + * Clear the config cache. Intended for long-running processes that need to + * pick up on-disk config changes, and for test isolation when tests share + * the same cwd. */ export function clearConfigCache() { _configCache.clear(); From 749136316d7af04be01454124a1e10d214ae1866 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Thu, 19 Mar 2026 03:08:23 -0600 Subject: [PATCH 3/4] fix: install @huggingface/transformers in npm-mode benchmark workers The embedding benchmark's npm mode installs codegraph into a temp dir, but @huggingface/transformers is a devDependency and not included. All 6 model workers crash on import, producing symbols: 0, models: {}. Install it explicitly from the local devDependencies version, matching the existing pattern for native platform packages. Also add a guard in update-embedding-report.js to reject empty results and fail loudly instead of silently overwriting valid benchmark data. --- scripts/lib/bench-config.js | 21 +++++++++++++++++++++ scripts/update-embedding-report.js | 9 +++++++++ 2 files changed, 30 insertions(+) diff --git a/scripts/lib/bench-config.js b/scripts/lib/bench-config.js index bd354334..55306e70 100644 --- a/scripts/lib/bench-config.js +++ b/scripts/lib/bench-config.js @@ -134,6 +134,27 @@ export async function resolveBenchmarkSource() { console.error(`Warning: failed to install native package: ${err.message}`); } + // @huggingface/transformers is a devDependency (lazy-loaded for embeddings). + // It is not installed as a transitive dep in npm mode, so install it + // explicitly so the embedding benchmark workers can import it. + try { + const localPkg = JSON.parse( + fs.readFileSync(path.resolve(path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/, '$1')), '..', '..', 'package.json'), 'utf8'), + ); + const hfVersion = localPkg.devDependencies?.['@huggingface/transformers']; + if (hfVersion) { + console.error(`Installing @huggingface/transformers@${hfVersion} for embedding benchmarks...`); + execFileSync('npm', ['install', `@huggingface/transformers@${hfVersion}`, '--no-audit', '--no-fund', '--no-save'], { + cwd: tmpDir, + stdio: 'pipe', + timeout: 120_000, + }); + console.error('Installed @huggingface/transformers'); + } + } catch (err) { + console.error(`Warning: failed to install @huggingface/transformers: ${err.message}`); + } + const srcDir = path.join(pkgDir, 'src'); if (!fs.existsSync(srcDir)) { diff --git a/scripts/update-embedding-report.js b/scripts/update-embedding-report.js index 47e31d15..645c1844 100644 --- a/scripts/update-embedding-report.js +++ b/scripts/update-embedding-report.js @@ -26,6 +26,15 @@ if (arg) { } const entry = JSON.parse(jsonText); +// Guard: reject empty benchmark results (all workers crashed or no symbols indexed) +if (!entry.symbols || !entry.models || Object.keys(entry.models).length === 0) { + console.error( + `Embedding benchmark produced empty results (symbols=${entry.symbols}, models=${Object.keys(entry.models || {}).length}). ` + + 'Skipping report update to avoid overwriting valid data. Check benchmark worker logs.', + ); + process.exit(1); +} + // ── Paths ──────────────────────────────────────────────────────────────── const reportPath = path.join(root, 'generated', 'benchmarks', 'EMBEDDING-BENCHMARKS.md'); From 1f9e2e59b0d3cba4e3df18840507c156da93fb22 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Thu, 19 Mar 2026 03:32:35 -0600 Subject: [PATCH 4/4] fix: clone config on cache-miss paths to prevent cache corruption (#528) Impact: 1 functions changed, 118 affected --- src/infrastructure/config.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/infrastructure/config.js b/src/infrastructure/config.js index 961e2848..7f62083b 100644 --- a/src/infrastructure/config.js +++ b/src/infrastructure/config.js @@ -156,7 +156,7 @@ export function loadConfig(cwd) { } delete merged.excludeTests; const result = resolveSecrets(applyEnvOverrides(merged)); - _configCache.set(cwd, result); + _configCache.set(cwd, structuredClone(result)); return result; } catch (err) { debug(`Failed to parse config ${filePath}: ${err.message}`); @@ -164,7 +164,7 @@ export function loadConfig(cwd) { } } const defaults = resolveSecrets(applyEnvOverrides({ ...DEFAULTS })); - _configCache.set(cwd, defaults); + _configCache.set(cwd, structuredClone(defaults)); return defaults; }