diff --git a/.claude/skills/bench-check/SKILL.md b/.claude/skills/bench-check/SKILL.md new file mode 100644 index 00000000..a6f474d3 --- /dev/null +++ b/.claude/skills/bench-check/SKILL.md @@ -0,0 +1,271 @@ +--- +name: bench-check +description: Run benchmarks against a saved baseline, detect performance regressions, and update the baseline — guards against silent slowdowns +argument-hint: "[--save-baseline | --compare-only | --threshold 15] (default: compare + save)" +allowed-tools: Bash, Read, Write, Edit, Glob, Grep, Agent +--- + +# /bench-check — Performance Regression Check + +Run the project's benchmark suite, compare results against a saved baseline, flag regressions beyond a threshold, and optionally update the baseline. Prevents silent performance degradation between releases. + +## Arguments + +- `$ARGUMENTS` may contain: + - `--save-baseline` — run benchmarks and save as the new baseline (no comparison) + - `--compare-only` — compare against baseline without updating it + - `--threshold N` — regression threshold percentage (default: 15%) + - No arguments — compare against baseline, then update it if no regressions + +## Phase 0 — Pre-flight + +1. Confirm we're in the codegraph repo root +2. Check that benchmark scripts exist: + - `scripts/benchmark.js` (build speed, query latency) + - `scripts/incremental-benchmark.js` (incremental build tiers) + - `scripts/query-benchmark.js` (query depth scaling) + - `scripts/embedding-benchmark.js` (search recall) — optional, skip if embedding deps missing +3. Parse `$ARGUMENTS`: + - `SAVE_ONLY=true` if `--save-baseline` + - `COMPARE_ONLY=true` if `--compare-only` + - `THRESHOLD=N` from `--threshold N` (default: 15) +4. Check for existing baseline at `generated/bench-check/baseline.json` + - If missing and not `--save-baseline`: warn that this will be an initial baseline run + +## Phase 1 — Run Benchmarks + +Run each benchmark script and collect results. Each script outputs JSON to stdout. + +### 1a. Build & Query Benchmark + +```bash +output=$(timeout 300 node scripts/benchmark.js 2>&1) +exit_code=$? +``` + +If `exit_code` is 124: record `"timeout"` for this suite and skip to the next suite. +Else if `exit_code` is non-zero: record `"error: $output"` for this suite and skip to the next suite. + +Extract: +- `buildTime` (ms) — per engine (native, WASM) +- `queryTime` (ms) — per query type +- `nodeCount`, `edgeCount` — graph size + +### 1b. Incremental Benchmark + +```bash +output=$(timeout 300 node scripts/incremental-benchmark.js 2>&1) +exit_code=$? +``` + +If `exit_code` is 124: record `"timeout"` for this suite and skip to the next suite. +Else if `exit_code` is non-zero: record `"error: $output"` for this suite and skip to the next suite. + +Extract: +- `noOpRebuild` (ms) — time for no-change rebuild +- `singleFileRebuild` (ms) — time after one file change +- `importResolution` (ms) — resolution throughput + +### 1c. Query Depth Benchmark + +```bash +output=$(timeout 300 node scripts/query-benchmark.js 2>&1) +exit_code=$? +``` + +If `exit_code` is 124: record `"timeout"` for this suite and skip to the next suite. +Else if `exit_code` is non-zero: record `"error: $output"` for this suite and skip to the next suite. + +Extract: +- `fnDeps` scaling by depth +- `fnImpact` scaling by depth +- `diffImpact` latency + +### 1d. Embedding Benchmark (optional) + +```bash +output=$(timeout 300 node scripts/embedding-benchmark.js 2>&1) +exit_code=$? +``` + +If `exit_code` is 124: record `"timeout"` for this suite and skip to the next suite. +Else if `exit_code` is non-zero: record `"error: $output"` for this suite and skip to the next suite. + +Extract: +- `embeddingTime` (ms) +- `recall` at Hit@1, Hit@3, Hit@5, Hit@10 + +> **Timeout:** Each benchmark gets 5 minutes max (`timeout 300`). Exit code 124 indicates timeout — record `"timeout"` for that suite and continue. + +> **Errors:** If a benchmark script fails (non-zero exit), record `"error: "` and continue with remaining benchmarks. + +## Phase 2 — Normalize Results + +Build a flat metrics object from all benchmark results: + +```json +{ + "timestamp": "", + "version": "", + "gitRef": "", + "metrics": { + "build.native.ms": 1234, + "build.wasm.ms": 2345, + "query.fnDeps.depth3.ms": 45, + "query.fnImpact.depth3.ms": 67, + "query.diffImpact.ms": 89, + "incremental.noOp.ms": 12, + "incremental.singleFile.ms": 34, + "incremental.importResolution.ms": 56, + "graph.nodes": 500, + "graph.edges": 1200, + "embedding.time.ms": 3000, + "embedding.recall.hit1": 0.85, + "embedding.recall.hit5": 0.95 + } +} +``` + +Adapt the metric keys to match whatever the benchmark scripts actually output — the above are representative. The goal is a flat key→number map for easy comparison. + +## Phase 3 — Compare Against Baseline + +Skip this phase if `SAVE_ONLY=true` or no baseline exists. + +For each metric in the current run: + +1. Look up the same metric in the baseline +2. Guard against division-by-zero: if `baseline == 0`, mark the delta as `"N/A — baseline was zero"` and treat the metric as **informational only** (not a regression or improvement) +3. Otherwise compute: `delta_pct = ((current - baseline) / baseline) * 100` +4. Classify: + - **Regression**: metric increased by more than `THRESHOLD`% (for time metrics) or decreased by more than `THRESHOLD`% (for recall/quality metrics) + - **Improvement**: metric decreased by more than `THRESHOLD`% (time) or increased (quality) + - **Stable**: within threshold + +> **Direction awareness:** For latency metrics (ms), higher = worse. For recall/quality metrics, higher = better. For count metrics (nodes, edges), changes are informational only — not regressions. + +### Regression table + +| Metric | Baseline | Current | Delta | Status | +|--------|----------|---------|-------|--------| +| build.native.ms | 1200 | 1500 | +25% | REGRESSION | +| query.fnDeps.depth3.ms | 45 | 43 | -4.4% | stable | + +## Phase 4 — Verdict + +Based on comparison results: + +### No regressions found +- Print: `BENCH-CHECK PASSED — no regressions beyond {THRESHOLD}% threshold` +- If not `COMPARE_ONLY`: update baseline with current results + +### Regressions found +- Print: `BENCH-CHECK FAILED — {N} regressions detected` +- List each regression with metric name, baseline value, current value, delta % +- Do NOT update the baseline +- Suggest investigation: + - `git log --oneline ..HEAD` to find what changed + - `codegraph diff-impact -T` to find structural changes + - Re-run individual benchmarks to confirm (not flaky) + +### First run (no baseline) +- If `COMPARE_ONLY` is set: print a warning that no baseline exists and exit without saving +- Otherwise: print `BENCH-CHECK — initial baseline saved` and save current results as baseline + +### Save-baseline with existing baseline (`--save-baseline`) +- Print: `BENCH-CHECK — baseline overwritten (previous: , new: )` +- Save current results as the new baseline (overwrite existing) + +## Phase 5 — Save Baseline + +**Skip this phase if `COMPARE_ONLY` is set.** Compare-only mode never writes or commits baselines. +**Skip this phase if regressions were detected in Phase 4.** The baseline is only updated on a clean run. + +When saving (initial run, `--save-baseline`, or passed comparison): + +Write to `generated/bench-check/baseline.json`: +```json +{ + "savedAt": "", + "version": "", + "gitRef": "", + "threshold": $THRESHOLD, + "metrics": { ... } +} +``` + +Also append a one-line summary to `generated/bench-check/history.ndjson`: +```json +{"timestamp":"...","version":"...","gitRef":"...","metrics":{...}} +``` + +This creates a running log of benchmark results over time. + +After writing both files, commit the baseline so it is a shared reference point: +```bash +git add generated/bench-check/baseline.json generated/bench-check/history.ndjson +git diff --cached --quiet -- generated/bench-check/baseline.json generated/bench-check/history.ndjson || git commit generated/bench-check/baseline.json generated/bench-check/history.ndjson -m "chore: update bench-check baseline ()" +``` + +> `git add` first so that newly created files (first run) are staged; `--cached` then detects them correctly. Without this, `git diff --quiet` ignores untracked files and the baseline is never committed on the first run. + +## Phase 6 — Report + +Write a human-readable report to `generated/bench-check/BENCH_REPORT_.md`. + +**If `SAVE_ONLY` is set or no prior baseline existed (first run):** write a shortened report — omit the "Comparison vs Baseline" and "Regressions" sections since no comparison was performed: + +```markdown +# Benchmark Report — + +**Version:** X.Y.Z | **Git ref:** abc1234 | **Threshold:** $THRESHOLD% + +## Verdict: BASELINE SAVED — no comparison performed + +## Raw Results + + +``` + +**Otherwise (comparison was performed):** write the full report with comparison and verdict: + +```markdown +# Benchmark Report — + +**Version:** X.Y.Z | **Git ref:** abc1234 | **Threshold:** $THRESHOLD% + +## Verdict: PASSED / FAILED + +## Comparison vs Baseline + + + +## Regressions (if any) + + + +## Trend (if history.ndjson has 3+ entries) + + + +## Raw Results + + +``` + +## Phase 7 — Cleanup + +1. If report was written, print its path +2. If baseline was updated, print confirmation +3. Print one-line summary: `PASSED (0 regressions) | FAILED (N regressions) | BASELINE SAVED` + +## Rules + +- **Never skip a benchmark** — if it fails, record the failure and continue +- **Timeout is 5 minutes per benchmark** — use appropriate timeout flags +- **Don't update baseline on regression** — the user must investigate first +- **Recall/quality metrics are inverted** — a decrease is a regression +- **Count metrics are informational** — graph growing isn't a regression +- **The baseline file is committed to git** — it's a shared reference point; Phase 5 always commits it +- **history.ndjson is append-only** — never truncate or rewrite it +- Generated files go in `generated/bench-check/` — create the directory if needed diff --git a/.claude/skills/deps-audit/SKILL.md b/.claude/skills/deps-audit/SKILL.md new file mode 100644 index 00000000..083a8c77 --- /dev/null +++ b/.claude/skills/deps-audit/SKILL.md @@ -0,0 +1,186 @@ +--- +name: deps-audit +description: Audit dependencies for vulnerabilities, staleness, unused packages, and license risks — produce a health report with actionable fixes +argument-hint: "[--fix] (optional — auto-fix safe updates)" +allowed-tools: Bash, Read, Write, Edit, Glob, Grep, Agent +--- + +# /deps-audit — Dependency Health Audit + +Audit the project's dependency tree for security vulnerabilities, outdated packages, unused dependencies, and license compliance. Produce a structured report and optionally auto-fix safe updates. + +## Arguments + +- `$ARGUMENTS` may contain `--fix` to auto-apply safe updates (patch/minor only) + +## Phase 0 — Pre-flight + +1. Confirm we're in the codegraph repo root (check for `package.json` and `package-lock.json`) +2. Run `node --version` — must be >= 20 +3. Run `npm --version` to capture toolchain info +4. Parse `$ARGUMENTS` — set `AUTO_FIX=true` if `--fix` is present +5. **If `AUTO_FIX` is set:** Save the original manifests now, before any npm commands run, so pre-existing unstaged changes are preserved: + ```bash + git stash push -m "deps-audit-backup" -- package.json package-lock.json + STASH_CREATED=$? + ``` + Track `STASH_CREATED` — when `0`, a stash entry was actually created; when `1`, the files had no changes so nothing was stashed. + If `STASH_CREATED` is `0`, immediately capture the stash ref for later use: + ```bash + STASH_REF=$(git stash list --format='%gd %s' | grep 'deps-audit-backup' | head -1 | awk '{print $1}') + ``` + Use `$STASH_REF` (not `stash@{0}`) in all later stash drop/pop commands to avoid targeting the wrong entry if other stashes are pushed in the interim. + +## Phase 1 — Security Vulnerabilities + +Run `npm audit --json` and parse the output: + +1. Count vulnerabilities by severity: `critical`, `high`, `moderate`, `low`, `info` +2. For each `critical` or `high` vulnerability: + - Record: package name, severity, CVE/GHSA ID, vulnerable version range, patched version, dependency path (direct vs transitive) + - Check if a fix is available (`npm audit fix --dry-run --json`) +3. Summarize: total vulns, fixable count, breaking-fix count + +**If `AUTO_FIX` is set:** Run `npm audit fix` (non-breaking fixes only). Record what changed. Do NOT run `npm audit fix --force` — breaking changes require manual review. + +## Phase 2 — Outdated Dependencies + +Run `npm outdated --json` and categorize: + +### 2a. Direct dependencies (`dependencies` + `devDependencies`) + +For each outdated package, record: +- Package name +- Current version → Wanted (semver-compatible) → Latest +- Whether the update is patch, minor, or major +- If major: check the package's CHANGELOG/release notes for breaking changes relevant to our usage + +### 2b. Staleness score + +Classify each outdated dep: +| Category | Definition | +|----------|-----------| +| **Fresh** | On latest or within 1 patch | +| **Aging** | 1+ minor versions behind | +| **Stale** | 1+ major versions behind | +| **Abandoned** | No release in 12+ months (check npm registry publish date) | + +For any package classified as **Abandoned**, check if there's a maintained fork or alternative. + +**If `AUTO_FIX` is set:** Run `npm update` to apply semver-compatible updates. Record what changed. + +## Phase 3 — Unused Dependencies + +Detect dependencies declared in `package.json` but never imported: + +1. Read `dependencies` and `devDependencies` from `package.json` +2. For each dependency, search for imports/requires across `src/`, `tests/`, `scripts/`, `mcp/`, `graph/`, `ast-analysis/`, `cli.js`, `index.js`: + - `require('')` or `require('/...')` + - `import ... from ''` or `import ''` + - `import('')` (dynamic imports) +3. Skip known implicit dependencies that don't have direct imports: + - `@anthropic-ai/tokenizer` — peer dependency of `@anthropic-ai/sdk`; the SDK may require it at runtime without an explicit import in our code (verify against package.json before removing) + - `tree-sitter-*` and `web-tree-sitter` — loaded dynamically via WASM + - `@biomejs/biome` — used as CLI tool only + - `commit-and-tag-version` — used as npm script + - `@optave/codegraph-*` — platform-specific optional binaries + - `vitest` — test runner, invoked via CLI + - Anything in `optionalDependencies` +4. For each truly unused dep: recommend removal with `npm uninstall ` + +> **Important:** Some deps are used transitively or via CLI — don't blindly remove. Flag as "likely unused" and let the user decide. + +## Phase 4 — License Compliance + +Check licenses for all direct dependencies: + +1. For each package in `dependencies`, read its `node_modules//package.json` → `license` field +2. Classify: + - **Permissive** (MIT, ISC, BSD-2-Clause, BSD-3-Clause, Apache-2.0, 0BSD, Unlicense): OK + - **Weak copyleft** (LGPL-2.1, LGPL-3.0, MPL-2.0): Flag for review + - **Strong copyleft** (GPL-2.0, GPL-3.0, AGPL-3.0): Flag as risk — may conflict with MIT license of codegraph + - **Unknown/UNLICENSED/missing**: Flag for investigation +3. Only flag non-permissive licenses — don't list every MIT dep + +## Phase 5 — Duplicate Packages + +Check for duplicate versions of the same package in the dependency tree: + +1. Run `npm ls --all --json` and look for packages that appear multiple times with different versions +2. Only flag duplicates that add significant bundle weight (> 100KB) or are security-sensitive (crypto, auth, etc.) +3. Suggest deduplication: `npm dedupe` + +## Phase 6 — Report + +Write a report to `generated/deps-audit/DEPS_AUDIT_.md` with this structure: + +```markdown +# Dependency Audit Report — + +## Summary + +| Metric | Value | +|--------|-------| +| Total dependencies (direct) | N | +| Total dependencies (transitive) | N | +| Security vulnerabilities | N critical, N high, N moderate, N low | +| Outdated packages | N stale, N aging, N fresh | +| Unused dependencies | N | +| License risks | N | +| Duplicates | N | +| **Health score** | **X/100** | + +## Health Score Calculation + +- Start at 100 +- -20 per critical vuln, -10 per high vuln, -3 per moderate vuln +- -5 per stale (major behind) dep, -2 per aging dep +- -5 per unused dep +- -10 per copyleft license risk +- Floor at 0 + +## Security Vulnerabilities + + +## Outdated Packages + + +## Unused Dependencies + + +## License Flags + + +## Duplicates + + +## Recommended Actions + +``` + +## Phase 7 — Auto-fix Summary (if `--fix`) + +If `AUTO_FIX` was set: + +Summarize all changes made: +1. List each package updated/fixed +2. Run `npm test` to verify nothing broke +3. If tests pass and `STASH_CREATED` is `0`: drop the saved state (`git stash drop $STASH_REF`) — the npm changes are good, no rollback needed + If tests pass and `STASH_CREATED` is `1`: no action needed — the npm changes are good and no stash entry exists to clean up +4. If tests fail and `STASH_CREATED` is `0`: + - Restore the saved manifests: `git stash pop $STASH_REF` + - Restore `node_modules/` to match the reverted lock file: `npm ci` + - Report what failed +5. If tests fail and `STASH_CREATED` is `1`: + - Discard manifest changes: `git checkout -- package.json package-lock.json` + - Restore `node_modules/` to match the reverted lock file: `npm ci` + - Report what failed + +## Rules + +- **Never run `npm audit fix --force`** — breaking changes need human review +- **Never remove a dependency** without asking the user, even if it appears unused — flag it in the report instead +- **Always run tests** after any auto-fix changes +- **If `--fix` causes test failures**, restore manifests from the saved state (`git stash pop $STASH_REF` if `STASH_CREATED=0`, or `git checkout` if stash was a no-op) then run `npm ci` to resync `node_modules/`, and report the failure +- Treat `optionalDependencies` separately — they're expected to fail on some platforms +- The report goes in `generated/deps-audit/` — create the directory if it doesn't exist diff --git a/.claude/skills/housekeep/SKILL.md b/.claude/skills/housekeep/SKILL.md new file mode 100644 index 00000000..ef15efb7 --- /dev/null +++ b/.claude/skills/housekeep/SKILL.md @@ -0,0 +1,241 @@ +--- +name: housekeep +description: Local repo maintenance — clean stale worktrees, remove dirt files, sync with main, update codegraph, prune branches, and verify repo health +argument-hint: "[--full | --dry-run | --skip-update] (default: full cleanup)" +allowed-tools: Bash, Read, Write, Edit, Glob, Grep +--- + +# /housekeep — Local Repository Maintenance + +Clean up the local repo: remove stale worktrees, delete dirt/temp files, sync with main, update codegraph to latest, prune merged branches, and verify repo health. The "spring cleaning" routine. + +## Arguments + +- `$ARGUMENTS` may contain: + - `--full` — run all phases (default behavior) + - `--dry-run` — show what would be cleaned without actually doing it + - `--skip-update` — skip the codegraph npm update phase + - No arguments — full cleanup + +## Phase 0 — Pre-flight + +1. Confirm we're in the codegraph repo root (check `package.json` with `"name": "@optave/codegraph"`) +2. Parse `$ARGUMENTS`: + - `DRY_RUN=true` if `--dry-run` + - `SKIP_UPDATE=true` if `--skip-update` +3. Record current branch: `git branch --show-current` +4. Record current git status: `git status --short` +5. Warn the user if there are uncommitted changes — housekeeping works best from a clean state + +## Phase 1 — Clean Stale Worktrees + +### 1a. List all worktrees + +```bash +git worktree list +``` + +### 1b. Identify stale worktrees + +A worktree is stale if: +- Its directory no longer exists on disk (prunable) +- It has no uncommitted changes AND its branch has been merged to main +- Its branch has no commits ahead of `origin/main` AND the branch's last commit is more than 7 days old + (check: `git log -1 --format=%ci ` — `git worktree list` does not expose creation timestamps) + +Check `.claude/worktrees/` for Claude Code worktrees specifically. + +### 1c. Clean up + +For prunable worktrees (missing directory): +```bash +git worktree prune +``` + +For stale worktrees with merged branches: +- List them and **always ask the user for confirmation before removing**, regardless of `--full` +- If confirmed: + ```bash + git worktree remove + git branch -d # only if fully merged + ``` + +**If `DRY_RUN`:** Just list what would be removed, don't do it. + +> **Never force-remove** a worktree with uncommitted changes. List it as "has uncommitted work" and skip. + +## Phase 2 — Delete Dirt Files + +Remove temporary and generated files that accumulate over time: + +### 2a. Known dirt patterns + +Search for and remove: +- `*.tmp.*`, `*.bak`, `*.orig` files in the repo (but NOT in `node_modules/`) +- `.DS_Store` files +- `*.log` files in repo root (not in `node_modules/`) +- Empty directories (except `.codegraph/`, `.claude/`, `node_modules/`) +- `coverage/` directory (regenerated by `npm run test:coverage`) +- `.codegraph/graph.db-journal` (SQLite WAL leftovers) +- Stale lock files: `.codegraph/*.lock` older than 1 hour + +### 2b. Large untracked files + +Find untracked files larger than 1MB: +```bash +git ls-files --others --exclude-standard | while read f; do + size=$(stat --format='%s' "$f" 2>/dev/null || stat -f '%z' "$f" 2>/dev/null) + [ -z "$size" ] && continue + if [ "$size" -gt 1048576 ]; then echo "$f ($size bytes)"; fi +done +``` + +Flag these for user review — they might be accidentally untracked binaries. + +### 2c. Clean up + +**If `DRY_RUN`:** List all files that would be removed with their sizes. + +**Otherwise:** +- Remove known dirt patterns automatically +- For large untracked files: list and ask the user + +> **Never delete** files that are tracked by git. Only clean untracked/ignored files. + +## Phase 3 — Sync with Main + +### 3a. Fetch latest + +```bash +git fetch origin +``` + +### 3b. Check main branch status + +```bash +git log HEAD..origin/main --oneline +``` + +If main has new commits: +- If on main: `git pull origin main` +- If on a feature branch: inform the user how many commits behind main they are + - Suggest: `git merge origin/main` (never rebase — per project rules) + +### 3c. Check for diverged branches + +List local branches that have diverged from their remote tracking branch: +```bash +git for-each-ref --format='%(refname:short) %(upstream:track)' refs/heads/ +``` + +Flag any branches marked `[ahead N, behind M]` — these may need attention. + +## Phase 4 — Prune Merged Branches + +### 4a. Find merged branches + +```bash +git branch --merged origin/main +``` + +### 4b. Safe to delete + +Branches that are: +- Fully merged into main +- Not `main` itself +- Not the current branch +- Not a worktree branch (check `git worktree list`) + +### 4c. Prune remote tracking refs + +```bash +git remote prune origin +``` + +This removes local refs to branches that no longer exist on the remote. + +### 4d. Clean up + +**If `DRY_RUN`:** List branches that would be deleted. + +**Otherwise:** For each merged branch, ask the user for confirmation before deleting: +``` +Delete merged branch ''? (y/n) +``` +If confirmed, delete the branch: +```bash +git branch -d # safe delete, only if fully merged +``` + +> **Never use `git branch -D`** (force delete). If `-d` fails, the branch has unmerged work — skip it. +> **Always confirm before deleting** — consistent with worktree removal in Phase 1c. + +## Phase 5 — Update Codegraph + +**Skip if `SKIP_UPDATE` is set.** + +> **Source-repo guard:** This phase is only meaningful when codegraph is installed as a *dependency* of a consumer project. Because the pre-flight confirms we are inside the codegraph *source* repo (`"name": "@optave/codegraph"`), comparing the dev version to the published release and running `npm install` would be a no-op — codegraph is not one of its own dependencies. **Skip this entire phase** when running inside the source repo and print: +> `Codegraph: skipped (running inside source repo — update via git pull / branch sync instead)` + +## Phase 6 — Verify Repo Health + +Quick health checks to catch issues: + +### 6a. Graph integrity + +```bash +npx codegraph stats +``` + +If the graph is stale (built from a different commit), rebuild: +```bash +npx codegraph build +``` + +### 6b. Node modules integrity + +```bash +npm ls --depth=0 2>&1 | grep -cE "missing|invalid|WARN" +``` + +If issues found: `npm install` to fix. + +### 6c. Git integrity + +```bash +git fsck --no-dangling 2>&1 | head -20 +``` + +Flag any errors (rare but important). + +## Phase 7 — Report + +Print a summary to the console (no file needed — this is a local maintenance task): + +``` +=== Housekeeping Report === + +Worktrees: removed 2 stale, 1 has uncommitted work (skipped) +Dirt files: cleaned 5 temp files (12KB), 1 large untracked flagged +Branches: pruned 3 merged branches, 2 remote refs +Main sync: up to date (or: 4 commits behind — merge suggested) +Codegraph: v3.1.2 → v3.1.3 updated (or: already latest) +Graph: rebuilt (was stale) (or: fresh) +Node mods: OK (or: fixed 2 missing deps) +Git: OK + +Status: CLEAN ✓ +``` + +**If `DRY_RUN`:** prefix with `[DRY RUN]` and show what would happen without doing it. + +## Rules + +- **Never force-delete** anything — use safe deletes only (`git branch -d`, `git worktree remove`) +- **Never rebase** — sync with main via merge only (per project rules) +- **Never delete tracked files** — only clean untracked/ignored dirt +- **Never delete worktrees with uncommitted changes** — warn and skip +- **Ask before deleting large untracked files** — they might be intentional +- **This is a local-only operation** — no pushes, no remote modifications, no PR creation +- **Idempotent** — running twice should be safe (second run finds nothing to clean) +- **`--dry-run` is sacred** — it must NEVER modify anything, only report diff --git a/.claude/skills/review/SKILL.md b/.claude/skills/review/SKILL.md index b0a7e00d..ce3ef428 100644 --- a/.claude/skills/review/SKILL.md +++ b/.claude/skills/review/SKILL.md @@ -120,7 +120,29 @@ For **each** review comment — including minor suggestions, nits, style feedbac 2. **Read the relevant code** at the file and line referenced. 3. **Make the change.** Even if the comment is marked as "nit" or "suggestion" or "minor" — address it. The goal is zero outstanding comments. 4. **If you disagree** with a suggestion (e.g., it would introduce a bug or contradicts project conventions), do NOT silently ignore it. Reply to the comment explaining why you chose a different approach. -5. **Reply to each comment** explaining what you did. The reply mechanism depends on where the comment lives: +5. **If the fix is genuinely out of scope** for this PR (e.g., it affects a different module not touched by this PR, or requires a design decision beyond the PR's purpose), you MUST create a GitHub issue to track it before replying. Never reply with "acknowledged as follow-up" or "noted for later" without a tracked issue — untracked deferrals get lost and nobody will ever revisit them. + + ```bash + # Create a tracking issue for the deferred item + gh issue create \ + --title "follow-up: " \ + --body "$(cat <<'EOF' + Deferred from PR # review. + + **Original reviewer comment:** https://github.com/optave/codegraph/pull/#discussion_r + + **Context:** + EOF + )" \ + --label "follow-up" + ``` + + Then reply to the reviewer comment referencing the issue: + ```bash + gh api repos/optave/codegraph/pulls//comments//replies \ + -f body="Out of scope for this PR — tracked in #" + ``` +6. **Reply to each comment** explaining what you did. The reply mechanism depends on where the comment lives: **For inline PR review comments** (from Claude, Greptile, or humans — these have a `path` and `line`): ```bash @@ -220,3 +242,4 @@ After processing all PRs, output a summary table: - **One concern per commit** — don't lump conflict resolution with code fixes. - **Flag scope creep.** If a PR's diff contains files unrelated to its stated purpose (e.g., a docs PR carrying `src/` or test changes from a merged feature branch), flag it immediately. Split the unrelated changes into a separate branch and PR. Do not proceed with review until the PR is scoped correctly — scope creep is not acceptable. - If a PR is fundamentally broken beyond what review feedback can fix, note it in the summary and skip to the next PR. +- **Never defer without tracking.** Do not reply "acknowledged as follow-up", "noted for later", or "tracking for follow-up" to a reviewer comment without creating a GitHub issue first. If you can't fix it now and it's genuinely out of scope, create an issue with the `follow-up` label and include the issue link in your reply. Untracked acknowledgements are the same as ignoring the comment — they will never be revisited. diff --git a/.claude/skills/test-health/SKILL.md b/.claude/skills/test-health/SKILL.md new file mode 100644 index 00000000..4c836586 --- /dev/null +++ b/.claude/skills/test-health/SKILL.md @@ -0,0 +1,261 @@ +--- +name: test-health +description: Audit test suite health — detect flaky tests, dead tests, coverage gaps, and missing assertions — produce a health report with fix suggestions +argument-hint: "[--flaky-runs 5 | --coverage | --quick] (default: full audit)" +allowed-tools: Bash, Read, Write, Edit, Glob, Grep, Agent +--- + +# /test-health — Test Suite Health Audit + +Audit the test suite for flaky tests, dead/trivial tests, coverage gaps on recent changes, missing assertions, and structural issues. Produce a health report with prioritized recommendations. + +## Arguments + +- `$ARGUMENTS` may contain: + - `--flaky-runs N` — number of times to run the suite for flaky detection (default: 5) + - `--coverage` — only run the coverage gap analysis (skip flaky/dead detection) + - `--quick` — skip flaky detection (most time-consuming), run everything else + - No arguments — full audit + +## Phase 0 — Pre-flight + +1. Confirm we're in the codegraph repo root +2. Verify vitest is available: `npx vitest --version` +3. Parse `$ARGUMENTS`: + - `FLAKY_RUNS=N` from `--flaky-runs N` (default: 5) + - `COVERAGE_ONLY=true` if `--coverage` + - `QUICK=true` if `--quick` +4. Discover all test files: + ```bash + find tests/ \( -name '*.test.js' -o -name '*.test.ts' \) | sort + ``` +5. Count total test files and categorize by directory (integration, parsers, graph, search, unit) + +## Phase 1 — Flaky Test Detection + +**Skip if `COVERAGE_ONLY` or `QUICK` is set.** + +Run the full test suite `FLAKY_RUNS` times and track per-test pass/fail: + +```bash +RUN_DIR=$(mktemp -d /tmp/test-health-XXXXXX) +for i in $(seq 1 $FLAKY_RUNS); do + timeout 180 npx vitest run --reporter=json > "$RUN_DIR/run-$i.json" 2>"$RUN_DIR/run-$i.err" + if [ $? -eq 124 ]; then + echo '{"timeout":true}' > "$RUN_DIR/run-$i.json" + fi +done +``` + +For each run, parse the JSON reporter output from `$RUN_DIR/run-$i.json` to get per-test results. + +After all runs are parsed and analysis is complete, clean up the temporary directory: +```bash +rm -rf "$RUN_DIR" +``` + +### Analysis + +A test is **flaky** if it passes in some runs and fails in others. + +For each flaky test found: +1. Record: test file, test name, pass count, fail count, failure messages +2. Categorize likely cause: + - **Timing-dependent**: failure message mentions timeout, race condition, or test has `setTimeout`/`sleep` + - **Order-dependent**: only fails when run with other tests (passes in isolation) + - **Resource-dependent**: mentions file system, network, port, or temp directory + - **Non-deterministic**: random/Date.now/Math.random in test or source + +> **Timeout:** Each full suite run gets 3 minutes (`timeout 180`). Exit code 124 indicates timeout — the run is recorded as `{"timeout":true}` and the loop continues. + +## Phase 2 — Dead & Trivial Test Detection + +**Skip if `COVERAGE_ONLY` is set.** + +Scan all test files for problematic patterns: + +### 2a. Empty / no-assertion tests + +Search for test bodies that: +- Have no `expect()`, `assert()`, `toBe()`, `toEqual()`, or similar assertion calls +- Only contain `console.log` or comments +- Are skipped: `it.skip(`, `test.skip(`, `xit(`, `xtest(` +- Are TODO: `it.todo(`, `test.todo(` + +``` +Pattern: test bodies with 0 assertions = dead tests +``` + +### 2b. Trivial / tautological tests + +Detect tests that assert on constants or trivially true conditions: +- `expect(true).toBe(true)` +- `expect(1).toBe(1)` +- `expect(result).toBeDefined()` as the ONLY assertion (too weak) + +### 2c. Commented-out tests + +Search for commented-out test blocks: +- `// it(`, `// test(`, `/* it(`, `/* test(` +- Large commented blocks inside `describe` blocks + +### 2d. Orphaned fixtures + +Check if any files in `tests/fixtures/` are not referenced by any test file. + +### 2e. Duplicate test names + +Search for duplicate test descriptions within the same `describe` block — these indicate copy-paste errors. + +## Phase 3 — Coverage Gap Analysis + +Run vitest with coverage and analyze: + +```bash +npx vitest run --coverage --coverage.reporter=json-summary 2>&1 +``` + +### 3a. Overall coverage + +Parse `coverage/coverage-summary.json` and extract: +- Line coverage % +- Branch coverage % +- Function coverage % +- Statement coverage % + +### 3b. Uncovered files + +Find source files in `src/` with 0% coverage (no tests touch them at all). + +### 3c. Low-coverage hotspots + +Find files with < 50% line coverage. For each: +- List uncovered functions (from the detailed coverage data) +- Check if the file is in `domain/` or `features/` (core logic — coverage matters more) +- Check file's complexity with `codegraph complexity -T` — high complexity + low coverage = high risk + +### 3d. Recent changes without coverage + +Compare against `main` branch to find recently changed files: + +```bash +git diff --name-only main...HEAD -- src/ +``` + +For each changed source file, check if: +1. It has corresponding test changes +2. Its coverage increased, decreased, or stayed the same +3. New functions/exports were added without test coverage + +> **Note:** If the coverage tool is not configured or fails, skip this phase and note it in the report. Coverage is a vitest plugin — it may need `@vitest/coverage-v8` installed. + +## Phase 4 — Test Structure Analysis + +**Skip if `COVERAGE_ONLY` is set.** + +Analyze the test suite's structural health: + +### 4a. Test-to-source mapping + +For each directory in `src/`: +- Count source files +- Count corresponding test files +- Calculate test coverage ratio (files with tests / total files) +- Flag directories with < 30% test file coverage + +### 4b. Test file size distribution + +- Find oversized test files (> 500 lines) — may need splitting +- Find tiny test files (< 10 lines) — may be stubs or dead + +### 4c. Setup/teardown hygiene + +Check for: +- Tests that create temp files/dirs but don't clean up (`afterEach`/`afterAll` missing) +- Tests that mutate global state without restoration +- Missing `beforeEach` resets in `describe` blocks that share state + +### 4d. Timeout analysis + +- Find tests with custom timeouts: `{ timeout: ... }` +- Find tests that exceed the default 30s timeout in recent runs +- High timeouts often indicate tests that should be restructured or are testing too much + +## Phase 5 — Report + +Write report to `generated/test-health/TEST_HEALTH_.md`: + +```markdown +# Test Health Report — + +## Summary + +| Metric | Value | +|--------|-------| +| Total test files | N | +| Total test cases | N | +| Flaky tests | N | +| Dead/trivial tests | N | +| Skipped tests | N | +| Coverage (lines) | X% | +| Coverage (branches) | X% | +| Uncovered source files | N | +| **Health score** | **X/100** | + +## Health Score Calculation + +- Start at 100 +- -10 per flaky test +- -3 per dead/trivial test +- -2 per skipped test (without TODO explaining why) +- -1 per uncovered source file in `domain/` or `features/` +- -(100 - line_coverage) / 5 (coverage penalty) +- Floor at 0 + +## Flaky Tests + + +## Dead & Trivial Tests + + +## Coverage Gaps + + +## Structural Issues + + +## Recommended Actions + +### Priority 1 — Fix flaky tests + + +### Priority 2 — Remove or fix dead tests + + +### Priority 3 — Add coverage for high-risk gaps + + +### Priority 4 — Structural improvements + +``` + +## Phase 6 — Quick Wins + +After writing the report, identify tests that can be fixed immediately (< 5 min each): + +1. Remove `.skip` from tests that now pass (run them to check) +2. Add missing assertions to empty test bodies (if the intent is clear) +3. Delete commented-out test blocks older than 6 months (check git blame) + +**Do NOT auto-fix** — list these as suggestions in the report. The user decides. + +## Rules + +- **Never delete or modify test files** without explicit user approval — this is a read-only audit +- **Flaky detection is slow** — warn the user before running 5+ iterations +- **Coverage requires `@vitest/coverage-v8`** — if missing, skip coverage and note it +- **Order-dependent flakiness** requires running tests both in suite and in isolation — only do this for tests that flaked in Phase 1 +- **Fixture files may be shared** across tests — don't flag as orphaned if used indirectly +- **Skipped tests aren't always bad** — only flag if there's no `TODO` or comment explaining why +- Generated files go in `generated/test-health/` — create the directory if needed +- **This is a diagnostic tool** — it reports problems, it doesn't fix them (unless the user opts in) diff --git a/docs/roadmap/ROADMAP.md b/docs/roadmap/ROADMAP.md index b9664157..4614c75e 100644 --- a/docs/roadmap/ROADMAP.md +++ b/docs/roadmap/ROADMAP.md @@ -17,8 +17,8 @@ Codegraph is a strong local-first code graph CLI. This roadmap describes planned | [**2.5**](#phase-25--analysis-expansion) | Analysis Expansion | Complexity metrics, community detection, flow tracing, co-change, manifesto, boundary rules, check, triage, audit, batch, hybrid search | **Complete** (v2.7.0) | | [**2.7**](#phase-27--deep-analysis--graph-enrichment) | Deep Analysis & Graph Enrichment | Dataflow analysis, intraprocedural CFG, AST node storage, expanded node/edge types, extractors refactoring, CLI consolidation, interactive viewer, exports command, normalizeSymbol | **Complete** (v3.0.0) | | [**3**](#phase-3--architectural-refactoring) | Architectural Refactoring (Vertical Slice) | Unified AST analysis framework, command/query separation, repository pattern, queries.js decomposition, composable MCP, CLI commands, domain errors, builder pipeline, presentation layer, domain grouping, curated API, unified graph model, qualified names, CLI composability | **Complete** (v3.1.5) | -| [**4**](#phase-4--resolution-accuracy) | Resolution Accuracy | Dead role sub-categories, receiver type tracking, interface/trait implementation edges, resolution precision/recall benchmarks, `package.json` exports field, monorepo workspace resolution | **In Progress** (5 of 6 complete) | -| [**5**](#phase-5--typescript-migration) | TypeScript Migration | Project setup, core type definitions, leaf -> core -> orchestration module migration, test migration, supply-chain security, CI coverage gates | **In Progress** (2 of 7 complete) | +| [**4**](#phase-4--resolution-accuracy) | Resolution Accuracy | Dead role sub-categories, receiver type tracking, interface/trait implementation edges, resolution precision/recall benchmarks, `package.json` exports field, monorepo workspace resolution | **Complete** (v3.3.1) | +| [**5**](#phase-5--typescript-migration) | TypeScript Migration | Project setup, core type definitions, leaf -> core -> orchestration module migration, test migration | **In Progress** (76 of 283 src files migrated, ~27%) | | [**6**](#phase-6--native-analysis-acceleration) | Native Analysis Acceleration | Move JS-only build phases (AST nodes, CFG, dataflow, insert nodes, structure, roles, complexity) to Rust; fix incremental rebuild data loss on native; sub-100ms 1-file rebuilds | Planned | | [**7**](#phase-7--runtime--extensibility) | Runtime & Extensibility | Event-driven pipeline, unified engine strategy, subgraph export filtering, transitive confidence, query caching, configuration profiles, pagination, plugin system, DX & onboarding, confidence annotations, shell completion | Planned | | [**8**](#phase-8--intelligent-embeddings) | Intelligent Embeddings | LLM-generated descriptions, enhanced embeddings, build-time semantic metadata, module summaries | Planned | @@ -994,9 +994,9 @@ src/domain/ --- -## Phase 4 -- Resolution Accuracy +## Phase 4 -- Resolution Accuracy ✅ -> **Status:** In Progress +> **Status:** Complete -- all 6 sub-phases shipped across v3.2.0 → v3.3.1 **Goal:** Close the most impactful gaps in call graph accuracy before investing in type safety or native acceleration. The entire value proposition — blast radius, impact analysis, dependency chains — rests on the call graph. These targeted improvements make the graph trustworthy. @@ -1080,12 +1080,14 @@ npm workspaces (`package.json` `workspaces`), `pnpm-workspace.yaml`, and `lerna. ## Phase 5 -- TypeScript Migration -> **Status:** In Progress +> **Status:** In Progress — 76 of 283 source files migrated (~27%), 207 `.js` files remaining **Goal:** Migrate the codebase from plain JavaScript to TypeScript, leveraging the clean module boundaries established in Phase 3. Incremental module-by-module migration starting from leaf modules inward. **Why after Phase 4:** The resolution accuracy work (Phase 4) operates on the existing JS codebase and produces immediate accuracy gains. TypeScript migration builds on Phase 3's clean module boundaries to add type safety across the entire codebase. Every subsequent phase benefits from types: MCP schema auto-generation, API contracts, refactoring safety. The Phase 4 resolution improvements (receiver tracking, interface edges) establish the resolution model that TypeScript types will formalize. +**Note:** `.js` and `.ts` coexist during migration (`allowJs: true` in tsconfig). PRs #553, #554, #555, #566 migrated a first wave of files across steps 5.3–5.5, but substantial work remains in each step. 13 stale `.js` files have `.ts` counterparts and need deletion. + ### ~~5.1 -- Project Setup~~ ✅ TypeScript project configured with strict mode, ES module output, path aliases, incremental compilation, and `dist/` build output with source maps. Biome configured for `.ts` files. `package.json` `exports` point to compiled output. @@ -1108,50 +1110,61 @@ Comprehensive TypeScript type definitions for the entire domain model — symbol **New file:** `src/types.ts` ([#516](https://github.com/optave/codegraph/pull/516)) -### 5.3 -- Leaf Module Migration +### 5.3 -- Leaf Module Migration (In Progress) + +Migrate modules with no or minimal internal dependencies. 25 migrated, 4 remaining. + +**Migrated (25):** `shared/errors`, `shared/kinds`, `shared/normalize`, `shared/paginate`, `shared/constants`, `shared/file-utils`, `shared/generators`, `shared/hierarchy`, `infrastructure/logger`, `infrastructure/config`, `infrastructure/native`, `infrastructure/registry`, `infrastructure/update-check`, `infrastructure/result-formatter`, `infrastructure/test-filter`, `db/repository/*` (14 files), `domain/analysis/*` (9 files), `presentation/colors`, `presentation/table` — via [#553](https://github.com/optave/codegraph/pull/553), [#566](https://github.com/optave/codegraph/pull/566) -Migrate modules with no internal dependencies first: +**Remaining (4):** | Module | Notes | |--------|-------| -| `src/errors.ts` | Domain error hierarchy (Phase 3.7) | -| `src/logger.ts` | Minimal, no internal deps | -| `src/constants.ts` | Pure data | -| `src/config.ts` | Config types derived from `.codegraphrc.json` schema | -| `src/db/connection.ts` | SQLite connection wrapper | -| `src/db/migrations.ts` | Schema version management | -| `src/formatters/*.ts` | Pure input->string transforms | -| `src/paginate.ts` | Generic pagination helpers | +| `src/db/connection.js` | SQLite connection wrapper | +| `src/db/index.js` | DB barrel/schema entry point | +| `src/db/migrations.js` | Schema version management | +| `src/db/query-builder.js` | Dynamic query builder | -Allow `.js` and `.ts` to coexist during migration (`allowJs: true` in tsconfig). +### 5.4 -- Core Module Migration (In Progress) -### 5.4 -- Core Module Migration +Migrate modules that implement domain logic and Phase 3 interfaces. Some migrated via [#554](https://github.com/optave/codegraph/pull/554), 39 files remaining. -Migrate modules that implement Phase 3 interfaces: +**Migrated:** `db/repository/*.ts` (14 files), `domain/parser.ts`, `domain/graph/resolve.ts`, `extractors/*.ts` (11 files), `domain/graph/builder.ts` + `context.ts` + `helpers.ts` + `pipeline.ts`, `domain/graph/watcher.ts`, `domain/search/{generator,index,models}.ts`, `graph/model.ts`, `graph/algorithms/{bfs,centrality,shortest-path,tarjan}.ts`, `graph/algorithms/leiden/rng.ts`, `graph/classifiers/{risk,roles}.ts` -| Module | Key types | -|--------|-----------| -| `src/db/repository.ts` | `Repository` interface, all prepared statements typed | -| `src/parser/engine.ts` | `Engine` interface, native/WASM dispatch | -| `src/parser/registry.ts` | `LanguageEntry` type, extension mapping | -| `src/parser/tree-utils.ts` | Tree-sitter node helpers | -| `src/parser/base-extractor.ts` | `Extractor` interface, handler map | -| `src/parser/extractors/*.ts` | Per-language extractors | -| `src/analysis/*.ts` | Typed analysis results (impact scores, call chains) | -| `src/resolve.ts` | Import resolution with confidence types | +**Remaining (39):** -### 5.5 -- Orchestration & Public API Migration +| Module | Files | Notes | +|--------|-------|-------| +| `domain/graph/builder/stages/` | 9 | All 9 build pipeline stages (collect-files, parse-files, resolve-imports, build-edges, etc.) | +| `domain/graph/builder/incremental.js` | 1 | Incremental rebuild logic | +| `domain/graph/{cycles,journal,change-journal}.js` | 3 | Graph utilities | +| `domain/queries.js` | 1 | Core query functions | +| `domain/search/search/` | 6 | Search subsystem (hybrid, semantic, keyword, filters, cli-formatter, prepare) | +| `domain/search/stores/` | 2 | FTS5, SQLite blob stores | +| `domain/search/strategies/` | 3 | Source, structured, text-utils strategies | +| `graph/algorithms/leiden/` | 6 | Leiden community detection (adapter, CPM, modularity, optimiser, partition, index) | +| `graph/algorithms/{louvain,index}.js` | 2 | Louvain + algorithms barrel | +| `graph/builders/` | 4 | Dependency, structure, temporal builders + barrel | +| `graph/classifiers/index.js` + `graph/index.js` | 2 | Barrel exports | -Migrate top-level orchestration and entry points: +### 5.5 -- Orchestration & Public API Migration (In Progress) -| Module | Notes | -|--------|-------| -| `src/builder.ts` | Pipeline stages with typed `PipelineStage` | -| `src/watcher.ts` | File system events + pipeline | -| `src/embeddings/*.ts` | Vector store interface, model registry | -| `src/mcp/*.ts` | Tool schemas, typed handlers | -| `src/cli/*.ts` | Command objects with typed options | -| `src/index.ts` | Curated public API with proper export types | +Migrate top-level orchestration, features, and entry points. Some migrated via [#555](https://github.com/optave/codegraph/pull/555), 159 files remaining. + +**Migrated:** `domain/graph/builder.ts` + `context.ts` + `helpers.ts` + `pipeline.ts`, `domain/graph/watcher.ts`, `domain/search/{generator,index,models}.ts`, `mcp/{index,middleware,server,tool-registry}.ts`, `features/export.ts`, `index.ts` + +**Remaining (159):** + +| Module | Files | Notes | +|--------|-------|-------| +| `cli.js` + `cli/` | 55 | Commander entry point, 43 command handlers (`commands/`), barrel, shared CLI utilities | +| `mcp/tools/` | 36 | Individual MCP tool handlers + barrel | +| `presentation/` | 28 | Presentation formatters (14 files), `queries-cli/` (7 files), sequence-renderer, viewer, export, etc. | +| `features/` | 21 | audit, batch, boundaries, cfg, check, cochange, communities, complexity, dataflow, flow, graph-enrichment, manifesto, owners, sequence, snapshot, structure, triage, ast, branch-compare, `shared/find-nodes` | +| `ast-analysis/` | 18 | AST analysis framework, visitors (4), language-specific rules (9), engine, metrics, shared, visitor-utils | +| `index.js` | 1 | Public API exports (stale — `.ts` exists) | + +**Stale `.js` counterparts to delete (13 files):** `domain/graph/builder.js`, `domain/graph/builder/{context,helpers,pipeline}.js`, `domain/graph/watcher.js`, `domain/search/{generator,index,models}.js`, `features/export.js`, `mcp/{index,middleware,server,tool-registry}.js` — these have `.ts` counterparts already ### 5.6 -- Test Migration @@ -1162,7 +1175,7 @@ Migrate top-level orchestration and entry points: **Verification:** All existing tests pass. `tsc --noEmit` succeeds with zero errors. No `any` escape hatches except at FFI boundaries (napi-rs addon, tree-sitter WASM). -**Affected files:** All `src/**/*.js` -> `src/**/*.ts`, all `tests/**/*.js` -> `tests/**/*.ts`, `package.json`, `biome.json` +**Affected files:** All remaining `src/**/*.js` → `src/**/*.ts`, all `tests/**/*.js` → `tests/**/*.ts`, `package.json`, `biome.json` --- diff --git a/src/db/connection.js b/src/db/connection.ts similarity index 70% rename from src/db/connection.js rename to src/db/connection.ts index 59114bbd..cadd04e0 100644 --- a/src/db/connection.js +++ b/src/db/connection.ts @@ -4,11 +4,15 @@ import path from 'node:path'; import Database from 'better-sqlite3'; import { debug, warn } from '../infrastructure/logger.js'; import { DbError } from '../shared/errors.js'; +import type { BetterSqlite3Database } from '../types.js'; import { Repository } from './repository/base.js'; import { SqliteRepository } from './repository/sqlite-repository.js'; -let _cachedRepoRoot; // undefined = not computed, null = not a git repo -let _cachedRepoRootCwd; // cwd at the time the cache was populated +/** DB instance with optional advisory lock path. */ +export type LockedDatabase = BetterSqlite3Database & { __lockPath?: string }; + +let _cachedRepoRoot: string | null | undefined; // undefined = not computed, null = not a git repo +let _cachedRepoRootCwd: string | undefined; // cwd at the time the cache was populated /** * Return the git worktree/repo root for the given directory (or cwd). @@ -17,15 +21,13 @@ let _cachedRepoRootCwd; // cwd at the time the cache was populated * Results are cached per-process when called without arguments. * The cache is keyed on cwd so it invalidates if the working directory changes * (e.g. MCP server serving multiple sessions). - * @param {string} [fromDir] - Directory to resolve from (defaults to cwd) - * @returns {string | null} Absolute path to repo root, or null if not in a git repo */ -export function findRepoRoot(fromDir) { +export function findRepoRoot(fromDir?: string): string | null { const dir = fromDir || process.cwd(); if (!fromDir && _cachedRepoRoot !== undefined && _cachedRepoRootCwd === dir) { return _cachedRepoRoot; } - let root = null; + let root: string | null = null; try { const raw = execFileSync('git', ['rev-parse', '--show-toplevel'], { cwd: dir, @@ -38,11 +40,11 @@ export function findRepoRoot(fromDir) { try { root = fs.realpathSync(raw); } catch (e) { - debug(`realpathSync failed for git root "${raw}", using resolve: ${e.message}`); + debug(`realpathSync failed for git root "${raw}", using resolve: ${(e as Error).message}`); root = path.resolve(raw); } } catch (e) { - debug(`git rev-parse failed for "${dir}": ${e.message}`); + debug(`git rev-parse failed for "${dir}": ${(e as Error).message}`); root = null; } if (!fromDir) { @@ -53,22 +55,22 @@ export function findRepoRoot(fromDir) { } /** Reset the cached repo root (for testing). */ -export function _resetRepoRootCache() { +export function _resetRepoRootCache(): void { _cachedRepoRoot = undefined; _cachedRepoRootCwd = undefined; } -function isProcessAlive(pid) { +function isProcessAlive(pid: number): boolean { try { process.kill(pid, 0); return true; } catch (e) { - debug(`PID ${pid} not alive: ${e.code || e.message}`); + debug(`PID ${pid} not alive: ${(e as NodeJS.ErrnoException).code || (e as Error).message}`); return false; } } -function acquireAdvisoryLock(dbPath) { +function acquireAdvisoryLock(dbPath: string): void { const lockPath = `${dbPath}.lock`; try { if (fs.existsSync(lockPath)) { @@ -79,23 +81,23 @@ function acquireAdvisoryLock(dbPath) { } } } catch (e) { - debug(`Advisory lock read failed: ${e.message}`); + debug(`Advisory lock read failed: ${(e as Error).message}`); } try { fs.writeFileSync(lockPath, String(process.pid), 'utf-8'); } catch (e) { - debug(`Advisory lock write failed: ${e.message}`); + debug(`Advisory lock write failed: ${(e as Error).message}`); } } -function releaseAdvisoryLock(lockPath) { +function releaseAdvisoryLock(lockPath: string): void { try { const content = fs.readFileSync(lockPath, 'utf-8').trim(); if (Number(content) === process.pid) { fs.unlinkSync(lockPath); } } catch (e) { - debug(`Advisory lock release failed for ${lockPath}: ${e.message}`); + debug(`Advisory lock release failed for ${lockPath}: ${(e as Error).message}`); } } @@ -104,58 +106,64 @@ function releaseAdvisoryLock(lockPath) { * Handles Windows 8.3 short names (RUNNER~1 vs runneradmin) and macOS * symlinks (/tmp vs /private/tmp) where string comparison fails. */ -function isSameDirectory(a, b) { +function isSameDirectory(a: string, b: string): boolean { if (path.resolve(a) === path.resolve(b)) return true; try { const sa = fs.statSync(a); const sb = fs.statSync(b); return sa.dev === sb.dev && sa.ino === sb.ino; } catch (e) { - debug(`isSameDirectory stat failed: ${e.message}`); + debug(`isSameDirectory stat failed: ${(e as Error).message}`); return false; } } -export function openDb(dbPath) { +export function openDb(dbPath: string): LockedDatabase { const dir = path.dirname(dbPath); if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); acquireAdvisoryLock(dbPath); - const db = new Database(dbPath); + // vendor.d.ts declares Database as a callable; cast through unknown for construct usage + const db = new ( + Database as unknown as new ( + path: string, + opts?: Record, + ) => LockedDatabase + )(dbPath); db.pragma('journal_mode = WAL'); db.pragma('busy_timeout = 5000'); db.__lockPath = `${dbPath}.lock`; return db; } -export function closeDb(db) { +export function closeDb(db: LockedDatabase): void { db.close(); if (db.__lockPath) releaseAdvisoryLock(db.__lockPath); } -export function findDbPath(customPath) { +export function findDbPath(customPath?: string): string { if (customPath) return path.resolve(customPath); const rawCeiling = findRepoRoot(); // Normalize ceiling with realpathSync to resolve 8.3 short names (Windows // RUNNER~1 → runneradmin) and symlinks (macOS /var → /private/var). // findRepoRoot already applies realpathSync internally, but the git output // may still contain short names on some Windows CI environments. - let ceiling; + let ceiling: string | null; if (rawCeiling) { try { ceiling = fs.realpathSync(rawCeiling); } catch (e) { - debug(`realpathSync failed for ceiling "${rawCeiling}": ${e.message}`); + debug(`realpathSync failed for ceiling "${rawCeiling}": ${(e as Error).message}`); ceiling = rawCeiling; } } else { ceiling = null; } // Resolve symlinks (e.g. macOS /var → /private/var) so dir matches ceiling from git - let dir; + let dir: string; try { dir = fs.realpathSync(process.cwd()); } catch (e) { - debug(`realpathSync failed for cwd: ${e.message}`); + debug(`realpathSync failed for cwd: ${(e as Error).message}`); dir = process.cwd(); } while (true) { @@ -173,10 +181,8 @@ export function findDbPath(customPath) { return path.join(base, '.codegraph', 'graph.db'); } -/** - * Open a database in readonly mode, with a user-friendly error if the DB doesn't exist. - */ -export function openReadonlyOrFail(customPath) { +/** Open a database in readonly mode, with a user-friendly error if the DB doesn't exist. */ +export function openReadonlyOrFail(customPath?: string): BetterSqlite3Database { const dbPath = findDbPath(customPath); if (!fs.existsSync(dbPath)) { throw new DbError( @@ -184,7 +190,12 @@ export function openReadonlyOrFail(customPath) { { file: dbPath }, ); } - return new Database(dbPath, { readonly: true }); + return new ( + Database as unknown as new ( + path: string, + opts?: Record, + ) => BetterSqlite3Database + )(dbPath, { readonly: true }); } /** @@ -192,13 +203,11 @@ export function openReadonlyOrFail(customPath) { * * When `opts.repo` is a Repository instance, returns it directly (no DB opened). * Otherwise opens a readonly SQLite DB and wraps it in SqliteRepository. - * - * @param {string} [customDbPath] - Path to graph.db (ignored when opts.repo is set) - * @param {object} [opts] - * @param {Repository} [opts.repo] - Pre-built Repository to use instead of SQLite - * @returns {{ repo: Repository, close(): void }} */ -export function openRepo(customDbPath, opts = {}) { +export function openRepo( + customDbPath?: string, + opts: { repo?: Repository } = {}, +): { repo: Repository; close(): void } { if (opts.repo != null) { if (!(opts.repo instanceof Repository)) { throw new TypeError( diff --git a/src/db/index.js b/src/db/index.ts similarity index 96% rename from src/db/index.js rename to src/db/index.ts index 1f657a83..d28e8939 100644 --- a/src/db/index.js +++ b/src/db/index.ts @@ -1,4 +1,6 @@ // Barrel re-export — keeps all existing `import { ... } from '…/db/index.js'` working. + +export type { LockedDatabase } from './connection.js'; export { closeDb, findDbPath, diff --git a/src/db/migrations.js b/src/db/migrations.ts similarity index 90% rename from src/db/migrations.js rename to src/db/migrations.ts index ecafa49e..ade92708 100644 --- a/src/db/migrations.js +++ b/src/db/migrations.ts @@ -1,7 +1,14 @@ import { debug } from '../infrastructure/logger.js'; +import type { BetterSqlite3Database } from '../types.js'; // ─── Schema Migrations ───────────────────────────────────────────────── -export const MIGRATIONS = [ + +interface Migration { + version: number; + up: string; +} + +export const MIGRATIONS: Migration[] = [ { version: 1, up: ` @@ -242,28 +249,43 @@ export const MIGRATIONS = [ }, ]; -function hasColumn(db, table, column) { - const cols = db.pragma(`table_info(${table})`); +interface PragmaColumnInfo { + name: string; + type: string; + notnull: number; + dflt_value: unknown; + pk: number; +} + +function hasColumn(db: BetterSqlite3Database, table: string, column: string): boolean { + const cols = db.pragma(`table_info(${table})`) as PragmaColumnInfo[]; return cols.some((c) => c.name === column); } -function hasTable(db, table) { - const row = db.prepare("SELECT 1 FROM sqlite_master WHERE type='table' AND name=?").get(table); +function hasTable(db: BetterSqlite3Database, table: string): boolean { + const row = db + .prepare<{ '1': number }>("SELECT 1 FROM sqlite_master WHERE type='table' AND name=?") + .get(table); return !!row; } -export function getBuildMeta(db, key) { +export function getBuildMeta(db: BetterSqlite3Database, key: string): string | null { if (!hasTable(db, 'build_meta')) return null; try { - const row = db.prepare('SELECT value FROM build_meta WHERE key = ?').get(key); + const row = db + .prepare<{ value: string }>('SELECT value FROM build_meta WHERE key = ?') + .get(key); return row ? row.value : null; } catch (e) { - debug(`getBuildMeta failed for key "${key}": ${e.message}`); + debug(`getBuildMeta failed for key "${key}": ${(e as Error).message}`); return null; } } -export function setBuildMeta(db, entries) { +export function setBuildMeta( + db: BetterSqlite3Database, + entries: Record, +): void { const upsert = db.prepare('INSERT OR REPLACE INTO build_meta (key, value) VALUES (?, ?)'); const tx = db.transaction(() => { for (const [key, value] of Object.entries(entries)) { @@ -273,10 +295,10 @@ export function setBuildMeta(db, entries) { tx(); } -export function initSchema(db) { +export function initSchema(db: BetterSqlite3Database): void { db.exec(`CREATE TABLE IF NOT EXISTS schema_version (version INTEGER NOT NULL DEFAULT 0)`); - const row = db.prepare('SELECT version FROM schema_version').get(); + const row = db.prepare<{ version: number }>('SELECT version FROM schema_version').get(); let currentVersion = row ? row.version : 0; if (!row) { diff --git a/src/db/query-builder.js b/src/db/query-builder.ts similarity index 76% rename from src/db/query-builder.js rename to src/db/query-builder.ts index ae2d11db..66fe5cd9 100644 --- a/src/db/query-builder.js +++ b/src/db/query-builder.ts @@ -1,5 +1,6 @@ import { DbError } from '../shared/errors.js'; import { DEAD_ROLE_PREFIX, EVERY_EDGE_KIND } from '../shared/kinds.js'; +import type { BetterSqlite3Database } from '../types.js'; // ─── Validation Helpers ───────────────────────────────────────────── @@ -11,19 +12,19 @@ const SAFE_ORDER_TERM_RE = /^[a-z_][a-z0-9_]*(?:\.[a-z_][a-z0-9_]*)?\s*(?:asc|de const SAFE_SELECT_TOKEN_RE = /^(?:[a-z_][a-z0-9_]*(?:\.[a-z_*][a-z0-9_]*)?\s*(?:as\s+[a-z_][a-z0-9_]*)?|[a-z_]+\([^)]*\)\s*(?:as\s+[a-z_][a-z0-9_]*)?)$/i; -function validateAlias(alias) { +function validateAlias(alias: string): void { if (!SAFE_ALIAS_RE.test(alias)) { throw new DbError(`Invalid SQL alias: ${alias}`); } } -function validateColumn(column) { +function validateColumn(column: string): void { if (!SAFE_COLUMN_RE.test(column)) { throw new DbError(`Invalid SQL column: ${column}`); } } -function validateOrderBy(clause) { +function validateOrderBy(clause: string): void { const terms = clause.split(',').map((t) => t.trim()); for (const term of terms) { if (!SAFE_ORDER_TERM_RE.test(term)) { @@ -32,8 +33,8 @@ function validateOrderBy(clause) { } } -function splitTopLevelCommas(str) { - const parts = []; +function splitTopLevelCommas(str: string): string[] { + const parts: string[] = []; let depth = 0; let start = 0; for (let i = 0; i < str.length; i++) { @@ -48,7 +49,7 @@ function splitTopLevelCommas(str) { return parts; } -function validateSelectCols(cols) { +function validateSelectCols(cols: string): void { const tokens = splitTopLevelCommas(cols); for (const token of tokens) { if (!SAFE_SELECT_TOKEN_RE.test(token)) { @@ -57,8 +58,8 @@ function validateSelectCols(cols) { } } -function validateEdgeKind(edgeKind) { - if (!EVERY_EDGE_KIND.includes(edgeKind)) { +function validateEdgeKind(edgeKind: string): void { + if (!EVERY_EDGE_KIND.includes(edgeKind as never)) { throw new DbError( `Invalid edge kind: ${edgeKind} (expected one of ${EVERY_EDGE_KIND.join(', ')})`, ); @@ -68,17 +69,15 @@ function validateEdgeKind(edgeKind) { // ─── LIKE Escaping ────────────────────────────────────────────────── /** Escape LIKE wildcards in a literal string segment. */ -export function escapeLike(s) { +export function escapeLike(s: string): string { return s.replace(/[%_\\]/g, '\\$&'); } /** * Normalize a file filter value (string, string[], or falsy) into a flat array. * Returns an empty array when the input is falsy. - * @param {string|string[]|undefined|null} file - * @returns {string[]} */ -export function normalizeFileFilter(file) { +export function normalizeFileFilter(file: string | string[] | undefined | null): string[] { if (!file) return []; return Array.isArray(file) ? file : [file]; } @@ -86,19 +85,18 @@ export function normalizeFileFilter(file) { /** * Build a SQL condition + params for a multi-value file LIKE filter. * Returns `{ sql: '', params: [] }` when the filter is empty. - * - * @param {string|string[]} file - One or more partial file paths - * @param {string} [column='file'] - The column name to filter on (e.g. 'n.file', 'a.file') - * @returns {{ sql: string, params: string[] }} */ -export function buildFileConditionSQL(file, column = 'file') { +export function buildFileConditionSQL( + file: string | string[], + column = 'file', +): { sql: string; params: string[] } { validateColumn(column); const files = normalizeFileFilter(file); if (files.length === 0) return { sql: '', params: [] }; if (files.length === 1) { return { sql: ` AND ${column} LIKE ? ESCAPE '\\'`, - params: [`%${escapeLike(files[0])}%`], + params: [`%${escapeLike(files[0] as string)}%`], }; } const clauses = files.map(() => `${column} LIKE ? ESCAPE '\\'`); @@ -111,11 +109,8 @@ export function buildFileConditionSQL(file, column = 'file') { /** * Commander option accumulator for repeatable `--file` flag. * Use as: `['-f, --file ', 'Scope to file (partial match, repeatable)', collectFile]` - * @param {string} val - New value from Commander - * @param {string[]} acc - Accumulated values (undefined on first call) - * @returns {string[]} */ -export function collectFile(val, acc) { +export function collectFile(val: string, acc?: string[]): string[] { acc = acc || []; acc.push(val); return acc; @@ -126,10 +121,8 @@ export function collectFile(val, acc) { /** * Return a SQL AND clause that excludes test/spec/stories files. * Returns empty string when disabled. - * @param {string} [column='n.file'] - Column to filter on - * @param {boolean} [enabled=true] - No-op when false */ -export function testFilterSQL(column = 'n.file', enabled = true) { +export function testFilterSQL(column = 'n.file', enabled = true): string { if (!enabled) return ''; validateColumn(column); return `AND ${column} NOT LIKE '%.test.%' @@ -139,12 +132,8 @@ export function testFilterSQL(column = 'n.file', enabled = true) { AND ${column} NOT LIKE '%.stories.%'`; } -/** - * Build IN (?, ?, ?) placeholders and params array for a kind filter. - * @param {string[]} kinds - * @returns {{ placeholders: string, params: string[] }} - */ -export function kindInClause(kinds) { +/** Build IN (?, ?, ?) placeholders and params array for a kind filter. */ +export function kindInClause(kinds: string[]): { placeholders: string; params: string[] } { return { placeholders: kinds.map(() => '?').join(', '), params: [...kinds], @@ -153,10 +142,8 @@ export function kindInClause(kinds) { /** * Return a LEFT JOIN subquery for fan-in (incoming edge count). - * @param {string} [edgeKind='calls'] - Edge kind to count - * @param {string} [alias='fi'] - Subquery alias */ -export function fanInJoinSQL(edgeKind = 'calls', alias = 'fi') { +export function fanInJoinSQL(edgeKind = 'calls', alias = 'fi'): string { validateEdgeKind(edgeKind); validateAlias(alias); return `LEFT JOIN ( @@ -166,10 +153,8 @@ export function fanInJoinSQL(edgeKind = 'calls', alias = 'fi') { /** * Return a LEFT JOIN subquery for fan-out (outgoing edge count). - * @param {string} [edgeKind='calls'] - Edge kind to count - * @param {string} [alias='fo'] - Subquery alias */ -export function fanOutJoinSQL(edgeKind = 'calls', alias = 'fo') { +export function fanOutJoinSQL(edgeKind = 'calls', alias = 'fo'): string { validateEdgeKind(edgeKind); validateAlias(alias); return `LEFT JOIN ( @@ -185,21 +170,21 @@ export function fanOutJoinSQL(edgeKind = 'calls', alias = 'fo') { */ export class NodeQuery { #selectCols = 'n.*'; - #joins = []; - #conditions = []; - #params = []; + #joins: string[] = []; + #conditions: string[] = []; + #params: (string | number)[] = []; #orderByClause = ''; - #limitValue = null; + #limitValue: number | null = null; /** Set SELECT columns (default: `n.*`). */ - select(cols) { + select(cols: string): this { validateSelectCols(cols); this.#selectCols = cols; return this; } /** WHERE n.kind IN (?, ?, ...) */ - kinds(kindArray) { + kinds(kindArray: string[] | undefined | null): this { if (!kindArray || kindArray.length === 0) return this; const { placeholders, params } = kindInClause(kindArray); this.#conditions.push(`n.kind IN (${placeholders})`); @@ -208,7 +193,7 @@ export class NodeQuery { } /** Add 5 NOT LIKE conditions to exclude test files. No-op when enabled is falsy. */ - excludeTests(enabled) { + excludeTests(enabled: boolean | undefined): this { if (!enabled) return this; this.#conditions.push( `n.file NOT LIKE '%.test.%'`, @@ -221,12 +206,12 @@ export class NodeQuery { } /** WHERE n.file LIKE ? (no-op if falsy). Accepts a single string or string[]. */ - fileFilter(file) { + fileFilter(file: string | string[] | undefined | null): this { const files = normalizeFileFilter(file); if (files.length === 0) return this; if (files.length === 1) { this.#conditions.push("n.file LIKE ? ESCAPE '\\'"); - this.#params.push(`%${escapeLike(files[0])}%`); + this.#params.push(`%${escapeLike(files[0] as string)}%`); } else { const clauses = files.map(() => "n.file LIKE ? ESCAPE '\\'"); this.#conditions.push(`(${clauses.join(' OR ')})`); @@ -236,7 +221,7 @@ export class NodeQuery { } /** WHERE n.kind = ? (no-op if falsy). */ - kindFilter(kind) { + kindFilter(kind: string | undefined | null): this { if (!kind) return this; this.#conditions.push('n.kind = ?'); this.#params.push(kind); @@ -244,7 +229,7 @@ export class NodeQuery { } /** WHERE n.role = ? (no-op if falsy). 'dead' matches all dead-* sub-roles. */ - roleFilter(role) { + roleFilter(role: string | undefined | null): this { if (!role) return this; if (role === DEAD_ROLE_PREFIX) { this.#conditions.push('n.role LIKE ?'); @@ -257,7 +242,7 @@ export class NodeQuery { } /** WHERE n.name LIKE ? (no-op if falsy). Escapes LIKE wildcards in the value. */ - nameLike(pattern) { + nameLike(pattern: string | undefined | null): this { if (!pattern) return this; this.#conditions.push("n.name LIKE ? ESCAPE '\\'"); this.#params.push(`%${escapeLike(pattern)}%`); @@ -265,54 +250,54 @@ export class NodeQuery { } /** Raw WHERE condition escape hatch. */ - where(sql, ...params) { + where(sql: string, ...params: (string | number)[]): this { this.#conditions.push(sql); this.#params.push(...params); return this; } /** Add fan-in LEFT JOIN subquery. */ - withFanIn(edgeKind = 'calls') { + withFanIn(edgeKind = 'calls'): this { return this._join(fanInJoinSQL(edgeKind)); } /** Add fan-out LEFT JOIN subquery. */ - withFanOut(edgeKind = 'calls') { + withFanOut(edgeKind = 'calls'): this { return this._join(fanOutJoinSQL(edgeKind)); } /** LEFT JOIN function_complexity. */ - withComplexity() { + withComplexity(): this { return this._join('LEFT JOIN function_complexity fc ON fc.node_id = n.id'); } /** LEFT JOIN file_commit_counts. */ - withChurn() { + withChurn(): this { return this._join('LEFT JOIN file_commit_counts fcc ON n.file = fcc.file'); } - /** @private Raw JOIN — internal use only; external callers should use withFanIn/withFanOut/withComplexity/withChurn. */ - _join(sql) { + /** @internal Raw JOIN — internal use only; external callers should use withFanIn/withFanOut/withComplexity/withChurn. */ + _join(sql: string): this { this.#joins.push(sql); return this; } /** ORDER BY clause. */ - orderBy(clause) { + orderBy(clause: string): this { validateOrderBy(clause); this.#orderByClause = clause; return this; } /** LIMIT ?. */ - limit(n) { + limit(n: number | undefined | null): this { if (n == null) return this; this.#limitValue = n; return this; } /** Build the SQL and params without executing. */ - build() { + build(): { sql: string; params: (string | number)[] } { const joins = this.#joins.length > 0 ? `\n ${this.#joins.join('\n ')}` : ''; const where = this.#conditions.length > 0 ? `\n WHERE ${this.#conditions.join(' AND ')}` : ''; @@ -330,20 +315,20 @@ export class NodeQuery { } /** Execute and return all rows. */ - all(db) { + all>(db: BetterSqlite3Database): TRow[] { const { sql, params } = this.build(); - return db.prepare(sql).all(...params); + return db.prepare(sql).all(...params) as TRow[]; } /** Execute and return first row. */ - get(db) { + get>(db: BetterSqlite3Database): TRow | undefined { const { sql, params } = this.build(); - return db.prepare(sql).get(...params); + return db.prepare(sql).get(...params) as TRow | undefined; } /** Execute and return an iterator. */ - iterate(db) { + iterate>(db: BetterSqlite3Database): IterableIterator { const { sql, params } = this.build(); - return db.prepare(sql).iterate(...params); + return db.prepare(sql).iterate(...params) as IterableIterator; } } diff --git a/src/domain/analysis/module-map.ts b/src/domain/analysis/module-map.ts index 7e300ea5..35b64331 100644 --- a/src/domain/analysis/module-map.ts +++ b/src/domain/analysis/module-map.ts @@ -170,17 +170,17 @@ function getEmbeddingsInfo(db: BetterSqlite3Database) { | { c: number } | undefined; if (count && count.c > 0) { - const meta: Record = {}; + const meta: { model?: string; dim?: string; built_at?: string } = {}; const metaRows = db.prepare('SELECT key, value FROM embedding_meta').all() as Array<{ key: string; value: string; }>; - for (const r of metaRows) meta[r.key] = r.value; + for (const r of metaRows) (meta as Record)[r.key] = r.value; return { count: count.c, - model: meta['model'] || null, - dim: meta['dim'] ? parseInt(meta['dim'], 10) : null, - builtAt: meta['built_at'] || null, + model: meta.model || null, + dim: meta.dim ? parseInt(meta.dim, 10) : null, + builtAt: meta.built_at || null, }; } } catch (e: unknown) { @@ -280,13 +280,13 @@ function countRoles(db: BetterSqlite3Database, noTests: boolean) { .prepare('SELECT role, COUNT(*) as c FROM nodes WHERE role IS NOT NULL GROUP BY role') .all() as Array<{ role: string; c: number }>; } - const roles: Record = {}; + const roles: Record & { dead?: number } = {}; let deadTotal = 0; for (const r of roleRows) { roles[r.role] = r.c; if (r.role.startsWith(DEAD_ROLE_PREFIX)) deadTotal += r.c; } - if (deadTotal > 0) roles['dead'] = deadTotal; + if (deadTotal > 0) roles.dead = deadTotal; return roles; } @@ -354,11 +354,16 @@ export function moduleMapData(customDbPath: string, limit = 20, opts: { noTests? coupling: n.in_edges + n.out_edges, })); - const totalNodes = (db.prepare('SELECT COUNT(*) as c FROM nodes').get() as { c: number }).c; - const totalEdges = (db.prepare('SELECT COUNT(*) as c FROM edges').get() as { c: number }).c; - const totalFiles = ( - db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get() as { c: number } - ).c; + const totalNodes = + (db.prepare('SELECT COUNT(*) as c FROM nodes').get() as { c: number } | undefined)?.c ?? 0; + const totalEdges = + (db.prepare('SELECT COUNT(*) as c FROM edges').get() as { c: number } | undefined)?.c ?? 0; + const totalFiles = + ( + db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get() as + | { c: number } + | undefined + )?.c ?? 0; return { limit, topNodes, stats: { totalFiles, totalNodes, totalEdges } }; } finally { diff --git a/src/domain/queries.js b/src/domain/queries.ts similarity index 98% rename from src/domain/queries.js rename to src/domain/queries.ts index 7a3c6207..b35ad981 100644 --- a/src/domain/queries.js +++ b/src/domain/queries.ts @@ -1,5 +1,5 @@ /** - * queries.js — Barrel re-export file. + * queries.ts — Barrel re-export file. * * All query logic lives in the sub-modules under src/analysis/ and src/shared/. * This file exists purely for backward compatibility so that all existing diff --git a/src/domain/search/generator.ts b/src/domain/search/generator.ts index 9129332c..085acbea 100644 --- a/src/domain/search/generator.ts +++ b/src/domain/search/generator.ts @@ -61,7 +61,7 @@ export async function buildEmbeddings( options: BuildEmbeddingsOptions = {}, ): Promise { const strategy = options.strategy || 'structured'; - const dbPath = customDbPath || findDbPath(null); + const dbPath = customDbPath || findDbPath(undefined); if (!fs.existsSync(dbPath)) { throw new DbError( diff --git a/src/graph/algorithms/index.js b/src/graph/algorithms/index.ts similarity index 100% rename from src/graph/algorithms/index.js rename to src/graph/algorithms/index.ts diff --git a/src/graph/algorithms/leiden/adapter.js b/src/graph/algorithms/leiden/adapter.js deleted file mode 100644 index c5425a5f..00000000 --- a/src/graph/algorithms/leiden/adapter.js +++ /dev/null @@ -1,160 +0,0 @@ -/** - * Graph adapter that converts a CodeGraph into the dense array format - * expected by the Leiden optimiser. - * - * Vendored from ngraph.leiden (MIT) — adapted for CodeGraph. - */ - -/** - * @param {import('../../model.js').CodeGraph} graph - * @param {object} [opts] - * @param {boolean} [opts.directed] - * @param {(attrs: object) => number} [opts.linkWeight] - extract weight from edge attrs - * @param {(attrs: object) => number} [opts.nodeSize] - extract size from node attrs - * @param {string[]} [opts.baseNodeIds] - */ -export function makeGraphAdapter(graph, opts = {}) { - const linkWeight = - opts.linkWeight || ((attrs) => (attrs && typeof attrs.weight === 'number' ? attrs.weight : 1)); - const nodeSize = - opts.nodeSize || ((attrs) => (attrs && typeof attrs.size === 'number' ? attrs.size : 1)); - const directed = !!opts.directed; - const baseNodeIds = opts.baseNodeIds; - - // Build dense node index mapping - const nodeIds = []; - const idToIndex = new Map(); - if (Array.isArray(baseNodeIds) && baseNodeIds.length > 0) { - for (let i = 0; i < baseNodeIds.length; i++) { - const id = baseNodeIds[i]; - if (!graph.hasNode(id)) throw new Error(`Missing node: ${id}`); - idToIndex.set(id, i); - nodeIds.push(id); - } - } else { - for (const [id] of graph.nodes()) { - idToIndex.set(id, nodeIds.length); - nodeIds.push(id); - } - } - const n = nodeIds.length; - - // Storage - const size = new Float64Array(n); - const selfLoop = new Float64Array(n); - const strengthOut = new Float64Array(n); - const strengthIn = new Float64Array(n); - - // Edge list by source for fast iteration - const outEdges = new Array(n); - const inEdges = new Array(n); - for (let i = 0; i < n; i++) { - outEdges[i] = []; - inEdges[i] = []; - } - - // Populate from graph - if (directed) { - for (const [src, tgt, attrs] of graph.edges()) { - const from = idToIndex.get(src); - const to = idToIndex.get(tgt); - if (from == null || to == null) continue; - const w = +linkWeight(attrs) || 0; - if (from === to) { - selfLoop[from] += w; - // Self-loop is intentionally kept in outEdges/inEdges as well. - // partition.js's moveNodeToCommunity (directed path) accounts for this - // by subtracting selfLoopWeight once from outToOld+inFromOld to avoid - // triple-counting (see partition.js moveNodeToCommunity directed block). - } - outEdges[from].push({ to, w }); - inEdges[to].push({ from, w }); - strengthOut[from] += w; - strengthIn[to] += w; - } - } else { - // Undirected: symmetrize and average reciprocal pairs - const pairAgg = new Map(); - - for (const [src, tgt, attrs] of graph.edges()) { - const a = idToIndex.get(src); - const b = idToIndex.get(tgt); - if (a == null || b == null) continue; - const w = +linkWeight(attrs) || 0; - if (a === b) { - selfLoop[a] += w; - continue; - } - const i = a < b ? a : b; - const j = a < b ? b : a; - const key = `${i}:${j}`; - let rec = pairAgg.get(key); - if (!rec) { - rec = { sum: 0, seenAB: 0, seenBA: 0 }; - pairAgg.set(key, rec); - } - rec.sum += w; - if (a === i) rec.seenAB = 1; - else rec.seenBA = 1; - } - - for (const [key, rec] of pairAgg.entries()) { - const [iStr, jStr] = key.split(':'); - const i = +iStr; - const j = +jStr; - const dirCount = (rec.seenAB ? 1 : 0) + (rec.seenBA ? 1 : 0); - const w = dirCount > 0 ? rec.sum / dirCount : 0; - if (w === 0) continue; - outEdges[i].push({ to: j, w }); - outEdges[j].push({ to: i, w }); - inEdges[i].push({ from: j, w }); - inEdges[j].push({ from: i, w }); - strengthOut[i] += w; - strengthOut[j] += w; - strengthIn[i] += w; - strengthIn[j] += w; - } - - // Add self-loops into adjacency and strengths. - // Note: uses single-w convention (not standard 2w) — the modularity formulas in - // modularity.js are written to match this convention, keeping the system self-consistent. - for (let v = 0; v < n; v++) { - const w = selfLoop[v]; - if (w !== 0) { - outEdges[v].push({ to: v, w }); - inEdges[v].push({ from: v, w }); - strengthOut[v] += w; - strengthIn[v] += w; - } - } - } - - // Node sizes - for (const [id, attrs] of graph.nodes()) { - const i = idToIndex.get(id); - if (i != null) size[i] = +nodeSize(attrs) || 0; - } - - // Totals - const totalWeight = strengthOut.reduce((a, b) => a + b, 0); - - function forEachNeighbor(i, cb) { - const list = outEdges[i]; - for (let k = 0; k < list.length; k++) cb(list[k].to, list[k].w); - } - - return { - n, - nodeIds, - idToIndex, - size, - selfLoop, - strengthOut, - strengthIn, - outEdges, - inEdges, - directed, - totalWeight, - forEachNeighbor, - }; -} diff --git a/src/graph/algorithms/leiden/adapter.ts b/src/graph/algorithms/leiden/adapter.ts new file mode 100644 index 00000000..5434cee0 --- /dev/null +++ b/src/graph/algorithms/leiden/adapter.ts @@ -0,0 +1,204 @@ +/** + * Graph adapter that converts a CodeGraph into the dense array format + * expected by the Leiden optimiser. + * + * Vendored from ngraph.leiden (MIT) — adapted for CodeGraph. + */ + +import type { CodeGraph, EdgeAttrs, NodeAttrs } from '../../model.js'; + +export interface EdgeEntry { + to: number; + w: number; +} + +export interface InEdgeEntry { + from: number; + w: number; +} + +export interface GraphAdapterOptions { + directed?: boolean; + linkWeight?: (attrs: EdgeAttrs) => number; + nodeSize?: (attrs: NodeAttrs) => number; + baseNodeIds?: string[]; +} + +export interface GraphAdapter { + n: number; + nodeIds: string[]; + idToIndex: Map; + size: Float64Array; + selfLoop: Float64Array; + strengthOut: Float64Array; + strengthIn: Float64Array; + outEdges: EdgeEntry[][]; + inEdges: InEdgeEntry[][]; + directed: boolean; + totalWeight: number; + forEachNeighbor: (i: number, cb: (to: number, w: number) => void) => void; +} + +// Typed arrays always return a number for in-bounds access, but noUncheckedIndexedAccess +// widens the return to `number | undefined`. These helpers wrap compound assignment +// patterns (+=, -=) that appear frequently in this performance-critical code. +function taGet(a: Float64Array, i: number): number { + return a[i] as number; +} + +function taAdd(a: Float64Array, i: number, v: number): void { + a[i] = taGet(a, i) + v; +} + +function taSub(a: Float64Array, i: number, v: number): void { + a[i] = taGet(a, i) - v; +} + +export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = {}): GraphAdapter { + const linkWeight: (attrs: EdgeAttrs) => number = + opts.linkWeight || + // biome-ignore lint/complexity/useLiteralKeys: index signature requires bracket access + ((attrs) => (attrs && typeof attrs['weight'] === 'number' ? attrs['weight'] : 1)); + const nodeSize: (attrs: NodeAttrs) => number = + // biome-ignore lint/complexity/useLiteralKeys: index signature requires bracket access + opts.nodeSize || ((attrs) => (attrs && typeof attrs['size'] === 'number' ? attrs['size'] : 1)); + const directed: boolean = !!opts.directed; + const baseNodeIds: string[] | undefined = opts.baseNodeIds; + + // Build dense node index mapping + const nodeIds: string[] = []; + const idToIndex = new Map(); + if (Array.isArray(baseNodeIds) && baseNodeIds.length > 0) { + for (let i = 0; i < baseNodeIds.length; i++) { + const id = baseNodeIds[i] as string; + if (!graph.hasNode(id)) throw new Error(`Missing node: ${id}`); + idToIndex.set(id, i); + nodeIds.push(id); + } + } else { + for (const [id] of graph.nodes()) { + idToIndex.set(id, nodeIds.length); + nodeIds.push(id); + } + } + const n: number = nodeIds.length; + + // Storage + const size = new Float64Array(n); + const selfLoop = new Float64Array(n); + const strengthOut = new Float64Array(n); + const strengthIn = new Float64Array(n); + + // Edge list by source for fast iteration + const outEdges: EdgeEntry[][] = new Array(n); + const inEdges: InEdgeEntry[][] = new Array(n); + for (let i = 0; i < n; i++) { + outEdges[i] = []; + inEdges[i] = []; + } + + // Populate from graph + if (directed) { + for (const [src, tgt, attrs] of graph.edges()) { + const from = idToIndex.get(src); + const to = idToIndex.get(tgt); + if (from == null || to == null) continue; + const w: number = +linkWeight(attrs) || 0; + if (from === to) { + taAdd(selfLoop, from, w); + // Self-loop is intentionally kept in outEdges/inEdges as well. + // partition.ts's moveNodeToCommunity (directed path) accounts for this + // by subtracting selfLoopWeight once from outToOld+inFromOld to avoid + // triple-counting (see partition.ts moveNodeToCommunity directed block). + } + (outEdges[from] as EdgeEntry[]).push({ to, w }); + (inEdges[to] as InEdgeEntry[]).push({ from, w }); + taAdd(strengthOut, from, w); + taAdd(strengthIn, to, w); + } + } else { + // Undirected: symmetrize and average reciprocal pairs + const pairAgg = new Map(); + + for (const [src, tgt, attrs] of graph.edges()) { + const a = idToIndex.get(src); + const b = idToIndex.get(tgt); + if (a == null || b == null) continue; + const w: number = +linkWeight(attrs) || 0; + if (a === b) { + taAdd(selfLoop, a, w); + continue; + } + const i = a < b ? a : b; + const j = a < b ? b : a; + const key = `${i}:${j}`; + let rec = pairAgg.get(key); + if (!rec) { + rec = { sum: 0, seenAB: 0, seenBA: 0 }; + pairAgg.set(key, rec); + } + rec.sum += w; + if (a === i) rec.seenAB = 1; + else rec.seenBA = 1; + } + + for (const [key, rec] of pairAgg.entries()) { + const parts = key.split(':'); + const i = +(parts[0] as string); + const j = +(parts[1] as string); + const dirCount: number = (rec.seenAB ? 1 : 0) + (rec.seenBA ? 1 : 0); + const w: number = dirCount > 0 ? rec.sum / dirCount : 0; + if (w === 0) continue; + (outEdges[i] as EdgeEntry[]).push({ to: j, w }); + (outEdges[j] as EdgeEntry[]).push({ to: i, w }); + (inEdges[i] as InEdgeEntry[]).push({ from: j, w }); + (inEdges[j] as InEdgeEntry[]).push({ from: i, w }); + taAdd(strengthOut, i, w); + taAdd(strengthOut, j, w); + taAdd(strengthIn, i, w); + taAdd(strengthIn, j, w); + } + + // Add self-loops into adjacency and strengths. + // Note: uses single-w convention (not standard 2w) — the modularity formulas in + // modularity.ts are written to match this convention, keeping the system self-consistent. + for (let v = 0; v < n; v++) { + const w: number = taGet(selfLoop, v); + if (w !== 0) { + (outEdges[v] as EdgeEntry[]).push({ to: v, w }); + (inEdges[v] as InEdgeEntry[]).push({ from: v, w }); + taAdd(strengthOut, v, w); + taAdd(strengthIn, v, w); + } + } + } + + // Node sizes + for (const [id, attrs] of graph.nodes()) { + const i = idToIndex.get(id); + if (i != null) size[i] = +nodeSize(attrs) || 0; + } + + // Totals + const totalWeight: number = strengthOut.reduce((a, b) => a + b, 0); + + function forEachNeighbor(i: number, cb: (to: number, w: number) => void): void { + const list = outEdges[i] as EdgeEntry[]; + for (let k = 0; k < list.length; k++) cb((list[k] as EdgeEntry).to, (list[k] as EdgeEntry).w); + } + + return { + n, + nodeIds, + idToIndex, + size, + selfLoop, + strengthOut, + strengthIn, + outEdges, + inEdges, + directed, + totalWeight, + forEachNeighbor, + }; +} diff --git a/src/graph/algorithms/leiden/cpm.js b/src/graph/algorithms/leiden/cpm.js deleted file mode 100644 index b32a2167..00000000 --- a/src/graph/algorithms/leiden/cpm.js +++ /dev/null @@ -1,39 +0,0 @@ -/** - * CPM (Constant Potts Model) quality functions. - * Vendored from ngraph.leiden (MIT) — no external dependencies. - */ - -export function diffCPM(part, g, v, c, gamma = 1.0) { - const oldC = part.nodeCommunity[v]; - if (c === oldC) return 0; - let w_old, w_new; - let selfCorrection = 0; - if (g.directed) { - w_old = - (part.getOutEdgeWeightToCommunity(oldC) || 0) + - (part.getInEdgeWeightFromCommunity(oldC) || 0); - w_new = - c < g.n - ? (part.getOutEdgeWeightToCommunity(c) || 0) + (part.getInEdgeWeightFromCommunity(c) || 0) - : 0; - // Self-loop weight appears in both out and in arrays for oldC, - // making w_old include 2×selfLoop. Correct to match moveNodeToCommunity. - selfCorrection = 2 * (g.selfLoop[v] || 0); - } else { - w_old = part.getNeighborEdgeWeightToCommunity(oldC) || 0; - w_new = c < g.n ? part.getNeighborEdgeWeightToCommunity(c) || 0 : 0; - } - const s_v = g.size[v] || 1; - const S_old = part.communityTotalSize[oldC] || 0; - const S_new = c < part.communityTotalSize.length ? part.communityTotalSize[c] : 0; - return w_new - w_old + selfCorrection - gamma * s_v * (S_new - S_old + s_v); -} - -export function qualityCPM(part, _g, gamma = 1.0) { - let sum = 0; - for (let c = 0; c < part.communityCount; c++) { - const S = part.communityTotalSize[c] || 0; - sum += part.communityInternalEdgeWeight[c] - (gamma * (S * (S - 1))) / 2; - } - return sum; -} diff --git a/src/graph/algorithms/leiden/cpm.ts b/src/graph/algorithms/leiden/cpm.ts new file mode 100644 index 00000000..957a605f --- /dev/null +++ b/src/graph/algorithms/leiden/cpm.ts @@ -0,0 +1,77 @@ +/** + * CPM (Constant Potts Model) quality functions. + * Vendored from ngraph.leiden (MIT) — no external dependencies. + */ + +/** + * Minimal view of a partition needed by CPM quality functions. + */ +export interface PartitionView { + readonly communityCount: number; + nodeCommunity: Int32Array; + readonly communityInternalEdgeWeight: Float64Array; + readonly communityTotalSize: Float64Array; + getOutEdgeWeightToCommunity(c: number): number; + getInEdgeWeightFromCommunity(c: number): number; + getNeighborEdgeWeightToCommunity(c: number): number; +} + +/** + * Minimal view of a graph needed by CPM quality functions. + */ +export interface GraphView { + n: number; + directed: boolean; + selfLoop: Float64Array; + size: Float64Array; +} + +// Typed array safe-access helper (see adapter.ts for rationale) +function fget(a: Float64Array, i: number): number { + return a[i] as number; +} +function iget(a: Int32Array, i: number): number { + return a[i] as number; +} + +export function diffCPM( + part: PartitionView, + g: GraphView, + v: number, + c: number, + gamma: number = 1.0, +): number { + const oldC: number = iget(part.nodeCommunity, v); + if (c === oldC) return 0; + let w_old: number; + let w_new: number; + let selfCorrection: number = 0; + if (g.directed) { + w_old = + (part.getOutEdgeWeightToCommunity(oldC) || 0) + + (part.getInEdgeWeightFromCommunity(oldC) || 0); + w_new = + c < g.n + ? (part.getOutEdgeWeightToCommunity(c) || 0) + (part.getInEdgeWeightFromCommunity(c) || 0) + : 0; + // Self-loop weight appears in both out and in arrays for oldC, + // making w_old include 2x selfLoop. Correct to match moveNodeToCommunity. + selfCorrection = 2 * (fget(g.selfLoop, v) || 0); + } else { + w_old = part.getNeighborEdgeWeightToCommunity(oldC) || 0; + w_new = c < g.n ? part.getNeighborEdgeWeightToCommunity(c) || 0 : 0; + } + const s_v: number = fget(g.size, v) || 1; + const S_old: number = fget(part.communityTotalSize, oldC) || 0; + const S_new: number = c < part.communityTotalSize.length ? fget(part.communityTotalSize, c) : 0; + return w_new - w_old + selfCorrection - gamma * s_v * (S_new - S_old + s_v); +} + +export function qualityCPM(part: PartitionView, _g: GraphView, gamma: number = 1.0): number { + let sum: number = 0; + for (let c = 0; c < part.communityCount; c++) { + const S: number = fget(part.communityTotalSize, c) || 0; + sum += fget(part.communityInternalEdgeWeight, c) - (gamma * (S * (S - 1))) / 2; + } + return sum; +} diff --git a/src/graph/algorithms/leiden/index.js b/src/graph/algorithms/leiden/index.js deleted file mode 100644 index 4db9a027..00000000 --- a/src/graph/algorithms/leiden/index.js +++ /dev/null @@ -1,144 +0,0 @@ -/** - * Leiden community detection — vendored from ngraph.leiden (MIT). - * Adapted to work directly with CodeGraph (no external graph library dependency). - * - * Original: https://github.com/anvaka/ngraph.leiden - * License: MIT — see LICENSE in this directory. - */ - -import { qualityCPM } from './cpm.js'; -import { qualityModularity } from './modularity.js'; -import { runLouvainUndirectedModularity } from './optimiser.js'; - -/** - * Detect communities in a CodeGraph using the Leiden algorithm. - * - * @param {import('../../model.js').CodeGraph} graph - * @param {object} [options] - * @param {number} [options.randomSeed=42] - * @param {boolean} [options.directed=false] - * @param {boolean} [options.refine=true] - Leiden refinement (set false for plain Louvain) - * @param {string} [options.quality='modularity'] - 'modularity' | 'cpm' - * @param {number} [options.resolution=1.0] - * @param {number} [options.maxCommunitySize] - * @param {Set|Array} [options.fixedNodes] - * @param {string} [options.candidateStrategy] - 'neighbors' | 'all' | 'random' | 'random-neighbor' - * @param {number} [options.refinementTheta=1.0] - Temperature for probabilistic Leiden refinement (Algorithm 3, Traag et al. 2019). Lower → more greedy, higher → more exploratory. Deterministic via seeded PRNG - * @returns {{ getClass(id): number, getCommunities(): Map, quality(): number, toJSON(): object }} - * - * **Note on `quality()`:** For modularity, `quality()` always evaluates at γ=1.0 - * (standard Newman-Girvan modularity) regardless of the `resolution` used during - * optimization. This makes quality values comparable across runs with different - * resolutions. For CPM, `quality()` uses the caller-specified resolution since γ - * is intrinsic to the CPM metric. Do not use modularity `quality()` values to - * compare partitions found at different resolutions — they reflect Q at γ=1.0, - * not the objective that was actually optimized. - */ -export function detectClusters(graph, options = {}) { - const { levels, originalToCurrent, originalNodeIds, baseGraph } = runLouvainUndirectedModularity( - graph, - options, - ); - - const idToClass = new Map(); - for (let i = 0; i < originalNodeIds.length; i++) { - const comm = originalToCurrent[i]; - idToClass.set(originalNodeIds[i], comm); - } - - return { - getClass(nodeId) { - return idToClass.get(String(nodeId)); - }, - getCommunities() { - const out = new Map(); - for (const [id, c] of idToClass) { - if (!out.has(c)) out.set(c, []); - out.get(c).push(id); - } - return out; - }, - quality() { - // Compute quality on the original (level-0) graph with the final - // partition mapped back. Computing on the last coarse-level graph - // produces inflated values because the modularity null model depends - // on the degree distribution, which changes after coarsening. - const part = buildOriginalPartition(baseGraph, originalToCurrent); - const q = (options.quality || 'modularity').toLowerCase(); - if (q === 'cpm') { - const gamma = typeof options.resolution === 'number' ? options.resolution : 1.0; - return qualityCPM(part, baseGraph, gamma); - } - // Always evaluate at gamma=1.0 for standard Newman-Girvan modularity - return qualityModularity(part, baseGraph, 1.0); - }, - toJSON() { - const membershipObj = {}; - for (const [id, c] of idToClass) membershipObj[id] = c; - return { - membership: membershipObj, - meta: { levels: levels.length, quality: this.quality(), options }, - }; - }, - }; -} - -/** - * Build a minimal partition-like object from the original graph and the - * final community mapping, suitable for qualityModularity / qualityCPM. - */ -function buildOriginalPartition(g, communityMap) { - const n = g.n; - let maxC = 0; - for (let i = 0; i < n; i++) if (communityMap[i] > maxC) maxC = communityMap[i]; - const cc = maxC + 1; - - const internalWeight = new Float64Array(cc); - const totalStr = new Float64Array(cc); - const totalOutStr = new Float64Array(cc); - const totalInStr = new Float64Array(cc); - const totalSize = new Float64Array(cc); - - for (let i = 0; i < n; i++) { - const c = communityMap[i]; - totalSize[c] += g.size[i]; - if (g.directed) { - totalOutStr[c] += g.strengthOut[i]; - totalInStr[c] += g.strengthIn[i]; - } else { - totalStr[c] += g.strengthOut[i]; - } - if (g.selfLoop[i]) internalWeight[c] += g.selfLoop[i]; - } - - if (g.directed) { - for (let i = 0; i < n; i++) { - const ci = communityMap[i]; - const list = g.outEdges[i]; - for (let k = 0; k < list.length; k++) { - const { to: j, w } = list[k]; - if (i === j) continue; - if (ci === communityMap[j]) internalWeight[ci] += w; - } - } - } else { - for (let i = 0; i < n; i++) { - const ci = communityMap[i]; - const list = g.outEdges[i]; - for (let k = 0; k < list.length; k++) { - const { to: j, w } = list[k]; - if (j <= i) continue; - if (ci === communityMap[j]) internalWeight[ci] += w; - } - } - } - - return { - communityCount: cc, - communityInternalEdgeWeight: internalWeight, - communityTotalStrength: totalStr, - communityTotalOutStrength: totalOutStr, - communityTotalInStrength: totalInStr, - communityTotalSize: totalSize, - }; -} diff --git a/src/graph/algorithms/leiden/index.ts b/src/graph/algorithms/leiden/index.ts new file mode 100644 index 00000000..fb627951 --- /dev/null +++ b/src/graph/algorithms/leiden/index.ts @@ -0,0 +1,185 @@ +/** + * Leiden community detection — vendored from ngraph.leiden (MIT). + * Adapted to work directly with CodeGraph (no external graph library dependency). + * + * Original: https://github.com/anvaka/ngraph.leiden + * License: MIT — see LICENSE in this directory. + */ + +import type { CodeGraph } from '../../model.js'; +import type { GraphAdapter } from './adapter.js'; +import { qualityCPM } from './cpm.js'; +import { qualityModularity } from './modularity.js'; +import type { LeidenOptions } from './optimiser.js'; +import { runLouvainUndirectedModularity } from './optimiser.js'; + +export type { LeidenOptions } from './optimiser.js'; + +export type DetectClustersOptions = LeidenOptions; + +export interface DetectClustersResult { + getClass(nodeId: string | number): number | undefined; + getCommunities(): Map; + quality(): number; + toJSON(): { + membership: Record; + meta: { levels: number; quality: number; options: DetectClustersOptions }; + }; +} + +// Typed array safe-access helpers (see adapter.ts for rationale) +function fget(a: Float64Array, i: number): number { + return a[i] as number; +} +function iget(a: Int32Array, i: number): number { + return a[i] as number; +} + +/** + * Detect communities in a CodeGraph using the Leiden algorithm. + * + * Note on `quality()`: For modularity, `quality()` always evaluates at gamma=1.0 + * (standard Newman-Girvan modularity) regardless of the `resolution` used during + * optimization. This makes quality values comparable across runs with different + * resolutions. For CPM, `quality()` uses the caller-specified resolution since gamma + * is intrinsic to the CPM metric. Do not use modularity `quality()` values to + * compare partitions found at different resolutions — they reflect Q at gamma=1.0, + * not the objective that was actually optimized. + */ +export function detectClusters( + graph: CodeGraph, + options: DetectClustersOptions = {}, +): DetectClustersResult { + const { levels, originalToCurrent, originalNodeIds, baseGraph } = runLouvainUndirectedModularity( + graph, + options, + ); + + const idToClass = new Map(); + for (let i = 0; i < originalNodeIds.length; i++) { + const comm: number = iget(originalToCurrent, i); + idToClass.set(originalNodeIds[i]!, comm); + } + + return { + getClass(nodeId: string | number): number | undefined { + return idToClass.get(String(nodeId)); + }, + getCommunities(): Map { + const out = new Map(); + for (const [id, c] of idToClass) { + if (!out.has(c)) out.set(c, []); + out.get(c)!.push(id); + } + return out; + }, + quality(): number { + // Compute quality on the original (level-0) graph with the final + // partition mapped back. Computing on the last coarse-level graph + // produces inflated values because the modularity null model depends + // on the degree distribution, which changes after coarsening. + const part = buildOriginalPartition(baseGraph, originalToCurrent); + const q: string = (options.quality || 'modularity').toLowerCase(); + if (q === 'cpm') { + const gamma: number = typeof options.resolution === 'number' ? options.resolution : 1.0; + return qualityCPM(part, baseGraph, gamma); + } + // Always evaluate at gamma=1.0 for standard Newman-Girvan modularity + return qualityModularity(part, baseGraph, 1.0); + }, + toJSON() { + const membershipObj: Record = {}; + for (const [id, c] of idToClass) membershipObj[id] = c; + return { + membership: membershipObj, + meta: { levels: levels.length, quality: this.quality(), options }, + }; + }, + }; +} + +/** + * Minimal partition-like object built from the original graph and the + * final community mapping, suitable for qualityModularity / qualityCPM. + * + * Implements the subset of PartitionView needed by the quality functions + * (no scratch-space methods needed since this is read-only evaluation). + */ +interface OriginalPartition { + communityCount: number; + nodeCommunity: Int32Array; + communityInternalEdgeWeight: Float64Array; + communityTotalStrength: Float64Array; + communityTotalOutStrength: Float64Array; + communityTotalInStrength: Float64Array; + communityTotalSize: Float64Array; + // Stub methods required by PartitionView but not called by qualityModularity/qualityCPM + getNeighborEdgeWeightToCommunity(c: number): number; + getOutEdgeWeightToCommunity(c: number): number; + getInEdgeWeightFromCommunity(c: number): number; +} + +function buildOriginalPartition(g: GraphAdapter, communityMap: Int32Array): OriginalPartition { + const n: number = g.n; + let maxC: number = 0; + for (let i = 0; i < n; i++) { + const ci = iget(communityMap, i); + if (ci > maxC) maxC = ci; + } + const cc: number = maxC + 1; + + const nodeCommunity = communityMap; + const internalWeight = new Float64Array(cc); + const totalStr = new Float64Array(cc); + const totalOutStr = new Float64Array(cc); + const totalInStr = new Float64Array(cc); + const totalSize = new Float64Array(cc); + + for (let i = 0; i < n; i++) { + const c: number = iget(communityMap, i); + totalSize[c] = fget(totalSize, c) + fget(g.size, i); + if (g.directed) { + totalOutStr[c] = fget(totalOutStr, c) + fget(g.strengthOut, i); + totalInStr[c] = fget(totalInStr, c) + fget(g.strengthIn, i); + } else { + totalStr[c] = fget(totalStr, c) + fget(g.strengthOut, i); + } + if (fget(g.selfLoop, i)) internalWeight[c] = fget(internalWeight, c) + fget(g.selfLoop, i); + } + + if (g.directed) { + for (let i = 0; i < n; i++) { + const ci: number = iget(communityMap, i); + const list = g.outEdges[i]!; + for (let k = 0; k < list.length; k++) { + const { to: j, w } = list[k]!; + if (i === j) continue; + if (ci === iget(communityMap, j)) internalWeight[ci] = fget(internalWeight, ci) + w; + } + } + } else { + for (let i = 0; i < n; i++) { + const ci: number = iget(communityMap, i); + const list = g.outEdges[i]!; + for (let k = 0; k < list.length; k++) { + const { to: j, w } = list[k]!; + if (j <= i) continue; + if (ci === iget(communityMap, j)) internalWeight[ci] = fget(internalWeight, ci) + w; + } + } + } + + return { + communityCount: cc, + nodeCommunity, + communityInternalEdgeWeight: internalWeight, + communityTotalStrength: totalStr, + communityTotalOutStrength: totalOutStr, + communityTotalInStrength: totalInStr, + communityTotalSize: totalSize, + // Stubs — quality functions only read the aggregate arrays, not these methods + getNeighborEdgeWeightToCommunity: () => 0, + getOutEdgeWeightToCommunity: () => 0, + getInEdgeWeightFromCommunity: () => 0, + }; +} diff --git a/src/graph/algorithms/leiden/modularity.js b/src/graph/algorithms/leiden/modularity.js deleted file mode 100644 index 15a5caf0..00000000 --- a/src/graph/algorithms/leiden/modularity.js +++ /dev/null @@ -1,71 +0,0 @@ -/** - * Modularity quality functions. - * Vendored from ngraph.leiden (MIT) — no external dependencies. - */ - -export function diffModularity(part, g, v, c, gamma = 1.0) { - if (g.directed) return diffModularityDirected(part, g, v, c, gamma); - const oldC = part.nodeCommunity[v]; - if (c === oldC) return 0; - const k_v = g.strengthOut[v]; - const m2 = g.totalWeight; - const k_v_in_new = part.getNeighborEdgeWeightToCommunity(c) || 0; - const k_v_in_old = part.getNeighborEdgeWeightToCommunity(oldC) || 0; - const wTot_new = c < part.communityTotalStrength.length ? part.communityTotalStrength[c] : 0; - const wTot_old = part.communityTotalStrength[oldC]; - const gain_remove = -(k_v_in_old / m2 - (gamma * (k_v * wTot_old)) / (m2 * m2)); - const gain_add = k_v_in_new / m2 - (gamma * (k_v * wTot_new)) / (m2 * m2); - return gain_remove + gain_add; -} - -export function diffModularityDirected(part, g, v, c, gamma = 1.0) { - const oldC = part.nodeCommunity[v]; - if (c === oldC) return 0; - const m = g.totalWeight; - const k_out = g.strengthOut[v]; - const k_in = g.strengthIn[v]; - const w_new_in = c < g.n ? part.getInEdgeWeightFromCommunity(c) || 0 : 0; - const w_new_out = c < g.n ? part.getOutEdgeWeightToCommunity(c) || 0 : 0; - const w_old_in = part.getInEdgeWeightFromCommunity(oldC) || 0; - const w_old_out = part.getOutEdgeWeightToCommunity(oldC) || 0; - const T_new = c < part.communityTotalInStrength.length ? part.communityTotalInStrength[c] : 0; - const F_new = c < part.communityTotalOutStrength.length ? part.communityTotalOutStrength[c] : 0; - const T_old = part.communityTotalInStrength[oldC]; - const F_old = part.communityTotalOutStrength[oldC]; - // Self-loop correction: the self-loop edge (v→v) appears in both - // outEdgeWeightToCommunity[oldC] and inEdgeWeightFromCommunity[oldC], - // making w_old include 2×selfLoop. Since the self-loop moves with the - // node, add it back to match moveNodeToCommunity's directed accounting. - const selfW = g.selfLoop[v] || 0; - const deltaInternal = (w_new_in + w_new_out - w_old_in - w_old_out + 2 * selfW) / m; - // The full Δ(F·T) expansion includes a constant 2·k_out·k_in term that - // doesn't depend on the target community but does affect the move-vs-stay - // decision. Without it, coarse-level merges can appear profitable when - // they actually decrease quality. - const deltaExpected = - (gamma * (k_out * (T_new - T_old) + k_in * (F_new - F_old) + 2 * k_out * k_in)) / (m * m); - return deltaInternal - deltaExpected; -} - -export function qualityModularity(part, g, gamma = 1.0) { - const m2 = g.totalWeight; - let sum = 0; - if (g.directed) { - for (let c = 0; c < part.communityCount; c++) - sum += - part.communityInternalEdgeWeight[c] / m2 - - (gamma * (part.communityTotalOutStrength[c] * part.communityTotalInStrength[c])) / - (m2 * m2); - } else { - // communityInternalEdgeWeight counts each undirected edge once (j > i), - // but m2 = totalWeight = 2m (sum of symmetrized degrees). The standard - // Newman-Girvan formula is Q = Σ_c [2·L_c/(2m) - γ·(d_c/(2m))²], so - // we multiply lc by 2 to match. - for (let c = 0; c < part.communityCount; c++) { - const lc = part.communityInternalEdgeWeight[c]; - const dc = part.communityTotalStrength[c]; - sum += (2 * lc) / m2 - (gamma * (dc * dc)) / (m2 * m2); - } - } - return sum; -} diff --git a/src/graph/algorithms/leiden/modularity.ts b/src/graph/algorithms/leiden/modularity.ts new file mode 100644 index 00000000..98a9a038 --- /dev/null +++ b/src/graph/algorithms/leiden/modularity.ts @@ -0,0 +1,122 @@ +/** + * Modularity quality functions. + * Vendored from ngraph.leiden (MIT) — no external dependencies. + */ + +/** + * Minimal view of a partition needed by modularity quality functions. + */ +export interface PartitionView { + readonly communityCount: number; + nodeCommunity: Int32Array; + readonly communityInternalEdgeWeight: Float64Array; + readonly communityTotalStrength: Float64Array; + readonly communityTotalOutStrength: Float64Array; + readonly communityTotalInStrength: Float64Array; + getNeighborEdgeWeightToCommunity(c: number): number; + getOutEdgeWeightToCommunity(c: number): number; + getInEdgeWeightFromCommunity(c: number): number; +} + +/** + * Minimal view of a graph needed by modularity quality functions. + */ +export interface GraphView { + n: number; + directed: boolean; + totalWeight: number; + strengthOut: Float64Array; + strengthIn: Float64Array; + selfLoop: Float64Array; +} + +// Typed array safe-access helper (see adapter.ts for rationale) +function fget(a: Float64Array, i: number): number { + return a[i] as number; +} +function iget(a: Int32Array, i: number): number { + return a[i] as number; +} + +export function diffModularity( + part: PartitionView, + g: GraphView, + v: number, + c: number, + gamma: number = 1.0, +): number { + if (g.directed) return diffModularityDirected(part, g, v, c, gamma); + const oldC: number = iget(part.nodeCommunity, v); + if (c === oldC) return 0; + const k_v: number = fget(g.strengthOut, v); + const m2: number = g.totalWeight; + const k_v_in_new: number = part.getNeighborEdgeWeightToCommunity(c) || 0; + const k_v_in_old: number = part.getNeighborEdgeWeightToCommunity(oldC) || 0; + const wTot_new: number = + c < part.communityTotalStrength.length ? fget(part.communityTotalStrength, c) : 0; + const wTot_old: number = fget(part.communityTotalStrength, oldC); + const gain_remove: number = -(k_v_in_old / m2 - (gamma * (k_v * wTot_old)) / (m2 * m2)); + const gain_add: number = k_v_in_new / m2 - (gamma * (k_v * wTot_new)) / (m2 * m2); + return gain_remove + gain_add; +} + +export function diffModularityDirected( + part: PartitionView, + g: GraphView, + v: number, + c: number, + gamma: number = 1.0, +): number { + const oldC: number = iget(part.nodeCommunity, v); + if (c === oldC) return 0; + const m: number = g.totalWeight; + const k_out: number = fget(g.strengthOut, v); + const k_in: number = fget(g.strengthIn, v); + const w_new_in: number = c < g.n ? part.getInEdgeWeightFromCommunity(c) || 0 : 0; + const w_new_out: number = c < g.n ? part.getOutEdgeWeightToCommunity(c) || 0 : 0; + const w_old_in: number = part.getInEdgeWeightFromCommunity(oldC) || 0; + const w_old_out: number = part.getOutEdgeWeightToCommunity(oldC) || 0; + const T_new: number = + c < part.communityTotalInStrength.length ? fget(part.communityTotalInStrength, c) : 0; + const F_new: number = + c < part.communityTotalOutStrength.length ? fget(part.communityTotalOutStrength, c) : 0; + const T_old: number = fget(part.communityTotalInStrength, oldC); + const F_old: number = fget(part.communityTotalOutStrength, oldC); + // Self-loop correction: the self-loop edge (v->v) appears in both + // outEdgeWeightToCommunity[oldC] and inEdgeWeightFromCommunity[oldC], + // making w_old include 2x selfLoop. Since the self-loop moves with the + // node, add it back to match moveNodeToCommunity's directed accounting. + const selfW: number = fget(g.selfLoop, v) || 0; + const deltaInternal: number = (w_new_in + w_new_out - w_old_in - w_old_out + 2 * selfW) / m; + // The full delta(F*T) expansion includes a constant 2*k_out*k_in term that + // doesn't depend on the target community but does affect the move-vs-stay + // decision. Without it, coarse-level merges can appear profitable when + // they actually decrease quality. + const deltaExpected: number = + (gamma * (k_out * (T_new - T_old) + k_in * (F_new - F_old) + 2 * k_out * k_in)) / (m * m); + return deltaInternal - deltaExpected; +} + +export function qualityModularity(part: PartitionView, g: GraphView, gamma: number = 1.0): number { + const m2: number = g.totalWeight; + let sum: number = 0; + if (g.directed) { + for (let c = 0; c < part.communityCount; c++) + sum += + fget(part.communityInternalEdgeWeight, c) / m2 - + (gamma * + (fget(part.communityTotalOutStrength, c) * fget(part.communityTotalInStrength, c))) / + (m2 * m2); + } else { + // communityInternalEdgeWeight counts each undirected edge once (j > i), + // but m2 = totalWeight = 2m (sum of symmetrized degrees). The standard + // Newman-Girvan formula is Q = sum_c [2*L_c/(2m) - gamma*(d_c/(2m))^2], so + // we multiply lc by 2 to match. + for (let c = 0; c < part.communityCount; c++) { + const lc: number = fget(part.communityInternalEdgeWeight, c); + const dc: number = fget(part.communityTotalStrength, c); + sum += (2 * lc) / m2 - (gamma * (dc * dc)) / (m2 * m2); + } + } + return sum; +} diff --git a/src/graph/algorithms/leiden/optimiser.js b/src/graph/algorithms/leiden/optimiser.ts similarity index 52% rename from src/graph/algorithms/leiden/optimiser.js rename to src/graph/algorithms/leiden/optimiser.ts index 52a5a732..d658b895 100644 --- a/src/graph/algorithms/leiden/optimiser.js +++ b/src/graph/algorithms/leiden/optimiser.ts @@ -4,44 +4,100 @@ */ import { CodeGraph } from '../../model.js'; +import type { EdgeEntry, GraphAdapter, GraphAdapterOptions, InEdgeEntry } from './adapter.js'; import { makeGraphAdapter } from './adapter.js'; import { diffCPM } from './cpm.js'; import { diffModularity } from './modularity.js'; +import type { Partition } from './partition.js'; import { makePartition } from './partition.js'; import { createRng } from './rng.js'; // Mirrored in DEFAULTS.community (src/infrastructure/config.js) for user override -// via .codegraphrc.json. Callers (e.g. louvain.js) can pass overrides through options. -const DEFAULT_MAX_LEVELS = 50; -const DEFAULT_MAX_LOCAL_PASSES = 20; -const GAIN_EPSILON = 1e-12; +// via .codegraphrc.json. Callers (e.g. louvain.ts) can pass overrides through options. +const DEFAULT_MAX_LEVELS: number = 50; +const DEFAULT_MAX_LOCAL_PASSES: number = 20; +const GAIN_EPSILON: number = 1e-12; const CandidateStrategy = { Neighbors: 0, All: 1, RandomAny: 2, RandomNeighbor: 3, -}; +} as const; + +type CandidateStrategyCode = (typeof CandidateStrategy)[keyof typeof CandidateStrategy]; + +export interface LeidenOptions { + directed?: boolean; + randomSeed?: number; + maxLevels?: number; + maxLocalPasses?: number; + allowNewCommunity?: boolean; + candidateStrategy?: 'neighbors' | 'all' | 'random' | 'random-neighbor'; + quality?: string; + resolution?: number; + refine?: boolean; + preserveLabels?: boolean | Map; + maxCommunitySize?: number; + refinementTheta?: number; + fixedNodes?: Set | string[]; + linkWeight?: GraphAdapterOptions['linkWeight']; + nodeSize?: GraphAdapterOptions['nodeSize']; + baseNodeIds?: string[]; +} + +export interface NormalizedOptions { + directed: boolean; + randomSeed: number; + maxLevels: number; + maxLocalPasses: number; + allowNewCommunity: boolean; + candidateStrategyCode: CandidateStrategyCode; + quality: string; + resolution: number; + refine: boolean; + preserveLabels: boolean | Map | undefined; + maxCommunitySize: number; + refinementTheta: number; + fixedNodes: Set | string[] | undefined; +} + +export interface LevelEntry { + graph: GraphAdapter; + partition: Partition; +} -export function runLouvainUndirectedModularity(graph, optionsInput = {}) { - const options = normalizeOptions(optionsInput); - let currentGraph = graph; - const levels = []; +export interface LouvainResult { + graph: GraphAdapter; + partition: Partition; + levels: LevelEntry[]; + originalToCurrent: Int32Array; + originalNodeIds: string[]; + baseGraph: GraphAdapter; +} + +export function runLouvainUndirectedModularity( + graph: CodeGraph, + optionsInput: LeidenOptions = {}, +): LouvainResult { + const options: NormalizedOptions = normalizeOptions(optionsInput); + let currentGraph: CodeGraph = graph; + const levels: LevelEntry[] = []; const rngSource = createRng(options.randomSeed); - const random = () => rngSource.nextDouble(); + const random: () => number = () => rngSource.nextDouble(); - const baseGraphAdapter = makeGraphAdapter(currentGraph, { + const baseGraphAdapter: GraphAdapter = makeGraphAdapter(currentGraph, { directed: options.directed, ...optionsInput, }); - const origN = baseGraphAdapter.n; + const origN: number = baseGraphAdapter.n; const originalToCurrent = new Int32Array(origN); for (let i = 0; i < origN; i++) originalToCurrent[i] = i; - let fixedNodeMask = null; + let fixedNodeMask: Uint8Array | null = null; if (options.fixedNodes) { const fixed = new Uint8Array(origN); - const asSet = + const asSet: Set = options.fixedNodes instanceof Set ? options.fixedNodes : new Set(options.fixedNodes); for (const id of asSet) { const idx = baseGraphAdapter.idToIndex.get(String(id)); @@ -51,75 +107,77 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { } for (let level = 0; level < options.maxLevels; level++) { - const graphAdapter = + const graphAdapter: GraphAdapter = level === 0 ? baseGraphAdapter : makeGraphAdapter(currentGraph, { directed: options.directed, ...optionsInput }); - const partition = makePartition(graphAdapter); + const partition: Partition = makePartition(graphAdapter); partition.graph = graphAdapter; partition.initializeAggregates(); const order = new Int32Array(graphAdapter.n); for (let i = 0; i < graphAdapter.n; i++) order[i] = i; - let improved = true; - let localPasses = 0; - const strategyCode = options.candidateStrategyCode; + let improved: boolean = true; + let localPasses: number = 0; + const strategyCode: CandidateStrategyCode = options.candidateStrategyCode; while (improved) { improved = false; localPasses++; shuffleArrayInPlace(order, random); for (let idx = 0; idx < order.length; idx++) { - const nodeIndex = order[idx]; + const nodeIndex: number = order[idx]!; if (level === 0 && fixedNodeMask && fixedNodeMask[nodeIndex]) continue; - const candidateCount = partition.accumulateNeighborCommunityEdgeWeights(nodeIndex); - let bestCommunityId = partition.nodeCommunity[nodeIndex]; - let bestGain = 0; - const maxCommunitySize = options.maxCommunitySize; + const candidateCount: number = partition.accumulateNeighborCommunityEdgeWeights(nodeIndex); + let bestCommunityId: number = partition.nodeCommunity[nodeIndex]!; + let bestGain: number = 0; + const maxCommunitySize: number = options.maxCommunitySize; if (strategyCode === CandidateStrategy.All) { for (let communityId = 0; communityId < partition.communityCount; communityId++) { - if (communityId === partition.nodeCommunity[nodeIndex]) continue; + if (communityId === partition.nodeCommunity[nodeIndex]!) continue; if ( maxCommunitySize < Infinity && - partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex] > + partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex]! > maxCommunitySize ) continue; - const gain = computeQualityGain(partition, nodeIndex, communityId, options); + const gain: number = computeQualityGain(partition, nodeIndex, communityId, options); if (gain > bestGain) { bestGain = gain; bestCommunityId = communityId; } } } else if (strategyCode === CandidateStrategy.RandomAny) { - const tries = Math.min(10, Math.max(1, partition.communityCount)); + const tries: number = Math.min(10, Math.max(1, partition.communityCount)); for (let trialIndex = 0; trialIndex < tries; trialIndex++) { - const communityId = (random() * partition.communityCount) | 0; - if (communityId === partition.nodeCommunity[nodeIndex]) continue; + const communityId: number = (random() * partition.communityCount) | 0; + if (communityId === partition.nodeCommunity[nodeIndex]!) continue; if ( maxCommunitySize < Infinity && - partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex] > + partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex]! > maxCommunitySize ) continue; - const gain = computeQualityGain(partition, nodeIndex, communityId, options); + const gain: number = computeQualityGain(partition, nodeIndex, communityId, options); if (gain > bestGain) { bestGain = gain; bestCommunityId = communityId; } } } else if (strategyCode === CandidateStrategy.RandomNeighbor) { - const tries = Math.min(10, Math.max(1, candidateCount)); + const tries: number = Math.min(10, Math.max(1, candidateCount)); for (let trialIndex = 0; trialIndex < tries; trialIndex++) { - const communityId = partition.getCandidateCommunityAt((random() * candidateCount) | 0); - if (communityId === partition.nodeCommunity[nodeIndex]) continue; + const communityId: number = partition.getCandidateCommunityAt( + (random() * candidateCount) | 0, + ); + if (communityId === partition.nodeCommunity[nodeIndex]!) continue; if ( maxCommunitySize < Infinity && - partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex] > + partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex]! > maxCommunitySize ) continue; - const gain = computeQualityGain(partition, nodeIndex, communityId, options); + const gain: number = computeQualityGain(partition, nodeIndex, communityId, options); if (gain > bestGain) { bestGain = gain; bestCommunityId = communityId; @@ -127,13 +185,13 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { } } else { for (let trialIndex = 0; trialIndex < candidateCount; trialIndex++) { - const communityId = partition.getCandidateCommunityAt(trialIndex); + const communityId: number = partition.getCandidateCommunityAt(trialIndex); if (maxCommunitySize < Infinity) { - const nextSize = - partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex]; + const nextSize: number = + partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex]!; if (nextSize > maxCommunitySize) continue; } - const gain = computeQualityGain(partition, nodeIndex, communityId, options); + const gain: number = computeQualityGain(partition, nodeIndex, communityId, options); if (gain > bestGain) { bestGain = gain; bestCommunityId = communityId; @@ -141,14 +199,14 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { } } if (options.allowNewCommunity) { - const newCommunityId = partition.communityCount; - const gain = computeQualityGain(partition, nodeIndex, newCommunityId, options); + const newCommunityId: number = partition.communityCount; + const gain: number = computeQualityGain(partition, nodeIndex, newCommunityId, options); if (gain > bestGain) { bestGain = gain; bestCommunityId = newCommunityId; } } - if (bestCommunityId !== partition.nodeCommunity[nodeIndex] && bestGain > GAIN_EPSILON) { + if (bestCommunityId !== partition.nodeCommunity[nodeIndex]! && bestGain > GAIN_EPSILON) { partition.moveNodeToCommunity(nodeIndex, bestCommunityId); improved = true; } @@ -158,9 +216,9 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { renumberCommunities(partition, options.preserveLabels); - let effectivePartition = partition; + let effectivePartition: Partition = partition; if (options.refine) { - const refined = refineWithinCoarseCommunities( + const refined: Partition = refineWithinCoarseCommunities( graphAdapter, partition, random, @@ -169,7 +227,7 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { ); // Post-refinement: split any disconnected communities into their // connected components. This is the cheap O(V+E) alternative to - // checking γ-connectedness on every candidate during refinement. + // checking gamma-connectedness on every candidate during refinement. // A disconnected community violates even basic connectivity, so // splitting is always correct. splitDisconnectedCommunities(graphAdapter, refined); @@ -178,15 +236,15 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { } levels.push({ graph: graphAdapter, partition: effectivePartition }); - const fineToCoarse = effectivePartition.nodeCommunity; + const fineToCoarse: Int32Array = effectivePartition.nodeCommunity; for (let i = 0; i < originalToCurrent.length; i++) { - originalToCurrent[i] = fineToCoarse[originalToCurrent[i]]; + originalToCurrent[i] = fineToCoarse[originalToCurrent[i]!]!; } - // Terminate when no further coarsening is possible. Check both the + // Terminate when no further coarsening is possible. Check both the // move-phase partition (did the greedy phase find merges?) and the // effective partition that feeds buildCoarseGraph (would coarsening - // actually reduce the graph?). When refine is enabled the refined + // actually reduce the graph?). When refine is enabled the refined // partition starts from singletons and may have more communities than // the move phase found, so checking only effectivePartition would // cause premature termination. @@ -198,7 +256,7 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { currentGraph = buildCoarseGraph(graphAdapter, effectivePartition); } - const last = levels[levels.length - 1]; + const last: LevelEntry = levels[levels.length - 1]!; return { graph: last.graph, partition: last.partition, @@ -209,39 +267,35 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { }; } -/** - * Build a coarse graph where each community becomes a node. - * Uses CodeGraph instead of ngraph.graph. - */ // Build a coarse graph where each community becomes a single node. // Self-loops (g.selfLoop[]) don't need separate handling here because they // are already present in g.outEdges (directed path keeps them in both arrays). // When the coarse graph is fed back to makeGraphAdapter at the next level, // the adapter re-detects cu===cu edges as self-loops and populates selfLoop[]. -function buildCoarseGraph(g, p) { +function buildCoarseGraph(g: GraphAdapter, p: Partition): CodeGraph { const coarse = new CodeGraph({ directed: g.directed }); for (let c = 0; c < p.communityCount; c++) { - coarse.addNode(String(c), { size: p.communityTotalSize[c] }); + coarse.addNode(String(c), { size: p.communityTotalSize[c]! }); } - const acc = new Map(); + const acc = new Map(); for (let i = 0; i < g.n; i++) { - const cu = p.nodeCommunity[i]; - const list = g.outEdges[i]; + const cu: number = p.nodeCommunity[i]!; + const list: EdgeEntry[] = g.outEdges[i]!; for (let k = 0; k < list.length; k++) { - const j = list[k].to; - const w = list[k].w; - const cv = p.nodeCommunity[j]; + const j: number = list[k]!.to; + const w: number = list[k]!.w; + const cv: number = p.nodeCommunity[j]!; // Undirected: each non-self edge (i,j) appears in both outEdges[i] and // outEdges[j]. For intra-community edges (cu===cv), skip the reverse to - // avoid inflating the coarse self-loop weight by 2×. + // avoid inflating the coarse self-loop weight by 2x. if (!g.directed && cu === cv && j < i) continue; const key = `${cu}:${cv}`; acc.set(key, (acc.get(key) || 0) + w); } } for (const [key, w] of acc.entries()) { - const [cuStr, cvStr] = key.split(':'); - coarse.addEdge(cuStr, cvStr, { weight: w }); + const parts = key.split(':'); + coarse.addEdge(parts[0]!, parts[1]!, { weight: w }); } return coarse; } @@ -251,32 +305,38 @@ function buildCoarseGraph(g, p) { * * Key properties that distinguish this from Louvain-style refinement: * - * 1. **Singleton start** — each node begins in its own community. - * 2. **Singleton guard** — only nodes still in singleton communities are + * 1. Singleton start — each node begins in its own community. + * 2. Singleton guard — only nodes still in singleton communities are * considered for merging. Once a node joins a non-singleton community * it is locked for the remainder of the pass. This prevents oscillation - * and is essential for the γ-connectedness guarantee. - * 3. **Single pass** — one randomized sweep through all nodes, not an + * and is essential for the gamma-connectedness guarantee. + * 3. Single pass — one randomized sweep through all nodes, not an * iterative loop until convergence (that would be Louvain behavior). - * 4. **Probabilistic selection** — candidate communities are sampled from - * a Boltzmann distribution `p(v, C) ∝ exp(ΔH / θ)`, with the "stay - * as singleton" option (ΔH = 0) included in the distribution. This - * means a node may probabilistically choose to remain alone even when - * positive-gain merges exist. + * 4. Probabilistic selection — candidate communities are sampled from + * a Boltzmann distribution p(v, C) proportional to exp(deltaH / theta), + * with the "stay as singleton" option (deltaH = 0) included in the + * distribution. This means a node may probabilistically choose to remain + * alone even when positive-gain merges exist. * - * θ (refinementTheta) controls temperature: lower → more deterministic - * (approaches greedy), higher → more exploratory. Determinism is preserved + * theta (refinementTheta) controls temperature: lower = more deterministic + * (approaches greedy), higher = more exploratory. Determinism is preserved * via the seeded PRNG — same seed produces the same assignments. */ -function refineWithinCoarseCommunities(g, basePart, rng, opts, fixedMask0) { - const p = makePartition(g); +function refineWithinCoarseCommunities( + g: GraphAdapter, + basePart: Partition, + rng: () => number, + opts: NormalizedOptions, + fixedMask0: Uint8Array | null, +): Partition { + const p: Partition = makePartition(g); p.initializeAggregates(); p.graph = g; - const macro = basePart.nodeCommunity; + const macro: Int32Array = basePart.nodeCommunity; const commMacro = new Int32Array(p.communityCount); - for (let i = 0; i < p.communityCount; i++) commMacro[i] = macro[i]; + for (let i = 0; i < p.communityCount; i++) commMacro[i] = macro[i]!; - const theta = typeof opts.refinementTheta === 'number' ? opts.refinementTheta : 1.0; + const theta: number = typeof opts.refinementTheta === 'number' ? opts.refinementTheta : 1.0; if (theta <= 0) throw new RangeError(`refinementTheta must be > 0 (got ${theta})`); // Single pass in random order (Algorithm 3, step 2). @@ -291,27 +351,29 @@ function refineWithinCoarseCommunities(g, basePart, rng, opts, fixedMask0) { const candWeight = new Float64Array(g.n); for (let idx = 0; idx < order.length; idx++) { - const v = order[idx]; + const v: number = order[idx]!; if (fixedMask0?.[v]) continue; // Singleton guard: only move nodes still alone in their community. - if (p.getCommunityNodeCount(p.nodeCommunity[v]) > 1) continue; + if (p.getCommunityNodeCount(p.nodeCommunity[v]!) > 1) continue; - const macroV = macro[v]; - const touchedCount = p.accumulateNeighborCommunityEdgeWeights(v); - const maxSize = Number.isFinite(opts.maxCommunitySize) ? opts.maxCommunitySize : Infinity; + const macroV: number = macro[v]!; + const touchedCount: number = p.accumulateNeighborCommunityEdgeWeights(v); + const maxSize: number = Number.isFinite(opts.maxCommunitySize) + ? opts.maxCommunitySize + : Infinity; // Collect eligible communities and their quality gains. - let candLen = 0; + let candLen: number = 0; for (let t = 0; t < touchedCount; t++) { - const c = p.getCandidateCommunityAt(t); - if (c === p.nodeCommunity[v]) continue; - if (commMacro[c] !== macroV) continue; + const c: number = p.getCandidateCommunityAt(t); + if (c === p.nodeCommunity[v]!) continue; + if (commMacro[c]! !== macroV) continue; if (maxSize < Infinity) { - const nextSize = p.getCommunityTotalSize(c) + g.size[v]; + const nextSize: number = p.getCommunityTotalSize(c) + g.size[v]!; if (nextSize > maxSize) continue; } - const gain = computeQualityGain(p, v, c, opts); + const gain: number = computeQualityGain(p, v, c, opts); if (gain > GAIN_EPSILON) { candC[candLen] = c; candGain[candLen] = gain; @@ -321,30 +383,30 @@ function refineWithinCoarseCommunities(g, basePart, rng, opts, fixedMask0) { if (candLen === 0) continue; - // Probabilistic selection: p(v, C) ∝ exp(ΔH / θ), with the "stay" - // option (ΔH = 0) included per Algorithm 3. + // Probabilistic selection: p(v, C) proportional to exp(deltaH / theta), + // with the "stay" option (deltaH = 0) included per Algorithm 3. // For numerical stability, subtract the max gain before exponentiation. - let maxGain = 0; + let maxGain: number = 0; for (let i = 0; i < candLen; i++) { - if (candGain[i] > maxGain) maxGain = candGain[i]; + if (candGain[i]! > maxGain) maxGain = candGain[i]!; } // "Stay as singleton" weight: exp((0 - maxGain) / theta) - const stayWeight = Math.exp((0 - maxGain) / theta); - let totalWeight = stayWeight; + const stayWeight: number = Math.exp((0 - maxGain) / theta); + let totalWeight: number = stayWeight; for (let i = 0; i < candLen; i++) { - candWeight[i] = Math.exp((candGain[i] - maxGain) / theta); - totalWeight += candWeight[i]; + candWeight[i] = Math.exp((candGain[i]! - maxGain) / theta); + totalWeight += candWeight[i]!; } - const r = rng() * totalWeight; + const r: number = rng() * totalWeight; if (r < stayWeight) continue; // node stays as singleton - let cumulative = stayWeight; - let chosenC = candC[candLen - 1]; // fallback + let cumulative: number = stayWeight; + let chosenC: number = candC[candLen - 1]!; // fallback for (let i = 0; i < candLen; i++) { - cumulative += candWeight[i]; + cumulative += candWeight[i]!; if (r < cumulative) { - chosenC = candC[i]; + chosenC = candC[i]!; break; } } @@ -355,59 +417,59 @@ function refineWithinCoarseCommunities(g, basePart, rng, opts, fixedMask0) { } /** - * Post-refinement connectivity check. For each community, run a BFS on + * Post-refinement connectivity check. For each community, run a BFS on * the subgraph induced by its members (using the adapter's outEdges). * If a community has multiple connected components, assign secondary * components to new community IDs, then reinitialize aggregates once. * * O(V+E) total since communities partition V. * - * This replaces the per-candidate γ-connectedness check from the paper + * This replaces the per-candidate gamma-connectedness check from the paper * with a cheaper post-step that catches the most important violation * (disconnected subcommunities). */ -function splitDisconnectedCommunities(g, partition) { - const n = g.n; - const nc = partition.nodeCommunity; - const members = partition.getCommunityMembers(); - let nextC = partition.communityCount; - let didSplit = false; +function splitDisconnectedCommunities(g: GraphAdapter, partition: Partition): void { + const n: number = g.n; + const nc: Int32Array = partition.nodeCommunity; + const members: number[][] = partition.getCommunityMembers(); + let nextC: number = partition.communityCount; + let didSplit: boolean = false; const visited = new Uint8Array(n); const inCommunity = new Uint8Array(n); for (let c = 0; c < members.length; c++) { - const nodes = members[c]; + const nodes: number[] = members[c]!; if (nodes.length <= 1) continue; - for (let i = 0; i < nodes.length; i++) inCommunity[nodes[i]] = 1; + for (let i = 0; i < nodes.length; i++) inCommunity[nodes[i]!] = 1; - let componentCount = 0; + let componentCount: number = 0; for (let i = 0; i < nodes.length; i++) { - const start = nodes[i]; + const start: number = nodes[i]!; if (visited[start]) continue; componentCount++; // BFS within the community subgraph. // For directed graphs, traverse both outEdges and inEdges to check // weak connectivity (reachability ignoring edge direction). - const queue = [start]; + const queue: number[] = [start]; visited[start] = 1; - let head = 0; + let head: number = 0; while (head < queue.length) { - const v = queue[head++]; - const out = g.outEdges[v]; + const v: number = queue[head++]!; + const out: EdgeEntry[] = g.outEdges[v]!; for (let k = 0; k < out.length; k++) { - const w = out[k].to; + const w: number = out[k]!.to; if (inCommunity[w] && !visited[w]) { visited[w] = 1; queue.push(w); } } - if (g.directed && g.inEdges) { - const inc = g.inEdges[v]; + if (g.directed) { + const inc: InEdgeEntry[] = g.inEdges[v]!; for (let k = 0; k < inc.length; k++) { - const w = inc[k].from; + const w: number = inc[k]!.from; if (inCommunity[w] && !visited[w]) { visited[w] = 1; queue.push(w); @@ -418,15 +480,15 @@ function splitDisconnectedCommunities(g, partition) { if (componentCount > 1) { // Secondary component — assign new community ID directly. - const newC = nextC++; - for (let q = 0; q < queue.length; q++) nc[queue[q]] = newC; + const newC: number = nextC++; + for (let q = 0; q < queue.length; q++) nc[queue[q]!] = newC; didSplit = true; } } for (let i = 0; i < nodes.length; i++) { - inCommunity[nodes[i]] = 0; - visited[nodes[i]] = 0; + inCommunity[nodes[i]!] = 0; + visited[nodes[i]!] = 0; } } @@ -438,27 +500,35 @@ function splitDisconnectedCommunities(g, partition) { } } -function computeQualityGain(partition, v, c, opts) { - const quality = (opts.quality || 'modularity').toLowerCase(); - const gamma = typeof opts.resolution === 'number' ? opts.resolution : 1.0; +function computeQualityGain( + partition: Partition, + v: number, + c: number, + opts: NormalizedOptions, +): number { + if (!partition.graph) { + throw new Error('partition.graph must be set before computeQualityGain'); + } + const quality: string = (opts.quality || 'modularity').toLowerCase(); + const gamma: number = typeof opts.resolution === 'number' ? opts.resolution : 1.0; if (quality === 'cpm') { - return diffCPM(partition, partition.graph || {}, v, c, gamma); + return diffCPM(partition, partition.graph, v, c, gamma); } // diffModularity dispatches to diffModularityDirected internally when g.directed is true - return diffModularity(partition, partition.graph || {}, v, c, gamma); + return diffModularity(partition, partition.graph, v, c, gamma); } -function shuffleArrayInPlace(arr, rng = Math.random) { +function shuffleArrayInPlace(arr: Int32Array, rng: () => number = Math.random): Int32Array { for (let i = arr.length - 1; i > 0; i--) { - const j = Math.floor(rng() * (i + 1)); - const t = arr[i]; - arr[i] = arr[j]; + const j: number = Math.floor(rng() * (i + 1)); + const t: number = arr[i]!; + arr[i] = arr[j]!; arr[j] = t; } return arr; } -function resolveCandidateStrategy(options) { +function resolveCandidateStrategy(options: LeidenOptions): CandidateStrategyCode { const val = options.candidateStrategy; if (typeof val !== 'string') return CandidateStrategy.Neighbors; switch (val) { @@ -475,23 +545,27 @@ function resolveCandidateStrategy(options) { } } -function normalizeOptions(options = {}) { - const directed = !!options.directed; - const randomSeed = Number.isFinite(options.randomSeed) ? options.randomSeed : 42; - const maxLevels = Number.isFinite(options.maxLevels) ? options.maxLevels : DEFAULT_MAX_LEVELS; - const maxLocalPasses = Number.isFinite(options.maxLocalPasses) - ? options.maxLocalPasses +function normalizeOptions(options: LeidenOptions = {}): NormalizedOptions { + const directed: boolean = !!options.directed; + const randomSeed: number = Number.isFinite(options.randomSeed) + ? (options.randomSeed as number) + : 42; + const maxLevels: number = Number.isFinite(options.maxLevels) + ? (options.maxLevels as number) + : DEFAULT_MAX_LEVELS; + const maxLocalPasses: number = Number.isFinite(options.maxLocalPasses) + ? (options.maxLocalPasses as number) : DEFAULT_MAX_LOCAL_PASSES; - const allowNewCommunity = !!options.allowNewCommunity; - const candidateStrategyCode = resolveCandidateStrategy(options); - const quality = (options.quality || 'modularity').toLowerCase(); - const resolution = typeof options.resolution === 'number' ? options.resolution : 1.0; - const refine = options.refine !== false; + const allowNewCommunity: boolean = !!options.allowNewCommunity; + const candidateStrategyCode: CandidateStrategyCode = resolveCandidateStrategy(options); + const quality: string = (options.quality || 'modularity').toLowerCase(); + const resolution: number = typeof options.resolution === 'number' ? options.resolution : 1.0; + const refine: boolean = options.refine !== false; const preserveLabels = options.preserveLabels; - const maxCommunitySize = Number.isFinite(options.maxCommunitySize) - ? options.maxCommunitySize + const maxCommunitySize: number = Number.isFinite(options.maxCommunitySize) + ? (options.maxCommunitySize as number) : Infinity; - const refinementTheta = + const refinementTheta: number = typeof options.refinementTheta === 'number' ? options.refinementTheta : 1.0; return { directed, @@ -510,7 +584,10 @@ function normalizeOptions(options = {}) { }; } -function renumberCommunities(partition, preserveLabels) { +function renumberCommunities( + partition: Partition, + preserveLabels: boolean | Map | undefined, +): void { if (preserveLabels && preserveLabels instanceof Map) { partition.compactCommunityIds({ preserveMap: preserveLabels }); } else if (preserveLabels === true) { diff --git a/src/graph/algorithms/leiden/partition.js b/src/graph/algorithms/leiden/partition.js deleted file mode 100644 index 0e39c1e3..00000000 --- a/src/graph/algorithms/leiden/partition.js +++ /dev/null @@ -1,407 +0,0 @@ -/** - * Mutable community assignment with per-community aggregates. - * Vendored from ngraph.leiden (MIT) — no external dependencies. - * - * Maintains per-community totals and per-move scratch accumulators so we can - * compute modularity/CPM gains in O(neighborhood) time without rescanning the - * whole graph after each move. - */ - -export function makePartition(graph) { - const n = graph.n; - const nodeCommunity = new Int32Array(n); - for (let i = 0; i < n; i++) nodeCommunity[i] = i; - let communityCount = n; - - let communityTotalSize = new Float64Array(communityCount); - let communityNodeCount = new Int32Array(communityCount); - let communityInternalEdgeWeight = new Float64Array(communityCount); - let communityTotalStrength = new Float64Array(communityCount); - let communityTotalOutStrength = new Float64Array(communityCount); - let communityTotalInStrength = new Float64Array(communityCount); - - const candidateCommunities = new Int32Array(n); - let candidateCommunityCount = 0; - const neighborEdgeWeightToCommunity = new Float64Array(n); - const outEdgeWeightToCommunity = new Float64Array(n); - const inEdgeWeightFromCommunity = new Float64Array(n); - const isCandidateCommunity = new Uint8Array(n); - - function ensureCommCapacity(newCount) { - if (newCount <= communityTotalSize.length) return; - const growTo = Math.max(newCount, Math.ceil(communityTotalSize.length * 1.5)); - communityTotalSize = growFloat(communityTotalSize, growTo); - communityNodeCount = growInt(communityNodeCount, growTo); - communityInternalEdgeWeight = growFloat(communityInternalEdgeWeight, growTo); - communityTotalStrength = growFloat(communityTotalStrength, growTo); - communityTotalOutStrength = growFloat(communityTotalOutStrength, growTo); - communityTotalInStrength = growFloat(communityTotalInStrength, growTo); - } - - function initializeAggregates() { - communityTotalSize.fill(0); - communityNodeCount.fill(0); - communityInternalEdgeWeight.fill(0); - communityTotalStrength.fill(0); - communityTotalOutStrength.fill(0); - communityTotalInStrength.fill(0); - for (let i = 0; i < n; i++) { - const c = nodeCommunity[i]; - communityTotalSize[c] += graph.size[i]; - communityNodeCount[c] += 1; - if (graph.directed) { - communityTotalOutStrength[c] += graph.strengthOut[i]; - communityTotalInStrength[c] += graph.strengthIn[i]; - } else { - communityTotalStrength[c] += graph.strengthOut[i]; - } - if (graph.selfLoop[i] !== 0) communityInternalEdgeWeight[c] += graph.selfLoop[i]; - } - if (graph.directed) { - for (let i = 0; i < n; i++) { - const ci = nodeCommunity[i]; - const neighbors = graph.outEdges[i]; - for (let k = 0; k < neighbors.length; k++) { - const { to: j, w } = neighbors[k]; - if (i === j) continue; // self-loop already counted via graph.selfLoop[i] - if (ci === nodeCommunity[j]) communityInternalEdgeWeight[ci] += w; - } - } - } else { - for (let i = 0; i < n; i++) { - const ci = nodeCommunity[i]; - const neighbors = graph.outEdges[i]; - for (let k = 0; k < neighbors.length; k++) { - const { to: j, w } = neighbors[k]; - if (j <= i) continue; - if (ci === nodeCommunity[j]) communityInternalEdgeWeight[ci] += w; - } - } - } - } - - function resetScratch() { - for (let i = 0; i < candidateCommunityCount; i++) { - const c = candidateCommunities[i]; - isCandidateCommunity[c] = 0; - neighborEdgeWeightToCommunity[c] = 0; - outEdgeWeightToCommunity[c] = 0; - inEdgeWeightFromCommunity[c] = 0; - } - candidateCommunityCount = 0; - } - - function touch(c) { - if (isCandidateCommunity[c]) return; - isCandidateCommunity[c] = 1; - candidateCommunities[candidateCommunityCount++] = c; - } - - function accumulateNeighborCommunityEdgeWeights(v) { - resetScratch(); - const ci = nodeCommunity[v]; - touch(ci); - if (graph.directed) { - const outL = graph.outEdges[v]; - for (let k = 0; k < outL.length; k++) { - const j = outL[k].to; - const w = outL[k].w; - const cj = nodeCommunity[j]; - touch(cj); - outEdgeWeightToCommunity[cj] += w; - } - const inL = graph.inEdges[v]; - for (let k = 0; k < inL.length; k++) { - const i2 = inL[k].from; - const w = inL[k].w; - const ci2 = nodeCommunity[i2]; - touch(ci2); - inEdgeWeightFromCommunity[ci2] += w; - } - } else { - const list = graph.outEdges[v]; - for (let k = 0; k < list.length; k++) { - const j = list[k].to; - const w = list[k].w; - const cj = nodeCommunity[j]; - touch(cj); - neighborEdgeWeightToCommunity[cj] += w; - } - } - return candidateCommunityCount; - } - - const twoMUndirected = graph.totalWeight; - function deltaModularityUndirected(v, newC, gamma = 1.0) { - const oldC = nodeCommunity[v]; - if (newC === oldC) return 0; - const strengthV = graph.strengthOut[v]; - const weightToNew = - newC < neighborEdgeWeightToCommunity.length ? neighborEdgeWeightToCommunity[newC] || 0 : 0; - const weightToOld = neighborEdgeWeightToCommunity[oldC] || 0; - const totalStrengthNew = - newC < communityTotalStrength.length ? communityTotalStrength[newC] : 0; - const totalStrengthOld = communityTotalStrength[oldC]; - const gain_remove = -( - weightToOld / twoMUndirected - - (gamma * (strengthV * totalStrengthOld)) / (twoMUndirected * twoMUndirected) - ); - const gain_add = - weightToNew / twoMUndirected - - (gamma * (strengthV * totalStrengthNew)) / (twoMUndirected * twoMUndirected); - return gain_remove + gain_add; - } - - function deltaModularityDirected(v, newC, gamma = 1.0) { - const oldC = nodeCommunity[v]; - if (newC === oldC) return 0; - const totalEdgeWeight = graph.totalWeight; - const strengthOutV = graph.strengthOut[v]; - const strengthInV = graph.strengthIn[v]; - const inFromNew = - newC < inEdgeWeightFromCommunity.length ? inEdgeWeightFromCommunity[newC] || 0 : 0; - const outToNew = - newC < outEdgeWeightToCommunity.length ? outEdgeWeightToCommunity[newC] || 0 : 0; - const inFromOld = inEdgeWeightFromCommunity[oldC] || 0; - const outToOld = outEdgeWeightToCommunity[oldC] || 0; - const totalInStrengthNew = - newC < communityTotalInStrength.length ? communityTotalInStrength[newC] : 0; - const totalOutStrengthNew = - newC < communityTotalOutStrength.length ? communityTotalOutStrength[newC] : 0; - const totalInStrengthOld = communityTotalInStrength[oldC]; - const totalOutStrengthOld = communityTotalOutStrength[oldC]; - // Self-loop correction + constant term (see modularity.js diffModularityDirected) - const selfW = graph.selfLoop[v] || 0; - const deltaInternal = - (inFromNew + outToNew - inFromOld - outToOld + 2 * selfW) / totalEdgeWeight; - const deltaExpected = - (gamma * - (strengthOutV * (totalInStrengthNew - totalInStrengthOld) + - strengthInV * (totalOutStrengthNew - totalOutStrengthOld) + - 2 * strengthOutV * strengthInV)) / - (totalEdgeWeight * totalEdgeWeight); - return deltaInternal - deltaExpected; - } - - function deltaCPM(v, newC, gamma = 1.0) { - const oldC = nodeCommunity[v]; - if (newC === oldC) return 0; - let w_old, w_new; - let selfCorrection = 0; - if (graph.directed) { - w_old = (outEdgeWeightToCommunity[oldC] || 0) + (inEdgeWeightFromCommunity[oldC] || 0); - w_new = - newC < outEdgeWeightToCommunity.length - ? (outEdgeWeightToCommunity[newC] || 0) + (inEdgeWeightFromCommunity[newC] || 0) - : 0; - // Self-loop correction (see cpm.js diffCPM) - selfCorrection = 2 * (graph.selfLoop[v] || 0); - } else { - w_old = neighborEdgeWeightToCommunity[oldC] || 0; - w_new = - newC < neighborEdgeWeightToCommunity.length ? neighborEdgeWeightToCommunity[newC] || 0 : 0; - } - const nodeSize = graph.size[v] || 1; - const sizeOld = communityTotalSize[oldC] || 0; - const sizeNew = newC < communityTotalSize.length ? communityTotalSize[newC] : 0; - return w_new - w_old + selfCorrection - gamma * nodeSize * (sizeNew - sizeOld + nodeSize); - } - - function moveNodeToCommunity(v, newC) { - const oldC = nodeCommunity[v]; - if (oldC === newC) return false; - if (newC >= communityCount) { - ensureCommCapacity(newC + 1); - communityCount = newC + 1; - } - const strengthOutV = graph.strengthOut[v]; - const strengthInV = graph.strengthIn[v]; - const selfLoopWeight = graph.selfLoop[v]; - const nodeSize = graph.size[v]; - - communityNodeCount[oldC] -= 1; - communityNodeCount[newC] += 1; - communityTotalSize[oldC] -= nodeSize; - communityTotalSize[newC] += nodeSize; - if (graph.directed) { - communityTotalOutStrength[oldC] -= strengthOutV; - communityTotalOutStrength[newC] += strengthOutV; - communityTotalInStrength[oldC] -= strengthInV; - communityTotalInStrength[newC] += strengthInV; - } else { - communityTotalStrength[oldC] -= strengthOutV; - communityTotalStrength[newC] += strengthOutV; - } - - if (graph.directed) { - const outToOld = outEdgeWeightToCommunity[oldC] || 0; - const inFromOld = inEdgeWeightFromCommunity[oldC] || 0; - const outToNew = - newC < outEdgeWeightToCommunity.length ? outEdgeWeightToCommunity[newC] || 0 : 0; - const inFromNew = - newC < inEdgeWeightFromCommunity.length ? inEdgeWeightFromCommunity[newC] || 0 : 0; - // outToOld/inFromOld already include the self-loop weight (self-loops are - // in outEdges/inEdges), so subtract it once to avoid triple-counting. - communityInternalEdgeWeight[oldC] -= outToOld + inFromOld - selfLoopWeight; - communityInternalEdgeWeight[newC] += outToNew + inFromNew + selfLoopWeight; - } else { - const weightToOld = neighborEdgeWeightToCommunity[oldC] || 0; - const weightToNew = neighborEdgeWeightToCommunity[newC] || 0; - communityInternalEdgeWeight[oldC] -= 2 * weightToOld + selfLoopWeight; - communityInternalEdgeWeight[newC] += 2 * weightToNew + selfLoopWeight; - } - - nodeCommunity[v] = newC; - return true; - } - - function compactCommunityIds(opts = {}) { - const ids = []; - for (let c = 0; c < communityCount; c++) if (communityNodeCount[c] > 0) ids.push(c); - if (opts.keepOldOrder) { - ids.sort((a, b) => a - b); - } else if (opts.preserveMap instanceof Map) { - ids.sort((a, b) => { - const pa = opts.preserveMap.get(a); - const pb = opts.preserveMap.get(b); - if (pa != null && pb != null && pa !== pb) return pa - pb; - if (pa != null && pb == null) return -1; - if (pb != null && pa == null) return 1; - return ( - communityTotalSize[b] - communityTotalSize[a] || - communityNodeCount[b] - communityNodeCount[a] || - a - b - ); - }); - } else { - ids.sort( - (a, b) => - communityTotalSize[b] - communityTotalSize[a] || - communityNodeCount[b] - communityNodeCount[a] || - a - b, - ); - } - const newId = new Int32Array(communityCount).fill(-1); - ids.forEach((c, i) => { - newId[c] = i; - }); - for (let i = 0; i < nodeCommunity.length; i++) nodeCommunity[i] = newId[nodeCommunity[i]]; - const remappedCount = ids.length; - const newTotalSize = new Float64Array(remappedCount); - const newNodeCount = new Int32Array(remappedCount); - const newInternalEdgeWeight = new Float64Array(remappedCount); - const newTotalStrength = new Float64Array(remappedCount); - const newTotalOutStrength = new Float64Array(remappedCount); - const newTotalInStrength = new Float64Array(remappedCount); - for (let i = 0; i < n; i++) { - const c = nodeCommunity[i]; - newTotalSize[c] += graph.size[i]; - newNodeCount[c] += 1; - if (graph.directed) { - newTotalOutStrength[c] += graph.strengthOut[i]; - newTotalInStrength[c] += graph.strengthIn[i]; - } else { - newTotalStrength[c] += graph.strengthOut[i]; - } - if (graph.selfLoop[i] !== 0) newInternalEdgeWeight[c] += graph.selfLoop[i]; - } - if (graph.directed) { - for (let i = 0; i < n; i++) { - const ci = nodeCommunity[i]; - const list = graph.outEdges[i]; - for (let k = 0; k < list.length; k++) { - const { to: j, w } = list[k]; - if (i === j) continue; // self-loop already counted via graph.selfLoop[i] - if (ci === nodeCommunity[j]) newInternalEdgeWeight[ci] += w; - } - } - } else { - for (let i = 0; i < n; i++) { - const ci = nodeCommunity[i]; - const list = graph.outEdges[i]; - for (let k = 0; k < list.length; k++) { - const { to: j, w } = list[k]; - if (j <= i) continue; - if (ci === nodeCommunity[j]) newInternalEdgeWeight[ci] += w; - } - } - } - communityCount = remappedCount; - communityTotalSize = newTotalSize; - communityNodeCount = newNodeCount; - communityInternalEdgeWeight = newInternalEdgeWeight; - communityTotalStrength = newTotalStrength; - communityTotalOutStrength = newTotalOutStrength; - communityTotalInStrength = newTotalInStrength; - } - - function getCommunityMembers() { - const comms = new Array(communityCount); - for (let i = 0; i < communityCount; i++) comms[i] = []; - for (let i = 0; i < n; i++) comms[nodeCommunity[i]].push(i); - return comms; - } - - function getCommunityTotalSize(c) { - return c < communityTotalSize.length ? communityTotalSize[c] : 0; - } - function getCommunityNodeCount(c) { - return c < communityNodeCount.length ? communityNodeCount[c] : 0; - } - - return { - n, - get communityCount() { - return communityCount; - }, - nodeCommunity, - get communityTotalSize() { - return communityTotalSize; - }, - get communityNodeCount() { - return communityNodeCount; - }, - get communityInternalEdgeWeight() { - return communityInternalEdgeWeight; - }, - get communityTotalStrength() { - return communityTotalStrength; - }, - get communityTotalOutStrength() { - return communityTotalOutStrength; - }, - get communityTotalInStrength() { - return communityTotalInStrength; - }, - resizeCommunities(newCount) { - ensureCommCapacity(newCount); - communityCount = newCount; - }, - initializeAggregates, - accumulateNeighborCommunityEdgeWeights, - getCandidateCommunityCount: () => candidateCommunityCount, - getCandidateCommunityAt: (i) => candidateCommunities[i], - getNeighborEdgeWeightToCommunity: (c) => neighborEdgeWeightToCommunity[c] || 0, - getOutEdgeWeightToCommunity: (c) => outEdgeWeightToCommunity[c] || 0, - getInEdgeWeightFromCommunity: (c) => inEdgeWeightFromCommunity[c] || 0, - deltaModularityUndirected, - deltaModularityDirected, - deltaCPM, - moveNodeToCommunity, - compactCommunityIds, - getCommunityMembers, - getCommunityTotalSize, - getCommunityNodeCount, - }; -} - -function growFloat(a, to) { - const b = new Float64Array(to); - b.set(a); - return b; -} -function growInt(a, to) { - const b = new Int32Array(to); - b.set(a); - return b; -} diff --git a/src/graph/algorithms/leiden/partition.ts b/src/graph/algorithms/leiden/partition.ts new file mode 100644 index 00000000..ffa6c46b --- /dev/null +++ b/src/graph/algorithms/leiden/partition.ts @@ -0,0 +1,479 @@ +/** + * Mutable community assignment with per-community aggregates. + * Vendored from ngraph.leiden (MIT) — no external dependencies. + * + * Maintains per-community totals and per-move scratch accumulators so we can + * compute modularity/CPM gains in O(neighborhood) time without rescanning the + * whole graph after each move. + */ + +import type { GraphAdapter } from './adapter.js'; + +export interface CompactOptions { + keepOldOrder?: boolean; + preserveMap?: Map; +} + +export interface Partition { + n: number; + readonly communityCount: number; + nodeCommunity: Int32Array; + readonly communityTotalSize: Float64Array; + readonly communityNodeCount: Int32Array; + readonly communityInternalEdgeWeight: Float64Array; + readonly communityTotalStrength: Float64Array; + readonly communityTotalOutStrength: Float64Array; + readonly communityTotalInStrength: Float64Array; + resizeCommunities(newCount: number): void; + initializeAggregates(): void; + accumulateNeighborCommunityEdgeWeights(v: number): number; + getCandidateCommunityCount(): number; + getCandidateCommunityAt(i: number): number; + getNeighborEdgeWeightToCommunity(c: number): number; + getOutEdgeWeightToCommunity(c: number): number; + getInEdgeWeightFromCommunity(c: number): number; + deltaModularityUndirected(v: number, newC: number, gamma?: number): number; + deltaModularityDirected(v: number, newC: number, gamma?: number): number; + deltaCPM(v: number, newC: number, gamma?: number): number; + moveNodeToCommunity(v: number, newC: number): boolean; + compactCommunityIds(opts?: CompactOptions): void; + getCommunityMembers(): number[][]; + getCommunityTotalSize(c: number): number; + getCommunityNodeCount(c: number): number; + /** Attached by optimiser after creation — undefined until set. */ + graph?: GraphAdapter; +} + +// Typed arrays always return a number for in-bounds access, but noUncheckedIndexedAccess +// widens to `number | undefined`. These helpers keep the compound assignment patterns readable. +function fget(a: Float64Array, i: number): number { + return a[i] as number; +} +function iget(a: Int32Array, i: number): number { + return a[i] as number; +} +function u8get(a: Uint8Array, i: number): number { + return a[i] as number; +} + +export function makePartition(graph: GraphAdapter): Partition { + const n: number = graph.n; + const nodeCommunity = new Int32Array(n); + for (let i = 0; i < n; i++) nodeCommunity[i] = i; + let communityCount: number = n; + + let communityTotalSize = new Float64Array(communityCount); + let communityNodeCount = new Int32Array(communityCount); + let communityInternalEdgeWeight = new Float64Array(communityCount); + let communityTotalStrength = new Float64Array(communityCount); + let communityTotalOutStrength = new Float64Array(communityCount); + let communityTotalInStrength = new Float64Array(communityCount); + + const candidateCommunities = new Int32Array(n); + let candidateCommunityCount: number = 0; + const neighborEdgeWeightToCommunity = new Float64Array(n); + const outEdgeWeightToCommunity = new Float64Array(n); + const inEdgeWeightFromCommunity = new Float64Array(n); + const isCandidateCommunity = new Uint8Array(n); + + function ensureCommCapacity(newCount: number): void { + if (newCount <= communityTotalSize.length) return; + const growTo: number = Math.max(newCount, Math.ceil(communityTotalSize.length * 1.5)); + communityTotalSize = growFloat(communityTotalSize, growTo); + communityNodeCount = growInt(communityNodeCount, growTo); + communityInternalEdgeWeight = growFloat(communityInternalEdgeWeight, growTo); + communityTotalStrength = growFloat(communityTotalStrength, growTo); + communityTotalOutStrength = growFloat(communityTotalOutStrength, growTo); + communityTotalInStrength = growFloat(communityTotalInStrength, growTo); + } + + function initializeAggregates(): void { + communityTotalSize.fill(0); + communityNodeCount.fill(0); + communityInternalEdgeWeight.fill(0); + communityTotalStrength.fill(0); + communityTotalOutStrength.fill(0); + communityTotalInStrength.fill(0); + for (let i = 0; i < n; i++) { + const c: number = iget(nodeCommunity, i); + communityTotalSize[c] = fget(communityTotalSize, c) + fget(graph.size, i); + communityNodeCount[c] = iget(communityNodeCount, c) + 1; + if (graph.directed) { + communityTotalOutStrength[c] = + fget(communityTotalOutStrength, c) + fget(graph.strengthOut, i); + communityTotalInStrength[c] = fget(communityTotalInStrength, c) + fget(graph.strengthIn, i); + } else { + communityTotalStrength[c] = fget(communityTotalStrength, c) + fget(graph.strengthOut, i); + } + if (fget(graph.selfLoop, i) !== 0) + communityInternalEdgeWeight[c] = + fget(communityInternalEdgeWeight, c) + fget(graph.selfLoop, i); + } + if (graph.directed) { + for (let i = 0; i < n; i++) { + const ci: number = iget(nodeCommunity, i); + const neighbors = graph.outEdges[i]!; + for (let k = 0; k < neighbors.length; k++) { + const { to: j, w } = neighbors[k]!; + if (i === j) continue; // self-loop already counted via graph.selfLoop[i] + if (ci === iget(nodeCommunity, j)) + communityInternalEdgeWeight[ci] = fget(communityInternalEdgeWeight, ci) + w; + } + } + } else { + for (let i = 0; i < n; i++) { + const ci: number = iget(nodeCommunity, i); + const neighbors = graph.outEdges[i]!; + for (let k = 0; k < neighbors.length; k++) { + const { to: j, w } = neighbors[k]!; + if (j <= i) continue; + if (ci === iget(nodeCommunity, j)) + communityInternalEdgeWeight[ci] = fget(communityInternalEdgeWeight, ci) + w; + } + } + } + } + + function resetScratch(): void { + for (let i = 0; i < candidateCommunityCount; i++) { + const c: number = iget(candidateCommunities, i); + isCandidateCommunity[c] = 0; + neighborEdgeWeightToCommunity[c] = 0; + outEdgeWeightToCommunity[c] = 0; + inEdgeWeightFromCommunity[c] = 0; + } + candidateCommunityCount = 0; + } + + function touch(c: number): void { + if (u8get(isCandidateCommunity, c)) return; + isCandidateCommunity[c] = 1; + candidateCommunities[candidateCommunityCount++] = c; + } + + function accumulateNeighborCommunityEdgeWeights(v: number): number { + resetScratch(); + const ci: number = iget(nodeCommunity, v); + touch(ci); + if (graph.directed) { + const outL = graph.outEdges[v]!; + for (let k = 0; k < outL.length; k++) { + const j: number = outL[k]!.to; + const w: number = outL[k]!.w; + const cj: number = iget(nodeCommunity, j); + touch(cj); + outEdgeWeightToCommunity[cj] = fget(outEdgeWeightToCommunity, cj) + w; + } + const inL = graph.inEdges[v]!; + for (let k = 0; k < inL.length; k++) { + const i2: number = inL[k]!.from; + const w: number = inL[k]!.w; + const ci2: number = iget(nodeCommunity, i2); + touch(ci2); + inEdgeWeightFromCommunity[ci2] = fget(inEdgeWeightFromCommunity, ci2) + w; + } + } else { + const list = graph.outEdges[v]!; + for (let k = 0; k < list.length; k++) { + const j: number = list[k]!.to; + const w: number = list[k]!.w; + const cj: number = iget(nodeCommunity, j); + touch(cj); + neighborEdgeWeightToCommunity[cj] = fget(neighborEdgeWeightToCommunity, cj) + w; + } + } + return candidateCommunityCount; + } + + const twoMUndirected: number = graph.totalWeight; + function deltaModularityUndirected(v: number, newC: number, gamma: number = 1.0): number { + const oldC: number = iget(nodeCommunity, v); + if (newC === oldC) return 0; + const strengthV: number = fget(graph.strengthOut, v); + const weightToNew: number = + newC < neighborEdgeWeightToCommunity.length + ? fget(neighborEdgeWeightToCommunity, newC) || 0 + : 0; + const weightToOld: number = fget(neighborEdgeWeightToCommunity, oldC) || 0; + const totalStrengthNew: number = + newC < communityTotalStrength.length ? fget(communityTotalStrength, newC) : 0; + const totalStrengthOld: number = fget(communityTotalStrength, oldC); + const gain_remove: number = -( + weightToOld / twoMUndirected - + (gamma * (strengthV * totalStrengthOld)) / (twoMUndirected * twoMUndirected) + ); + const gain_add: number = + weightToNew / twoMUndirected - + (gamma * (strengthV * totalStrengthNew)) / (twoMUndirected * twoMUndirected); + return gain_remove + gain_add; + } + + function deltaModularityDirected(v: number, newC: number, gamma: number = 1.0): number { + const oldC: number = iget(nodeCommunity, v); + if (newC === oldC) return 0; + const totalEdgeWeight: number = graph.totalWeight; + const strengthOutV: number = fget(graph.strengthOut, v); + const strengthInV: number = fget(graph.strengthIn, v); + const inFromNew: number = + newC < inEdgeWeightFromCommunity.length ? fget(inEdgeWeightFromCommunity, newC) || 0 : 0; + const outToNew: number = + newC < outEdgeWeightToCommunity.length ? fget(outEdgeWeightToCommunity, newC) || 0 : 0; + const inFromOld: number = fget(inEdgeWeightFromCommunity, oldC) || 0; + const outToOld: number = fget(outEdgeWeightToCommunity, oldC) || 0; + const totalInStrengthNew: number = + newC < communityTotalInStrength.length ? fget(communityTotalInStrength, newC) : 0; + const totalOutStrengthNew: number = + newC < communityTotalOutStrength.length ? fget(communityTotalOutStrength, newC) : 0; + const totalInStrengthOld: number = fget(communityTotalInStrength, oldC); + const totalOutStrengthOld: number = fget(communityTotalOutStrength, oldC); + // Self-loop correction + constant term (see modularity.ts diffModularityDirected) + const selfW: number = fget(graph.selfLoop, v) || 0; + const deltaInternal: number = + (inFromNew + outToNew - inFromOld - outToOld + 2 * selfW) / totalEdgeWeight; + const deltaExpected: number = + (gamma * + (strengthOutV * (totalInStrengthNew - totalInStrengthOld) + + strengthInV * (totalOutStrengthNew - totalOutStrengthOld) + + 2 * strengthOutV * strengthInV)) / + (totalEdgeWeight * totalEdgeWeight); + return deltaInternal - deltaExpected; + } + + function deltaCPM(v: number, newC: number, gamma: number = 1.0): number { + const oldC: number = iget(nodeCommunity, v); + if (newC === oldC) return 0; + let w_old: number; + let w_new: number; + let selfCorrection: number = 0; + if (graph.directed) { + w_old = + (fget(outEdgeWeightToCommunity, oldC) || 0) + (fget(inEdgeWeightFromCommunity, oldC) || 0); + w_new = + newC < outEdgeWeightToCommunity.length + ? (fget(outEdgeWeightToCommunity, newC) || 0) + + (fget(inEdgeWeightFromCommunity, newC) || 0) + : 0; + // Self-loop correction (see cpm.ts diffCPM) + selfCorrection = 2 * (fget(graph.selfLoop, v) || 0); + } else { + w_old = fget(neighborEdgeWeightToCommunity, oldC) || 0; + w_new = + newC < neighborEdgeWeightToCommunity.length + ? fget(neighborEdgeWeightToCommunity, newC) || 0 + : 0; + } + const nodeSz: number = fget(graph.size, v) || 1; + const sizeOld: number = fget(communityTotalSize, oldC) || 0; + const sizeNew: number = newC < communityTotalSize.length ? fget(communityTotalSize, newC) : 0; + return w_new - w_old + selfCorrection - gamma * nodeSz * (sizeNew - sizeOld + nodeSz); + } + + function moveNodeToCommunity(v: number, newC: number): boolean { + const oldC: number = iget(nodeCommunity, v); + if (oldC === newC) return false; + if (newC >= communityCount) { + ensureCommCapacity(newC + 1); + communityCount = newC + 1; + } + const strengthOutV: number = fget(graph.strengthOut, v); + const strengthInV: number = fget(graph.strengthIn, v); + const selfLoopWeight: number = fget(graph.selfLoop, v); + const nodeSz: number = fget(graph.size, v); + + communityNodeCount[oldC] = iget(communityNodeCount, oldC) - 1; + communityNodeCount[newC] = iget(communityNodeCount, newC) + 1; + communityTotalSize[oldC] = fget(communityTotalSize, oldC) - nodeSz; + communityTotalSize[newC] = fget(communityTotalSize, newC) + nodeSz; + if (graph.directed) { + communityTotalOutStrength[oldC] = fget(communityTotalOutStrength, oldC) - strengthOutV; + communityTotalOutStrength[newC] = fget(communityTotalOutStrength, newC) + strengthOutV; + communityTotalInStrength[oldC] = fget(communityTotalInStrength, oldC) - strengthInV; + communityTotalInStrength[newC] = fget(communityTotalInStrength, newC) + strengthInV; + } else { + communityTotalStrength[oldC] = fget(communityTotalStrength, oldC) - strengthOutV; + communityTotalStrength[newC] = fget(communityTotalStrength, newC) + strengthOutV; + } + + if (graph.directed) { + const outToOld: number = fget(outEdgeWeightToCommunity, oldC) || 0; + const inFromOld: number = fget(inEdgeWeightFromCommunity, oldC) || 0; + const outToNew: number = + newC < outEdgeWeightToCommunity.length ? fget(outEdgeWeightToCommunity, newC) || 0 : 0; + const inFromNew: number = + newC < inEdgeWeightFromCommunity.length ? fget(inEdgeWeightFromCommunity, newC) || 0 : 0; + // outToOld/inFromOld already include the self-loop weight (self-loops are + // in outEdges/inEdges), so subtract it once to avoid triple-counting. + communityInternalEdgeWeight[oldC] = + fget(communityInternalEdgeWeight, oldC) - (outToOld + inFromOld - selfLoopWeight); + communityInternalEdgeWeight[newC] = + fget(communityInternalEdgeWeight, newC) + (outToNew + inFromNew + selfLoopWeight); + } else { + const weightToOld: number = fget(neighborEdgeWeightToCommunity, oldC) || 0; + const weightToNew: number = fget(neighborEdgeWeightToCommunity, newC) || 0; + communityInternalEdgeWeight[oldC] = + fget(communityInternalEdgeWeight, oldC) - (2 * weightToOld + selfLoopWeight); + communityInternalEdgeWeight[newC] = + fget(communityInternalEdgeWeight, newC) + (2 * weightToNew + selfLoopWeight); + } + + nodeCommunity[v] = newC; + return true; + } + + function compactCommunityIds(opts: CompactOptions = {}): void { + const ids: number[] = []; + for (let c = 0; c < communityCount; c++) if (iget(communityNodeCount, c) > 0) ids.push(c); + if (opts.keepOldOrder) { + ids.sort((a, b) => a - b); + } else if (opts.preserveMap instanceof Map) { + const preserveMap = opts.preserveMap; + ids.sort((a, b) => { + const pa = preserveMap.get(a); + const pb = preserveMap.get(b); + if (pa != null && pb != null && pa !== pb) return pa - pb; + if (pa != null && pb == null) return -1; + if (pb != null && pa == null) return 1; + return ( + fget(communityTotalSize, b) - fget(communityTotalSize, a) || + iget(communityNodeCount, b) - iget(communityNodeCount, a) || + a - b + ); + }); + } else { + ids.sort( + (a, b) => + fget(communityTotalSize, b) - fget(communityTotalSize, a) || + iget(communityNodeCount, b) - iget(communityNodeCount, a) || + a - b, + ); + } + const newId = new Int32Array(communityCount).fill(-1); + ids.forEach((c, i) => { + newId[c] = i; + }); + for (let i = 0; i < nodeCommunity.length; i++) + nodeCommunity[i] = iget(newId, iget(nodeCommunity, i)); + const remappedCount: number = ids.length; + const newTotalSize = new Float64Array(remappedCount); + const newNodeCount = new Int32Array(remappedCount); + const newInternalEdgeWeight = new Float64Array(remappedCount); + const newTotalStrength = new Float64Array(remappedCount); + const newTotalOutStrength = new Float64Array(remappedCount); + const newTotalInStrength = new Float64Array(remappedCount); + for (let i = 0; i < n; i++) { + const c: number = iget(nodeCommunity, i); + newTotalSize[c] = fget(newTotalSize, c) + fget(graph.size, i); + newNodeCount[c] = iget(newNodeCount, c) + 1; + if (graph.directed) { + newTotalOutStrength[c] = fget(newTotalOutStrength, c) + fget(graph.strengthOut, i); + newTotalInStrength[c] = fget(newTotalInStrength, c) + fget(graph.strengthIn, i); + } else { + newTotalStrength[c] = fget(newTotalStrength, c) + fget(graph.strengthOut, i); + } + if (fget(graph.selfLoop, i) !== 0) + newInternalEdgeWeight[c] = fget(newInternalEdgeWeight, c) + fget(graph.selfLoop, i); + } + if (graph.directed) { + for (let i = 0; i < n; i++) { + const ci: number = iget(nodeCommunity, i); + const list = graph.outEdges[i]!; + for (let k = 0; k < list.length; k++) { + const { to: j, w } = list[k]!; + if (i === j) continue; // self-loop already counted via graph.selfLoop[i] + if (ci === iget(nodeCommunity, j)) + newInternalEdgeWeight[ci] = fget(newInternalEdgeWeight, ci) + w; + } + } + } else { + for (let i = 0; i < n; i++) { + const ci: number = iget(nodeCommunity, i); + const list = graph.outEdges[i]!; + for (let k = 0; k < list.length; k++) { + const { to: j, w } = list[k]!; + if (j <= i) continue; + if (ci === iget(nodeCommunity, j)) + newInternalEdgeWeight[ci] = fget(newInternalEdgeWeight, ci) + w; + } + } + } + communityCount = remappedCount; + communityTotalSize = newTotalSize; + communityNodeCount = newNodeCount; + communityInternalEdgeWeight = newInternalEdgeWeight; + communityTotalStrength = newTotalStrength; + communityTotalOutStrength = newTotalOutStrength; + communityTotalInStrength = newTotalInStrength; + } + + function getCommunityMembers(): number[][] { + const comms: number[][] = new Array(communityCount); + for (let i = 0; i < communityCount; i++) comms[i] = []; + for (let i = 0; i < n; i++) comms[iget(nodeCommunity, i)]!.push(i); + return comms; + } + + function getCommunityTotalSizeFn(c: number): number { + return c < communityTotalSize.length ? fget(communityTotalSize, c) : 0; + } + function getCommunityNodeCountFn(c: number): number { + return c < communityNodeCount.length ? iget(communityNodeCount, c) : 0; + } + + return { + n, + get communityCount() { + return communityCount; + }, + nodeCommunity, + get communityTotalSize() { + return communityTotalSize; + }, + get communityNodeCount() { + return communityNodeCount; + }, + get communityInternalEdgeWeight() { + return communityInternalEdgeWeight; + }, + get communityTotalStrength() { + return communityTotalStrength; + }, + get communityTotalOutStrength() { + return communityTotalOutStrength; + }, + get communityTotalInStrength() { + return communityTotalInStrength; + }, + resizeCommunities(newCount: number): void { + ensureCommCapacity(newCount); + communityCount = newCount; + }, + initializeAggregates, + accumulateNeighborCommunityEdgeWeights, + getCandidateCommunityCount: (): number => candidateCommunityCount, + getCandidateCommunityAt: (i: number): number => iget(candidateCommunities, i), + getNeighborEdgeWeightToCommunity: (c: number): number => + fget(neighborEdgeWeightToCommunity, c) || 0, + getOutEdgeWeightToCommunity: (c: number): number => fget(outEdgeWeightToCommunity, c) || 0, + getInEdgeWeightFromCommunity: (c: number): number => fget(inEdgeWeightFromCommunity, c) || 0, + deltaModularityUndirected, + deltaModularityDirected, + deltaCPM, + moveNodeToCommunity, + compactCommunityIds, + getCommunityMembers, + getCommunityTotalSize: getCommunityTotalSizeFn, + getCommunityNodeCount: getCommunityNodeCountFn, + graph: undefined, + }; +} + +function growFloat(a: Float64Array, to: number): Float64Array { + const b = new Float64Array(to); + for (let i = 0; i < a.length; i++) b[i] = a[i] as number; + return b; +} +function growInt(a: Int32Array, to: number): Int32Array { + const b = new Int32Array(to); + for (let i = 0; i < a.length; i++) b[i] = a[i] as number; + return b; +} diff --git a/src/graph/algorithms/louvain.js b/src/graph/algorithms/louvain.ts similarity index 57% rename from src/graph/algorithms/louvain.js rename to src/graph/algorithms/louvain.ts index c4195b60..c8643b93 100644 --- a/src/graph/algorithms/louvain.js +++ b/src/graph/algorithms/louvain.ts @@ -2,23 +2,32 @@ * Community detection via vendored Leiden algorithm. * Maintains backward-compatible API: { assignments: Map, modularity: number } * - * **Note:** Always runs in undirected mode (`directed: false`) regardless of + * Note: Always runs in undirected mode (`directed: false`) regardless of * the input graph's directedness. For direction-aware community detection, * use `detectClusters` from `./leiden/index.js` directly. - * - * @param {import('../model.js').CodeGraph} graph - * @param {{ resolution?: number, maxLevels?: number, maxLocalPasses?: number }} [opts] - * @returns {{ assignments: Map, modularity: number }} */ +import type { CodeGraph } from '../model.js'; +import type { DetectClustersResult } from './leiden/index.js'; import { detectClusters } from './leiden/index.js'; -export function louvainCommunities(graph, opts = {}) { +export interface LouvainOptions { + resolution?: number; + maxLevels?: number; + maxLocalPasses?: number; +} + +export interface LouvainResult { + assignments: Map; + modularity: number; +} + +export function louvainCommunities(graph: CodeGraph, opts: LouvainOptions = {}): LouvainResult { if (graph.nodeCount === 0 || graph.edgeCount === 0) { return { assignments: new Map(), modularity: 0 }; } - const resolution = opts.resolution ?? 1.0; - const result = detectClusters(graph, { + const resolution: number = opts.resolution ?? 1.0; + const result: DetectClustersResult = detectClusters(graph, { resolution, randomSeed: 42, directed: false, @@ -26,7 +35,7 @@ export function louvainCommunities(graph, opts = {}) { ...(opts.maxLocalPasses != null && { maxLocalPasses: opts.maxLocalPasses }), }); - const assignments = new Map(); + const assignments = new Map(); for (const [id] of graph.nodes()) { const cls = result.getClass(id); if (cls != null) assignments.set(id, cls); diff --git a/src/graph/builders/dependency.js b/src/graph/builders/dependency.ts similarity index 63% rename from src/graph/builders/dependency.js rename to src/graph/builders/dependency.ts index 7024f0db..9af53751 100644 --- a/src/graph/builders/dependency.js +++ b/src/graph/builders/dependency.ts @@ -11,17 +11,29 @@ import { Repository, } from '../../db/index.js'; import { isTestFile } from '../../infrastructure/test-filter.js'; +import type { + BetterSqlite3Database, + CallableNodeRow, + CallEdgeRow, + FileNodeRow, + ImportGraphEdgeRow, +} from '../../types.js'; import { CodeGraph } from '../model.js'; +export interface DependencyGraphOptions { + fileLevel?: boolean; + noTests?: boolean; + minConfidence?: number; +} + /** - * @param {object} dbOrRepo - Open better-sqlite3 database (readonly) or a Repository instance - * @param {object} [opts] - * @param {boolean} [opts.fileLevel=true] - File-level (imports) or function-level (calls) - * @param {boolean} [opts.noTests=false] - Exclude test files - * @param {number} [opts.minConfidence] - Minimum edge confidence (function-level only) - * @returns {CodeGraph} + * Build a dependency graph from an open database or Repository instance. + * Supports both file-level (import edges) and function-level (call edges) graphs. */ -export function buildDependencyGraph(dbOrRepo, opts = {}) { +export function buildDependencyGraph( + dbOrRepo: BetterSqlite3Database | Repository, + opts: DependencyGraphOptions = {}, +): CodeGraph { const fileLevel = opts.fileLevel !== false; const noTests = opts.noTests || false; @@ -31,20 +43,23 @@ export function buildDependencyGraph(dbOrRepo, opts = {}) { return buildFunctionLevelGraph(dbOrRepo, noTests, opts.minConfidence); } -function buildFileLevelGraph(dbOrRepo, noTests) { +function buildFileLevelGraph( + dbOrRepo: BetterSqlite3Database | Repository, + noTests: boolean, +): CodeGraph { const graph = new CodeGraph(); const isRepo = dbOrRepo instanceof Repository; - let nodes = isRepo ? dbOrRepo.getFileNodesAll() : getFileNodesAll(dbOrRepo); + let nodes: FileNodeRow[] = isRepo ? dbOrRepo.getFileNodesAll() : getFileNodesAll(dbOrRepo); if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file)); - const nodeIds = new Set(); + const nodeIds = new Set(); for (const n of nodes) { graph.addNode(String(n.id), { label: n.file, file: n.file, dbId: n.id }); nodeIds.add(n.id); } - const edges = isRepo ? dbOrRepo.getImportEdges() : getImportEdges(dbOrRepo); + const edges: ImportGraphEdgeRow[] = isRepo ? dbOrRepo.getImportEdges() : getImportEdges(dbOrRepo); for (const e of edges) { if (!nodeIds.has(e.source_id) || !nodeIds.has(e.target_id)) continue; const src = String(e.source_id); @@ -58,14 +73,23 @@ function buildFileLevelGraph(dbOrRepo, noTests) { return graph; } -function buildFunctionLevelGraph(dbOrRepo, noTests, minConfidence) { +interface MinConfidenceEdgeRow { + source_id: number; + target_id: number; +} + +function buildFunctionLevelGraph( + dbOrRepo: BetterSqlite3Database | Repository, + noTests: boolean, + minConfidence?: number, +): CodeGraph { const graph = new CodeGraph(); const isRepo = dbOrRepo instanceof Repository; - let nodes = isRepo ? dbOrRepo.getCallableNodes() : getCallableNodes(dbOrRepo); + let nodes: CallableNodeRow[] = isRepo ? dbOrRepo.getCallableNodes() : getCallableNodes(dbOrRepo); if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file)); - const nodeIds = new Set(); + const nodeIds = new Set(); for (const n of nodes) { graph.addNode(String(n.id), { label: n.name, @@ -76,7 +100,7 @@ function buildFunctionLevelGraph(dbOrRepo, noTests, minConfidence) { nodeIds.add(n.id); } - let edges; + let edges: CallEdgeRow[] | MinConfidenceEdgeRow[]; if (minConfidence != null) { if (isRepo) { // Trade-off: Repository.getCallEdges() returns all call edges, so we @@ -88,8 +112,10 @@ function buildFunctionLevelGraph(dbOrRepo, noTests, minConfidence) { .getCallEdges() .filter((e) => e.confidence != null && e.confidence >= minConfidence); } else { - edges = dbOrRepo - .prepare("SELECT source_id, target_id FROM edges WHERE kind = 'calls' AND confidence >= ?") + edges = (dbOrRepo as BetterSqlite3Database) + .prepare( + "SELECT source_id, target_id FROM edges WHERE kind = 'calls' AND confidence >= ?", + ) .all(minConfidence); } } else { diff --git a/src/graph/builders/index.js b/src/graph/builders/index.ts similarity index 100% rename from src/graph/builders/index.js rename to src/graph/builders/index.ts diff --git a/src/graph/builders/structure.js b/src/graph/builders/structure.js deleted file mode 100644 index 10efb110..00000000 --- a/src/graph/builders/structure.js +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Build a containment graph (directory → file) from the SQLite database. - */ - -import { CodeGraph } from '../model.js'; - -/** - * @param {object} db - Open better-sqlite3 database (readonly) - * @returns {CodeGraph} Directed graph with directory→file containment edges - */ -export function buildStructureGraph(db) { - const graph = new CodeGraph(); - - const dirs = db.prepare("SELECT id, name FROM nodes WHERE kind = 'directory'").all(); - - for (const d of dirs) { - graph.addNode(String(d.id), { label: d.name, kind: 'directory' }); - } - - const files = db.prepare("SELECT id, name, file FROM nodes WHERE kind = 'file'").all(); - - for (const f of files) { - graph.addNode(String(f.id), { label: f.name, kind: 'file', file: f.file }); - } - - const containsEdges = db - .prepare(` - SELECT e.source_id, e.target_id - FROM edges e - JOIN nodes n ON e.source_id = n.id - WHERE e.kind = 'contains' AND n.kind = 'directory' - `) - .all(); - - for (const e of containsEdges) { - graph.addEdge(String(e.source_id), String(e.target_id), { kind: 'contains' }); - } - - return graph; -} diff --git a/src/graph/builders/structure.ts b/src/graph/builders/structure.ts new file mode 100644 index 00000000..d1114d82 --- /dev/null +++ b/src/graph/builders/structure.ts @@ -0,0 +1,58 @@ +/** + * Build a containment graph (directory -> file) from the SQLite database. + */ + +import type { BetterSqlite3Database } from '../../types.js'; +import { CodeGraph } from '../model.js'; + +interface DirRow { + id: number; + name: string; +} + +interface FileRow { + id: number; + name: string; + file: string; +} + +interface ContainsEdgeRow { + source_id: number; + target_id: number; +} + +/** + * Build a directed graph with directory->file containment edges. + */ +export function buildStructureGraph(db: BetterSqlite3Database): CodeGraph { + const graph = new CodeGraph(); + + const dirs = db.prepare("SELECT id, name FROM nodes WHERE kind = 'directory'").all(); + + for (const d of dirs) { + graph.addNode(String(d.id), { label: d.name, kind: 'directory' }); + } + + const files = db.prepare("SELECT id, name, file FROM nodes WHERE kind = 'file'").all(); + + for (const f of files) { + graph.addNode(String(f.id), { label: f.name, kind: 'file', file: f.file }); + } + + const containsEdges = db + .prepare(` + SELECT e.source_id, e.target_id + FROM edges e + JOIN nodes n ON e.source_id = n.id + WHERE e.kind = 'contains' AND n.kind = 'directory' + `) + .all(); + + for (const e of containsEdges) { + graph.addEdge(String(e.source_id), String(e.target_id), { + kind: 'contains', + }); + } + + return graph; +} diff --git a/src/graph/builders/temporal.js b/src/graph/builders/temporal.js deleted file mode 100644 index c694d47c..00000000 --- a/src/graph/builders/temporal.js +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Build a co-change (temporal) graph weighted by Jaccard similarity. - */ - -import { CodeGraph } from '../model.js'; - -/** - * @param {object} db - Open better-sqlite3 database (readonly) - * @param {{ minJaccard?: number }} [opts] - * @returns {CodeGraph} Undirected graph weighted by Jaccard similarity - */ -export function buildTemporalGraph(db, opts = {}) { - const minJaccard = opts.minJaccard ?? 0.0; - const graph = new CodeGraph({ directed: false }); - - // Check if co_changes table exists - const tableCheck = db - .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='co_changes'") - .get(); - if (!tableCheck) return graph; - - const rows = db - .prepare('SELECT file_a, file_b, jaccard FROM co_changes WHERE jaccard >= ?') - .all(minJaccard); - - for (const r of rows) { - if (!graph.hasNode(r.file_a)) graph.addNode(r.file_a, { label: r.file_a }); - if (!graph.hasNode(r.file_b)) graph.addNode(r.file_b, { label: r.file_b }); - graph.addEdge(r.file_a, r.file_b, { jaccard: r.jaccard }); - } - - return graph; -} diff --git a/src/graph/builders/temporal.ts b/src/graph/builders/temporal.ts new file mode 100644 index 00000000..43aea31a --- /dev/null +++ b/src/graph/builders/temporal.ts @@ -0,0 +1,51 @@ +/** + * Build a co-change (temporal) graph weighted by Jaccard similarity. + */ + +import type { BetterSqlite3Database } from '../../types.js'; +import { CodeGraph } from '../model.js'; + +export interface TemporalGraphOptions { + minJaccard?: number; +} + +interface TableCheckRow { + name: string; +} + +interface CoChangeRow { + file_a: string; + file_b: string; + jaccard: number; +} + +/** + * Build an undirected graph weighted by Jaccard similarity from the co_changes table. + */ +export function buildTemporalGraph( + db: BetterSqlite3Database, + opts: TemporalGraphOptions = {}, +): CodeGraph { + const minJaccard = opts.minJaccard ?? 0.0; + const graph = new CodeGraph({ directed: false }); + + // Check if co_changes table exists + const tableCheck = db + .prepare( + "SELECT name FROM sqlite_master WHERE type='table' AND name='co_changes'", + ) + .get(); + if (!tableCheck) return graph; + + const rows = db + .prepare('SELECT file_a, file_b, jaccard FROM co_changes WHERE jaccard >= ?') + .all(minJaccard); + + for (const r of rows) { + if (!graph.hasNode(r.file_a)) graph.addNode(r.file_a, { label: r.file_a }); + if (!graph.hasNode(r.file_b)) graph.addNode(r.file_b, { label: r.file_b }); + graph.addEdge(r.file_a, r.file_b, { jaccard: r.jaccard }); + } + + return graph; +} diff --git a/src/graph/classifiers/index.js b/src/graph/classifiers/index.ts similarity index 100% rename from src/graph/classifiers/index.js rename to src/graph/classifiers/index.ts diff --git a/src/graph/index.js b/src/graph/index.ts similarity index 100% rename from src/graph/index.js rename to src/graph/index.ts diff --git a/src/vendor.d.ts b/src/vendor.d.ts index e8c49fdf..28c247f6 100644 --- a/src/vendor.d.ts +++ b/src/vendor.d.ts @@ -7,18 +7,21 @@ declare module 'better-sqlite3' { namespace BetterSqlite3 { interface Database { - prepare(sql: string): Statement; + prepare(sql: string): Statement; exec(sql: string): Database; - transaction unknown>(fn: T): T; + // biome-ignore lint/suspicious/noExplicitAny: must match better-sqlite3's generic Transaction + transaction any>(fn: F): F; close(): void; pragma(pragma: string, options?: { simple?: boolean }): unknown; + readonly open: boolean; + readonly name: string; } - interface Statement { + interface Statement { run(...params: unknown[]): RunResult; - get(...params: unknown[]): unknown | undefined; - all(...params: unknown[]): unknown[]; - iterate(...params: unknown[]): IterableIterator; + get(...params: unknown[]): TRow | undefined; + all(...params: unknown[]): TRow[]; + iterate(...params: unknown[]): IterableIterator; raw(toggle?: boolean): this; }