From 6e0ecf830e357713941e863031e10475816149cb Mon Sep 17 00:00:00 2001 From: Sutu Sebastian Date: Sun, 3 May 2026 18:37:25 +0300 Subject: [PATCH 1/7] feat(state-dir): resolveStateDir + DB at /index.db (Tracer 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - application/state-dir.ts: resolveStateDir({root, cliFlag, env}) per plan §D7. Constants STATE_DIR_DEFAULT='.codemap', STATE_DB_NAME='index.db'. 12 unit tests cover precedence + relative/absolute paths. - config.ts: ResolvedCodemapConfig gains stateDir; resolveCodemapConfig 3rd arg opts.stateDir; databasePath defaults to /index.db. User-supplied databasePath wins (escape hatch). - config.ts: loadUserConfig reads /config.{ts,js,json} (D8); legacy /codemap.config.* dropped (pre-v1). - runtime.ts: getStateDir() getter. - bootstrap.ts: --state-dir + CODEMAP_STATE_DIR; precedence per D7. - bootstrap-codemap.ts (new): single helper extracts the loadUserConfig+resolveCodemapConfig+initCodemap+configureResolver dance from 9 cmd-* files. Tracer 4's ensureStateDir attaches here. - All 9 cmd-* files refactored; stateDir threaded through interfaces + main.ts dispatch + ServerOpts (mcp/serve). - audit-worktree.ts: cached entries at /.codemap/index.db (recursive layout — each cached worktree is its own self-contained codemap project). - audit-engine.ts: makeWorktreeReindex stops hard-coding db path; openCodemapDatabase() reads from initialised runtime. - sqlite-db.ts: openCodemapDatabase mkdirs the parent (state-dir may not exist on fresh project). Dogfood: - .codemap/.gitignore (self-managed, blacklist) — codemap repo + fixtures/minimal/ - root .gitignore: dropped .codemap.* and .codemap/audit-cache/ (nested .gitignore handles them) - fixtures/minimal/.codemap.db* removed (stale legacy) 703 tests pass. --- .codemap/.gitignore | 7 +++ .gitignore | 2 - fixtures/minimal/.codemap/.gitignore | 7 +++ src/application/audit-engine.ts | 8 +-- src/application/audit-worktree.test.ts | 16 +++--- src/application/audit-worktree.ts | 33 ++++++------- src/application/http-server.ts | 9 +++- src/application/mcp-server.ts | 9 +++- src/application/state-dir.test.ts | 64 ++++++++++++++++++++++++ src/application/state-dir.ts | 39 +++++++++++++++ src/cli/bootstrap-codemap.ts | 26 ++++++++++ src/cli/bootstrap.ts | 9 +++- src/cli/cmd-audit.ts | 10 ++-- src/cli/cmd-context.ts | 10 ++-- src/cli/cmd-impact.ts | 9 ++-- src/cli/cmd-index.ts | 9 ++-- src/cli/cmd-mcp.ts | 2 + src/cli/cmd-query.ts | 20 +++----- src/cli/cmd-serve.ts | 2 + src/cli/cmd-show.ts | 10 ++-- src/cli/cmd-snippet.ts | 10 ++-- src/cli/cmd-validate.ts | 10 ++-- src/cli/cmd-watch.ts | 15 ++---- src/cli/main.ts | 15 +++++- src/cli/query-output-benchmark.test.ts | 2 +- src/config.test.ts | 15 ++++-- src/config.ts | 67 +++++++++++++++++++++----- src/runtime.ts | 4 ++ src/sqlite-db.ts | 4 ++ 29 files changed, 324 insertions(+), 119 deletions(-) create mode 100644 .codemap/.gitignore create mode 100644 fixtures/minimal/.codemap/.gitignore create mode 100644 src/application/state-dir.test.ts create mode 100644 src/application/state-dir.ts create mode 100644 src/cli/bootstrap-codemap.ts diff --git a/.codemap/.gitignore b/.codemap/.gitignore new file mode 100644 index 0000000..4099e2e --- /dev/null +++ b/.codemap/.gitignore @@ -0,0 +1,7 @@ +# codemap-managed — edits will be overwritten by `ensureStateGitignore`. +# Blacklist of generated artifacts; tracked sources (recipes/, config.*) +# default to tracked. Bump alongside any new cache (Rule 9 analogue). +index.db +index.db-shm +index.db-wal +audit-cache/ diff --git a/.gitignore b/.gitignore index 8fffa96..6b20ae4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,4 @@ node_modules/ -.codemap.* -.codemap/audit-cache/ .DS_Store dist/ *.tgz diff --git a/fixtures/minimal/.codemap/.gitignore b/fixtures/minimal/.codemap/.gitignore new file mode 100644 index 0000000..4099e2e --- /dev/null +++ b/fixtures/minimal/.codemap/.gitignore @@ -0,0 +1,7 @@ +# codemap-managed — edits will be overwritten by `ensureStateGitignore`. +# Blacklist of generated artifacts; tracked sources (recipes/, config.*) +# default to tracked. Bump alongside any new cache (Rule 9 analogue). +index.db +index.db-shm +index.db-wal +audit-cache/ diff --git a/src/application/audit-engine.ts b/src/application/audit-engine.ts index fe444c3..0ca0732 100644 --- a/src/application/audit-engine.ts +++ b/src/application/audit-engine.ts @@ -350,16 +350,18 @@ let _reindexChain: Promise = Promise.resolve(); export function makeWorktreeReindex(): ReindexFn { return (worktreePath: string) => { const next = _reindexChain.then(async () => { - const wtDbPath = `${worktreePath}/.codemap.db`; - const wtDb = openCodemapDatabase(wtDbPath); + // Cached worktree is a self-contained codemap project; DB resolves + // to /.codemap/index.db via the default state-dir. const savedConfig = getCodemapConfig(); + let wtDb; try { const wtUser = await loadUserConfig(worktreePath, undefined); initCodemap(resolveCodemapConfig(worktreePath, wtUser)); configureResolver(getProjectRoot(), getTsconfigPath()); + wtDb = openCodemapDatabase(); await runCodemapIndex(wtDb, { mode: "full", quiet: true }); } finally { - wtDb.close(); + wtDb?.close(); initCodemap(savedConfig); configureResolver(getProjectRoot(), getTsconfigPath()); } diff --git a/src/application/audit-worktree.test.ts b/src/application/audit-worktree.test.ts index faea011..e976457 100644 --- a/src/application/audit-worktree.test.ts +++ b/src/application/audit-worktree.test.ts @@ -144,8 +144,10 @@ describe("populateWorktree + lookupCacheEntry", () => { projectRoot, sha: baseSha, reindex: async (worktreePath) => { - // Stand-in for the real reindex — just create an empty .codemap.db. - const db = openCodemapDatabase(join(worktreePath, ".codemap.db")); + // Stand-in for the real reindex — just create an empty cached DB. + const db = openCodemapDatabase( + join(worktreePath, ".codemap", "index.db"), + ); createTables(db); db.close(); }, @@ -162,7 +164,7 @@ describe("populateWorktree + lookupCacheEntry", () => { let reindexCalls = 0; const reindex = async (wp: string) => { reindexCalls += 1; - const db = openCodemapDatabase(join(wp, ".codemap.db")); + const db = openCodemapDatabase(join(wp, ".codemap", "index.db")); createTables(db); db.close(); }; @@ -204,7 +206,9 @@ describe("populateWorktree + lookupCacheEntry", () => { projectRoot, sha: baseSha, reindex: async (worktreePath) => { - const db = openCodemapDatabase(join(worktreePath, ".codemap.db")); + const db = openCodemapDatabase( + join(worktreePath, ".codemap", "index.db"), + ); createTables(db); db.close(); }, @@ -230,11 +234,11 @@ describe("populateWorktree + lookupCacheEntry", () => { describe("runAuditFromRef — end-to-end against a fixture repo", () => { /** * Reindex stub that actually runs the canonical SQL projection by creating - * a `.codemap.db` with the worktree's files seeded into the `files` table. + * a `/index.db` with the worktree's files seeded into the `files` table. * Stand-in for the real `runCodemapIndex` — Tracer 2 wires the real one. */ async function fakeReindex(worktreePath: string): Promise { - const dbPath = join(worktreePath, ".codemap.db"); + const dbPath = join(worktreePath, ".codemap", "index.db"); const db = openCodemapDatabase(dbPath); try { createTables(db); diff --git a/src/application/audit-worktree.ts b/src/application/audit-worktree.ts index 897d315..a800bb8 100644 --- a/src/application/audit-worktree.ts +++ b/src/application/audit-worktree.ts @@ -28,22 +28,15 @@ function gitSpawnEnv(): NodeJS.ProcessEnv { } /** - * Sha-keyed worktree cache for `audit --base `. + * Sha-keyed worktree cache for `audit --base `. Each entry is a + * `git worktree` at `/.codemap/audit-cache//` with its + * own `.codemap/index.db`. Cache-hit detection: that DB exists. * - * Each cache entry is a populated `git worktree` at `/.codemap/audit-cache//` - * containing the materialised tree at that commit AND a temp `.codemap.db` - * indexed against it. Cache-hit detection is "does `/.codemap.db` exist?" - * — atomic populate (D11) guarantees the DB only appears after a successful - * reindex, so a cache hit never observes a half-written entry. + * **Concurrency.** Per-pid temp dir + POSIX `rename` to the final `/` + * slot — losers fall through to cache-hit; no lock files. * - * **Concurrency.** Two parallel `audit --base ` invocations resolving to - * the same sha race-safely: each writes to a per-pid temp dir, then POSIX - * `rename` claims the final `/` slot. Whichever rename loses gets EEXIST - * on most platforms — we treat that as "the winner already populated, fall - * through to cache-hit." No lock files needed. - * - * **Eviction.** LRU after 5 entries OR 500 MiB (D2). Computed by directory - * mtime; `git worktree remove --force` then `rm -rf` to clean up. + * **Eviction.** LRU after 5 entries OR 500 MiB (D2); `git worktree remove + * --force` + `rm -rf`. */ const CACHE_DIR_NAME = ".codemap/audit-cache"; @@ -105,16 +98,20 @@ export function isGitRepo(projectRoot: string): boolean { return existsSync(join(projectRoot, ".git")); } +/** Path of the cached DB inside a single cache entry. Mirrors the + * post-consolidation layout (`/.codemap/index.db`) recursively. */ +const CACHE_ENTRY_DB_REL = ".codemap/index.db"; + /** - * Cache-hit fast path. Returns the entry when `/.codemap.db` exists. - * Caller falls back to {@link populateWorktree} on a miss. + * Cache-hit fast path. Returns the entry when `/.codemap/index.db` + * exists. Caller falls back to {@link populateWorktree} on a miss. */ export function lookupCacheEntry( sha: string, opts: WorktreeCacheOpts, ): PopulatedCacheEntry | undefined { const worktreePath = join(opts.projectRoot, CACHE_DIR_NAME, sha); - const dbPath = join(worktreePath, ".codemap.db"); + const dbPath = join(worktreePath, CACHE_ENTRY_DB_REL); if (!existsSync(dbPath)) return undefined; return { worktreePath, @@ -212,7 +209,7 @@ export async function populateWorktree( return { worktreePath: finalPath, - dbPath: join(finalPath, ".codemap.db"), + dbPath: join(finalPath, CACHE_ENTRY_DB_REL), sha: opts.sha, indexedAt: Date.now(), }; diff --git a/src/application/http-server.ts b/src/application/http-server.ts index 9c6fd5a..299438f 100644 --- a/src/application/http-server.ts +++ b/src/application/http-server.ts @@ -63,6 +63,7 @@ export interface HttpServerOpts { version: string; root: string; configFile?: string | undefined; + stateDir?: string | undefined; host: string; port: number; /** Bearer token; if undefined the server skips auth. */ @@ -173,8 +174,12 @@ export async function runHttpServer(opts: HttpServerOpts): Promise { } async function bootstrapForServe(opts: HttpServerOpts): Promise { - const user = await loadUserConfig(opts.root, opts.configFile); - initCodemap(resolveCodemapConfig(opts.root, user)); + const user = await loadUserConfig(opts.root, opts.configFile, { + stateDir: opts.stateDir, + }); + initCodemap( + resolveCodemapConfig(opts.root, user, { stateDir: opts.stateDir }), + ); configureResolver(getProjectRoot(), getTsconfigPath()); } diff --git a/src/application/mcp-server.ts b/src/application/mcp-server.ts index 7280129..5fb2c55 100644 --- a/src/application/mcp-server.ts +++ b/src/application/mcp-server.ts @@ -62,6 +62,7 @@ interface ServerOpts { version: string; root: string; configFile?: string | undefined; + stateDir?: string | undefined; /** * If true, boot a co-process file watcher (chokidar via * `runWatchLoop`) so the server's tools always read live data without @@ -362,8 +363,12 @@ function registerStaticResource( * pre-initialized stack on every request without re-bootstrapping. */ async function bootstrapForMcp(opts: ServerOpts): Promise { - const user = await loadUserConfig(opts.root, opts.configFile); - initCodemap(resolveCodemapConfig(opts.root, user)); + const user = await loadUserConfig(opts.root, opts.configFile, { + stateDir: opts.stateDir, + }); + initCodemap( + resolveCodemapConfig(opts.root, user, { stateDir: opts.stateDir }), + ); configureResolver(getProjectRoot(), getTsconfigPath()); } diff --git a/src/application/state-dir.test.ts b/src/application/state-dir.test.ts new file mode 100644 index 0000000..28d690f --- /dev/null +++ b/src/application/state-dir.test.ts @@ -0,0 +1,64 @@ +import { describe, expect, it } from "bun:test"; +import { tmpdir } from "node:os"; +import { join, resolve } from "node:path"; + +import { + resolveStateDir, + STATE_CONFIG_BASENAMES, + STATE_DB_NAME, + STATE_DIR_DEFAULT, + STATE_GITIGNORE_NAME, +} from "./state-dir"; + +const ROOT = resolve(tmpdir(), "codemap-state-dir-test"); + +describe("resolveStateDir — precedence", () => { + it("defaults to /.codemap when nothing supplied", () => { + expect(resolveStateDir({ root: ROOT })).toBe(join(ROOT, ".codemap")); + }); + + it("uses env when set", () => { + expect(resolveStateDir({ root: ROOT, env: ".cm" })).toBe(join(ROOT, ".cm")); + }); + + it("uses cliFlag when set, ignoring env (flag wins)", () => { + expect( + resolveStateDir({ root: ROOT, cliFlag: ".override", env: ".cm" }), + ).toBe(join(ROOT, ".override")); + }); + + it("treats absolute cliFlag as-is (no resolve against root)", () => { + const abs = "/tmp/elsewhere/codemap-state"; + expect(resolveStateDir({ root: ROOT, cliFlag: abs })).toBe(abs); + }); + + it("treats absolute env as-is", () => { + const abs = "/var/cache/codemap"; + expect(resolveStateDir({ root: ROOT, env: abs })).toBe(abs); + }); + + it("nested relative path resolves against root", () => { + expect(resolveStateDir({ root: ROOT, cliFlag: "build/codemap" })).toBe( + join(ROOT, "build/codemap"), + ); + }); +}); + +describe("constants", () => { + it("default state-dir name is '.codemap'", () => { + expect(STATE_DIR_DEFAULT).toBe(".codemap"); + }); + it("DB name is 'index.db'", () => { + expect(STATE_DB_NAME).toBe("index.db"); + }); + it("gitignore name is '.gitignore'", () => { + expect(STATE_GITIGNORE_NAME).toBe(".gitignore"); + }); + it("config basenames are tried in ts → js → json order", () => { + expect([...STATE_CONFIG_BASENAMES]).toEqual([ + "config.ts", + "config.js", + "config.json", + ]); + }); +}); diff --git a/src/application/state-dir.ts b/src/application/state-dir.ts new file mode 100644 index 0000000..7ba9528 --- /dev/null +++ b/src/application/state-dir.ts @@ -0,0 +1,39 @@ +import { isAbsolute, resolve } from "node:path"; + +/** + * Default name of the codemap state directory under ``. + * Holds every codemap-managed file: `index.db` (+ WAL/SHM), `audit-cache/`, + * `recipes/`, `config.{ts,js,json}`, `.gitignore` (self-managed). + */ +export const STATE_DIR_DEFAULT = ".codemap"; + +/** Filename of the SQLite index inside `/`. */ +export const STATE_DB_NAME = "index.db"; + +/** Filename of the codemap-managed `.gitignore` inside `/`. */ +export const STATE_GITIGNORE_NAME = ".gitignore"; + +/** Config-file basename probed (in this order) inside `/`. */ +export const STATE_CONFIG_BASENAMES = [ + "config.ts", + "config.js", + "config.json", +] as const; + +export interface ResolveStateDirOpts { + root: string; + /** From `--state-dir ` CLI flag. */ + cliFlag?: string | undefined; + /** From `CODEMAP_STATE_DIR` env var. */ + env?: string | undefined; +} + +/** + * Resolve the absolute `` per plan §D7. Precedence: + * (1) `--state-dir `, (2) `CODEMAP_STATE_DIR`, (3) `/.codemap`. + * Relative paths resolve against `root`. Returns absolute. + */ +export function resolveStateDir(opts: ResolveStateDirOpts): string { + const raw = opts.cliFlag ?? opts.env ?? STATE_DIR_DEFAULT; + return isAbsolute(raw) ? raw : resolve(opts.root, raw); +} diff --git a/src/cli/bootstrap-codemap.ts b/src/cli/bootstrap-codemap.ts new file mode 100644 index 0000000..ec59b6e --- /dev/null +++ b/src/cli/bootstrap-codemap.ts @@ -0,0 +1,26 @@ +import { loadUserConfig, resolveCodemapConfig } from "../config"; +import { configureResolver } from "../resolver"; +import { getProjectRoot, getTsconfigPath, initCodemap } from "../runtime"; + +/** + * Per-command bootstrap: load user config, init runtime singletons, + * configure the resolver. Single attachment point for the self-healing + * reconcilers added in Tracer 4 (`ensureStateDir` will fan out from here). + */ +export interface BootstrapCodemapOpts { + root: string; + configFile: string | undefined; + stateDir?: string | undefined; +} + +export async function bootstrapCodemap( + opts: BootstrapCodemapOpts, +): Promise { + const user = await loadUserConfig(opts.root, opts.configFile, { + stateDir: opts.stateDir, + }); + initCodemap( + resolveCodemapConfig(opts.root, user, { stateDir: opts.stateDir }), + ); + configureResolver(getProjectRoot(), getTsconfigPath()); +} diff --git a/src/cli/bootstrap.ts b/src/cli/bootstrap.ts index 5248f8c..11dfd87 100644 --- a/src/cli/bootstrap.ts +++ b/src/cli/bootstrap.ts @@ -112,6 +112,7 @@ export function parseBootstrapArgs(argv: string[]) { const envRoot = process.env.CODEMAP_ROOT ?? process.env.CODEMAP_TEST_BENCH; let root = envRoot ? resolve(envRoot) : undefined; let configFile: string | undefined; + let stateDir: string | undefined; const rest: string[] = []; for (let i = 0; i < argv.length; i++) { const a = argv[i]; @@ -123,8 +124,14 @@ export function parseBootstrapArgs(argv: string[]) { configFile = resolve(argv[++i]); continue; } + if (a === "--state-dir" && argv[i + 1]) { + stateDir = argv[++i]; + continue; + } rest.push(a); } if (!root) root = process.cwd(); - return { root, configFile, rest }; + // --state-dir wins over CODEMAP_STATE_DIR (precedence per plan §D7). + if (!stateDir) stateDir = process.env.CODEMAP_STATE_DIR; + return { root, configFile, stateDir, rest }; } diff --git a/src/cli/cmd-audit.ts b/src/cli/cmd-audit.ts index cc65eb7..daa67dc 100644 --- a/src/cli/cmd-audit.ts +++ b/src/cli/cmd-audit.ts @@ -7,10 +7,9 @@ import { } from "../application/audit-engine"; import type { AuditEnvelope } from "../application/audit-engine"; import { runCodemapIndex } from "../application/run-index"; -import { loadUserConfig, resolveCodemapConfig } from "../config"; import { closeDb, openDb } from "../db"; -import { configureResolver } from "../resolver"; -import { getProjectRoot, getTsconfigPath, initCodemap } from "../runtime"; +import { getProjectRoot } from "../runtime"; +import { bootstrapCodemap } from "./bootstrap-codemap"; // Per-delta CLI flag → delta key. Generated from V1_DELTAS so adding a delta // in the engine surfaces a `---baseline` flag automatically. @@ -259,6 +258,7 @@ Examples: export async function runAuditCmd(opts: { root: string; configFile: string | undefined; + stateDir?: string | undefined; baselinePrefix: string | undefined; base: string | undefined; perDelta: Record; @@ -267,9 +267,7 @@ export async function runAuditCmd(opts: { noIndex: boolean; }): Promise { try { - const user = await loadUserConfig(opts.root, opts.configFile); - initCodemap(resolveCodemapConfig(opts.root, user)); - configureResolver(getProjectRoot(), getTsconfigPath()); + await bootstrapCodemap(opts); const db = openDb(); try { diff --git a/src/cli/cmd-context.ts b/src/cli/cmd-context.ts index cf07282..adde228 100644 --- a/src/cli/cmd-context.ts +++ b/src/cli/cmd-context.ts @@ -1,13 +1,13 @@ import { buildContextEnvelope } from "../application/context-engine"; import type { ContextEnvelope } from "../application/context-engine"; -import { loadUserConfig, resolveCodemapConfig } from "../config"; import { closeDb, openDb } from "../db"; -import { configureResolver } from "../resolver"; -import { getProjectRoot, getTsconfigPath, initCodemap } from "../runtime"; +import { getProjectRoot } from "../runtime"; +import { bootstrapCodemap } from "./bootstrap-codemap"; interface ContextOpts { root: string; configFile: string | undefined; + stateDir?: string | undefined; compact: boolean; intent: string | null; } @@ -82,9 +82,7 @@ export function parseContextRest( */ export async function runContextCmd(opts: ContextOpts): Promise { try { - const user = await loadUserConfig(opts.root, opts.configFile); - initCodemap(resolveCodemapConfig(opts.root, user)); - configureResolver(getProjectRoot(), getTsconfigPath()); + await bootstrapCodemap(opts); const db = openDb(); let envelope: ContextEnvelope; try { diff --git a/src/cli/cmd-impact.ts b/src/cli/cmd-impact.ts index 3bee1af..bba5975 100644 --- a/src/cli/cmd-impact.ts +++ b/src/cli/cmd-impact.ts @@ -4,14 +4,13 @@ import type { ImpactDirection, ImpactResult, } from "../application/impact-engine"; -import { loadUserConfig, resolveCodemapConfig } from "../config"; import { closeDb, openDb } from "../db"; -import { configureResolver } from "../resolver"; -import { getProjectRoot, getTsconfigPath, initCodemap } from "../runtime"; +import { bootstrapCodemap } from "./bootstrap-codemap"; interface ImpactOpts { root: string; configFile: string | undefined; + stateDir?: string | undefined; target: string; direction: ImpactDirection; via: ImpactBackend; @@ -225,9 +224,7 @@ export function parseImpactRest(rest: string[]): */ export async function runImpactCmd(opts: ImpactOpts): Promise { try { - const user = await loadUserConfig(opts.root, opts.configFile); - initCodemap(resolveCodemapConfig(opts.root, user)); - configureResolver(getProjectRoot(), getTsconfigPath()); + await bootstrapCodemap(opts); const db = openDb(); let result: ImpactResult; diff --git a/src/cli/cmd-index.ts b/src/cli/cmd-index.ts index 575e776..542f93b 100644 --- a/src/cli/cmd-index.ts +++ b/src/cli/cmd-index.ts @@ -2,19 +2,16 @@ import { extname } from "node:path"; import { VALID_EXTENSIONS } from "../application/index-engine"; import { runCodemapIndex } from "../application/run-index"; -import { loadUserConfig, resolveCodemapConfig } from "../config"; import { closeDb, openDb } from "../db"; -import { configureResolver } from "../resolver"; -import { getProjectRoot, getTsconfigPath, initCodemap } from "../runtime"; +import { bootstrapCodemap } from "./bootstrap-codemap"; export async function runIndexCmd(opts: { root: string; configFile: string | undefined; + stateDir?: string | undefined; rest: string[]; }): Promise { - const user = await loadUserConfig(opts.root, opts.configFile); - initCodemap(resolveCodemapConfig(opts.root, user)); - configureResolver(getProjectRoot(), getTsconfigPath()); + await bootstrapCodemap(opts); const args = opts.rest; const db = openDb(); diff --git a/src/cli/cmd-mcp.ts b/src/cli/cmd-mcp.ts index c708c11..59f6c4e 100644 --- a/src/cli/cmd-mcp.ts +++ b/src/cli/cmd-mcp.ts @@ -125,6 +125,7 @@ With --watch, the file watcher is drained before the server exits. export async function runMcpCmd(opts: { root: string; configFile: string | undefined; + stateDir?: string | undefined; watch: boolean; debounceMs: number; }): Promise { @@ -132,6 +133,7 @@ export async function runMcpCmd(opts: { version: CODEMAP_VERSION, root: opts.root, configFile: opts.configFile, + stateDir: opts.stateDir, watch: opts.watch, debounceMs: opts.debounceMs, }); diff --git a/src/cli/cmd-query.ts b/src/cli/cmd-query.ts index 246a42a..912f753 100644 --- a/src/cli/cmd-query.ts +++ b/src/cli/cmd-query.ts @@ -16,7 +16,6 @@ import { listQueryRecipeIds, QUERY_RECIPES, } from "../application/query-recipes"; -import { loadUserConfig, resolveCodemapConfig } from "../config"; import { closeDb, deleteQueryBaseline, @@ -37,8 +36,8 @@ import { loadCodeowners, makePackageBucketizer, } from "../group-by"; -import { configureResolver } from "../resolver"; -import { getProjectRoot, getTsconfigPath, initCodemap } from "../runtime"; +import { getProjectRoot } from "../runtime"; +import { bootstrapCodemap } from "./bootstrap-codemap"; /** * Parse `argv` after the global bootstrap: `rest[0]` must be `"query"`. @@ -630,6 +629,7 @@ Examples: export async function runQueryCmd(opts: { root: string; configFile: string | undefined; + stateDir?: string | undefined; sql: string; json?: boolean; /** @@ -655,9 +655,7 @@ export async function runQueryCmd(opts: { opts.format ?? (opts.json === true ? "json" : "text"); const isJson = effectiveFormat === "json"; try { - const user = await loadUserConfig(opts.root, opts.configFile); - initCodemap(resolveCodemapConfig(opts.root, user)); - configureResolver(getProjectRoot(), getTsconfigPath()); + await bootstrapCodemap(opts); let changedFiles: Set | undefined; if (opts.changedSince !== undefined) { @@ -743,12 +741,11 @@ export async function runQueryCmd(opts: { export async function runListBaselinesCmd(opts: { root: string; configFile: string | undefined; + stateDir?: string | undefined; json: boolean; }): Promise { try { - const user = await loadUserConfig(opts.root, opts.configFile); - initCodemap(resolveCodemapConfig(opts.root, user)); - configureResolver(getProjectRoot(), getTsconfigPath()); + await bootstrapCodemap(opts); const db = openDb(); try { const rows = listQueryBaselines(db); @@ -772,13 +769,12 @@ export async function runListBaselinesCmd(opts: { export async function runDropBaselineCmd(opts: { root: string; configFile: string | undefined; + stateDir?: string | undefined; name: string; json: boolean; }): Promise { try { - const user = await loadUserConfig(opts.root, opts.configFile); - initCodemap(resolveCodemapConfig(opts.root, user)); - configureResolver(getProjectRoot(), getTsconfigPath()); + await bootstrapCodemap(opts); const db = openDb(); try { const dropped = deleteQueryBaseline(db, opts.name); diff --git a/src/cli/cmd-serve.ts b/src/cli/cmd-serve.ts index 0812326..58b9dd6 100644 --- a/src/cli/cmd-serve.ts +++ b/src/cli/cmd-serve.ts @@ -211,6 +211,7 @@ The server runs until SIGINT/SIGTERM (drains in-flight + closes listener). export async function runServeCmd(opts: { root: string; configFile: string | undefined; + stateDir?: string | undefined; host: string; port: number; token: string | undefined; @@ -221,6 +222,7 @@ export async function runServeCmd(opts: { version: CODEMAP_VERSION, root: opts.root, configFile: opts.configFile, + stateDir: opts.stateDir, host: opts.host, port: opts.port, token: opts.token, diff --git a/src/cli/cmd-show.ts b/src/cli/cmd-show.ts index 53b13a5..73ea9bd 100644 --- a/src/cli/cmd-show.ts +++ b/src/cli/cmd-show.ts @@ -1,14 +1,14 @@ import { buildShowResult, findSymbolsByName } from "../application/show-engine"; import type { ShowResult, SymbolMatch } from "../application/show-engine"; import { toProjectRelative } from "../application/validate-engine"; -import { loadUserConfig, resolveCodemapConfig } from "../config"; import { closeDb, openDb } from "../db"; -import { configureResolver } from "../resolver"; -import { getProjectRoot, getTsconfigPath, initCodemap } from "../runtime"; +import { getProjectRoot } from "../runtime"; +import { bootstrapCodemap } from "./bootstrap-codemap"; interface ShowOpts { root: string; configFile: string | undefined; + stateDir?: string | undefined; name: string; kind: string | undefined; inPath: string | undefined; @@ -134,9 +134,7 @@ export function parseShowRest(rest: string[]): */ export async function runShowCmd(opts: ShowOpts): Promise { try { - const user = await loadUserConfig(opts.root, opts.configFile); - initCodemap(resolveCodemapConfig(opts.root, user)); - configureResolver(getProjectRoot(), getTsconfigPath()); + await bootstrapCodemap(opts); const projectRoot = getProjectRoot(); const inPath = diff --git a/src/cli/cmd-snippet.ts b/src/cli/cmd-snippet.ts index 7b4072a..19172a9 100644 --- a/src/cli/cmd-snippet.ts +++ b/src/cli/cmd-snippet.ts @@ -4,14 +4,14 @@ import { } from "../application/show-engine"; import type { SnippetResult, SymbolMatch } from "../application/show-engine"; import { toProjectRelative } from "../application/validate-engine"; -import { loadUserConfig, resolveCodemapConfig } from "../config"; import { closeDb, openDb } from "../db"; -import { configureResolver } from "../resolver"; -import { getProjectRoot, getTsconfigPath, initCodemap } from "../runtime"; +import { getProjectRoot } from "../runtime"; +import { bootstrapCodemap } from "./bootstrap-codemap"; interface SnippetOpts { root: string; configFile: string | undefined; + stateDir?: string | undefined; name: string; kind: string | undefined; inPath: string | undefined; @@ -145,9 +145,7 @@ export function parseSnippetRest(rest: string[]): */ export async function runSnippetCmd(opts: SnippetOpts): Promise { try { - const user = await loadUserConfig(opts.root, opts.configFile); - initCodemap(resolveCodemapConfig(opts.root, user)); - configureResolver(getProjectRoot(), getTsconfigPath()); + await bootstrapCodemap(opts); const projectRoot = getProjectRoot(); const inPath = diff --git a/src/cli/cmd-validate.ts b/src/cli/cmd-validate.ts index 6b99c89..1dc9f47 100644 --- a/src/cli/cmd-validate.ts +++ b/src/cli/cmd-validate.ts @@ -1,13 +1,13 @@ import { computeValidateRows } from "../application/validate-engine"; import type { ValidateRow } from "../application/validate-engine"; -import { loadUserConfig, resolveCodemapConfig } from "../config"; import { closeDb, openDb } from "../db"; -import { configureResolver } from "../resolver"; -import { getProjectRoot, getTsconfigPath, initCodemap } from "../runtime"; +import { getProjectRoot } from "../runtime"; +import { bootstrapCodemap } from "./bootstrap-codemap"; interface ValidateOpts { root: string; configFile: string | undefined; + stateDir?: string | undefined; paths: string[]; json?: boolean; } @@ -82,9 +82,7 @@ export function parseValidateRest( export async function runValidateCmd(opts: ValidateOpts): Promise { const json = opts.json === true; try { - const user = await loadUserConfig(opts.root, opts.configFile); - initCodemap(resolveCodemapConfig(opts.root, user)); - configureResolver(getProjectRoot(), getTsconfigPath()); + await bootstrapCodemap(opts); const db = openDb(); let rows: ValidateRow[]; try { diff --git a/src/cli/cmd-watch.ts b/src/cli/cmd-watch.ts index 91bf95d..f6169da 100644 --- a/src/cli/cmd-watch.ts +++ b/src/cli/cmd-watch.ts @@ -4,18 +4,13 @@ import { DEFAULT_DEBOUNCE_MS, runWatchLoop, } from "../application/watcher"; -import { loadUserConfig, resolveCodemapConfig } from "../config"; -import { configureResolver } from "../resolver"; -import { - getExcludeDirNames, - getProjectRoot, - getTsconfigPath, - initCodemap, -} from "../runtime"; +import { getExcludeDirNames, getProjectRoot } from "../runtime"; +import { bootstrapCodemap } from "./bootstrap-codemap"; interface WatchOpts { root: string; configFile: string | undefined; + stateDir?: string | undefined; debounceMs: number; quiet: boolean; } @@ -123,9 +118,7 @@ file watcher). Tracer 4 lands an optimization: when watcher is active, */ export async function runWatchCmd(opts: WatchOpts): Promise { try { - const user = await loadUserConfig(opts.root, opts.configFile); - initCodemap(resolveCodemapConfig(opts.root, user)); - configureResolver(getProjectRoot(), getTsconfigPath()); + await bootstrapCodemap(opts); const root = getProjectRoot(); if (!opts.quiet) { diff --git a/src/cli/main.ts b/src/cli/main.ts index 2b6f909..2c0beb1 100644 --- a/src/cli/main.ts +++ b/src/cli/main.ts @@ -12,7 +12,7 @@ import { */ export async function main(): Promise { const argv = process.argv.slice(2); - const { root, configFile, rest } = parseBootstrapArgs(argv); + const { root, configFile, stateDir, rest } = parseBootstrapArgs(argv); if (rest[0] === "--help" || rest[0] === "-h") { printCliUsage(); @@ -81,6 +81,7 @@ Copies bundled agent templates into .agents/ under the project root. await runContextCmd({ root, configFile, + stateDir, compact: parsed.compact, intent: parsed.intent, }); @@ -102,6 +103,7 @@ Copies bundled agent templates into .agents/ under the project root. await runValidateCmd({ root, configFile, + stateDir, paths: parsed.paths, json: parsed.json, }); @@ -123,6 +125,7 @@ Copies bundled agent templates into .agents/ under the project root. await runShowCmd({ root, configFile, + stateDir, name: parsed.name, kind: parsed.kindFilter, inPath: parsed.inPath, @@ -146,6 +149,7 @@ Copies bundled agent templates into .agents/ under the project root. await runSnippetCmd({ root, configFile, + stateDir, name: parsed.name, kind: parsed.kindFilter, inPath: parsed.inPath, @@ -169,6 +173,7 @@ Copies bundled agent templates into .agents/ under the project root. await runMcpCmd({ root, configFile, + stateDir, watch: parsed.watch, debounceMs: parsed.debounceMs, }); @@ -190,6 +195,7 @@ Copies bundled agent templates into .agents/ under the project root. await runWatchCmd({ root, configFile, + stateDir, debounceMs: parsed.debounceMs, quiet: parsed.quiet, }); @@ -211,6 +217,7 @@ Copies bundled agent templates into .agents/ under the project root. await runServeCmd({ root, configFile, + stateDir, host: parsed.host, port: parsed.port, token: parsed.token, @@ -235,6 +242,7 @@ Copies bundled agent templates into .agents/ under the project root. await runImpactCmd({ root, configFile, + stateDir, target: parsed.target, direction: parsed.direction, via: parsed.via, @@ -261,6 +269,7 @@ Copies bundled agent templates into .agents/ under the project root. await runAuditCmd({ root, configFile, + stateDir, baselinePrefix: parsed.baselinePrefix, base: parsed.base, perDelta: parsed.perDelta, @@ -308,6 +317,7 @@ Copies bundled agent templates into .agents/ under the project root. await runDropBaselineCmd({ root, configFile, + stateDir, name: parsed.name, json: parsed.json, }); @@ -316,6 +326,7 @@ Copies bundled agent templates into .agents/ under the project root. await runQueryCmd({ root, configFile, + stateDir, sql: parsed.sql, json: parsed.json, format: parsed.format, @@ -330,5 +341,5 @@ Copies bundled agent templates into .agents/ under the project root. } const { runIndexCmd } = await import("./cmd-index.js"); - await runIndexCmd({ root, configFile, rest }); + await runIndexCmd({ root, configFile, stateDir, rest }); } diff --git a/src/cli/query-output-benchmark.test.ts b/src/cli/query-output-benchmark.test.ts index 6a190e9..13eb78f 100644 --- a/src/cli/query-output-benchmark.test.ts +++ b/src/cli/query-output-benchmark.test.ts @@ -18,7 +18,7 @@ async function runCli( } const root = join(import.meta.dir, "..", ".."); -const hasDb = existsSync(join(root, ".codemap.db")); +const hasDb = existsSync(join(root, ".codemap", "index.db")); const describeIntegration = hasDb ? describe : describe.skip; describeIntegration("query default vs --json (integration)", () => { diff --git a/src/config.test.ts b/src/config.test.ts index 14e3888..24f76eb 100644 --- a/src/config.test.ts +++ b/src/config.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it, beforeEach, afterEach } from "bun:test"; -import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; @@ -57,7 +57,8 @@ describe("resolveCodemapConfig", () => { it("defaults database path and include patterns", () => { const r = resolveCodemapConfig(dir, undefined); expect(r.root).toBe(dir); - expect(r.databasePath).toBe(join(dir, ".codemap.db")); + expect(r.stateDir).toBe(join(dir, ".codemap")); + expect(r.databasePath).toBe(join(dir, ".codemap", "index.db")); expect(r.include.length).toBe(DEFAULT_INCLUDE_PATTERNS.length); expect(r.excludeDirNames.has("node_modules")).toBe(true); }); @@ -114,9 +115,11 @@ describe("loadUserConfig", () => { rmSync(dir, { recursive: true, force: true }); }); - it("loads codemap.config.json from project root", async () => { + it("loads /config.json", async () => { + const stateDir = join(dir, ".codemap"); + mkdirSync(stateDir, { recursive: true }); writeFileSync( - join(dir, "codemap.config.json"), + join(stateDir, "config.json"), JSON.stringify({ include: ["**/*.ts"] }), ); const cfg = await loadUserConfig(dir); @@ -136,8 +139,10 @@ describe("loadUserConfig", () => { }); it("invalid JSON config throws when resolved", async () => { + const stateDir = join(dir, ".codemap"); + mkdirSync(stateDir, { recursive: true }); writeFileSync( - join(dir, "codemap.config.json"), + join(stateDir, "config.json"), JSON.stringify({ include: [1, 2] }), ); const cfg = await loadUserConfig(dir); diff --git a/src/config.ts b/src/config.ts index 33bd977..c085494 100644 --- a/src/config.ts +++ b/src/config.ts @@ -5,6 +5,12 @@ import { pathToFileURL } from "node:url"; import { z } from "zod"; +import { + resolveStateDir, + STATE_CONFIG_BASENAMES, + STATE_DB_NAME, +} from "./application/state-dir"; + async function readJsonFile(filePath: string): Promise { if (typeof Bun !== "undefined") { return Bun.file(filePath).json(); @@ -103,7 +109,14 @@ function formatCodemapConfigError(error: z.ZodError): string { export interface ResolvedCodemapConfig { /** Absolute project root (from CLI `--root`, env, or `process.cwd()`). */ readonly root: string; - /** Absolute path to the SQLite database file (default `/.codemap.db`). */ + /** + * Absolute path to the codemap state directory (`/.codemap` by default). + * Overridable via `--state-dir ` or `CODEMAP_STATE_DIR`. Holds every + * codemap-managed file: `index.db` (+ WAL/SHM), `audit-cache/`, `recipes/`, + * `config.{ts,js,json}`, `.gitignore` (self-managed). + */ + readonly stateDir: string; + /** Absolute path to the SQLite database file (default `/index.db`). */ readonly databasePath: string; /** Glob patterns relative to `root`; either user-supplied or {@link DEFAULT_INCLUDE_PATTERNS}. */ readonly include: readonly string[]; @@ -135,18 +148,37 @@ export function defineConfig(config: CodemapUserConfig): CodemapUserConfig { return parseCodemapUserConfig(config); } +export interface ResolveCodemapConfigOpts { + /** + * Pre-resolved state-dir (from CLI `--state-dir` or `CODEMAP_STATE_DIR`). + * When omitted the default `/.codemap` is used. Resolved at the + * bootstrap layer (NOT via the user config — the config file lives + * inside `/` so we'd hit a chicken-and-egg). + */ + stateDir?: string | undefined; +} + /** * Merge user config with defaults (absolute paths, default DB location, tsconfig discovery). + * + * Three-arg form (`opts.stateDir`) lets the bootstrap pass the resolved + * state directory through; legacy two-arg call sites keep working with the + * default `/.codemap`. User-supplied `databasePath` (escape hatch for + * non-standard layouts) wins over the state-dir derivation. */ export function resolveCodemapConfig( root: string, user: CodemapUserConfig | undefined, + opts: ResolveCodemapConfigOpts = {}, ): ResolvedCodemapConfig { const parsed = user !== undefined ? parseCodemapUserConfig(user) : undefined; const absRoot = resolve(root); + const stateDir = opts.stateDir + ? resolve(opts.stateDir) + : resolveStateDir({ root: absRoot }); const databasePath = parsed?.databasePath ? resolve(absRoot, parsed.databasePath) - : join(absRoot, ".codemap.db"); + : join(stateDir, STATE_DB_NAME); const include = parsed?.include?.length ? [...parsed.include] : [...DEFAULT_INCLUDE_PATTERNS]; @@ -167,6 +199,7 @@ export function resolveCodemapConfig( return { root: absRoot, + stateDir, databasePath, include, excludeDirNames, @@ -175,12 +208,18 @@ export function resolveCodemapConfig( } /** - * Load optional `codemap.config.ts` / `codemap.config.json` from the project root, - * or from `explicitPath` (CLI `--config`). + * Load `/config.{ts,js,json}` (D8 order) — or `explicitPath` + * when CLI `--config` is set. Pre-v1: legacy `/codemap.config.{ts,json}` + * paths are not searched; the changelog notes the one-line move. + * + * Three-arg form (`opts.stateDir`) lets the bootstrap pass the resolved + * state directory through; legacy two-arg form (`loadUserConfig(root)`) + * defaults to `/.codemap/`. */ export async function loadUserConfig( root: string, explicitPath?: string, + opts: { stateDir?: string | undefined } = {}, ): Promise { const tryImport = async ( file: string, @@ -207,14 +246,18 @@ export async function loadUserConfig( return tryImport(explicitPath); } - const tsConfig = join(root, "codemap.config.ts"); - const fromTs = await tryImport(tsConfig); - if (fromTs) return fromTs; - - const jsonPath = join(root, "codemap.config.json"); - if (existsSync(jsonPath)) { - const raw = await readJsonFile(jsonPath); - return raw as CodemapUserConfig; + const stateDir = opts.stateDir ?? resolveStateDir({ root }); + for (const basename of STATE_CONFIG_BASENAMES) { + const candidate = join(stateDir, basename); + if (basename.endsWith(".json")) { + if (existsSync(candidate)) { + const raw = await readJsonFile(candidate); + return raw as CodemapUserConfig; + } + continue; + } + const fromImport = await tryImport(candidate); + if (fromImport) return fromImport; } return undefined; diff --git a/src/runtime.ts b/src/runtime.ts index 523f574..a7c66a3 100644 --- a/src/runtime.ts +++ b/src/runtime.ts @@ -23,6 +23,10 @@ export function getProjectRoot(): string { return getCodemapConfig().root; } +export function getStateDir(): string { + return getCodemapConfig().stateDir; +} + export function getDatabasePath(): string { return getCodemapConfig().databasePath; } diff --git a/src/sqlite-db.ts b/src/sqlite-db.ts index bcf05ff..2a2e2ed 100644 --- a/src/sqlite-db.ts +++ b/src/sqlite-db.ts @@ -1,4 +1,6 @@ +import { mkdirSync } from "node:fs"; import { createRequire } from "node:module"; +import { dirname } from "node:path"; import { getDatabasePath } from "./runtime"; @@ -136,6 +138,8 @@ function wrap(inner: SqliteInner): CodemapDatabase { export function openCodemapDatabase(path?: string): CodemapDatabase { const p = path ?? getDatabasePath(); + // Auto-create parent dir — may not exist on first run. + if (p !== ":memory:") mkdirSync(dirname(p), { recursive: true }); const db = wrap(openRaw(p)); db.run("PRAGMA journal_mode = WAL"); From 05f96983d348018583b5347e62c7b0447f6e1175 Mon Sep 17 00:00:00 2001 From: Sutu Sebastian Date: Sun, 3 May 2026 18:39:25 +0300 Subject: [PATCH 2/7] =?UTF-8?q?feat(state-dir):=20ensureStateGitignore=20r?= =?UTF-8?q?econciler=20=E2=80=94=20self-healing=20(Tracer=202)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - STATE_GITIGNORE_BODY constant — single source of truth for the canonical blacklist. - ensureStateGitignore(stateDir) — pure-shape return ({before, after, written}); idempotent (no write on steady state); auto-mkdir; user-edits rewritten back per D11 (file is codemap-managed; header line declares it). - 5 tests cover: fresh write, idempotent, user-modified, older-version self-heal, returned shape matches disk. Bumping STATE_GITIGNORE_BODY in a future PR is the entire migration — every consumer's project repairs itself on next codemap run. --- src/application/state-dir.test.ts | 70 ++++++++++++++++++++++++++++++- src/application/state-dir.ts | 50 +++++++++++++++++++++- 2 files changed, 118 insertions(+), 2 deletions(-) diff --git a/src/application/state-dir.test.ts b/src/application/state-dir.test.ts index 28d690f..caa0e72 100644 --- a/src/application/state-dir.test.ts +++ b/src/application/state-dir.test.ts @@ -1,12 +1,21 @@ -import { describe, expect, it } from "bun:test"; +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { + existsSync, + mkdtempSync, + readFileSync, + rmSync, + writeFileSync, +} from "node:fs"; import { tmpdir } from "node:os"; import { join, resolve } from "node:path"; import { + ensureStateGitignore, resolveStateDir, STATE_CONFIG_BASENAMES, STATE_DB_NAME, STATE_DIR_DEFAULT, + STATE_GITIGNORE_BODY, STATE_GITIGNORE_NAME, } from "./state-dir"; @@ -62,3 +71,62 @@ describe("constants", () => { ]); }); }); + +describe("ensureStateGitignore — self-healing reconciler (D11)", () => { + let stateDir: string; + + beforeEach(() => { + stateDir = mkdtempSync(join(tmpdir(), "codemap-state-")) + "/.codemap"; + }); + + afterEach(() => { + rmSync(stateDir + "/..", { recursive: true, force: true }); + }); + + it("creates the file when absent (and mkdirs the state-dir)", () => { + expect(existsSync(stateDir)).toBe(false); + const r = ensureStateGitignore(stateDir); + expect(r).toEqual({ + before: undefined, + after: STATE_GITIGNORE_BODY, + written: true, + }); + expect(readFileSync(join(stateDir, ".gitignore"), "utf-8")).toBe( + STATE_GITIGNORE_BODY, + ); + }); + + it("steady-state run is a no-op (drift-detect)", () => { + ensureStateGitignore(stateDir); + const r = ensureStateGitignore(stateDir); + expect(r.written).toBe(false); + expect(r.before).toBe(STATE_GITIGNORE_BODY); + expect(r.after).toBe(STATE_GITIGNORE_BODY); + }); + + it("rewrites a user-modified file back to canonical (overwrite by design)", () => { + ensureStateGitignore(stateDir); + writeFileSync(join(stateDir, ".gitignore"), "rogue content\n", "utf-8"); + const r = ensureStateGitignore(stateDir); + expect(r.written).toBe(true); + expect(r.before).toBe("rogue content\n"); + expect(r.after).toBe(STATE_GITIGNORE_BODY); + }); + + it("self-heals when an older codemap version's content is missing today's entries", () => { + // Older shape — pre-audit-cache: only the DB lines. + const olderBody = + "# old codemap-managed file\nindex.db\nindex.db-shm\nindex.db-wal\n"; + const { mkdirSync } = require("node:fs") as typeof import("node:fs"); + mkdirSync(stateDir, { recursive: true }); + writeFileSync(join(stateDir, ".gitignore"), olderBody, "utf-8"); + const r = ensureStateGitignore(stateDir); + expect(r.written).toBe(true); + expect(r.after).toContain("audit-cache/"); + }); + + it("returned `after` matches the file on disk", () => { + const r = ensureStateGitignore(stateDir); + expect(r.after).toBe(readFileSync(join(stateDir, ".gitignore"), "utf-8")); + }); +}); diff --git a/src/application/state-dir.ts b/src/application/state-dir.ts index 7ba9528..e20d88d 100644 --- a/src/application/state-dir.ts +++ b/src/application/state-dir.ts @@ -1,4 +1,5 @@ -import { isAbsolute, resolve } from "node:path"; +import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { isAbsolute, join, resolve } from "node:path"; /** * Default name of the codemap state directory under ``. @@ -37,3 +38,50 @@ export function resolveStateDir(opts: ResolveStateDirOpts): string { const raw = opts.cliFlag ?? opts.env ?? STATE_DIR_DEFAULT; return isAbsolute(raw) ? raw : resolve(opts.root, raw); } + +/** + * Canonical contents of `/.gitignore` — codemap-managed + * blacklist. Bumping this constant IS the migration: every consumer's + * project repairs itself on the next `codemap` run via {@link ensureStateGitignore}. + * + * Kept as one string (not an array of patterns) so the ENTIRE file is + * the source of truth — header, blank lines, and ordering all reproduce + * verbatim. Add new generated artifacts in the same PR that introduces them. + */ +export const STATE_GITIGNORE_BODY = `# codemap-managed — edits will be overwritten by \`ensureStateGitignore\`. +# Blacklist of generated artifacts; tracked sources (recipes/, config.*) +# default to tracked. Bump alongside any new cache (Rule 9 analogue). +index.db +index.db-shm +index.db-wal +audit-cache/ +`; + +export interface EnsureStateGitignoreResult { + /** Content present before the call (`undefined` when the file didn't exist). */ + before: string | undefined; + /** Content written (or that would have been written if it had drifted). */ + after: string; + /** True when the file was created or rewritten; false on the steady-state hit. */ + written: boolean; +} + +/** + * Self-healing reconciler for `/.gitignore` (D11). Idempotent: + * read → compare to {@link STATE_GITIGNORE_BODY} → write only on drift. + * Auto-creates `/` if absent. Pure shape (`{before, after, + * written}`) so callers can unit-test the decision separately from the + * filesystem effect. + */ +export function ensureStateGitignore( + stateDir: string, +): EnsureStateGitignoreResult { + const path = join(stateDir, STATE_GITIGNORE_NAME); + const before = existsSync(path) ? readFileSync(path, "utf-8") : undefined; + if (before === STATE_GITIGNORE_BODY) { + return { before, after: STATE_GITIGNORE_BODY, written: false }; + } + mkdirSync(stateDir, { recursive: true }); + writeFileSync(path, STATE_GITIGNORE_BODY, "utf-8"); + return { before, after: STATE_GITIGNORE_BODY, written: true }; +} From b50bae6f5b2cd4d5fdb2069a862d4a50942ac98b Mon Sep 17 00:00:00 2001 From: Sutu Sebastian Date: Sun, 3 May 2026 18:42:30 +0300 Subject: [PATCH 3/7] feat(state-dir): ensureStateConfig reconciler (Tracer 3) --- src/application/state-config.test.ts | 103 +++++++++++++++++++++++++++ src/application/state-config.ts | 97 +++++++++++++++++++++++++ 2 files changed, 200 insertions(+) create mode 100644 src/application/state-config.test.ts create mode 100644 src/application/state-config.ts diff --git a/src/application/state-config.test.ts b/src/application/state-config.test.ts new file mode 100644 index 0000000..d13117d --- /dev/null +++ b/src/application/state-config.test.ts @@ -0,0 +1,103 @@ +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { ensureStateConfig } from "./state-config"; + +let stateDir: string; + +beforeEach(() => { + stateDir = mkdtempSync(join(tmpdir(), "codemap-cfg-")); +}); + +afterEach(() => { + rmSync(stateDir, { recursive: true, force: true }); +}); + +describe("ensureStateConfig — self-healing JSON reconciler (D8 + D11)", () => { + it("no-op when no config file exists", () => { + const r = ensureStateConfig(stateDir); + expect(r).toEqual({ found: undefined, written: false, warnings: [] }); + }); + + it("steady-state: well-formed JSON with sorted keys is not rewritten", () => { + const body = `${JSON.stringify( + { include: ["**/*.ts"], tsconfigPath: "tsconfig.json" }, + null, + 2, + )}\n`; + writeFileSync(join(stateDir, "config.json"), body, "utf-8"); + const r = ensureStateConfig(stateDir); + expect(r.written).toBe(false); + expect(r.warnings).toEqual([]); + expect(readFileSync(join(stateDir, "config.json"), "utf-8")).toBe(body); + }); + + it("normalises key order alphabetically (drift → write)", () => { + const unsorted = `${JSON.stringify( + { tsconfigPath: "tsconfig.json", include: ["**/*.ts"] }, + null, + 2, + )}\n`; + writeFileSync(join(stateDir, "config.json"), unsorted, "utf-8"); + const r = ensureStateConfig(stateDir); + expect(r.written).toBe(true); + const after = readFileSync(join(stateDir, "config.json"), "utf-8"); + expect(after.indexOf("include")).toBeLessThan( + after.indexOf("tsconfigPath"), + ); + }); + + it("prunes unknown keys with a warning", () => { + writeFileSync( + join(stateDir, "config.json"), + JSON.stringify({ include: ["**/*.ts"], bogus: 1 }, null, 2), + "utf-8", + ); + const r = ensureStateConfig(stateDir); + expect(r.written).toBe(true); + expect(r.warnings.some((w) => w.includes("bogus"))).toBe(true); + const after = JSON.parse( + readFileSync(join(stateDir, "config.json"), "utf-8"), + ) as Record; + expect("bogus" in after).toBe(false); + expect(after.include).toEqual(["**/*.ts"]); + }); + + it("warns + leaves file alone on invalid JSON", () => { + writeFileSync(join(stateDir, "config.json"), "{not json", "utf-8"); + const r = ensureStateConfig(stateDir); + expect(r.written).toBe(false); + expect(r.warnings[0]).toContain("invalid JSON"); + expect(readFileSync(join(stateDir, "config.json"), "utf-8")).toBe( + "{not json", + ); + }); + + it("warns on schema violation (e.g. wrong type) without writing", () => { + writeFileSync( + join(stateDir, "config.json"), + JSON.stringify({ include: "not-an-array" }, null, 2), + "utf-8", + ); + const r = ensureStateConfig(stateDir); + expect(r.written).toBe(false); + expect(r.warnings.some((w) => w.includes("include"))).toBe(true); + }); + + it("TS config path: validate-only — never rewrites user code", () => { + const userCode = `export default { include: ["**/*.ts"] }\n`; + writeFileSync(join(stateDir, "config.ts"), userCode, "utf-8"); + const r = ensureStateConfig(stateDir); + expect(r).toMatchObject({ found: "config.ts", written: false }); + expect(readFileSync(join(stateDir, "config.ts"), "utf-8")).toBe(userCode); + }); + + it("config.ts wins over config.json (D8 search order)", () => { + writeFileSync(join(stateDir, "config.ts"), `export default {}\n`, "utf-8"); + writeFileSync(join(stateDir, "config.json"), `{}`, "utf-8"); + const r = ensureStateConfig(stateDir); + expect(r.found).toBe("config.ts"); + }); +}); diff --git a/src/application/state-config.ts b/src/application/state-config.ts new file mode 100644 index 0000000..b4fc204 --- /dev/null +++ b/src/application/state-config.ts @@ -0,0 +1,97 @@ +import { existsSync, readFileSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; + +import { codemapUserConfigSchema } from "../config"; +import type { CodemapUserConfig } from "../config"; +import { STATE_CONFIG_BASENAMES } from "./state-dir"; + +export interface EnsureStateConfigResult { + /** Found basename (e.g. `config.json`) or undefined when no config file exists. */ + found: (typeof STATE_CONFIG_BASENAMES)[number] | undefined; + /** True only for JSON drift; TS/JS configs are validate-only and never rewritten. */ + written: boolean; + /** Validation errors collected during reconciliation (each logged via `console.warn`). */ + warnings: string[]; +} + +/** + * Self-healing reconciler for `/config.{ts,js,json}` (D8 + D11). + * No-op when no config file exists (codemap's defaults cover everything). + * + * **JSON path** — parse, validate against {@link codemapUserConfigSchema} + * (passthrough so we can detect+prune unknown keys), key-sort the + * validated subset alphabetically, write back only on drift. Bumping + * the schema in v2 IS the migration: every consumer's `config.json` is + * normalised on next codemap run. + * + * **TS/JS path** — validate-only at load time (handled by `loadUserConfig`); + * never rewritten. User code is sacred. + */ +export function ensureStateConfig(stateDir: string): EnsureStateConfigResult { + for (const basename of STATE_CONFIG_BASENAMES) { + const path = join(stateDir, basename); + if (!existsSync(path)) continue; + + if (basename !== "config.json") { + // TS/JS — validation happens at load time; nothing to write. + return { found: basename, written: false, warnings: [] }; + } + + const raw = readFileSync(path, "utf-8"); + const warnings: string[] = []; + let parsed: unknown; + try { + parsed = JSON.parse(raw); + } catch (err) { + warnings.push( + `${path}: invalid JSON, leaving file alone (${err instanceof Error ? err.message : String(err)})`, + ); + for (const w of warnings) console.warn(w); + return { found: basename, written: false, warnings }; + } + + // Passthrough validation lets us spot+prune unknown keys; strict + // schema rejection only happens at downstream `parseCodemapUserConfig` + // (config.ts) which we leave authoritative for runtime errors. + const result = codemapUserConfigSchema.passthrough().safeParse(parsed); + if (!result.success) { + for (const issue of result.error.issues) { + warnings.push( + `${path}: ${issue.path.join(".") || "(root)"} — ${issue.message}`, + ); + } + for (const w of warnings) console.warn(w); + return { found: basename, written: false, warnings }; + } + + const known: CodemapUserConfig = pickKnown(result.data); + const droppedKeys = Object.keys(result.data).filter((k) => !(k in known)); + for (const k of droppedKeys) { + warnings.push(`${path}: unknown key "${k}" pruned`); + } + const sorted = sortKeys(known); + const next = `${JSON.stringify(sorted, null, 2)}\n`; + if (next === raw) { + return { found: basename, written: false, warnings }; + } + for (const w of warnings) console.warn(w); + writeFileSync(path, next, "utf-8"); + return { found: basename, written: true, warnings }; + } + return { found: undefined, written: false, warnings: [] }; +} + +function pickKnown(obj: Record): CodemapUserConfig { + const known = new Set(Object.keys(codemapUserConfigSchema.shape)); + const out: Record = {}; + for (const [k, v] of Object.entries(obj)) { + if (known.has(k)) out[k] = v; + } + return out as CodemapUserConfig; +} + +function sortKeys>(obj: T): T { + return Object.fromEntries( + Object.entries(obj).sort(([a], [b]) => a.localeCompare(b)), + ) as T; +} From 929433ea055e97676029fc8d2f353aba8904d610 Mon Sep 17 00:00:00 2001 From: Sutu Sebastian Date: Sun, 3 May 2026 18:48:58 +0300 Subject: [PATCH 4/7] feat(state-dir): bootstrap orchestrator + agents-init delegation (Tracer 4) --- src/agents-init.test.ts | 60 +++++++++--------------------------- src/agents-init.ts | 52 +++++++------------------------ src/cli/bootstrap-codemap.ts | 39 +++++++++++++++++------ 3 files changed, 55 insertions(+), 96 deletions(-) diff --git a/src/agents-init.test.ts b/src/agents-init.test.ts index 8411f24..2ae2aa9 100644 --- a/src/agents-init.test.ts +++ b/src/agents-init.test.ts @@ -102,71 +102,41 @@ describe("runAgentsInit", () => { expect(p.endsWith("/templates/agents")).toBe(true); }); - it("ensureGitignoreCodemapPattern appends .codemap.* when .gitignore exists", () => { + it("ensureGitignoreCodemapPattern writes /.gitignore (root untouched)", () => { const dir = mkdtempSync(join(tmpdir(), "codemap-agents-")); try { - mkdirSync(join(dir, ".git"), { recursive: true }); - const gi = join(dir, ".gitignore"); - writeFileSync(gi, "node_modules/\n", "utf-8"); + const rootGi = join(dir, ".gitignore"); + writeFileSync(rootGi, "node_modules/\n", "utf-8"); ensureGitignoreCodemapPattern(dir); - expect(readFileSync(gi, "utf-8")).toContain(".codemap.*"); - ensureGitignoreCodemapPattern(dir); - const lines = readFileSync(gi, "utf-8").split("\n").filter(Boolean); - expect(lines.filter((l) => l === ".codemap.*").length).toBe(1); + expect(readFileSync(rootGi, "utf-8")).toBe("node_modules/\n"); + const stateGi = join(dir, ".codemap", ".gitignore"); + expect(existsSync(stateGi)).toBe(true); + expect(readFileSync(stateGi, "utf-8")).toContain("index.db"); + expect(readFileSync(stateGi, "utf-8")).toContain("audit-cache/"); } finally { rmSync(dir, { recursive: true, force: true }); } }); - it("ensureGitignoreCodemapPattern no-ops when not a Git repo", () => { + it("ensureGitignoreCodemapPattern is idempotent (no rewrite on steady state)", () => { const dir = mkdtempSync(join(tmpdir(), "codemap-agents-")); try { ensureGitignoreCodemapPattern(dir); - expect(existsSync(join(dir, ".gitignore"))).toBe(false); - } finally { - rmSync(dir, { recursive: true, force: true }); - } - }); - - it("ensureGitignoreCodemapPattern creates .gitignore when Git repo has none", () => { - const dir = mkdtempSync(join(tmpdir(), "codemap-agents-")); - try { - mkdirSync(join(dir, ".git"), { recursive: true }); + const stateGi = join(dir, ".codemap", ".gitignore"); + const before = readFileSync(stateGi, "utf-8"); ensureGitignoreCodemapPattern(dir); - expect(readFileSync(join(dir, ".gitignore"), "utf-8")).toBe( - ".codemap.*\n.codemap/audit-cache/\n", - ); - ensureGitignoreCodemapPattern(dir); - expect(readFileSync(join(dir, ".gitignore"), "utf-8")).toBe( - ".codemap.*\n.codemap/audit-cache/\n", - ); + expect(readFileSync(stateGi, "utf-8")).toBe(before); } finally { rmSync(dir, { recursive: true, force: true }); } }); - it("runAgentsInit updates .gitignore when present", () => { + it("runAgentsInit calls the reconciler — produces /.gitignore", () => { const dir = mkdtempSync(join(tmpdir(), "codemap-agents-")); try { - mkdirSync(join(dir, ".git"), { recursive: true }); - writeFileSync(join(dir, ".gitignore"), "dist/\n", "utf-8"); expect(runAgentsInit({ projectRoot: dir, force: true })).toBe(true); - expect(readFileSync(join(dir, ".gitignore"), "utf-8")).toContain( - ".codemap.*", - ); - } finally { - rmSync(dir, { recursive: true, force: true }); - } - }); - - it("runAgentsInit creates .gitignore in Git repo without one", () => { - const dir = mkdtempSync(join(tmpdir(), "codemap-agents-")); - try { - mkdirSync(join(dir, ".git"), { recursive: true }); - expect(runAgentsInit({ projectRoot: dir, force: true })).toBe(true); - expect(readFileSync(join(dir, ".gitignore"), "utf-8")).toBe( - ".codemap.*\n.codemap/audit-cache/\n", - ); + const stateGi = join(dir, ".codemap", ".gitignore"); + expect(existsSync(stateGi)).toBe(true); } finally { rmSync(dir, { recursive: true, force: true }); } diff --git a/src/agents-init.ts b/src/agents-init.ts index 9548730..eccdd74 100644 --- a/src/agents-init.ts +++ b/src/agents-init.ts @@ -1,5 +1,4 @@ import { - appendFileSync, copyFileSync, existsSync, mkdirSync, @@ -13,6 +12,8 @@ import { import { dirname, join, relative } from "node:path"; import { fileURLToPath } from "node:url"; +import { ensureStateGitignore, resolveStateDir } from "./application/state-dir"; + /** * Directory containing `rules/` and `skills/` (next to `dist/` in published packages). */ @@ -121,12 +122,6 @@ function removeBundledPathsIfExist(destBase: string, relPaths: string[]): void { } } -/** Default DB basename `.codemap` plus SQLite sidecars (`.db`, `-wal`, `-shm`, …). */ -const GITIGNORE_CODEMAP_PATTERN = ".codemap.*"; -// `.codemap/audit-cache/` ignored separately because `.codemap.*` doesn't -// match the directory shape AND `.codemap/recipes/` is git-tracked. -const GITIGNORE_AUDIT_CACHE_PATTERN = ".codemap/audit-cache/"; - /** * Optional integrations after canonical `.agents/` is written. * - Symlink/copy: `cursor`, `windsurf`, `continue`, `cline`, `amazon-q` (per-file symlinks or copies from `.agents/rules`; Cursor also `.agents/skills`). @@ -291,43 +286,18 @@ export interface AgentsInitOptions { } /** - * Ensure codemap-managed paths are listed in `.gitignore` when the project - * uses Git. Adds `.codemap.*` (matches `.codemap.db` etc.) AND - * `.codemap/audit-cache/` (the audit-base worktree cache; `.codemap/recipes/` - * stays tracked, so we can't ignore the whole `.codemap/` dir). - * - * - If `/.git` exists and there is no `.gitignore`, create one - * with both patterns. - * - If `.gitignore` exists, append each pattern once when missing. - * - If there is no `.git`, do nothing (not a Git working tree). + * Reconcile the self-managed `/.gitignore` (per plan §D11 + + * Tracer 2). Idempotent: writes only on drift; logs only on actual change. + * Replaces the per-feature root `.gitignore` patching the old version did + * — root is no longer touched (nested `.gitignore` covers every artifact). */ export function ensureGitignoreCodemapPattern(projectRoot: string): void { - const gitDir = join(projectRoot, ".git"); - const gitignorePath = join(projectRoot, ".gitignore"); - if (!existsSync(gitDir)) { - return; + const stateDir = resolveStateDir({ root: projectRoot }); + const result = ensureStateGitignore(stateDir); + if (result.written) { + const verb = result.before === undefined ? "Created" : "Updated"; + console.log(` ${verb} ${stateDir}/.gitignore`); } - const patterns = [GITIGNORE_CODEMAP_PATTERN, GITIGNORE_AUDIT_CACHE_PATTERN]; - if (!existsSync(gitignorePath)) { - writeFileSync(gitignorePath, `${patterns.join("\n")}\n`, "utf-8"); - console.log( - ` Created .gitignore with ${patterns.join(" + ")} (Git repo, no .gitignore yet)`, - ); - return; - } - const content = readFileSync(gitignorePath, "utf-8"); - const lines = content.split(/\r?\n/); - const missing = patterns.filter( - (p) => !lines.some((line) => line.trim() === p), - ); - if (missing.length === 0) return; - const needsLeadingNewline = content.length > 0 && !content.endsWith("\n"); - appendFileSync( - gitignorePath, - `${needsLeadingNewline ? "\n" : ""}${missing.join("\n")}\n`, - "utf-8", - ); - console.log(` Appended ${missing.join(" + ")} to .gitignore`); } function removePathForRewrite( diff --git a/src/cli/bootstrap-codemap.ts b/src/cli/bootstrap-codemap.ts index ec59b6e..3ba7069 100644 --- a/src/cli/bootstrap-codemap.ts +++ b/src/cli/bootstrap-codemap.ts @@ -1,11 +1,20 @@ +import { ensureStateConfig } from "../application/state-config"; +import { resolveStateDir } from "../application/state-dir"; +import { ensureStateGitignore } from "../application/state-dir"; import { loadUserConfig, resolveCodemapConfig } from "../config"; import { configureResolver } from "../resolver"; -import { getProjectRoot, getTsconfigPath, initCodemap } from "../runtime"; +import { + getProjectRoot, + getStateDir, + getTsconfigPath, + initCodemap, +} from "../runtime"; /** - * Per-command bootstrap: load user config, init runtime singletons, - * configure the resolver. Single attachment point for the self-healing - * reconcilers added in Tracer 4 (`ensureStateDir` will fan out from here). + * Per-command bootstrap: resolve the state-dir, run the self-healing + * reconcilers (D11), load user config, init runtime singletons, configure + * the resolver. Adding a new self-healing file is a one-line addition + * after `ensureStateConfig` below. */ export interface BootstrapCodemapOpts { root: string; @@ -16,11 +25,21 @@ export interface BootstrapCodemapOpts { export async function bootstrapCodemap( opts: BootstrapCodemapOpts, ): Promise { - const user = await loadUserConfig(opts.root, opts.configFile, { - stateDir: opts.stateDir, - }); - initCodemap( - resolveCodemapConfig(opts.root, user, { stateDir: opts.stateDir }), - ); + // Reconcile state-dir BEFORE config load so a freshly-created + // /config.json from `ensureStateConfig` doesn't miss a + // first-run consumer's read. State-dir is resolved upfront via the + // same precedence config will use. + const stateDir = resolveStateDir({ root: opts.root, cliFlag: opts.stateDir }); + ensureStateGitignore(stateDir); + ensureStateConfig(stateDir); + + const user = await loadUserConfig(opts.root, opts.configFile, { stateDir }); + initCodemap(resolveCodemapConfig(opts.root, user, { stateDir })); configureResolver(getProjectRoot(), getTsconfigPath()); + // Sanity: getStateDir() must mirror what we passed into resolveCodemapConfig. + if (getStateDir() !== stateDir) { + throw new Error( + `bootstrap: state-dir mismatch (resolved ${stateDir}, runtime got ${getStateDir()})`, + ); + } } From 51af9059679ab7a419995a0fcf3e9676e62cec59 Mon Sep 17 00:00:00 2001 From: Sutu Sebastian Date: Sun, 3 May 2026 18:54:01 +0300 Subject: [PATCH 5/7] docs(state-dir): sync README + architecture + glossary + agent rule/skill + changeset (Tracer 5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - README.md: --state-dir flag, config-file location, self-healing callout - docs/architecture.md: state-dir resolver in src/config.ts intro; bulk .codemap.db → .codemap/index.db; User config section rewritten with self-healing details (ensureStateGitignore + ensureStateConfig from src/application/, single attachment point in cli/bootstrap-codemap.ts) - docs/glossary.md: new entries for '.codemap/' / / CODEMAP_STATE_DIR; '.codemap/index.db'; '.codemap/.gitignore' / self-healing files - docs/roadmap.md: drop the consolidated-dir backlog item (it shipped) - docs/research/fallow.md: Adjacent-shipped bullet referencing PR #53 (plan) + #54 (impl) - .agents/rules/codemap.md + templates/agents/rules/codemap.md: bulk .codemap.db → .codemap/index.db; one-paragraph addition explaining state-dir + self-healing .gitignore (Rule 10 lockstep) - .agents/skills/codemap/SKILL.md + templates/agents/skills/codemap/SKILL.md: bulk path update (Rule 10 lockstep) - .changeset/codemap-dir-consolidation.md: minor — full design rationale + cleanup steps - docs/plans/codemap-dir-consolidation.md: deleted per docs-governance Rule 3 --- .agents/rules/codemap.md | 4 +- .agents/skills/codemap/SKILL.md | 6 +- .changeset/codemap-dir-consolidation.md | 38 +++++++++ README.md | 30 ++++--- docs/architecture.md | 28 +++--- docs/glossary.md | 12 ++- docs/plans/codemap-dir-consolidation.md | 104 ----------------------- docs/research/fallow.md | 1 + docs/roadmap.md | 1 - templates/agents/rules/codemap.md | 4 +- templates/agents/skills/codemap/SKILL.md | 6 +- 11 files changed, 89 insertions(+), 145 deletions(-) create mode 100644 .changeset/codemap-dir-consolidation.md delete mode 100644 docs/plans/codemap-dir-consolidation.md diff --git a/.agents/rules/codemap.md b/.agents/rules/codemap.md index 353a667..a23562f 100644 --- a/.agents/rules/codemap.md +++ b/.agents/rules/codemap.md @@ -6,7 +6,7 @@ alwaysApply: true > **STOP.** Before you call Grep, Glob, SemanticSearch, or Read to answer a **structural** question about this repository — query the Codemap SQLite index first. This is not optional when the question matches a trigger pattern below. -A local database (default **`.codemap.db`**) indexes structure: symbols, imports, exports, components, dependencies, markers, CSS variables, CSS classes, CSS keyframes. +A local database (default **`.codemap/index.db`**) indexes structure: symbols, imports, exports, components, dependencies, markers, CSS variables, CSS classes, CSS keyframes. The `.codemap/` directory holds every codemap-managed file (`index.db` + WAL/SHM, `audit-cache/`, project `recipes/`, `config.{ts,js,json}`, self-managed `.gitignore`); override the dir with `--state-dir ` or `CODEMAP_STATE_DIR`. The `.codemap/.gitignore` is **codemap-managed and reconciled on every boot** (`ensureStateGitignore`) — bumping its canonical body in a PR auto-applies on every consumer's next run. **This file** is for **developing Codemap** in this clone. **End users** of the published package get the agent rule from **`templates/agents/`** (via **`codemap agents init`**). **Generic defaults:** SQL and triggers stay project-agnostic — **edit** this rule for repo-specific paths and queries. @@ -51,7 +51,7 @@ actions: Validation: SQL is rejected at load time if it starts with DML/DDL (DELETE/DROP/UPDATE/etc.); the runtime `PRAGMA query_only=1` is the parser-proof backstop. -**Baselines** (`query_baselines` table inside `.codemap.db`, no parallel JSON files): `--save-baseline[=]` snapshots a result set; `--baseline[=]` diffs the current result against it (added / removed rows; identity = `JSON.stringify(row)`). Name defaults to the `--recipe` id; ad-hoc SQL needs an explicit `=`. Survives `--full` and SCHEMA bumps. +**Baselines** (`query_baselines` table inside `.codemap/index.db`, no parallel JSON files): `--save-baseline[=]` snapshots a result set; `--baseline[=]` diffs the current result against it (added / removed rows; identity = `JSON.stringify(row)`). Name defaults to the `--recipe` id; ad-hoc SQL needs an explicit `=`. Survives `--full` and SCHEMA bumps. **Audit (`bun src/index.ts audit`)**: structural-drift command; emits `{head, deltas: {files, dependencies, deprecated}}` (each delta carries its own `base` metadata). Three mutually-exclusive snapshot sources: `--base ` materialises a git committish via `git worktree add` to a sha-keyed cache under `.codemap/audit-cache/`, reindexes a temp DB, then diffs (sub-100ms second run; requires git; `base.source: "ref"`); `--baseline ` auto-resolves `-files` / `-dependencies` / `-deprecated` from saved `query_baselines` entries (`base.source: "baseline"`); `---baseline ` is the explicit per-delta override (composes with both). v1 ships no `verdict` / threshold config — consumers compose `--json` + `jq` for CI exit codes. Auto-runs an incremental index before the diff (use `--no-index` to skip for frozen-DB CI). diff --git a/.agents/skills/codemap/SKILL.md b/.agents/skills/codemap/SKILL.md index 3b962b4..44aa604 100644 --- a/.agents/skills/codemap/SKILL.md +++ b/.agents/skills/codemap/SKILL.md @@ -41,11 +41,11 @@ Replace placeholders (`'...'`) with your module path, file glob, or symbol name. - **`--summary`** — counts only. With **`--json`**: **`{"count": N}`**. With **`--group-by`**: **`{"group_by": "", "groups": [{key, count}]}`**. - **`--changed-since `** — post-filter rows by **`path`** / **`file_path`** / **`from_path`** / **`to_path`** / **`resolved_path`** against **`git diff --name-only ...HEAD ∪ git status --porcelain`**. Rows with no recognised path column pass through. - **`--group-by owner|directory|package`** — partition into buckets and emit **`{"group_by", "groups": [{key, count, rows}]}`**. **`owner`** reads CODEOWNERS (last matching rule wins); **`directory`** is the first path segment; **`package`** uses **`package.json`** **`workspaces`** or **`pnpm-workspace.yaml`**. **Mutually exclusive with `--save-baseline` / `--baseline`.** -- **`--save-baseline[=]`** — snapshot the result rows to the **`query_baselines`** table inside `.codemap.db` (no parallel JSON files; survives `--full` and SCHEMA bumps). Name defaults to the `--recipe` id; ad-hoc SQL needs an explicit `=`. Re-saving with the same name overwrites in place. +- **`--save-baseline[=]`** — snapshot the result rows to the **`query_baselines`** table inside `.codemap/index.db` (no parallel JSON files; survives `--full` and SCHEMA bumps). Name defaults to the `--recipe` id; ad-hoc SQL needs an explicit `=`. Re-saving with the same name overwrites in place. - **`--baseline[=]`** — diff the current result against the saved baseline. Output `{baseline:{...}, current_row_count, added: [...], removed: [...]}` (with `--json`) or a two-section terminal dump. Identity = per-row multiset equality (canonical `JSON.stringify` keyed frequency map; duplicates preserved). Pair with `--summary` for `{baseline:{...}, current_row_count, added: N, removed: N}`. **Mutually exclusive with `--group-by`.** - **`--baselines`** lists saved baselines (no `rows_json` payload); **`--drop-baseline `** deletes one. Both reject every other flag — they're list-only / drop-only operations. - **Per-row recipe `actions`** — recipes that define an **`actions: [{type, auto_fixable?, description?}]`** template append it to every row in **`--json`** output (recipe-only; ad-hoc SQL never carries actions). Under `--baseline`, actions attach to the **`added`** rows only (the rows the agent should act on). Inspect via **`--recipes-json`**. -- **Project-local recipes** — drop **`.sql`** (and optional **`.md`** for description body + actions) into **`/.codemap/recipes/`** to make team-internal SQL a first-class CLI verb. `--recipes-json` and the `codemap://recipes` MCP resource list project recipes alongside bundled ones with **`source: "bundled" | "project"`** discriminating them. Project recipes win on id collision; entries that override a bundled id carry **`shadows: true`** so agents reading the catalog at session start know when a recipe behaves differently from the documented bundled version. `.md` supports YAML frontmatter for the per-row action template — **block-list shape only** (loader's hand-rolled parser; no inline-flow `[{...}]`): `---\nactions:\n - type: my-verb\n auto_fixable: false\n description: "..."\n---`. Validation: SQL is rejected at load time if it starts with DML/DDL (DELETE/DROP/UPDATE/etc.); the runtime `PRAGMA query_only=1` is the parser-proof backstop. `.codemap.db` is gitignored; **`.codemap/recipes/` is NOT** — recipes are git-tracked source code authored for human review. +- **Project-local recipes** — drop **`.sql`** (and optional **`.md`** for description body + actions) into **`/.codemap/recipes/`** to make team-internal SQL a first-class CLI verb. `--recipes-json` and the `codemap://recipes` MCP resource list project recipes alongside bundled ones with **`source: "bundled" | "project"`** discriminating them. Project recipes win on id collision; entries that override a bundled id carry **`shadows: true`** so agents reading the catalog at session start know when a recipe behaves differently from the documented bundled version. `.md` supports YAML frontmatter for the per-row action template — **block-list shape only** (loader's hand-rolled parser; no inline-flow `[{...}]`): `---\nactions:\n - type: my-verb\n auto_fixable: false\n description: "..."\n---`. Validation: SQL is rejected at load time if it starts with DML/DDL (DELETE/DROP/UPDATE/etc.); the runtime `PRAGMA query_only=1` is the parser-proof backstop. `.codemap/index.db` is gitignored; **`.codemap/recipes/` is NOT** — recipes are git-tracked source code authored for human review. **Audit (`bun src/index.ts audit`)** — separate top-level command for structural-drift verdicts. Composes B.6 baselines into a per-delta `{head, deltas}` envelope; v1 ships `files` / `dependencies` / `deprecated`. Two snapshot-source shapes: @@ -79,7 +79,7 @@ Each emitted delta carries its own `base` metadata so mixed-baseline audits are - **`codemap://recipes`** — full catalog JSON (same as `--recipes-json`). Each entry carries `source: "bundled" | "project"` and `shadows: true` on project entries that override a bundled recipe id. Read this at session start so you know when a `--recipe foo` call will run a project override instead of the documented bundled version. - **`codemap://recipes/{id}`** — single recipe `{id, description, body?, sql, actions?, source, shadows?}`. Replaces `--print-sql `. -- **`codemap://schema`** — DDL of every table in `.codemap.db` (queried live from `sqlite_schema`). +- **`codemap://schema`** — DDL of every table in `.codemap/index.db` (queried live from `sqlite_schema`). - **`codemap://skill`** — full text of bundled `templates/agents/skills/codemap/SKILL.md`. Agents that don't preload the skill at session start can fetch it here. **Implementation:** `src/cli/cmd-mcp.ts` (CLI shell — argv + lifecycle) + `src/application/mcp-server.ts` (transport — SDK glue). Tool bodies live in `src/application/tool-handlers.ts` (pure transport-agnostic — same handlers `codemap serve` dispatches over HTTP); resource fetchers in `src/application/resource-handlers.ts`. Mirrors the `cmd-audit.ts ↔ audit-engine.ts` seam. `--changed-since` git lookups are memoised per `(root, ref)` pair across batch items so a `query_batch` of N items sharing the same ref does one git invocation, not N. diff --git a/.changeset/codemap-dir-consolidation.md b/.changeset/codemap-dir-consolidation.md new file mode 100644 index 0000000..6bd989e --- /dev/null +++ b/.changeset/codemap-dir-consolidation.md @@ -0,0 +1,38 @@ +--- +"@stainless-code/codemap": minor +--- + +`.codemap/` directory consolidation + self-healing files. Every codemap-managed path lives under a single configurable state directory (default `.codemap/`, override via `--state-dir ` or `CODEMAP_STATE_DIR`). Cleans up the dual-pattern surface (`/.codemap.db` + `/.codemap//`) that's been growing with every cache PR; collapses the user `.gitignore` patching surface to zero. + +**New layout:** + +``` +/ +└── .codemap/ ← override via --state-dir / CODEMAP_STATE_DIR + ├── .gitignore ← codemap-managed (self-healing); tracked + ├── config.{ts,js,json} ← was /codemap.config.*; tracked + ├── recipes/ ← user-authored SQL; tracked (existing) + ├── index.db ← was .codemap.db + ├── index.db-shm ← was .codemap.db-shm + ├── index.db-wal ← was .codemap.db-wal + └── audit-cache/ ← was .codemap/audit-cache/ (existing) +``` + +**Self-healing files (D11):** `/.gitignore` and `/config.json` are owned by idempotent `ensure*` reconcilers (`src/application/state-dir.ts`, `src/application/state-config.ts`) that run on every codemap boot — read → validate → reconcile → write only on drift. **The setup logic IS the migration**: future codemap versions add new generated artifacts to `STATE_GITIGNORE_BODY` (or extend the Zod schema), and every consumer's project repairs itself on the next `codemap` invocation. No more per-feature `.gitignore` patching in `agents-init.ts`. + +**Pre-v1 — no migration shim:** + +- `/.codemap.db` → `/index.db` (rename basename) +- `/codemap.config.{ts,json}` → `/config.{ts,js,json}` (move file) +- Existing dev clones: `rm .codemap.db .codemap.db-shm .codemap.db-wal` once and re-index; move `codemap.config.*` into `.codemap/` (or set `--config ` to keep using the legacy location explicitly). + +**New flags + env:** + +- `--state-dir ` — override the state directory (resolves relative to project root). +- `CODEMAP_STATE_DIR` — same, env-var form. + +**Internal refactor:** new `src/cli/bootstrap-codemap.ts` extracts the `loadUserConfig + resolveCodemapConfig + initCodemap + configureResolver` dance from 9 cmd-\* files into one helper that also runs the self-healing reconcilers. Adding a new self-healing file is now a one-line addition there. + +Inspired by flowbite-react's `.flowbite-react/.gitignore` + `setup-*` pattern; expressed in codemap's own conventions (`ensure*` reconcilers, Zod schema as `z.infer` source of truth, pure `{before, after, written}` return shapes for testability). + +Plan: PR #53 (merged). Implementation: PR #54. diff --git a/README.md b/README.md index d3693d1..9c35d59 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ codemap agents init # scaffold .agents/ ### Full reference ```bash -# Index project root (optional codemap.config.ts / codemap.config.json) +# Index project root (optional /config.{ts,js,json}; --state-dir overrides .codemap/) codemap # Version (also: codemap --version, codemap -V) @@ -85,7 +85,7 @@ codemap query --json --summary --changed-since HEAD~5 "SELECT file_path FROM sym codemap query --json --summary --group-by directory -r fan-in codemap query --json --group-by owner -r deprecated-symbols codemap query --json --summary --group-by package "SELECT file_path FROM symbols" -# Snapshot a result, refactor, then diff (saved inside .codemap.db, no JSON files) +# Snapshot a result, refactor, then diff (saved inside .codemap/index.db, no JSON files) codemap query --save-baseline -r visibility-tags # save under name "visibility-tags" codemap query --json --baseline -r visibility-tags # full diff: {baseline, current_row_count, added, removed} codemap query --json --summary --baseline -r visibility-tags # counts only: {baseline, current_row_count, added: N, removed: N} @@ -171,7 +171,9 @@ codemap mcp # JSON-RPC on st codemap --root /path/to/repo --full # Explicit config -codemap --config /path/to/codemap.config.json --full +codemap --config /path/to/config.json --full +# Override the state directory (default `.codemap/`): +codemap --state-dir .cm --full # or: CODEMAP_STATE_DIR=.cm codemap --full # Re-index only given paths (relative to project root) codemap --files src/a.ts src/b.tsx @@ -184,7 +186,7 @@ codemap agents init --interactive # -i; IDE wiring + symlink vs copy **Environment / flags:** `--root` overrides **`CODEMAP_ROOT`** / **`CODEMAP_TEST_BENCH`**, then **`process.cwd()`**. Indexing a project outside this clone: [docs/benchmark.md § Indexing another project](docs/benchmark.md#indexing-another-project). -**Configuration:** optional **`codemap.config.ts`** (default export object or async factory) or **`codemap.config.json`**. Shape: [codemap.config.example.json](codemap.config.example.json). Runtime validation (**Zod**, strict keys) and API surface: [docs/architecture.md § User config](docs/architecture.md#user-config). When developing inside this repo you can use `defineConfig` from `@stainless-code/codemap` or `./src/config`. If you set **`include`**, it **replaces** the default glob list entirely. +**Configuration:** optional **`/config.{ts,js,json}`** (default `.codemap/config.*`; default export object or async factory). Shape: [codemap.config.example.json](codemap.config.example.json). Runtime validation (**Zod**, strict keys) and API surface: [docs/architecture.md § User config](docs/architecture.md#user-config). When developing inside this repo you can use `defineConfig` from `@stainless-code/codemap` or `./src/config`. If you set **`include`**, it **replaces** the default glob list entirely. **Self-healing files (D11):** `/.gitignore` is rewritten to canonical on every codemap boot; JSON config gets unknown-key pruning + key-sort drift; TS/JS configs are validate-only. --- @@ -210,16 +212,16 @@ const rows = cm.query("SELECT name FROM symbols LIMIT 5"); Tooling: **Oxfmt**, **Oxlint**, **tsgo** (`@typescript/native-preview`). -| Command | Purpose | -| ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `bun run dev` | Run the CLI from source (same as `bun src/index.ts`) | -| `bun run check` | Build, format check, lint, tests, typecheck — run before pushing | -| `bun run fix` | Apply lint fixes, then format | -| `bun run test` / `bun run typecheck` | Focused checks | -| `bun run test:golden` | SQL snapshot regression on `fixtures/minimal` (included in `check`) | -| `bun run test:golden:external` | Tier B: local tree via `CODEMAP_*` / `--root` (not in default `check`) | -| `bun run benchmark:query` | Compare `console.table` vs `--json` stdout size (needs local `.codemap.db`; [docs/benchmark.md § Query stdout](docs/benchmark.md#query-stdout-table-vs-json-benchmarkquery)) | -| `bun run qa:external` | Index + sanity checks + benchmark on `CODEMAP_ROOT` / `CODEMAP_TEST_BENCH` | +| Command | Purpose | +| ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `bun run dev` | Run the CLI from source (same as `bun src/index.ts`) | +| `bun run check` | Build, format check, lint, tests, typecheck — run before pushing | +| `bun run fix` | Apply lint fixes, then format | +| `bun run test` / `bun run typecheck` | Focused checks | +| `bun run test:golden` | SQL snapshot regression on `fixtures/minimal` (included in `check`) | +| `bun run test:golden:external` | Tier B: local tree via `CODEMAP_*` / `--root` (not in default `check`) | +| `bun run benchmark:query` | Compare `console.table` vs `--json` stdout size (needs local `.codemap/index.db`; [docs/benchmark.md § Query stdout](docs/benchmark.md#query-stdout-table-vs-json-benchmarkquery)) | +| `bun run qa:external` | Index + sanity checks + benchmark on `CODEMAP_ROOT` / `CODEMAP_TEST_BENCH` | ```bash bun install diff --git a/docs/architecture.md b/docs/architecture.md index 77858d9..727297e 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -4,13 +4,13 @@ See [documentation index](./README.md). ## Overview -A local SQLite database (`.codemap.db`) indexes the project tree and stores structural metadata (symbols, imports, exports, components, dependencies, CSS tokens, markers) for SQL queries instead of repeated full-tree scans. +A local SQLite database (`.codemap/index.db`) indexes the project tree and stores structural metadata (symbols, imports, exports, components, dependencies, CSS tokens, markers) for SQL queries instead of repeated full-tree scans. ### Runtime and database **`src/sqlite-db.ts`:** Node uses **`better-sqlite3`**; Bun uses **`bun:sqlite`**. Same schema everywhere. **`better-sqlite3`** allows **one SQL statement per `prepare()`**; **`bun:sqlite`** accepts **multiple statements** in one `run()`. On Node, **`runSql()`** splits multi-statement strings on **`;`** and runs each fragment. Do **not** put **`;`** inside **`--` line comments** in **`db.ts`** DDL strings (naive split would break). Details: [packaging.md § Node vs Bun](./packaging.md#node-vs-bun). -**`src/worker-pool.ts`:** Bun `Worker` or Node `worker_threads`. **`src/glob-sync.ts`:** Bun **`Glob`** or **`tinyglobby`** for include patterns. **`src/config.ts`:** loads **`codemap.config.json`** / **`codemap.config.ts`** (JSON read path: **`Bun.file`** on Bun, **`readFile` + `JSON.parse`** on Node — [packaging.md § Node vs Bun](./packaging.md#node-vs-bun)), then validates with **Zod** (`codemapUserConfigSchema`). Details: [User config](#user-config). +**`src/worker-pool.ts`:** Bun `Worker` or Node `worker_threads`. **`src/glob-sync.ts`:** Bun **`Glob`** or **`tinyglobby`** for include patterns. **`src/config.ts`:** loads **`/config.{ts,js,json}`** (JSON read path: **`Bun.file`** on Bun, **`readFile` + `JSON.parse`** on Node — [packaging.md § Node vs Bun](./packaging.md#node-vs-bun)), then validates with **Zod** (`codemapUserConfigSchema`). Details: [User config](#user-config). State directory resolved via **`src/application/state-dir.ts`** (`resolveStateDir`); precedence `--state-dir ` > `CODEMAP_STATE_DIR` > `.codemap/`. **Shipped artifact:** **`dist/`** — `package.json` **`bin`** and **`exports`** both point at **`dist/index.mjs`** ([packaging.md](./packaging.md)); tsdown also emits **lazy CLI chunks** (`cmd-index`, `cmd-query`, `cmd-agents`, …) loaded via **`import()`** from **`src/cli/main.ts`**. @@ -50,7 +50,7 @@ A local SQLite database (`.codemap.db`) indexes the project tree and stores stru │ ├─ CREATE INDEX (single sorted pass) │ - └─ .codemap.db + └─ .codemap/index.db ``` ### Incremental / targeted (sequential) @@ -63,7 +63,7 @@ A local SQLite database (`.codemap.db`) indexes the project tree and stores stru ├─ for each changed file: │ read → parse → resolve → INSERT │ - └─ .codemap.db + └─ .codemap/index.db ``` ### Parser stack @@ -117,13 +117,13 @@ A local SQLite database (`.codemap.db`) indexes the project tree and stores stru **Commands and flags** (index, query, **`codemap agents init`**, **`--root`**, **`--config`**, environment): [../README.md § CLI](../README.md#cli) — **do not duplicate** flag lists here; this section only adds implementation notes. From this repository: **`bun run dev`** or **`bun src/index.ts`** (same flags). -**Query wiring:** **`src/cli/cmd-query.ts`** (argv, **`printQueryResult`**, `--recipe` / `-r` alias, **`--summary`**, **`--changed-since`**, **`--group-by`**, **`--save-baseline`** / **`--baseline`** / **`--baselines`** / **`--drop-baseline`**), **`src/application/query-recipes.ts`** (**`QUERY_RECIPES`** — bundled SQL only source; optional **`actions: RecipeAction[]`** per recipe), **`src/cli/main.ts`** (**`--recipes-json`** / **`--print-sql`** exit before config/DB). With **`--json`**, errors use **`{"error":"…"}`** on stdout for SQL failures, DB open, and bootstrap (same shape); **`runQueryCmd`** sets **`process.exitCode`** instead of **`process.exit`**. Friendlier "no `.codemap.db`" — `no such table: ` and `no such column: ` errors are rewritten in **`enrichQueryError`** to point at `codemap` / `codemap --full`. **`--summary`** filters output only — the SQL still executes against the index; output collapses to `{"count": N}` (with `--json`) or `count: N`. **`--changed-since `** post-filters result rows by `path` / `file_path` / `from_path` / `to_path` / `resolved_path` against `git diff --name-only ...HEAD ∪ git status --porcelain` (helper: **`src/git-changed.ts`** — `getFilesChangedSince`, `filterRowsByChangedFiles`, `PATH_COLUMNS`); rows with no recognised path column pass through. **`--group-by `** (`owner` | `directory` | `package`) routes through **`runGroupedQuery`** in `cmd-query.ts` and emits `{"group_by": "", "groups": [{key, count, rows}]}` (or `[{key, count}]` with `--summary`); helpers in **`src/group-by.ts`** (`groupRowsBy`, `firstDirectory`, `loadCodeowners`, `discoverWorkspaceRoots`, `makePackageBucketizer`, `codeownersGlobToRegex`). CODEOWNERS lookup is last-match-wins (GitHub semantics); workspace discovery reads `package.json` `workspaces` and `pnpm-workspace.yaml` `packages:`. **`--save-baseline[=]`** snapshots the result to the **`query_baselines`** table inside `.codemap.db` (no parallel JSON files; survives `--full` / SCHEMA bumps because the table is intentionally absent from `dropAll()`); name defaults to `--recipe` id, ad-hoc SQL needs an explicit name. **`--baseline[=]`** replays the SQL, fetches the saved row set, and emits `{baseline:{...}, current_row_count, added: [...], removed: [...]}` (or `{baseline:{...}, current_row_count, added: N, removed: N}` with `--summary`); identity is per-row multiset equality (canonical `JSON.stringify` keyed frequency map — duplicate rows are tracked, not collapsed). No fuzzy "changed" category in v1. **`--group-by` is mutually exclusive** with both `--save-baseline` and `--baseline` (different output shapes). **`--baselines`** (read-only list) and **`--drop-baseline `** complete the surface; helpers in **`src/db.ts`** (`upsertQueryBaseline`, `getQueryBaseline`, `listQueryBaselines`, `deleteQueryBaseline`). **Per-row recipe `actions`** are appended only when the user runs **`--recipe `** with **`--json`** AND the recipe defines an `actions` template — programmatic `cm.query(sql)` and ad-hoc CLI SQL never carry actions; under `--baseline`, actions attach to `added` rows only (the rows the agent should act on). The **`components-by-hooks`** recipe ranks by hook count with a **comma-based tally** on **`hooks_used`** (no SQLite JSON1). Shipped **`templates/agents/`** documents **`codemap query --json`** as the primary agent example ([README § CLI](../README.md#cli)). +**Query wiring:** **`src/cli/cmd-query.ts`** (argv, **`printQueryResult`**, `--recipe` / `-r` alias, **`--summary`**, **`--changed-since`**, **`--group-by`**, **`--save-baseline`** / **`--baseline`** / **`--baselines`** / **`--drop-baseline`**), **`src/application/query-recipes.ts`** (**`QUERY_RECIPES`** — bundled SQL only source; optional **`actions: RecipeAction[]`** per recipe), **`src/cli/main.ts`** (**`--recipes-json`** / **`--print-sql`** exit before config/DB). With **`--json`**, errors use **`{"error":"…"}`** on stdout for SQL failures, DB open, and bootstrap (same shape); **`runQueryCmd`** sets **`process.exitCode`** instead of **`process.exit`**. Friendlier "no `.codemap/index.db`" — `no such table: ` and `no such column: ` errors are rewritten in **`enrichQueryError`** to point at `codemap` / `codemap --full`. **`--summary`** filters output only — the SQL still executes against the index; output collapses to `{"count": N}` (with `--json`) or `count: N`. **`--changed-since `** post-filters result rows by `path` / `file_path` / `from_path` / `to_path` / `resolved_path` against `git diff --name-only ...HEAD ∪ git status --porcelain` (helper: **`src/git-changed.ts`** — `getFilesChangedSince`, `filterRowsByChangedFiles`, `PATH_COLUMNS`); rows with no recognised path column pass through. **`--group-by `** (`owner` | `directory` | `package`) routes through **`runGroupedQuery`** in `cmd-query.ts` and emits `{"group_by": "", "groups": [{key, count, rows}]}` (or `[{key, count}]` with `--summary`); helpers in **`src/group-by.ts`** (`groupRowsBy`, `firstDirectory`, `loadCodeowners`, `discoverWorkspaceRoots`, `makePackageBucketizer`, `codeownersGlobToRegex`). CODEOWNERS lookup is last-match-wins (GitHub semantics); workspace discovery reads `package.json` `workspaces` and `pnpm-workspace.yaml` `packages:`. **`--save-baseline[=]`** snapshots the result to the **`query_baselines`** table inside `.codemap/index.db` (no parallel JSON files; survives `--full` / SCHEMA bumps because the table is intentionally absent from `dropAll()`); name defaults to `--recipe` id, ad-hoc SQL needs an explicit name. **`--baseline[=]`** replays the SQL, fetches the saved row set, and emits `{baseline:{...}, current_row_count, added: [...], removed: [...]}` (or `{baseline:{...}, current_row_count, added: N, removed: N}` with `--summary`); identity is per-row multiset equality (canonical `JSON.stringify` keyed frequency map — duplicate rows are tracked, not collapsed). No fuzzy "changed" category in v1. **`--group-by` is mutually exclusive** with both `--save-baseline` and `--baseline` (different output shapes). **`--baselines`** (read-only list) and **`--drop-baseline `** complete the surface; helpers in **`src/db.ts`** (`upsertQueryBaseline`, `getQueryBaseline`, `listQueryBaselines`, `deleteQueryBaseline`). **Per-row recipe `actions`** are appended only when the user runs **`--recipe `** with **`--json`** AND the recipe defines an `actions` template — programmatic `cm.query(sql)` and ad-hoc CLI SQL never carry actions; under `--baseline`, actions attach to `added` rows only (the rows the agent should act on). The **`components-by-hooks`** recipe ranks by hook count with a **comma-based tally** on **`hooks_used`** (no SQLite JSON1). Shipped **`templates/agents/`** documents **`codemap query --json`** as the primary agent example ([README § CLI](../README.md#cli)). **Output formatters:** **`src/application/output-formatters.ts`** — pure transport-agnostic; **`formatSarif`** emits SARIF 2.1.0 (auto-detected location columns: `file_path` / `path` / `to_path` / `from_path` priority + optional `line_start` / `line_end` region; `rule.id = codemap.` for `--recipe`, `codemap.adhoc` for ad-hoc SQL; aggregate recipes without locations → `results: []` + stderr warning); **`formatAnnotations`** emits `::notice file=…,line=…::msg` GitHub Actions workflow commands (one line per locatable row; messages collapsed to a single line because the GH parser stops at the first newline). Wired into both **`src/cli/cmd-query.ts`** (`--format `; `--format` overrides `--json`; `sarif` / `annotations` reject `--summary` / `--group-by` / baseline at parse time) and the MCP **`query`** / **`query_recipe`** tools (`format: "sarif" | "annotations"` with the same incompatibility guard). Per-recipe `sarifLevel` / `sarifMessage` / `sarifRuleId` overrides via frontmatter on `.md` deferred to v1.x. **Validate wiring:** **`src/cli/cmd-validate.ts`** (argv + render) + **`src/application/validate-engine.ts`** (engine — **`computeValidateRows`** + **`toProjectRelative`**). `computeValidateRows` is a pure function over `(db, projectRoot, paths)` returning `{path, status}` rows where `status ∈ stale | missing | unindexed`. CLI wraps it with read-once-and-print + exits **1** on any drift (git-status semantics). Path normalization: **`toProjectRelative`** converts CLI input to POSIX-style relative keys matching the `files.path` storage format (Windows backslash → forward slash); same convention as `lint-staged.config.js`. Also reused by `cmd-show.ts` / `cmd-snippet.ts` and the MCP show/snippet handlers — single canonical implementation. -**Audit wiring:** **`src/cli/cmd-audit.ts`** (argv, `--baseline ` auto-resolve sugar, `---baseline ` per-delta explicit overrides, `--base ` git-ref baseline, `--json`, `--summary`, `--no-index`) + **`src/application/audit-engine.ts`** (delta registry + diff). Mirrors the `cmd-index.ts ↔ application/index-engine.ts` seam — CLI parses + dispatches; engine does the diff. **`runAudit({db, baselines})`** iterates the per-delta baseline map; deltas absent from the map don't run. Each entry in **`V1_DELTAS`** pins a canonical SQL projection (`files`: `SELECT path FROM files`; `dependencies`: `SELECT from_path, to_path FROM dependencies`; `deprecated`: `SELECT name, kind, file_path FROM symbols WHERE doc_comment LIKE '%@deprecated%'`) plus a `requiredColumns` list. **`computeDelta`** validates baseline column-set membership, projects baseline rows down to the canonical column subset (extras dropped — schema-drift-resilient), runs the canonical SQL via the caller's DB connection, and set-diffs via the existing **`src/diff-rows.ts`** multiset helper (shared with `query --baseline`). Each emitted delta carries its own **`base`** metadata so mixed-baseline audits (e.g. `--baseline base --dependencies-baseline override`) are first-class. **`runAuditCmd`** runs an auto-incremental-index prelude (`runCodemapIndex({mode: "incremental", quiet: true})`) before the diff so `head` reflects the current source — `--no-index` opts out for frozen-DB CI scenarios. **`resolveAuditBaselines({db, baselinePrefix, perDelta})`** composes the baseline map: auto-resolves `-` for slots that exist (silently absent otherwise) and lets per-delta flags override individual slots. v1 ships no `verdict` / threshold config / non-zero exit codes — consumers compose `--json` + `jq` for CI exit codes; v1.x still tracks `verdict` + `codemap.config.audit` thresholds. **`--base ` (shipped):** **`runAuditFromRef({db, ref, perDeltaOverrides, projectRoot, reindex})`** materialises the ref via **`application/audit-worktree.ts`** — `git rev-parse --verify "^{commit}"` → resolved sha → cache lookup at `/.codemap/audit-cache//`. Cache miss: per-pid temp dir (`.tmp...`) gets `git worktree add --detach`, the injected `reindex` callback (`makeWorktreeReindex` in production — re-inits the runtime singletons against the worktree path, runs `runCodemapIndex({mode: "full"})`, restores) writes `.codemap.db` inside, then POSIX `rename` claims the final `/` slot. **Atomic populate** — concurrent processes resolving the same sha race-safely without lock files (loser's rename fails with EEXIST → falls through to cache hit). Eviction: hardcoded LRU 5 entries / 500 MiB; `git worktree remove --force` then `rm -rf` for each victim; orphan `.tmp.*` dirs older than 10 min get swept too. Per-delta `base` metadata gains a discriminator: existing baseline-source remains `{source: "baseline", name, sha, indexed_at}`; new ref-source is `{source: "ref", ref, sha, indexed_at}`. `--base` is mutually exclusive with `--baseline ` (parser + handler both guard); composes orthogonally with per-delta `---baseline name` overrides. Hard error on non-git projects (`existsSync(/.git)` check before any spawn). All git spawns in `audit-worktree.ts` strip inherited `GIT_*` env vars so a containing git operation (e.g. running codemap inside a husky hook) doesn't route worktree calls at the wrong index. +**Audit wiring:** **`src/cli/cmd-audit.ts`** (argv, `--baseline ` auto-resolve sugar, `---baseline ` per-delta explicit overrides, `--base ` git-ref baseline, `--json`, `--summary`, `--no-index`) + **`src/application/audit-engine.ts`** (delta registry + diff). Mirrors the `cmd-index.ts ↔ application/index-engine.ts` seam — CLI parses + dispatches; engine does the diff. **`runAudit({db, baselines})`** iterates the per-delta baseline map; deltas absent from the map don't run. Each entry in **`V1_DELTAS`** pins a canonical SQL projection (`files`: `SELECT path FROM files`; `dependencies`: `SELECT from_path, to_path FROM dependencies`; `deprecated`: `SELECT name, kind, file_path FROM symbols WHERE doc_comment LIKE '%@deprecated%'`) plus a `requiredColumns` list. **`computeDelta`** validates baseline column-set membership, projects baseline rows down to the canonical column subset (extras dropped — schema-drift-resilient), runs the canonical SQL via the caller's DB connection, and set-diffs via the existing **`src/diff-rows.ts`** multiset helper (shared with `query --baseline`). Each emitted delta carries its own **`base`** metadata so mixed-baseline audits (e.g. `--baseline base --dependencies-baseline override`) are first-class. **`runAuditCmd`** runs an auto-incremental-index prelude (`runCodemapIndex({mode: "incremental", quiet: true})`) before the diff so `head` reflects the current source — `--no-index` opts out for frozen-DB CI scenarios. **`resolveAuditBaselines({db, baselinePrefix, perDelta})`** composes the baseline map: auto-resolves `-` for slots that exist (silently absent otherwise) and lets per-delta flags override individual slots. v1 ships no `verdict` / threshold config / non-zero exit codes — consumers compose `--json` + `jq` for CI exit codes; v1.x still tracks `verdict` + `codemap.config.audit` thresholds. **`--base ` (shipped):** **`runAuditFromRef({db, ref, perDeltaOverrides, projectRoot, reindex})`** materialises the ref via **`application/audit-worktree.ts`** — `git rev-parse --verify "^{commit}"` → resolved sha → cache lookup at `/.codemap/audit-cache//`. Cache miss: per-pid temp dir (`.tmp...`) gets `git worktree add --detach`, the injected `reindex` callback (`makeWorktreeReindex` in production — re-inits the runtime singletons against the worktree path, runs `runCodemapIndex({mode: "full"})`, restores) writes `.codemap/index.db` inside, then POSIX `rename` claims the final `/` slot. **Atomic populate** — concurrent processes resolving the same sha race-safely without lock files (loser's rename fails with EEXIST → falls through to cache hit). Eviction: hardcoded LRU 5 entries / 500 MiB; `git worktree remove --force` then `rm -rf` for each victim; orphan `.tmp.*` dirs older than 10 min get swept too. Per-delta `base` metadata gains a discriminator: existing baseline-source remains `{source: "baseline", name, sha, indexed_at}`; new ref-source is `{source: "ref", ref, sha, indexed_at}`. `--base` is mutually exclusive with `--baseline ` (parser + handler both guard); composes orthogonally with per-delta `---baseline name` overrides. Hard error on non-git projects (`existsSync(/.git)` check before any spawn). All git spawns in `audit-worktree.ts` strip inherited `GIT_*` env vars so a containing git operation (e.g. running codemap inside a husky hook) doesn't route worktree calls at the wrong index. **Context wiring:** **`src/cli/cmd-context.ts`** (argv + render) + **`src/application/context-engine.ts`** (engine — **`buildContextEnvelope`**, **`classifyIntent`**, `ContextEnvelope` type). `buildContextEnvelope` composes the JSON envelope from existing recipes (`fan-in` for `hubs`, `markers` SELECT for `sample_markers`, `QUERY_RECIPES` map for the catalog). **`classifyIntent`** maps `--for ""` to one of `refactor | debug | test | feature | explore | other` via regex against the trimmed input; whitespace-only intents are rejected. `--compact` drops `hubs` + `sample_markers` and emits one-line JSON; otherwise pretty-prints with 2-space indent. @@ -131,7 +131,7 @@ A local SQLite database (`.codemap.db`) indexes the project tree and stores stru **Show / snippet wiring:** **`src/cli/cmd-show.ts`** + **`src/cli/cmd-snippet.ts`** — sibling CLI verbs sharing the same parser shape (`` + `--kind` + `--in ` + `--json`) and the pure engine **`src/application/show-engine.ts`** (`findSymbolsByName({db, name, kind?, inPath?})` for the lookup; `readSymbolSource({match, projectRoot, indexedContentHash?})` + `getIndexedContentHash(db, filePath)` for the snippet-side FS read; **`buildShowResult`** + **`buildSnippetResult`** envelope builders — same engine the MCP show/snippet tools call). Both verbs return the same `{matches, disambiguation?}` envelope per plan § 4 uniformity — single match → `{matches: [{...}]}`; multi-match adds `{n, by_kind, files, hint}`. Snippet matches add `source` / `stale` / `missing` fields (additive — no shape divergence). **`--in `** is normalized through `toProjectRelative(projectRoot, p)` (from **`src/application/validate-engine.ts`**) so `--in ./src/cli/`, `--in src/cli`, and `--in src/cli/cmd-show.ts` all resolve identically. Stale-file behavior on `snippet`: `hashContent` (from **`src/hash.ts`** — same primitive `cmd-validate.ts` uses) compares the on-disk content_hash against `files.content_hash`; mismatch sets `stale: true` but the source IS still returned (read tool, no auto-reindex side-effects). MCP tools `show` and `snippet` register parallel to the CLI surface (see [§ MCP wiring](#cli-usage)). -**Recipes wiring:** **`src/application/recipes-loader.ts`** (pure transport-agnostic loader) + **`src/application/query-recipes.ts`** (cache + public API — `getQueryRecipeSql` / `getQueryRecipeActions` / `listQueryRecipeIds` / `listQueryRecipeCatalog` / `getQueryRecipeCatalogEntry`, shared by CLI + MCP). Recipes live as file pairs: **`.sql`** + optional **`.md`**. The loader reads `templates/recipes/` (bundled, ships in npm package next to `templates/agents/`) and `/.codemap/recipes/` (project-local — root-only resolution per the registry plan, no walk-up). Project recipes win on id collision; entries that override a bundled id carry **`shadows: true`** in the catalog so agents reading `codemap://recipes` at session start see when a recipe behaves differently from the documented bundled version. Per-row **`actions`** templates (kebab-case verb + description) live in YAML frontmatter on each `.md` — uniform shape across bundled + project. Hand-rolled YAML parser scoped to `actions: [{type, auto_fixable?, description?}]` only (no `js-yaml` dep). Load-time validation rejects empty SQL and DML / DDL keywords (`INSERT` / `UPDATE` / `DELETE` / `DROP` / `CREATE` / `ALTER` / `ATTACH` / `DETACH` / `REPLACE` / `TRUNCATE` / `VACUUM` / `PRAGMA`) with recipe-aware error messages — defence in depth alongside the runtime `PRAGMA query_only=1` backstop in `query-engine.ts` (PR #35). `.codemap.db` is gitignored; `.codemap/recipes/` is NOT (verified via `git check-ignore`) — recipes are git-tracked source code authored for human review. +**Recipes wiring:** **`src/application/recipes-loader.ts`** (pure transport-agnostic loader) + **`src/application/query-recipes.ts`** (cache + public API — `getQueryRecipeSql` / `getQueryRecipeActions` / `listQueryRecipeIds` / `listQueryRecipeCatalog` / `getQueryRecipeCatalogEntry`, shared by CLI + MCP). Recipes live as file pairs: **`.sql`** + optional **`.md`**. The loader reads `templates/recipes/` (bundled, ships in npm package next to `templates/agents/`) and `/.codemap/recipes/` (project-local — root-only resolution per the registry plan, no walk-up). Project recipes win on id collision; entries that override a bundled id carry **`shadows: true`** in the catalog so agents reading `codemap://recipes` at session start see when a recipe behaves differently from the documented bundled version. Per-row **`actions`** templates (kebab-case verb + description) live in YAML frontmatter on each `.md` — uniform shape across bundled + project. Hand-rolled YAML parser scoped to `actions: [{type, auto_fixable?, description?}]` only (no `js-yaml` dep). Load-time validation rejects empty SQL and DML / DDL keywords (`INSERT` / `UPDATE` / `DELETE` / `DROP` / `CREATE` / `ALTER` / `ATTACH` / `DETACH` / `REPLACE` / `TRUNCATE` / `VACUUM` / `PRAGMA`) with recipe-aware error messages — defence in depth alongside the runtime `PRAGMA query_only=1` backstop in `query-engine.ts` (PR #35). `.codemap/index.db` is gitignored; `.codemap/recipes/` is NOT (verified via `git check-ignore`) — recipes are git-tracked source code authored for human review. **Tool / resource handlers (transport-agnostic):** **`src/application/tool-handlers.ts`** + **`src/application/resource-handlers.ts`** — pure functions that take the args object an MCP tool / resource URI accepts and return a discriminated **`ToolResult`** (`{ok: true, format: 'json'|'sarif'|'annotations', payload}` / `{ok: false, error}`) or a **`ResourcePayload`** (`{mimeType, text}`). MCP and HTTP both wrap the same handlers — MCP translates to `{content: [{type: "text", text}]}`, HTTP translates to `(status, body)` with the right `Content-Type`. Engine layer untouched; transport changes don't ripple into the SQL. @@ -161,13 +161,13 @@ The npm package exports **`createCodemap`**, **`Codemap`** (`query`, `index`), * 1. **`await createCodemap({ root, configFile?, config? })`** — loads `codemap.config.*`, calls **`initCodemap`** and **`configureResolver`**. 2. **`await cm.index({ mode, files?, quiet? })`** — same pipeline as the CLI (incremental / full / targeted). -3. **`cm.query(sql)`** — read-only SQL against `.codemap.db` (opens the DB per call). +3. **`cm.query(sql)`** — read-only SQL against `.codemap/index.db` (opens the DB per call). **Constraint:** `initCodemap` is global to the process; only one active indexed project at a time. ### User config -Optional **`codemap.config.ts`** (default export: object or async factory) or **`codemap.config.json`** at the project root; **`--config`** points at either. Example shape: [`codemap.config.example.json`](../codemap.config.example.json). +Optional **`/config.{ts,js,json}`** (default `.codemap/config.*`; default export: object or async factory). **`--config `** overrides with an explicit file (absolute or relative to cwd). Example shape: [`codemap.config.example.json`](../codemap.config.example.json). **Self-healing (D11):** `/.gitignore` is reconciled to canonical on every codemap boot via **`ensureStateGitignore`** (`src/application/state-dir.ts`); JSON config is reconciled via **`ensureStateConfig`** (`src/application/state-config.ts` — prunes unknown keys with a warning, sorts alphabetically, write-only-on-drift). TS/JS configs are validate-only at load time. Bumping the canonical `STATE_GITIGNORE_BODY` constant or the Zod schema IS the migration — every consumer's project repairs itself on next boot. Single attachment point: **`src/cli/bootstrap-codemap.ts`** runs the reconcilers before `loadUserConfig`. **Validation:** **`codemapUserConfigSchema`** ([Zod](https://zod.dev)) — strict object (unknown keys are rejected). **`defineConfig({ ... })`**, **`parseCodemapUserConfig`**, and **`resolveCodemapConfig`** (CLI and merged `createCodemap({ config })`) all go through the same schema. Invalid config throws **`TypeError`** with a short path/message list. @@ -177,7 +177,7 @@ Optional **`codemap.config.ts`** (default export: object or async factory) or ** **Fingerprints:** incremental runs compare **`files.content_hash`** — SHA-256 hex of raw file bytes from [`src/hash.ts`](../src/hash.ts) (same on Node and Bun). Details in the **`files`** table below. -**Fresh database:** the default CLI **`codemap`** (incremental) calls **`createSchema()`** in **`runCodemapIndex`** before **`getChangedFiles()`**, so the **`meta`** table exists before **`getMeta(..., "last_indexed_commit")`** runs on an empty **`.codemap.db`**. +**Fresh database:** the default CLI **`codemap`** (incremental) calls **`createSchema()`** in **`runCodemapIndex`** before **`getChangedFiles()`**, so the **`meta`** table exists before **`getMeta(..., "last_indexed_commit")`** runs on an empty **`.codemap/index.db`**. Current schema version: **5** — see [Schema Versioning](#schema-versioning) for details. @@ -410,11 +410,11 @@ The indexer uses git to detect changes since the last indexed commit: Running the indexer produces up to three files in the project root, all gitignored: -### `.codemap.db` +### `.codemap/index.db` The main SQLite database file. Contains all tables and indexes. This is the persistent data store that survives between runs. Typically 2-5 MB for this project. -### `.codemap.db-wal` (Write-Ahead Log) +### `.codemap/index.db-wal` (Write-Ahead Log) Created automatically because the database uses `PRAGMA journal_mode = WAL`. Instead of writing changes directly to the main `.db` file, SQLite appends them to this WAL file first. This enables: @@ -422,9 +422,9 @@ Created automatically because the database uses `PRAGMA journal_mode = WAL`. Ins - **Crash safety** — if the process dies mid-write, the WAL is replayed on next open - **Better write performance** — sequential appends to WAL are faster than random writes to the B-tree -The WAL gets **checkpointed** (merged back into `.codemap.db`) periodically by SQLite or when the last connection closes cleanly. After a clean close, this file may be empty (0 bytes) or absent. +The WAL gets **checkpointed** (merged back into `.codemap/index.db`) periodically by SQLite or when the last connection closes cleanly. After a clean close, this file may be empty (0 bytes) or absent. -### `.codemap.db-shm` (Shared Memory) +### `.codemap/index.db-shm` (Shared Memory) A memory-mapped index into the WAL file. Allows multiple processes to coordinate concurrent read access to the WAL without locking the main database file. Contains a hash table mapping page numbers to WAL frame locations. diff --git a/docs/glossary.md b/docs/glossary.md index 3ebe7b5..1cb0461 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -75,9 +75,17 @@ TS shape for one row of the `calls` table. Maps 1:1 to the SQLite columns. The extraction path a file took during parsing. One of `ts`, `css`, or `text`. Stored on `ParsedFile.category`, not on a SQLite table. See `ParsedFile`. -### `.codemap.db` +### `.codemap/` / `` / `CODEMAP_STATE_DIR` -The on-disk SQLite database file at `/.codemap.db`. Always accompanied by `.codemap.db-wal` and `.codemap.db-shm` while open (WAL mode). Gitignored via the `.codemap.*` pattern that `codemap agents init` ensures. +The codemap state directory under `` — holds every codemap-managed file: `index.db` (+ WAL / SHM), `audit-cache//`, project-local `recipes/`, `config.{ts,js,json}`, and the self-managed `.gitignore` (per plan §D7 + D11). Default name `.codemap/`; override via `--state-dir ` CLI or `CODEMAP_STATE_DIR` env (relative paths resolve against ``). Resolved at bootstrap, not via the config file (chicken-and-egg). Engine: `src/application/state-dir.ts` (`resolveStateDir`). + +### `.codemap/index.db` (the index) + +The on-disk SQLite database file at `/index.db` (default `/.codemap/index.db`). Always accompanied by `index.db-wal` and `index.db-shm` while open (WAL mode). Gitignored by the self-managed `/.gitignore` written by `ensureStateGitignore`. + +### `.codemap/.gitignore` / self-healing files + +Codemap-managed `.gitignore` inside `/` (blacklist of generated artifacts; tracked sources `recipes/` + `config.*` default to tracked). Reconciled on every codemap boot by `ensureStateGitignore` (`src/application/state-dir.ts`) — read → compare to canonical → write only on drift. **Bumping the canonical body in a future PR IS the migration**: every consumer's project repairs itself on next codemap run. Same self-healing pattern (`ensure*` reconciler, idempotent, drift-detect) governs `/config.json` (`ensureStateConfig` in `src/application/state-config.ts` — prunes unknown keys, sorts keys, never touches user-authored TS/JS configs). Inspired by flowbite-react's `setup-*` shape; expressed in codemap's own conventions per plan §D11. ### `codemap context` diff --git a/docs/plans/codemap-dir-consolidation.md b/docs/plans/codemap-dir-consolidation.md deleted file mode 100644 index 349dfc4..0000000 --- a/docs/plans/codemap-dir-consolidation.md +++ /dev/null @@ -1,104 +0,0 @@ -# `.codemap/` directory consolidation — single root, self-managed `.gitignore` - -> **Status:** in design (no code) · **Backlog:** roadmap entry to be added in this PR. Delete this file when shipped (per [`docs/README.md` Rule 3](../README.md)). - -## Goal - -Consolidate every codemap-managed path under a single state directory (``, configurable, default `.codemap/`) and ship a self-managed `/.gitignore` so future codemap features never require user `.gitignore` edits. - -Today the user-facing surface has **three patterns**: - -- `/.codemap.db` (+ `-wal` / `-shm`) — root-level SQLite files; matched by user's `.gitignore: .codemap.*`. -- `/codemap.config.{ts,json}` — root-level config file; tracked. -- `/.codemap/recipes/` (tracked) + `/.codemap/audit-cache/` (untracked) — under `.codemap/`; matched by `.codemap/audit-cache/` in user's `.gitignore`. - -Every new cache or persistent state we add (audit-cache shipped in PR #52, future ones for impact-graph caching, query-result caching, telemetry, etc.) requires another line in the user's `.gitignore` via `agents-init.ts`. The `flowbite-react` precedent (`.flowbite-react/.gitignore` shipping a blacklist of generated artifacts) collapses that surface to one self-managed file. - -## Why - -- **Future-proof.** New codemap state lives under `.codemap/`; bumping the blacklist in `.codemap/.gitignore` is automatic on `codemap` boot, not a user-visible change. -- **Single-dir convention.** Matches `.git/`, `.next/`, `.turbo/`, `.vercel/`, `.flowbite-react/` — every modern tool that owns project state ships one root. -- **Cleaner root.** Project listings (file explorers, IDE sidebars, `ls`) show one `.codemap/` entry instead of `.codemap.db` + `.codemap.db-wal` + `.codemap.db-shm` + `.codemap/`. -- **Self-managed `.gitignore`.** Reader of `.codemap/.gitignore` immediately sees what's machine-written; adding a new tracked source (e.g. future `config.json`) doesn't need a `.gitignore` change. -- **Closes the per-feature `agents-init.ts` `.gitignore` patching surface.** PR #52 added `.codemap/audit-cache/`; PR #X would add `.codemap//`. Done. - -## Sketched layout - -```text -/ -├── .codemap/ ← default; overridable via --state-dir / CODEMAP_STATE_DIR -│ ├── .gitignore ← codemap-managed; tracked -│ ├── config.ts ← was /codemap.config.ts; tracked (D8) -│ ├── recipes/ ← user-authored SQL; tracked (existing) -│ │ ├── big-files.sql -│ │ └── big-files.md -│ ├── index.db ← was .codemap.db; ignored -│ ├── index.db-wal ← was .codemap.db-wal; ignored -│ ├── index.db-shm ← was .codemap.db-shm; ignored -│ └── audit-cache/ ← per PR #52; ignored -│ └── /... -└── (root .gitignore — codemap entries no longer needed) -``` - -`.codemap/.gitignore` (blacklist, mirrors flowbite-react's `class-list.json` / `pid` shape): - -```gitignore -index.db -index.db-wal -index.db-shm -audit-cache/ -``` - -User's root `.gitignore` no longer needs **any** codemap entries — git respects the nested `.gitignore`. `agents init` stops touching the root `.gitignore` for codemap and instead writes `.codemap/.gitignore` on first init. - -## Decisions - -| # | Decision | -| --- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| D1 | **Blacklist over whitelist.** `/.gitignore` lists each generated artifact explicitly (per flowbite-react). Adding a new tracked source needs no change; adding a new generated artifact bumps the blacklist in the same PR that introduces it (mirrors how docs-governance Rule 9 ties new domain nouns to glossary updates). | -| D2 | **No migration shim.** Codemap is pre-v1; the implementation will move `.codemap.db` → `/index.db` cleanly with no compat code. Existing dev clones run `rm .codemap.db` once and re-index. Same for `/codemap.config.{ts,json}` → `/config.{ts,json}`. Changelog will note the one-line cleanup. | -| D3 | **`agents init` writes both the nested `.gitignore` and a root entry.** On every `codemap agents init` run: (a) write/update `/.gitignore` to the canonical content; (b) ensure root `.gitignore` contains `/` ONCE as a safety net (covers tools that disable nested `.gitignore` lookup). Both writes are idempotent. Pre-existing entries like `.codemap.*` are left alone — pre-v1, users can clean up manually if they care. | -| D4 | **`/.gitignore` is regenerated on every `codemap` boot, not just `agents init`.** flowbite-react's `setupGitIgnore` runs on every CLI invocation; same shape here. Idempotent (read-compare-write; only fires on drift). New generated paths land in user's checkouts the first time they run `codemap` after upgrading — no out-of-band `agents init` re-run needed. | -| D5 | **Recipes stay where they are.** `/recipes/` is the documented location (PR #37) and stays. The blacklist doesn't mention it — defaults to tracked. | -| D6 | **`audit-cache/` move?** Stays at `/audit-cache//` — already correctly placed per PR #52. The blacklist gains a literal `audit-cache/` line. | -| D7 | **State directory is configurable; default `.codemap/`.** Resolved at bootstrap (NOT via the config file — chicken-and-egg) in this order: (1) `--state-dir ` CLI arg, (2) `CODEMAP_STATE_DIR` env var, (3) default `/.codemap/`. Resolves relative paths against `projectRoot`. The dir name flows through every codemap-managed path uniformly — DB at `/index.db`, gitignore at `/.gitignore`, config at `/config.{ts,json}`, audit cache at `/audit-cache/`, etc. | -| D8 | **Config file moves into `/config.{ts,js,json}`.** Replaces `/codemap.config.{ts,json}`. Bootstrap order: (1) `--config ` (CLI override; absolute / relative-to-cwd path, no implicit `/` prefix), (2) `/config.ts`, (3) `/config.js`, (4) `/config.json`. Pre-v1 → no back-compat for the legacy root paths; doc the one-line move in the changelog. The config file is **tracked** (it's user-authored source, not generated) — no entry in the blacklist. | -| D9 | **Env vars.** `CODEMAP_ROOT` continues to point at the project root (unchanged). `CODEMAP_STATE_DIR` (new) overrides the default `/.codemap/` location. No `CODEMAP_DATABASE_PATH` — `--state-dir` IS the escape hatch for non-standard layouts; if one of the few existing users had `CODEMAP_DATABASE_PATH` set, the changelog notes the rename. | -| D10 | **Self-managed `.gitignore` is itself tracked.** `/.gitignore` is committed to the user's repo; codemap rewrites it idempotently. Same pattern flowbite-react uses. User can edit it manually but codemap will overwrite back to canonical on next boot — consistent with `package.json` write-on-install behaviors. | -| D11 | **Self-healing files — the setup logic IS the migration.** Every codemap-managed file in `/` is owned by an idempotent `ensure` engine in `application/` (matches the existing `ensureGitignoreCodemapPattern` shape) that runs on every codemap boot: read existing content if any → validate against the canonical shape → reconcile (fill missing defaults, prune unknowns, normalise key order) → write only on drift → log one line. No separate "migrator" code path; bumping the canonical shape in v2 IS the migration — every consumer's project repairs itself on next `codemap` run. Config validation reuses **Zod** (already in deps via `tool-handlers.ts`) so the schema doubles as the runtime-typecheck and the TS source via `z.infer`. The `.gitignore` reconciler enforces the canonical blacklist verbatim — the file is owned by codemap (header line `# codemap-managed — edits will be overwritten`), no merge logic, no marker block. Means upgrade UX is `bun update` and consumers' files just become correct; cost to us is one-line bump per new artifact, not a per-PR `agents-init.ts` patch. | - -## Tracers - -| # | Slice | Acceptance | -| --- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- | -| 1 | **State-dir resolver + path constants** in `src/application/state-dir.ts` (new). `resolveStateDir({root, cliFlag, env})` returns the absolute `` per D7 ordering. `getDatabasePath()` (existing in `runtime.ts`) reads from the resolved state-dir. Pure helpers, no side effects. Unit tests cover: default, `--state-dir`, env var, env+flag (flag wins), relative-path resolution against `projectRoot`. | Resolver deterministic across all four sources. | -| 2 | **`ensureStateGitignore` reconciler (self-healing, D11)** in `src/application/state-dir.ts`. Pure-function-where-possible shape (`{ before, after, written }` return value for testability); side effect at the edge. Owns one canonical constant `STATE_GITIGNORE_BODY` (the blacklist) that includes a `# codemap-managed — edits will be overwritten` header per D11. Unit tests cover: fresh write, idempotent re-run (no write), older-version content missing today's entry (rewrites), user-edited content (rewrites back), absent file (creates). | Drift detection works; no write on the steady-state common case. | -| 3 | **Config loader move + `ensureStateConfig` reconciler (D8 + D11)** — `loadUserConfig({stateDir, explicitPath?})` reads `/config.{ts,js,json}` (D8 order). For JSON configs: `ensureStateConfig({stateDir})` uses a Zod schema (one source of truth — type via `z.infer`, runtime validation via `safeParse`) to fill missing defaults, prune unknown keys with `console.warn`, normalise key order, write only on drift. For TS/JS configs: validate-only mode — runtime apply defaults + warn on type mismatches; never rewrite user code. `--config ` continues to take an explicit path; legacy `/codemap.config.{ts,json}` paths dropped (D2). Tests cover: fresh JSON, missing-field fill, unknown-field prune, key-sort normalisation, TS validate-only, drift-no-write, schema-evolution (older config gains new defaults). | JSON configs self-heal across versions; TS configs validated-only. | -| 4 | **Bootstrap wiring** — every CLI command's bootstrap path (`cli/bootstrap.ts` or its caller) calls `ensureStateDir({stateDir})` once before `loadUserConfig`. `ensureStateDir` is a thin orchestrator calling `ensureStateGitignore` + `ensureStateConfig` (and any future `ensure*`). `agents-init.ts`'s `ensureGitignoreCodemapPattern` is rewritten — drops the per-feature pattern list, adds `/` to root `.gitignore` once as a defensive entry per D3, then delegates to `ensureStateDir`. Existing legacy root entries left alone. Tests updated. | Bootstrap fans out to all reconcilers; `agents init` delegates correctly. | -| 5 | **Doc + changeset + plan deletion** — README (new layout, `--state-dir` flag, config-file move, "self-healing files" callout), `docs/architecture.md` § Persistence wiring (state-dir resolver, reconciler orchestrator, self-healing principle, Zod-as-config-schema), `docs/glossary.md` (``, `CODEMAP_STATE_DIR`, "self-healing files / `ensure*` reconciler" entries), `.agents/` + `templates/agents/` rule + skill (Rule 10 lockstep). Repo's own root `.gitignore` slimmed to drop `.codemap.*` + `.codemap/audit-cache/`; `.codemap/.gitignore` checked in. Repo's own `codemap.config.*` (if any) moved into `.codemap/config.*`. Minor changeset with the one-line cleanup instructions for existing devs. Plan deleted per Rule 3. | All docs consistent; repo dogfoods the new layout. | - -## Performance considerations - -- **State-dir resolution** — three `process.env`/argv reads + one `path.resolve`; sub-µs. -- **`/.gitignore` write** — read-compare-write per boot; sub-ms when content matches (the common case). -- **Nested `.gitignore` lookup cost** — git already walks for nested ignores everywhere; one extra file is irrelevant. -- **Config file lookup** — three `existsSync` calls (ts → js → json) per boot; sub-ms. - -## Alternatives considered - -| Candidate | Why not | -| -------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| **Whitelist `/.gitignore` (`*` + `!recipes/**`)\*\* | Safer for newcomers (default ignored, opt-in) but harder to read — the user can't tell which files are generated by glancing at the `.gitignore`. flowbite-react picked blacklist for the same reason. | -| **Keep `.codemap.db` at root, only consolidate caches under `/`** | Avoids the move but keeps the dual-pattern surface forever — every cache PR still patches the user's root `.gitignore` (just once instead of twice). The whole point of this refactor is collapsing that surface. | -| **Self-managed root-level `.gitignore` block (between `# codemap-managed start` / `# codemap-managed end` markers)** | More fragile than a separate file; easy for users to break the markers; doesn't survive merge conflicts well. The flowbite-react pattern (separate file under the tool's own dir) sidesteps all of this. | -| **Migration shim with deprecation timeline** | Pre-v1 — see D2. Two existing dev clones (codemap repo itself + `CODEMAP_TEST_BENCH` projects) can each `rm .codemap.db` once. | -| **State-dir name configurable via the config file (not just CLI/env)** | Chicken-and-egg: codemap needs to know `` to find the config that says where `` is. Bootstrap via CLI/env only (D7); config file controls everything else. | -| **Move recipes too (`/state/recipes/`)** | Recipes are user-authored source; nesting them under `state/` blurs "I wrote this" vs "the tool generated this". Kept at `/recipes/` (top-level under `/`) for clarity. | - -## Out of scope - -- **Telemetry / analytics state** — speculative; if added, lands under `//` and gets a blacklist line. -- **Backward-compat for `/codemap.config.{ts,json}`** — pre-v1, dropped cleanly per D2/D8. Changelog notes the move. -- **Backward-compat for `/.codemap.db`** — pre-v1, dropped cleanly per D2. `rm .codemap.db && codemap` re-indexes from scratch. -- **Auto-cleanup of root `.gitignore` codemap entries** — per D3, leave existing lines alone. A dedicated `codemap agents cleanup-gitignore` verb is a v1.x+ concern. -- **`CODEMAP_TEST_BENCH` env semantics** — continues pointing at a project root; the resolved `` is derived per D7. diff --git a/docs/research/fallow.md b/docs/research/fallow.md index f5aeae1..e722cdc 100644 --- a/docs/research/fallow.md +++ b/docs/research/fallow.md @@ -34,6 +34,7 @@ Adoption-candidate ship status. The tier tables in § 1 are preserved as the ori - **Doc-governance Rule 10** added during PR [#29](https://github.com/stainless-code/codemap/pull/29) — every core-surface change must update both `templates/agents/` (ships to npm) and `.agents/` (this clone) in lockstep. - **`cli/*` → `application/*` engine lift (internal)** — PR [#41](https://github.com/stainless-code/codemap/pull/41) closed the last layer-reversal imports `application/mcp-server.ts` had on `cli/*` (called out in the PR #35 self-audit). New engines `context-engine` / `validate-engine`; `query-recipes` moved to `application/`; envelope builders + helpers consolidated in `audit-engine` / `show-engine`. Pure refactor — no behavior or public API change — but unblocks the HTTP transport (B-tier `serve`) since that engine reuse is now clean. - **`codemap serve` HTTP API** — PR [#44](https://github.com/stainless-code/codemap/pull/44). Same tool taxonomy as `codemap mcp` over `POST /tool/{name}` for non-MCP consumers (CI scripts, simple `curl`, IDE plugins). Loopback default (`127.0.0.1:7878`); optional `--token` for Bearer auth. Bare `node:http` (no Express/Fastify dep). Tool bodies + resource fetchers live in shared `application/{tool,resource}-handlers.ts` — both transports dispatch the same pure handlers. CSRF + DNS-rebinding guard rejects `Sec-Fetch-Site: cross-site|same-site`, mismatched `Host` (loopback bind), and any `Origin` header — defends against malicious local webpages `fetch`-ing the API while the dev browses. Per-tool Zod validation at the HTTP boundary; ToolResult error arm carries `status?: 400|404|500` so unknown recipe / baseline → 404 and engine throws → 500. +- **`.codemap/` directory consolidation + self-healing files** — PR [#54](https://github.com/stainless-code/codemap/pull/54), planned in PR [#53](https://github.com/stainless-code/codemap/pull/53). Single state directory under `/.codemap/` (default; overridable via `--state-dir` / `CODEMAP_STATE_DIR`). `.codemap.db` → `.codemap/index.db`; `/codemap.config.{ts,json}` → `.codemap/config.{ts,js,json}`. Self-managed `.codemap/.gitignore` (blacklist of generated artifacts) — codemap reconciles it (and JSON config) on every boot via `ensureStateGitignore` / `ensureStateConfig`. Bumping the canonical content IS the migration: every consumer's project repairs itself on next codemap run, no per-feature `agents-init.ts` `.gitignore` patching forever after. Pattern inspired by flowbite-react's `setup-*` shape, expressed in codemap's own conventions (`ensure*` reconcilers, Zod schema as single source of truth via `z.infer`, pure `{before, after, written}` return shapes for testability). - **`codemap audit --base ` (git-ref baseline)** — PR [#52](https://github.com/stainless-code/codemap/pull/52), planned in PR [#51](https://github.com/stainless-code/codemap/pull/51). Closes the highest-frequency post-watch agent loop: "what changed structurally between this branch and origin/main?". Worktree+reindex against any git committish to a sha-keyed cache under `.codemap/audit-cache/`; cache hit on second run is sub-100ms. Atomic populate via per-pid temp dir + POSIX `rename` — concurrent CI matrix runs safe by construction. `AuditBase` discriminated union — existing `{source: "baseline", ...}` rows untouched, new `{source: "ref", ref, sha, ...}` arm. Mutually exclusive with `--baseline `; per-delta `---baseline` overrides compose orthogonally. Hard error on non-git projects (no graceful fallback — there's no meaningful "ref" without git). MCP `audit` tool gains `base?: string` arg + HTTP `POST /tool/audit` lights up automatically via the existing dispatcher. - **`codemap impact` (blast-radius walker)** — PR [#50](https://github.com/stainless-code/codemap/pull/50), planned in PR [#49](https://github.com/stainless-code/codemap/pull/49). Replaces the "agent composes `WITH RECURSIVE` by hand" tax — single verb walks the calls / dependencies / imports graphs (callers, callees, dependents, dependencies), depth- and limit-bounded, cycle-detected. Same pure-engine pattern as `show` / `snippet`: `application/impact-engine.ts` reused by CLI / MCP `impact` / HTTP `POST /tool/impact` via the existing `tool-handlers.ts` dispatcher. Symbol vs file targets walk compatible backends automatically; mismatched explicit `--via` choices land in `skipped_backends`. Output envelope `{target, matches, summary: {nodes, terminated_by}}` — `--summary` trims `matches` for cheap CI-gate consumption (`jq '.summary.nodes'`). - **`codemap watch` (live reindex)** — PR [#47](https://github.com/stainless-code/codemap/pull/47), planned in PR [#46](https://github.com/stainless-code/codemap/pull/46). The biggest agent-UX win in the roadmap: eliminates the "is the index stale?" friction every CLI / MCP / HTTP query rides on today. Three shapes: standalone `codemap watch`, plus killer combos `codemap mcp --watch` and `codemap serve --watch` (also `CODEMAP_WATCH=1`). Chokidar v5 backend (selected via 6-watcher audit on PR #46 — pure JS, no Bun N-API quirks, identical on Bun + Node). Sliding-window debouncer (default 250 ms) + path-segment exclude scan + project-local recipe glob. Optional `onPrime` opt runs an incremental catch-up BEFORE flipping `isWatchActive()` true so `handleAudit` only skips its prelude when the index is genuinely fresh. Stop drains in-flight reindex (serialized via inFlight chain) before close so SIGINT/SIGTERM never leaves a half-written DB. Backend errors clear the active flag so a dying chokidar re-enables the audit prelude immediately. diff --git a/docs/roadmap.md b/docs/roadmap.md index 5dea152..d6ebfd1 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -35,7 +35,6 @@ Codemap stays a structural-index primitive that other tools can consume. Out of ## Backlog -- [ ] **`.codemap/` directory consolidation** — single root + self-managed `.codemap/.gitignore` (blacklist of generated artifacts, mirrors flowbite-react pattern). Move `.codemap.db` → `.codemap/index.db`; move `/codemap.config.{ts,json}` → `/config.{ts,js,json}`; collapse the per-feature `agents-init.ts` `.gitignore` patching to a one-time `.codemap/` defensive entry. Self-healing files (`ensure*` reconcilers run every codemap boot — setup logic IS the migration). Plan: [`plans/codemap-dir-consolidation.md`](./plans/codemap-dir-consolidation.md). - [ ] **`codemap audit` verdict + thresholds** (v1.x) — `verdict: "pass" | "warn" | "fail"` driven by `codemap.config.audit.deltas[].{added_max, action}`. Triggers: two consumers ship `jq`-based threshold scripts with similar shapes, OR one consumer asks with a concrete config sketch. Until then, raw deltas + consumer-side `jq` is the CI exit-code idiom. - [ ] **Monorepo / workspace awareness** — discover workspaces from `pnpm-workspace.yaml` / `package.json` and index per-workspace dependency graphs - [ ] **Cross-agent handoff artifact** — _speculative_; layered prefix/delta JSON written on session-stop, read on session-start. Complementary to indexing rather than core to it; revisit if user demand emerges diff --git a/templates/agents/rules/codemap.md b/templates/agents/rules/codemap.md index a79d43f..9c83ec2 100644 --- a/templates/agents/rules/codemap.md +++ b/templates/agents/rules/codemap.md @@ -6,7 +6,7 @@ alwaysApply: true > **STOP.** Before you call Grep, Glob, SemanticSearch, or Read to answer a **structural** question about this repository — query the Codemap SQLite index first. This is not optional when the question matches a trigger pattern below. -A local database (default **`.codemap.db`**) indexes structure: symbols, imports, exports, components, dependencies, markers, CSS variables, CSS classes, CSS keyframes. +A local database (default **`.codemap/index.db`**) indexes structure: symbols, imports, exports, components, dependencies, markers, CSS variables, CSS classes, CSS keyframes. The `.codemap/` directory holds every codemap-managed file (`index.db` + WAL/SHM, `audit-cache/`, project `recipes/`, `config.{ts,js,json}`, self-managed `.gitignore`); override the dir with `--state-dir ` or `CODEMAP_STATE_DIR`. The `.codemap/.gitignore` is **codemap-managed and reconciled on every boot** — codemap version bumps auto-apply on next run, no manual cleanup needed. **Generic defaults:** This rule is **project-agnostic**. After **`codemap agents init`** (or copying these files into **`.agents/`**), **edit your copy** to add app-specific triggers and SQL — upstream text is only a baseline. @@ -58,7 +58,7 @@ actions: Validation: SQL is rejected at load time if it starts with DML/DDL (DELETE/DROP/UPDATE/etc.); the runtime `PRAGMA query_only=1` is the parser-proof backstop. -**Baselines** (`query_baselines` table inside `.codemap.db`, no parallel JSON files): `--save-baseline[=]` snapshots a result set; `--baseline[=]` diffs the current result against it (added / removed rows; identity = `JSON.stringify(row)`). Name defaults to the `--recipe` id; ad-hoc SQL needs an explicit `=`. Survives `--full` and SCHEMA bumps. +**Baselines** (`query_baselines` table inside `.codemap/index.db`, no parallel JSON files): `--save-baseline[=]` snapshots a result set; `--baseline[=]` diffs the current result against it (added / removed rows; identity = `JSON.stringify(row)`). Name defaults to the `--recipe` id; ad-hoc SQL needs an explicit `=`. Survives `--full` and SCHEMA bumps. **Audit (`codemap audit`)**: structural-drift command; emits `{head, deltas: {files, dependencies, deprecated}}` (each delta carries its own `base` metadata). Three mutually-exclusive snapshot sources: `--base ` materialises a git committish via `git worktree add` to a sha-keyed cache under `.codemap/audit-cache/`, reindexes a temp DB, then diffs (sub-100ms second run; requires git; `base.source: "ref"`); `--baseline ` auto-resolves `-files` / `-dependencies` / `-deprecated` from saved `query_baselines` entries (`base.source: "baseline"`); `---baseline ` is the explicit per-delta override (composes with both). v1 ships no `verdict` / threshold config — consumers compose `--json` + `jq` for CI exit codes. Auto-runs an incremental index before the diff (use `--no-index` to skip for frozen-DB CI). diff --git a/templates/agents/skills/codemap/SKILL.md b/templates/agents/skills/codemap/SKILL.md index 140310a..5b58885 100644 --- a/templates/agents/skills/codemap/SKILL.md +++ b/templates/agents/skills/codemap/SKILL.md @@ -41,11 +41,11 @@ Replace placeholders (`'...'`) with your module path, file glob, or symbol name. - **`--summary`** — counts only. With **`--json`**: **`{"count": N}`**. With **`--group-by`**: **`{"group_by": "", "groups": [{key, count}]}`**. - **`--changed-since `** — post-filter rows by **`path`** / **`file_path`** / **`from_path`** / **`to_path`** / **`resolved_path`** against **`git diff --name-only ...HEAD ∪ git status --porcelain`**. Rows with no recognised path column pass through. - **`--group-by owner|directory|package`** — partition into buckets and emit **`{"group_by", "groups": [{key, count, rows}]}`**. **`owner`** reads CODEOWNERS (last matching rule wins); **`directory`** is the first path segment; **`package`** uses **`package.json`** **`workspaces`** or **`pnpm-workspace.yaml`**. **Mutually exclusive with `--save-baseline` / `--baseline`.** -- **`--save-baseline[=]`** — snapshot the result rows to the **`query_baselines`** table inside `.codemap.db` (no parallel JSON files; survives `--full` and SCHEMA bumps). Name defaults to the `--recipe` id; ad-hoc SQL needs an explicit `=`. Re-saving with the same name overwrites in place. +- **`--save-baseline[=]`** — snapshot the result rows to the **`query_baselines`** table inside `.codemap/index.db` (no parallel JSON files; survives `--full` and SCHEMA bumps). Name defaults to the `--recipe` id; ad-hoc SQL needs an explicit `=`. Re-saving with the same name overwrites in place. - **`--baseline[=]`** — diff the current result against the saved baseline. Output `{baseline:{...}, current_row_count, added: [...], removed: [...]}` (with `--json`) or a two-section terminal dump. Identity = per-row multiset equality (canonical `JSON.stringify` keyed frequency map; duplicates preserved). Pair with `--summary` for `{baseline:{...}, current_row_count, added: N, removed: N}`. **Mutually exclusive with `--group-by`.** - **`--baselines`** lists saved baselines (no `rows_json` payload); **`--drop-baseline `** deletes one. Both reject every other flag — they're list-only / drop-only operations. - **Per-row recipe `actions`** — recipes that define an **`actions: [{type, auto_fixable?, description?}]`** template append it to every row in **`--json`** output (recipe-only; ad-hoc SQL never carries actions). Under `--baseline`, actions attach to the **`added`** rows only (the rows the agent should act on). Inspect via **`--recipes-json`**. -- **Project-local recipes** — drop **`.sql`** (and optional **`.md`** for description body + actions) into **`/.codemap/recipes/`** to make team-internal SQL a first-class CLI verb. `--recipes-json` and the `codemap://recipes` MCP resource list project recipes alongside bundled ones with **`source: "bundled" | "project"`** discriminating them. Project recipes win on id collision; entries that override a bundled id carry **`shadows: true`** so agents reading the catalog at session start know when a recipe behaves differently from the documented bundled version. `.md` supports YAML frontmatter for the per-row action template — **block-list shape only** (loader's hand-rolled parser; no inline-flow `[{...}]`): `---\nactions:\n - type: my-verb\n auto_fixable: false\n description: "..."\n---`. Validation: SQL is rejected at load time if it starts with DML/DDL (DELETE/DROP/UPDATE/etc.); the runtime `PRAGMA query_only=1` is the parser-proof backstop. `.codemap.db` is gitignored; **`.codemap/recipes/` is NOT** — recipes are git-tracked source code authored for human review. +- **Project-local recipes** — drop **`.sql`** (and optional **`.md`** for description body + actions) into **`/.codemap/recipes/`** to make team-internal SQL a first-class CLI verb. `--recipes-json` and the `codemap://recipes` MCP resource list project recipes alongside bundled ones with **`source: "bundled" | "project"`** discriminating them. Project recipes win on id collision; entries that override a bundled id carry **`shadows: true`** so agents reading the catalog at session start know when a recipe behaves differently from the documented bundled version. `.md` supports YAML frontmatter for the per-row action template — **block-list shape only** (loader's hand-rolled parser; no inline-flow `[{...}]`): `---\nactions:\n - type: my-verb\n auto_fixable: false\n description: "..."\n---`. Validation: SQL is rejected at load time if it starts with DML/DDL (DELETE/DROP/UPDATE/etc.); the runtime `PRAGMA query_only=1` is the parser-proof backstop. `.codemap/index.db` is gitignored; **`.codemap/recipes/` is NOT** — recipes are git-tracked source code authored for human review. **Audit (`codemap audit`)** — separate top-level command for structural-drift verdicts. Composes B.6 baselines into a per-delta `{head, deltas}` envelope; v1 ships `files` / `dependencies` / `deprecated`. Two snapshot-source shapes: @@ -79,7 +79,7 @@ Each emitted delta carries its own `base` metadata so mixed-baseline audits are - **`codemap://recipes`** — full catalog JSON (same as `--recipes-json`). Each entry carries `source: "bundled" | "project"` and `shadows: true` on project entries that override a bundled recipe id. Read this at session start so you know when a `--recipe foo` call will run a project override instead of the documented bundled version. - **`codemap://recipes/{id}`** — single recipe `{id, description, body?, sql, actions?, source, shadows?}`. Replaces `--print-sql `. -- **`codemap://schema`** — DDL of every table in `.codemap.db` (queried live from `sqlite_schema`). +- **`codemap://schema`** — DDL of every table in `.codemap/index.db` (queried live from `sqlite_schema`). - **`codemap://skill`** — full text of this skill file. Agents that don't preload the skill at session start can fetch it here. **Launching:** point your agent host at `codemap mcp` as the stdio command. Most hosts (Claude Code, Cursor, Codex) accept `{command: "codemap", args: ["mcp"], cwd: "/path/to/project"}`. The server inherits `cwd` as the project root unless `--root` overrides it. From 5df0e2f9de5e2126d9626bc37048600fc5425b74 Mon Sep 17 00:00:00 2001 From: Sutu Sebastian Date: Sun, 3 May 2026 18:58:12 +0300 Subject: [PATCH 6/7] docs(state-dir): refresh stale path refs across docs/ + slim self-authored comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doc staleness sweep (after Tracer 5): - docs/glossary.md, docs/agents.md, docs/benchmark.md, docs/why-codemap.md, docs/research/competitive-scan-2026-04.md, docs/research/fallow.md (B.6 row): bulk `.codemap.db` → `.codemap/index.db` everywhere except the intentional 'old → new' migration callouts. - docs/architecture.md, docs/research/fallow.md, docs/packaging.md: `codemap.config.{ts,json}` → `/config.{ts,js,json}`. - docs/agents.md § Git: rewritten to describe the self-managed /.gitignore reconciler instead of root-.gitignore patching. - docs/benchmark.md: 'where the DB lives' updated; manual .gitignore note dropped (reconciler handles it). Concise-comments sweep on this turn's authored comments: - src/application/state-config.ts: 2 inline comments slimmed (TS/JS-validation-only and passthrough-rationale). --- docs/README.md | 2 +- docs/agents.md | 7 ++----- docs/architecture.md | 4 ++-- docs/benchmark.md | 16 ++++++++-------- docs/glossary.md | 14 +++++++------- docs/packaging.md | 2 +- docs/research/competitive-scan-2026-04.md | 4 ++-- docs/research/fallow.md | 4 ++-- docs/why-codemap.md | 4 ++-- src/application/state-config.ts | 7 +++---- 10 files changed, 30 insertions(+), 34 deletions(-) diff --git a/docs/README.md b/docs/README.md index f43061b..07192f7 100644 --- a/docs/README.md +++ b/docs/README.md @@ -53,7 +53,7 @@ Cross-cutting topics that span multiple files. Each has exactly one canonical ho | Topic | Canonical doc | Elsewhere | | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | Runtime splits (SQLite, workers, globs, JSON config I/O) | [packaging § Node vs Bun](./packaging.md#node-vs-bun) — **the table lives here** | [architecture § Runtime](./architecture.md#runtime-and-database) links here; do not copy the table | -| **`codemap.config.*`** shape / Zod validation | [architecture § User config](./architecture.md#user-config) | Root [README § Configuration](../README.md#configuration) points here | +| **`/config.{ts,js,json}`** shape / Zod validation | [architecture § User config](./architecture.md#user-config) | Root [README § Configuration](../README.md#configuration) points here | | **`codemap agents init`**: **`--force`** on **`.agents/`** in **consumer projects** (template file paths only), IDE matrix, per-file symlink/copy, **`templates/agents`** | [agents.md](./agents.md) | Link here; do not paste the integration table into README or packaging | | **`CLAUDE.md` / `AGENTS.md` / `GEMINI.md` / Copilot** — managed **`codemap-pointer`** sections, merge vs **`--force`** | [agents.md § Pointer files](./agents.md#pointer-files) | Link here; do not duplicate the situation table | | End-user CLI (index, **`query --json`**, **`query --recipe`**, **`query --recipes-json`**, **`query --print-sql`**, agents, flags, env) — query has no row cap; use SQL **`LIMIT`**; **`--json`** errors include SQL, DB open, and bootstrap failures; bundled **`templates/agents/`** examples default to **`--json`** | [../README.md § CLI](../README.md#cli) | [architecture § CLI usage](./architecture.md#cli-usage) summarizes and links back; [agents.md](./agents.md) | diff --git a/docs/agents.md b/docs/agents.md index f86f81e..657b96a 100644 --- a/docs/agents.md +++ b/docs/agents.md @@ -21,12 +21,9 @@ codemap agents init --interactive # or -i; requires a TTY ## Git and `.gitignore` -If **`/.git`** exists, Codemap ensures **`.codemap.*`** is listed so SQLite artifacts (e.g. **`.codemap.db`**, WAL/SHM) stay untracked: +Codemap maintains its own self-managed **`/.gitignore`** (default `.codemap/.gitignore`) — a blacklist of generated artifacts (`index.db` + WAL/SHM, `audit-cache/`) reconciled to canonical on every codemap boot via `ensureStateGitignore` (`src/application/state-dir.ts`). Project-tracked sources (`recipes/`, `config.{ts,js,json}`) default to tracked. -- No **`.gitignore`** → create one containing **`.codemap.*`**. -- **`.gitignore`** exists → append **`.codemap.*`** once if missing. - -If the project is **not** a Git working tree, **`.gitignore`** is not created. +The user's root **`.gitignore`** is no longer touched by `codemap agents init`. Future codemap versions can add new generated artifacts to the canonical blacklist; every consumer's project repairs itself on the next `codemap` invocation. **The setup logic IS the migration** (per plan §D11). ## Optional IDE / tool wiring diff --git a/docs/architecture.md b/docs/architecture.md index 727297e..dd3e93a 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -111,7 +111,7 @@ A local SQLite database (`.codemap/index.db`) indexes the project tree and store | `parsed-types.ts` | Shared `ParsedFile` shape for workers and adapters | | `agents-init.ts` / `agents-init-interactive.ts` | `codemap agents init` — see [agents.md](./agents.md) (granular template + IDE writes, pointer upsert, **`--interactive`**, `.gitignore`) | | `benchmark.ts` (+ `benchmark-default-scenarios.ts`, `benchmark-config.ts`, `benchmark-common.ts`) | SQL vs traditional timing; optional **`CODEMAP_BENCHMARK_CONFIG`** JSON — [benchmark.md § Custom scenarios](./benchmark.md#custom-scenarios-codemap_benchmark_config) | -| `config.ts` | `codemap.config.*` load path, **Zod** user schema (`codemapUserConfigSchema`), `resolveCodemapConfig` | +| `config.ts` | `/config.{ts,js,json}` load path, **Zod** user schema (`codemapUserConfigSchema`), `resolveCodemapConfig` | ## CLI usage @@ -159,7 +159,7 @@ When specific file paths are passed via `--files`, the indexer skips git diff, g The npm package exports **`createCodemap`**, **`Codemap`** (`query`, `index`), **`runCodemapIndex`** (advanced), **`codemapUserConfigSchema`**, **`parseCodemapUserConfig`**, **`defineConfig`**, **`CodemapDatabase`** (type), adapter types (`LanguageAdapter`, `getAdapterForExtension`, …), and **`ParsedFile`** — see **`src/api.ts`** / **`src/index.ts`** and **`dist/index.d.mts`**. Typical flow: -1. **`await createCodemap({ root, configFile?, config? })`** — loads `codemap.config.*`, calls **`initCodemap`** and **`configureResolver`**. +1. **`await createCodemap({ root, configFile?, config? })`** — loads `/config.{ts,js,json}`, calls **`initCodemap`** and **`configureResolver`**. 2. **`await cm.index({ mode, files?, quiet? })`** — same pipeline as the CLI (incremental / full / targeted). 3. **`cm.query(sql)`** — read-only SQL against `.codemap/index.db` (opens the DB per call). diff --git a/docs/benchmark.md b/docs/benchmark.md index 39d7cdd..20202f2 100644 --- a/docs/benchmark.md +++ b/docs/benchmark.md @@ -4,11 +4,11 @@ **Two topics — pick the row that matches what you need:** -| You want to… | Read | -| -------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | -| **Point Codemap at another directory** (large app clone, QA target) while hacking in **this** repo — `CODEMAP_*`, `.env`, where `.codemap.db` goes | [§ Indexing another project](#indexing-another-project) | -| **Measure SQL vs glob+read+regex** after an index exists — `src/benchmark.ts`, scenarios, fixtures | [§ The benchmark script](#the-benchmark-script) | -| **Compare `codemap query` table vs `--json` stdout** (lines/bytes) on an existing index | [§ Query stdout (`benchmark:query`)](#query-stdout-table-vs-json-benchmarkquery) | +| You want to… | Read | +| -------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | +| **Point Codemap at another directory** (large app clone, QA target) while hacking in **this** repo — `CODEMAP_*`, `.env`, where `.codemap/index.db` goes | [§ Indexing another project](#indexing-another-project) | +| **Measure SQL vs glob+read+regex** after an index exists — `src/benchmark.ts`, scenarios, fixtures | [§ The benchmark script](#the-benchmark-script) | +| **Compare `codemap query` table vs `--json` stdout** (lines/bytes) on an existing index | [§ Query stdout (`benchmark:query`)](#query-stdout-table-vs-json-benchmarkquery) | --- @@ -33,7 +33,7 @@ CODEMAP_TEST_BENCH=/absolute/path/to/your-app bun src/index.ts --full Use **`CODEMAP_ROOT`** instead of **`CODEMAP_TEST_BENCH`** if you prefer; behavior is the same. -**Where `.codemap.db` lives:** defaults to **`/.codemap.db`**, not inside the Codemap repo — add `.codemap.db` to that project’s `.gitignore` if needed. +**Where `.codemap/index.db` lives:** defaults to **`/.codemap/index.db`**, not inside the Codemap repo. The codemap-managed `/.gitignore` reconciler ignores it automatically on first boot; no manual `.gitignore` edits needed. **Agents:** Work in the **stainless-code/codemap** window with [`.agents/rules/codemap.md`](../.agents/rules/codemap.md) and the [skill](../.agents/skills/codemap/SKILL.md). Queries resolve against whatever **`CODEMAP_*`** / **`--root`** selected. @@ -45,7 +45,7 @@ Use **`CODEMAP_ROOT`** instead of **`CODEMAP_TEST_BENCH`** if you prefer; behavi ### Overview -1. **Indexed** — single SQL query against `.codemap.db` +1. **Indexed** — single SQL query against `.codemap/index.db` 2. **Traditional** — glob (same implementation as the indexer — [packaging.md § Node vs Bun](./packaging.md#node-vs-bun)) → **`readFileSync`** → regex match (simulates what AI agent tools like Grep/Read/Glob do) For **repeatable** numbers, use **`fixtures/minimal/`** ([Fixtures](#fixtures)) or index your own app with **`CODEMAP_ROOT`** before running the script. @@ -74,7 +74,7 @@ bun src/benchmark.ts --verbose ### Query stdout: table vs JSON (`benchmark:query`) -After **`bun src/index.ts`** (or **`codemap`**) has created **`.codemap.db`** in the project you are measuring: +After **`bun src/index.ts`** (or **`codemap`**) has created **`.codemap/index.db`** in the project you are measuring: ```bash bun run benchmark:query diff --git a/docs/glossary.md b/docs/glossary.md index 1cb0461..2878de0 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -11,7 +11,7 @@ Alphabetical, lowercase. Disambiguation pairs link to each other. ## Conventions - **TS shape** = a TypeScript interface or type alias. -- **SQLite table** = an actual on-disk table in `.codemap.db`. +- **SQLite table** = an actual on-disk table in `.codemap/index.db`. - **Recipe** = a cataloged SQL recipe loaded by `src/application/recipes-loader.ts` from `templates/recipes/.{sql,md}` (bundled) or `/.codemap/recipes/.{sql,md}` (project-local). Exposed via `codemap query --recipe ` and the `codemap://recipes` MCP resource. See [§ R recipe](#recipe). - **Query** = any SQL run against the index (recipe or ad-hoc). @@ -33,11 +33,11 @@ A `.agents/rules/.md` file with YAML frontmatter. Distinct from a **skill* ### audit -Two-snapshot structural-drift command: `codemap audit` diffs the live `.codemap.db` against a base snapshot and emits `{head, deltas}` where each `deltas[]` carries `{base, added, removed}`. v1 ships three deltas: `files`, `dependencies`, `deprecated`. Each delta pins a canonical SQL projection (in `V1_DELTAS`) and a required-columns list — projects baseline rows down to that subset before diffing so schema bumps that add columns don't break pre-bump baselines. Three mutually-exclusive top-level snapshot sources: `--baseline ` (auto-resolve `-files` / `-dependencies` / `-deprecated` from `query_baselines`), `---baseline ` (explicit per-delta — composes with the others), and `--base ` (worktree + reindex against a git committish — see § A `audit --base`). Distinct from `codemap query --baseline` (that's one query, one diff; audit composes multiple per-delta diffs into one envelope). Distinct from `fallow audit` (that runs code-quality verdicts — dead code, dupes, complexity — which are explicit non-goals per [`roadmap.md` § Non-goals (v1)](./roadmap.md#non-goals-v1); codemap audit stays structural). +Two-snapshot structural-drift command: `codemap audit` diffs the live `.codemap/index.db` against a base snapshot and emits `{head, deltas}` where each `deltas[]` carries `{base, added, removed}`. v1 ships three deltas: `files`, `dependencies`, `deprecated`. Each delta pins a canonical SQL projection (in `V1_DELTAS`) and a required-columns list — projects baseline rows down to that subset before diffing so schema bumps that add columns don't break pre-bump baselines. Three mutually-exclusive top-level snapshot sources: `--baseline ` (auto-resolve `-files` / `-dependencies` / `-deprecated` from `query_baselines`), `---baseline ` (explicit per-delta — composes with the others), and `--base ` (worktree + reindex against a git committish — see § A `audit --base`). Distinct from `codemap query --baseline` (that's one query, one diff; audit composes multiple per-delta diffs into one envelope). Distinct from `fallow audit` (that runs code-quality verdicts — dead code, dupes, complexity — which are explicit non-goals per [`roadmap.md` § Non-goals (v1)](./roadmap.md#non-goals-v1); codemap audit stays structural). ### `audit --base ` / git-ref baseline -Ad-hoc audit snapshot from any git committish (`origin/main`, `HEAD~5`, ``, tag, …). `git worktree add` materialises `` to `/.codemap/audit-cache//`, codemap reindexes into the worktree's `.codemap.db`, then per-delta canonical SQL runs on that DB vs the live one. Cache key is the **resolved sha** (`git rev-parse --verify`), so `--base origin/main` and `--base ` (when they point at the same commit) share one cache entry. **Atomic populate** — per-pid temp dir + POSIX `rename`; concurrent processes resolving the same sha race-safely without lock files. Eviction: hardcoded LRU 5 entries / 500 MiB. Per-delta `base.source` is `"ref"` (vs `"baseline"`) and the delta carries `base.ref` (user-supplied string) + `base.sha` (resolved). Mutually exclusive with `--baseline `; composes orthogonally with per-delta `---baseline ` overrides. Hard error on non-git projects (no graceful fallback — there's no meaningful "ref" without git). Both transports (MCP `audit` tool's `base?` arg, HTTP `POST /tool/audit`) call the same `runAuditFromRef` engine in `application/audit-engine.ts`. +Ad-hoc audit snapshot from any git committish (`origin/main`, `HEAD~5`, ``, tag, …). `git worktree add` materialises `` to `/.codemap/audit-cache//`, codemap reindexes into the worktree's `.codemap/index.db`, then per-delta canonical SQL runs on that DB vs the live one. Cache key is the **resolved sha** (`git rev-parse --verify`), so `--base origin/main` and `--base ` (when they point at the same commit) share one cache entry. **Atomic populate** — per-pid temp dir + POSIX `rename`; concurrent processes resolving the same sha race-safely without lock files. Eviction: hardcoded LRU 5 entries / 500 MiB. Per-delta `base.source` is `"ref"` (vs `"baseline"`) and the delta carries `base.ref` (user-supplied string) + `base.sha` (resolved). Mutually exclusive with `--baseline `; composes orthogonally with per-delta `---baseline ` overrides. Hard error on non-git projects (no graceful fallback — there's no meaningful "ref" without git). Both transports (MCP `audit` tool's `base?` arg, HTTP `POST /tool/audit`) call the same `runAuditFromRef` engine in `application/audit-engine.ts`. --- @@ -333,11 +333,11 @@ A managed root-level file (`CLAUDE.md`, `AGENTS.md`, `GEMINI.md`, `.github/copil ### query -Any SQL run against `.codemap.db` — either a **recipe** (bundled SQL) or ad-hoc. Distinct from **query-recipes.ts** (the file that holds bundled recipe SQL strings). +Any SQL run against `.codemap/index.db` — either a **recipe** (bundled SQL) or ad-hoc. Distinct from **query-recipes.ts** (the file that holds bundled recipe SQL strings). ### query baseline -A snapshot of a query result set saved by `codemap query --save-baseline[=]` and replayed by `codemap query --baseline[=]` for added/removed diffs. Stored in the `query_baselines` table inside `.codemap.db` (no parallel JSON files; survives `--full` and `SCHEMA_VERSION` rebuilds because the table is intentionally absent from `dropAll()`). Default name = `--recipe` id; ad-hoc SQL must pass an explicit name. Diff identity is per-row `JSON.stringify` equality — exact match, no fuzzy "changed" category in v1. +A snapshot of a query result set saved by `codemap query --save-baseline[=]` and replayed by `codemap query --baseline[=]` for added/removed diffs. Stored in the `query_baselines` table inside `.codemap/index.db` (no parallel JSON files; survives `--full` and `SCHEMA_VERSION` rebuilds because the table is intentionally absent from `dropAll()`). Default name = `--recipe` id; ad-hoc SQL must pass an explicit name. Diff identity is per-row `JSON.stringify` equality — exact match, no fuzzy "changed" category in v1. ### query recipe @@ -412,7 +412,7 @@ Long-running process that subscribes to filesystem changes via [chokidar v5](htt ### `codemap serve` / HTTP server -Long-running HTTP server exposing the same tool taxonomy as `codemap mcp` over `POST /tool/{name}` for non-MCP consumers (CI scripts, simple `curl`, IDE plugins that don't speak MCP). Default bind **`127.0.0.1:7878`** (loopback only — refuse `0.0.0.0` unless explicitly opted in via `--host 0.0.0.0`); optional `--token ` requires `Authorization: Bearer ` on every request. Output shape matches `codemap query --json` (NOT MCP's `{content: [...]}` wrapper — HTTP doesn't need that transport artifact); `format: "sarif"` payloads ship as `application/sarif+json`, `format: "annotations"` as `text/plain`. Routes: `POST /tool/{name}` (every MCP tool), `GET /resources/{encoded-uri}` (mirror of `codemap://recipes` / `schema` / `skill`), `GET /health` (auth-exempt liveness probe), `GET /tools` / `GET /resources` (catalogs). Pure transport — same `tool-handlers.ts` / `resource-handlers.ts` MCP uses; no engine duplication. Errors → `{"error": "..."}` with HTTP status 400 / 401 / 403 / 404 / 500. SIGINT / SIGTERM → graceful drain. Every response carries `X-Codemap-Version: `. **CSRF + DNS-rebinding guard:** every request (including auth-exempt `/health`) is evaluated against `Sec-Fetch-Site` / `Origin` / `Host` when present — modern browsers send `Sec-Fetch-Site` and `Origin` on cross-origin fetches (header presence varies by request type, browser, and privacy settings), so the guard rejects browser-driven cross-origin requests like a malicious local webpage `fetch`-ing `http://127.0.0.1:7878/tool/save_baseline` to mutate `.codemap.db`. `Host` mismatch on a loopback bind blocks DNS rebinding (an attacker resolving `evil.com` to `127.0.0.1` post-load). Non-browser clients (curl, fetch from Node, MCP hosts, CI scripts) typically omit these headers and pass through. Implementation: `src/cli/cmd-serve.ts` (CLI shell) + `src/application/http-server.ts` (transport). See [`architecture.md` § HTTP wiring](./architecture.md#cli-usage). +Long-running HTTP server exposing the same tool taxonomy as `codemap mcp` over `POST /tool/{name}` for non-MCP consumers (CI scripts, simple `curl`, IDE plugins that don't speak MCP). Default bind **`127.0.0.1:7878`** (loopback only — refuse `0.0.0.0` unless explicitly opted in via `--host 0.0.0.0`); optional `--token ` requires `Authorization: Bearer ` on every request. Output shape matches `codemap query --json` (NOT MCP's `{content: [...]}` wrapper — HTTP doesn't need that transport artifact); `format: "sarif"` payloads ship as `application/sarif+json`, `format: "annotations"` as `text/plain`. Routes: `POST /tool/{name}` (every MCP tool), `GET /resources/{encoded-uri}` (mirror of `codemap://recipes` / `schema` / `skill`), `GET /health` (auth-exempt liveness probe), `GET /tools` / `GET /resources` (catalogs). Pure transport — same `tool-handlers.ts` / `resource-handlers.ts` MCP uses; no engine duplication. Errors → `{"error": "..."}` with HTTP status 400 / 401 / 403 / 404 / 500. SIGINT / SIGTERM → graceful drain. Every response carries `X-Codemap-Version: `. **CSRF + DNS-rebinding guard:** every request (including auth-exempt `/health`) is evaluated against `Sec-Fetch-Site` / `Origin` / `Host` when present — modern browsers send `Sec-Fetch-Site` and `Origin` on cross-origin fetches (header presence varies by request type, browser, and privacy settings), so the guard rejects browser-driven cross-origin requests like a malicious local webpage `fetch`-ing `http://127.0.0.1:7878/tool/save_baseline` to mutate `.codemap/index.db`. `Host` mismatch on a loopback bind blocks DNS rebinding (an attacker resolving `evil.com` to `127.0.0.1` post-load). Non-browser clients (curl, fetch from Node, MCP hosts, CI scripts) typically omit these headers and pass through. Implementation: `src/cli/cmd-serve.ts` (CLI shell) + `src/application/http-server.ts` (transport). See [`architecture.md` § HTTP wiring](./architecture.md#cli-usage). ### SARIF @@ -476,7 +476,7 @@ A JSDoc tag controlling export visibility — `@public`, `@internal`, `@private` ### WAL -Write-Ahead Log mode. Set by `PRAGMA journal_mode = WAL` on every `openDb()`. Why `.codemap.db-wal` and `.codemap.db-shm` files exist alongside `.codemap.db`. Allows concurrent readers during writes. +Write-Ahead Log mode. Set by `PRAGMA journal_mode = WAL` on every `openDb()`. Why `.codemap/index.db-wal` and `.codemap/index.db-shm` files exist alongside `.codemap/index.db`. Allows concurrent readers during writes. ### `WITHOUT ROWID` diff --git a/docs/packaging.md b/docs/packaging.md index c6f2398..35d9c5b 100644 --- a/docs/packaging.md +++ b/docs/packaging.md @@ -11,7 +11,7 @@ How **@stainless-code/codemap** is built and published. **Doc index:** [README.m Published tarballs match **`package.json` `files`**: **`CHANGELOG.md`**, **`dist/`**, **`templates/`** (no `src/`). **`bun run pack`**, then point the consumer at **`file:…/stainless-code-codemap-*.tgz`**, or use **`file:/path/to/repo`** after build, or **`bun link`**. If **`better-sqlite3`** fails in the consumer, **`npm rebuild better-sqlite3`** (native addon must match that Node). -**Engines** (`package.json`): **Node** `^20.19.0 || >=22.12.0` (matches **`oxc-parser`**; **`better-sqlite3`** is prebuilt for current Node majors only). **Bun** `>=1.0.0`. **Native bindings:** `better-sqlite3`, `lightningcss`, `oxc-parser`, `oxc-resolver` (NAPI); **`tinyglobby`** and **`zod`** are JS-only. **`zod`** validates `codemap.config.*` at runtime (**`codemapUserConfigSchema`** in **`src/config.ts`**); see [architecture.md § User config](./architecture.md#user-config). +**Engines** (`package.json`): **Node** `^20.19.0 || >=22.12.0` (matches **`oxc-parser`**; **`better-sqlite3`** is prebuilt for current Node majors only). **Bun** `>=1.0.0`. **Native bindings:** `better-sqlite3`, `lightningcss`, `oxc-parser`, `oxc-resolver` (NAPI); **`tinyglobby`** and **`zod`** are JS-only. **`zod`** validates `/config.{ts,js,json}` at runtime (**`codemapUserConfigSchema`** in **`src/config.ts`**); see [architecture.md § User config](./architecture.md#user-config). ## Node vs Bun diff --git a/docs/research/competitive-scan-2026-04.md b/docs/research/competitive-scan-2026-04.md index 5d338ef..01faa54 100644 --- a/docs/research/competitive-scan-2026-04.md +++ b/docs/research/competitive-scan-2026-04.md @@ -26,7 +26,7 @@ Sources: | Axis | **us** | fallow | AZidan/codemap | JordanCoin/codemap | | ----------------- | --------------------------------------- | ------------------------------------------------------ | ------------------------------------ | -------------------------------------------- | | Lang of impl | TS (Bun/Node) | Rust | Python | Go | -| Storage | SQLite (`.codemap.db`) | in-process; SARIF/JSON outputs | distributed JSON (`.codemap/*.json`) | per-project `.codemap/` JSON artifacts | +| Storage | SQLite (`.codemap/index.db`) | in-process; SARIF/JSON outputs | distributed JSON (`.codemap/*.json`) | per-project `.codemap/` JSON artifacts | | Query surface | **SQL** (full power) | CLI subcommands, `--format json` | `find`/`show`/`stats` CLI | `tree`/`--diff`/`--deps`/`context`/MCP/HTTP | | What's indexed | symbols, imports/exports, deps, CSS, … | module graph, dupe clones, complexity, boundary rules | symbols + line ranges + hash | tree + dep flow + hubs + working set | | Agent integration | rules/skills via `agents init` | MCP, LSP, VSCode ext, `--format json` w/ fix `actions` | Claude plugin, MCP planned | hooks, MCP, HTTP, "context envelope", skills | @@ -52,7 +52,7 @@ Sources: | `barrel-files` recipe (top files by export count) | `src/application/query-recipes.ts` | own derivation from JordanCoin "hubs" framing | | `files-hashes` recipe powering `validate` | `src/application/query-recipes.ts` | AZidan | | `-r` short alias for `--recipe`, cleaner `--help` | `src/cli/cmd-query.ts` | own UX polish | -| Friendlier "no `.codemap.db`" error | `src/application/index-engine.ts` | own UX polish | +| Friendlier "no `.codemap/index.db`" error | `src/application/index-engine.ts` | own UX polish | | Anti-pitch — "What Codemap is not" | [why-codemap.md § What Codemap is not](../why-codemap.md#what-codemap-is-not) | AZidan | | Scenario-keyed token-savings table | [why-codemap.md § Across a Typical Session](../why-codemap.md#across-a-typical-session) | AZidan | | "Grep/Read vs Codemap" capability table | [README.md § What you get](../../README.md#what-you-get) | fallow ("Linter vs Fallow") | diff --git a/docs/research/fallow.md b/docs/research/fallow.md index e722cdc..7f37b82 100644 --- a/docs/research/fallow.md +++ b/docs/research/fallow.md @@ -18,7 +18,7 @@ Adoption-candidate ship status. The tier tables in § 1 are preserved as the ori | A | A.3 | `--group-by owner\|directory\|package` | ✅ Shipped | PR [#26](https://github.com/stainless-code/codemap/pull/26) | | A | A.4 | `--summary` flag | ✅ Shipped | PR [#26](https://github.com/stainless-code/codemap/pull/26) | | B | B.5 | `codemap audit` (structural-drift) | ⚠️ Partial — v1 + v1.x shipped; verdict deferred | v1 in PR [#33](https://github.com/stainless-code/codemap/pull/33) (`--baseline ` reusing B.6 baselines). v1.x `--base ` worktree+reindex shipped in PR [#52](https://github.com/stainless-code/codemap/pull/52) (planned PR [#51](https://github.com/stainless-code/codemap/pull/51)) — closes the per-PR structural-diff loop. `verdict` / threshold config still deferred to v1.x+ — trigger: 2 consumers ship `jq`-based threshold scripts with similar shapes. Schema landed on `symbols` (not `exports`) per actual usage. | -| B | B.6 | `--save-baseline` / `--baseline` on `query` | ✅ Shipped | PR [#30](https://github.com/stainless-code/codemap/pull/30). Implemented as a `query_baselines` table inside `.codemap.db` (not parallel JSON files) — survives `--full` and SCHEMA bumps because the table is intentionally absent from `dropAll()`. | +| B | B.6 | `--save-baseline` / `--baseline` on `query` | ✅ Shipped | PR [#30](https://github.com/stainless-code/codemap/pull/30). Implemented as a `query_baselines` table inside `.codemap/index.db` (not parallel JSON files) — survives `--full` and SCHEMA bumps because the table is intentionally absent from `dropAll()`. | | B | B.7 | `symbols.visibility` column | ✅ Shipped | PR [#28](https://github.com/stainless-code/codemap/pull/28). Landed on `symbols` (not `exports`) — `visibility` is a property of the symbol's docstring, not its export status. | | B | B.8 | `--format sarif` + `--format annotations` | ✅ Shipped | PR [#43](https://github.com/stainless-code/codemap/pull/43). `codemap query --format sarif\|annotations` (also on MCP `query` / `query_recipe` tools as `format: "sarif"\|"annotations"`); `rule.id = codemap.` (`codemap.adhoc` for ad-hoc SQL); auto-detects `file_path` / `path` / `to_path` / `from_path`; aggregate recipes (`index-summary`, `markers-by-kind`) emit `results: []` + stderr warning. Per-recipe `sarifLevel` / `sarifMessage` / `sarifRuleId` overrides via frontmatter deferred to v1.x. | | C | C.9 | Framework plugin layer | ❌ Open | Big surface; worth a `plans/.md` before any code. | @@ -160,7 +160,7 @@ Don't trade these away for fallow parity — they're the moat. The Tier A / B ca - **Should `actions` (A.1) live in recipe definitions or be derived?** Two shapes: (a) recipe author hand-writes the `actions` template alongside the SQL — predictable, every row gets the same actions; (b) a small action-derivation layer keyed off recipe id + row shape — less code, less control. Bias toward (a) for the first pass. - **How invasive should the framework plugin layer (C.9) be?** Two extreme shapes: (i) plugins only contribute entry-point globs (`is_entry: true` annotation on `files`); (ii) plugins can contribute arbitrary `dependencies` edges (e.g. "Next.js's `next/link` href references this route"). (i) keeps the surface small and composable; (ii) catches more but explodes the contract surface and risks plugin drift. Bias toward (i) — see how far it gets us before reaching for (ii). -- **`codemap audit` (B.5) verdict threshold defaults.** Fallow defaults to `pass / warn / fail` with reasonable thresholds. Codemap's structural deltas don't have obvious thresholds yet ("how many new dependency edges is too many?" depends entirely on the project). First pass probably exposes raw deltas only and lets the consumer set thresholds in `codemap.config.*`. +- **`codemap audit` (B.5) verdict threshold defaults.** Fallow defaults to `pass / warn / fail` with reasonable thresholds. Codemap's structural deltas don't have obvious thresholds yet ("how many new dependency edges is too many?" depends entirely on the project). First pass probably exposes raw deltas only and lets the consumer set thresholds in `/config.{ts,js,json}`. - **Coverage ingestion (C.11) — column on `symbols` or separate `coverage` table?** Putting `coverage_pct` on `symbols` keeps queries simple but couples schema bumps to coverage shape changes. A `coverage` table with `(symbol_id, coverage_pct, last_updated)` is more flexible but forces every coverage query to JOIN. Probably worth prototyping both before committing. --- diff --git a/docs/why-codemap.md b/docs/why-codemap.md index 4f6400d..2add59c 100644 --- a/docs/why-codemap.md +++ b/docs/why-codemap.md @@ -28,7 +28,7 @@ What Codemap **is**: a deterministic, AST-backed SQLite index of structural fact ## The Solution -A pre-built SQLite index (`.codemap.db`) that extracts and structures code metadata at index time. Agents query it with SQL instead of scanning files. Timings, scenarios, and methodology: [benchmark.md](./benchmark.md). +A pre-built SQLite index (`.codemap/index.db`) that extracts and structures code metadata at index time. Agents query it with SQL instead of scanning files. Timings, scenarios, and methodology: [benchmark.md](./benchmark.md). ## Bundled CLI recipes @@ -113,7 +113,7 @@ Other "AI-friendly code intelligence" tools occupy different points in the desig | Output shape | Result rows from a SQL query | SARIF / JSON findings, fix `actions` | Markdown / token-budgeted text | LSP messages over stdio | | Decides relevance | The agent (via SQL) | The tool (via static rules) | The tool (PageRank-style) | The editor | | Scope | Structural facts (definitions, edges) | Static analysis verdicts | Whole-repo summary | One file at a time | -| Storage | Local SQLite (`.codemap.db`) | In-process; emits findings | In-prompt context | In-process index | +| Storage | Local SQLite (`.codemap/index.db`) | In-process; emits findings | In-prompt context | In-process index | | Token cost | Per-query; tiny result rows | Per-run; finding lists | Upfront; bounded by token budget | None (editor-side) | | Best for | Targeted "where / what / who" lookups | "Did this PR introduce dead code / dupes / complexity drift?" | First-touch context priming | Editor-time refactoring | | Worst for | Whole-file semantic understanding | Granular structural lookups (different shape) | Targeted line-range reads | Cross-cutting graph queries | diff --git a/src/application/state-config.ts b/src/application/state-config.ts index b4fc204..71b27e2 100644 --- a/src/application/state-config.ts +++ b/src/application/state-config.ts @@ -32,8 +32,8 @@ export function ensureStateConfig(stateDir: string): EnsureStateConfigResult { const path = join(stateDir, basename); if (!existsSync(path)) continue; + // TS/JS validation happens at load time (loadUserConfig); never rewrite. if (basename !== "config.json") { - // TS/JS — validation happens at load time; nothing to write. return { found: basename, written: false, warnings: [] }; } @@ -50,9 +50,8 @@ export function ensureStateConfig(stateDir: string): EnsureStateConfigResult { return { found: basename, written: false, warnings }; } - // Passthrough validation lets us spot+prune unknown keys; strict - // schema rejection only happens at downstream `parseCodemapUserConfig` - // (config.ts) which we leave authoritative for runtime errors. + // Passthrough so we can prune unknown keys; strict rejection lives + // in `parseCodemapUserConfig` (config.ts), authoritative for runtime. const result = codemapUserConfigSchema.passthrough().safeParse(parsed); if (!result.success) { for (const issue of result.error.issues) { From c36ad422409f18ca7827bcc7558d28b69b925208 Mon Sep 17 00:00:00 2001 From: Sutu Sebastian Date: Sun, 3 May 2026 19:27:47 +0300 Subject: [PATCH 7/7] fix(state-dir): address CodeRabbit findings (1 inline + 4 outside-diff + 2 nitpicks) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Real bug: - main.ts: runListBaselinesCmd was being called without stateDir — `codemap query --baselines` would fall back to the default DB instead of the caller-selected one. Fixed. Stale doc refs: - audit-worktree.ts: 3 JSDoc strings still said `.codemap.db` after Tracer 1's CACHE_ENTRY_DB_REL move; bumped to `.codemap/index.db`. - bootstrap.ts: printCliUsage() had two `.codemap.db` refs + missing --state-dir/CODEMAP_STATE_DIR docs in Environment+Options. Fixed. - config.ts: Zod databasePath.describe() said default was `/.codemap.db`; corrected to `/index.db`. - .agents/skills + templates skills: 2 hard-coded `.codemap/` refs reworded to `/` with `(default .codemap/)` callout (state-dir is configurable). Nitpicks applied: - state-dir.test.ts: dropped redundant `require('node:fs')` for mkdirSync (already imported). - bootstrap-codemap.ts: consolidated two single-import lines from state-dir into one statement. Nitpicks declined: - changeset code-fence missing 'text' lang — purely cosmetic. - cmd-index.ts JSDoc on runIndexCmd — 'all public APIs need JSDoc' is a fabricated rule (sibling cmds inconsistent; same hallucination rejected on PR #50). --- .agents/skills/codemap/SKILL.md | 4 ++-- src/application/audit-worktree.ts | 6 +++--- src/application/state-dir.test.ts | 2 +- src/cli/bootstrap-codemap.ts | 6 ++++-- src/cli/bootstrap.ts | 7 ++++--- src/cli/main.ts | 7 ++++++- src/config.ts | 2 +- templates/agents/skills/codemap/SKILL.md | 4 ++-- 8 files changed, 23 insertions(+), 15 deletions(-) diff --git a/.agents/skills/codemap/SKILL.md b/.agents/skills/codemap/SKILL.md index 44aa604..b78ab04 100644 --- a/.agents/skills/codemap/SKILL.md +++ b/.agents/skills/codemap/SKILL.md @@ -41,11 +41,11 @@ Replace placeholders (`'...'`) with your module path, file glob, or symbol name. - **`--summary`** — counts only. With **`--json`**: **`{"count": N}`**. With **`--group-by`**: **`{"group_by": "", "groups": [{key, count}]}`**. - **`--changed-since `** — post-filter rows by **`path`** / **`file_path`** / **`from_path`** / **`to_path`** / **`resolved_path`** against **`git diff --name-only ...HEAD ∪ git status --porcelain`**. Rows with no recognised path column pass through. - **`--group-by owner|directory|package`** — partition into buckets and emit **`{"group_by", "groups": [{key, count, rows}]}`**. **`owner`** reads CODEOWNERS (last matching rule wins); **`directory`** is the first path segment; **`package`** uses **`package.json`** **`workspaces`** or **`pnpm-workspace.yaml`**. **Mutually exclusive with `--save-baseline` / `--baseline`.** -- **`--save-baseline[=]`** — snapshot the result rows to the **`query_baselines`** table inside `.codemap/index.db` (no parallel JSON files; survives `--full` and SCHEMA bumps). Name defaults to the `--recipe` id; ad-hoc SQL needs an explicit `=`. Re-saving with the same name overwrites in place. +- **`--save-baseline[=]`** — snapshot the result rows to the **`query_baselines`** table inside `/index.db` (default `.codemap/index.db`; no parallel JSON files; survives `--full` and SCHEMA bumps). Name defaults to the `--recipe` id; ad-hoc SQL needs an explicit `=`. Re-saving with the same name overwrites in place. - **`--baseline[=]`** — diff the current result against the saved baseline. Output `{baseline:{...}, current_row_count, added: [...], removed: [...]}` (with `--json`) or a two-section terminal dump. Identity = per-row multiset equality (canonical `JSON.stringify` keyed frequency map; duplicates preserved). Pair with `--summary` for `{baseline:{...}, current_row_count, added: N, removed: N}`. **Mutually exclusive with `--group-by`.** - **`--baselines`** lists saved baselines (no `rows_json` payload); **`--drop-baseline `** deletes one. Both reject every other flag — they're list-only / drop-only operations. - **Per-row recipe `actions`** — recipes that define an **`actions: [{type, auto_fixable?, description?}]`** template append it to every row in **`--json`** output (recipe-only; ad-hoc SQL never carries actions). Under `--baseline`, actions attach to the **`added`** rows only (the rows the agent should act on). Inspect via **`--recipes-json`**. -- **Project-local recipes** — drop **`.sql`** (and optional **`.md`** for description body + actions) into **`/.codemap/recipes/`** to make team-internal SQL a first-class CLI verb. `--recipes-json` and the `codemap://recipes` MCP resource list project recipes alongside bundled ones with **`source: "bundled" | "project"`** discriminating them. Project recipes win on id collision; entries that override a bundled id carry **`shadows: true`** so agents reading the catalog at session start know when a recipe behaves differently from the documented bundled version. `.md` supports YAML frontmatter for the per-row action template — **block-list shape only** (loader's hand-rolled parser; no inline-flow `[{...}]`): `---\nactions:\n - type: my-verb\n auto_fixable: false\n description: "..."\n---`. Validation: SQL is rejected at load time if it starts with DML/DDL (DELETE/DROP/UPDATE/etc.); the runtime `PRAGMA query_only=1` is the parser-proof backstop. `.codemap/index.db` is gitignored; **`.codemap/recipes/` is NOT** — recipes are git-tracked source code authored for human review. +- **Project-local recipes** — drop **`.sql`** (and optional **`.md`** for description body + actions) into **`/recipes/`** (default `/.codemap/recipes/`) to make team-internal SQL a first-class CLI verb. `--recipes-json` and the `codemap://recipes` MCP resource list project recipes alongside bundled ones with **`source: "bundled" | "project"`** discriminating them. Project recipes win on id collision; entries that override a bundled id carry **`shadows: true`** so agents reading the catalog at session start know when a recipe behaves differently from the documented bundled version. `.md` supports YAML frontmatter for the per-row action template — **block-list shape only** (loader's hand-rolled parser; no inline-flow `[{...}]`): `---\nactions:\n - type: my-verb\n auto_fixable: false\n description: "..."\n---`. Validation: SQL is rejected at load time if it starts with DML/DDL (DELETE/DROP/UPDATE/etc.); the runtime `PRAGMA query_only=1` is the parser-proof backstop. `.codemap/index.db` is gitignored; **`.codemap/recipes/` is NOT** — recipes are git-tracked source code authored for human review. **Audit (`bun src/index.ts audit`)** — separate top-level command for structural-drift verdicts. Composes B.6 baselines into a per-delta `{head, deltas}` envelope; v1 ships `files` / `dependencies` / `deprecated`. Two snapshot-source shapes: diff --git a/src/application/audit-worktree.ts b/src/application/audit-worktree.ts index a800bb8..d0a01f1 100644 --- a/src/application/audit-worktree.ts +++ b/src/application/audit-worktree.ts @@ -50,7 +50,7 @@ export interface WorktreeCacheOpts { export interface PopulatedCacheEntry { /** Absolute path to the cached worktree dir. */ worktreePath: string; - /** Absolute path to the `.codemap.db` inside that worktree. */ + /** Absolute path to the cached `.codemap/index.db` inside that worktree. */ dbPath: string; /** Resolved sha this entry was created against. */ sha: string; @@ -123,7 +123,7 @@ export function lookupCacheEntry( export interface PopulateOpts extends WorktreeCacheOpts { sha: string; - /** Reindex callback — receives the worktree path, must build `.codemap.db` inside it. */ + /** Reindex callback — receives the worktree path, must build `.codemap/index.db` inside it. */ reindex: (worktreePath: string) => Promise; } @@ -131,7 +131,7 @@ export interface PopulateOpts extends WorktreeCacheOpts { * Populate a cache entry atomically (D11): * 1. mkdir per-pid temp dir under the cache root * 2. `git worktree add ` - * 3. caller's `reindex()` builds `.codemap.db` + * 3. caller's `reindex()` builds `.codemap/index.db` * 4. `rename(, )` — POSIX-atomic; if the final slot already exists * (raced with a concurrent populate), discard the temp and use the winner. * diff --git a/src/application/state-dir.test.ts b/src/application/state-dir.test.ts index caa0e72..5ca42cb 100644 --- a/src/application/state-dir.test.ts +++ b/src/application/state-dir.test.ts @@ -1,6 +1,7 @@ import { afterEach, beforeEach, describe, expect, it } from "bun:test"; import { existsSync, + mkdirSync, mkdtempSync, readFileSync, rmSync, @@ -117,7 +118,6 @@ describe("ensureStateGitignore — self-healing reconciler (D11)", () => { // Older shape — pre-audit-cache: only the DB lines. const olderBody = "# old codemap-managed file\nindex.db\nindex.db-shm\nindex.db-wal\n"; - const { mkdirSync } = require("node:fs") as typeof import("node:fs"); mkdirSync(stateDir, { recursive: true }); writeFileSync(join(stateDir, ".gitignore"), olderBody, "utf-8"); const r = ensureStateGitignore(stateDir); diff --git a/src/cli/bootstrap-codemap.ts b/src/cli/bootstrap-codemap.ts index 3ba7069..66c49ca 100644 --- a/src/cli/bootstrap-codemap.ts +++ b/src/cli/bootstrap-codemap.ts @@ -1,6 +1,8 @@ import { ensureStateConfig } from "../application/state-config"; -import { resolveStateDir } from "../application/state-dir"; -import { ensureStateGitignore } from "../application/state-dir"; +import { + ensureStateGitignore, + resolveStateDir, +} from "../application/state-dir"; import { loadUserConfig, resolveCodemapConfig } from "../config"; import { configureResolver } from "../resolver"; import { diff --git a/src/cli/bootstrap.ts b/src/cli/bootstrap.ts index 11dfd87..664d5a9 100644 --- a/src/cli/bootstrap.ts +++ b/src/cli/bootstrap.ts @@ -8,7 +8,7 @@ import { CODEMAP_VERSION } from "../version"; export function printCliUsage(): void { console.log(`Usage: codemap [options] [command] -Index (default): update .codemap.db for the project root (\`--root\` or cwd). +Index (default): update .codemap/index.db for the project root (\`--root\` or cwd). codemap [--root DIR] [--config FILE] [--full] codemap [--root DIR] [--config FILE] --files @@ -31,7 +31,7 @@ MCP server (Model Context Protocol — for agent hosts): HTTP server (for non-MCP consumers — CI scripts, curl, IDE plugins): codemap serve [--host 127.0.0.1] [--port 7878] [--token ] -Watch mode (long-running; keeps .codemap.db fresh on file edits): +Watch mode (long-running; keeps .codemap/index.db fresh on file edits): codemap watch [--debounce 250] [--quiet] codemap mcp --watch · codemap serve --watch # killer combo @@ -46,10 +46,11 @@ Other: codemap version codemap --version, -V -Environment: CODEMAP_ROOT (same as --root) +Environment: CODEMAP_ROOT (same as --root), CODEMAP_STATE_DIR (same as --state-dir) Options: --full Full rebuild + --state-dir DIR State directory for codemap-managed files (default .codemap/ under root) --performance Print per-phase timing breakdown + top-10 slowest files (full rebuild only) --help, -h Show this help diff --git a/src/cli/main.ts b/src/cli/main.ts index 2c0beb1..8730ea0 100644 --- a/src/cli/main.ts +++ b/src/cli/main.ts @@ -310,7 +310,12 @@ Copies bundled agent templates into .agents/ under the project root. return; } if (parsed.kind === "listBaselines") { - await runListBaselinesCmd({ root, configFile, json: parsed.json }); + await runListBaselinesCmd({ + root, + configFile, + stateDir, + json: parsed.json, + }); return; } if (parsed.kind === "dropBaseline") { diff --git a/src/config.ts b/src/config.ts index c085494..5777ec9 100644 --- a/src/config.ts +++ b/src/config.ts @@ -64,7 +64,7 @@ export const codemapUserConfigSchema = z .string() .optional() .describe( - "SQLite database path, relative to root or absolute. Default: `/.codemap.db`.", + "SQLite database path, relative to root or absolute. Default: `/index.db` (i.e. `.codemap/index.db`).", ), include: z .array(z.string()) diff --git a/templates/agents/skills/codemap/SKILL.md b/templates/agents/skills/codemap/SKILL.md index 5b58885..265c7e9 100644 --- a/templates/agents/skills/codemap/SKILL.md +++ b/templates/agents/skills/codemap/SKILL.md @@ -41,11 +41,11 @@ Replace placeholders (`'...'`) with your module path, file glob, or symbol name. - **`--summary`** — counts only. With **`--json`**: **`{"count": N}`**. With **`--group-by`**: **`{"group_by": "", "groups": [{key, count}]}`**. - **`--changed-since `** — post-filter rows by **`path`** / **`file_path`** / **`from_path`** / **`to_path`** / **`resolved_path`** against **`git diff --name-only ...HEAD ∪ git status --porcelain`**. Rows with no recognised path column pass through. - **`--group-by owner|directory|package`** — partition into buckets and emit **`{"group_by", "groups": [{key, count, rows}]}`**. **`owner`** reads CODEOWNERS (last matching rule wins); **`directory`** is the first path segment; **`package`** uses **`package.json`** **`workspaces`** or **`pnpm-workspace.yaml`**. **Mutually exclusive with `--save-baseline` / `--baseline`.** -- **`--save-baseline[=]`** — snapshot the result rows to the **`query_baselines`** table inside `.codemap/index.db` (no parallel JSON files; survives `--full` and SCHEMA bumps). Name defaults to the `--recipe` id; ad-hoc SQL needs an explicit `=`. Re-saving with the same name overwrites in place. +- **`--save-baseline[=]`** — snapshot the result rows to the **`query_baselines`** table inside `/index.db` (default `.codemap/index.db`; no parallel JSON files; survives `--full` and SCHEMA bumps). Name defaults to the `--recipe` id; ad-hoc SQL needs an explicit `=`. Re-saving with the same name overwrites in place. - **`--baseline[=]`** — diff the current result against the saved baseline. Output `{baseline:{...}, current_row_count, added: [...], removed: [...]}` (with `--json`) or a two-section terminal dump. Identity = per-row multiset equality (canonical `JSON.stringify` keyed frequency map; duplicates preserved). Pair with `--summary` for `{baseline:{...}, current_row_count, added: N, removed: N}`. **Mutually exclusive with `--group-by`.** - **`--baselines`** lists saved baselines (no `rows_json` payload); **`--drop-baseline `** deletes one. Both reject every other flag — they're list-only / drop-only operations. - **Per-row recipe `actions`** — recipes that define an **`actions: [{type, auto_fixable?, description?}]`** template append it to every row in **`--json`** output (recipe-only; ad-hoc SQL never carries actions). Under `--baseline`, actions attach to the **`added`** rows only (the rows the agent should act on). Inspect via **`--recipes-json`**. -- **Project-local recipes** — drop **`.sql`** (and optional **`.md`** for description body + actions) into **`/.codemap/recipes/`** to make team-internal SQL a first-class CLI verb. `--recipes-json` and the `codemap://recipes` MCP resource list project recipes alongside bundled ones with **`source: "bundled" | "project"`** discriminating them. Project recipes win on id collision; entries that override a bundled id carry **`shadows: true`** so agents reading the catalog at session start know when a recipe behaves differently from the documented bundled version. `.md` supports YAML frontmatter for the per-row action template — **block-list shape only** (loader's hand-rolled parser; no inline-flow `[{...}]`): `---\nactions:\n - type: my-verb\n auto_fixable: false\n description: "..."\n---`. Validation: SQL is rejected at load time if it starts with DML/DDL (DELETE/DROP/UPDATE/etc.); the runtime `PRAGMA query_only=1` is the parser-proof backstop. `.codemap/index.db` is gitignored; **`.codemap/recipes/` is NOT** — recipes are git-tracked source code authored for human review. +- **Project-local recipes** — drop **`.sql`** (and optional **`.md`** for description body + actions) into **`/recipes/`** (default `/.codemap/recipes/`) to make team-internal SQL a first-class CLI verb. `--recipes-json` and the `codemap://recipes` MCP resource list project recipes alongside bundled ones with **`source: "bundled" | "project"`** discriminating them. Project recipes win on id collision; entries that override a bundled id carry **`shadows: true`** so agents reading the catalog at session start know when a recipe behaves differently from the documented bundled version. `.md` supports YAML frontmatter for the per-row action template — **block-list shape only** (loader's hand-rolled parser; no inline-flow `[{...}]`): `---\nactions:\n - type: my-verb\n auto_fixable: false\n description: "..."\n---`. Validation: SQL is rejected at load time if it starts with DML/DDL (DELETE/DROP/UPDATE/etc.); the runtime `PRAGMA query_only=1` is the parser-proof backstop. `.codemap/index.db` is gitignored; **`.codemap/recipes/` is NOT** — recipes are git-tracked source code authored for human review. **Audit (`codemap audit`)** — separate top-level command for structural-drift verdicts. Composes B.6 baselines into a per-delta `{head, deltas}` envelope; v1 ships `files` / `dependencies` / `deprecated`. Two snapshot-source shapes: