Skip to content

Commit 5e8c41b

Browse files
feat: add TTL-based registry pruning for idle entries
Track lastAccessedAt on registry entries (updated on build and MCP query). pruneRegistry now removes entries not accessed within a configurable TTL (default 30 days) in addition to missing directories. CLI `registry prune --ttl <days>` exposes the TTL parameter.
1 parent e1222df commit 5e8c41b

5 files changed

Lines changed: 169 additions & 18 deletions

File tree

src/cli.js

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -261,14 +261,16 @@ registry
261261

262262
registry
263263
.command('prune')
264-
.description('Remove registry entries whose directories no longer exist')
265-
.action(() => {
266-
const pruned = pruneRegistry();
264+
.description('Remove stale registry entries (missing directories or idle beyond TTL)')
265+
.option('--ttl <days>', 'Days of inactivity before pruning (default: 30)', '30')
266+
.action((opts) => {
267+
const pruned = pruneRegistry(undefined, parseInt(opts.ttl, 10));
267268
if (pruned.length === 0) {
268269
console.log('No stale entries found.');
269270
} else {
270271
for (const entry of pruned) {
271-
console.log(`Pruned "${entry.name}" (${entry.path})`);
272+
const tag = entry.reason === 'expired' ? 'expired' : 'missing';
273+
console.log(`Pruned "${entry.name}" (${entry.path}) [${tag}]`);
272274
}
273275
console.log(`\nRemoved ${pruned.length} stale ${pruned.length === 1 ? 'entry' : 'entries'}.`);
274276
}
@@ -282,7 +284,7 @@ program
282284
.action(() => {
283285
console.log('\nAvailable embedding models:\n');
284286
for (const [key, config] of Object.entries(MODELS)) {
285-
const def = key === 'minilm' ? ' (default)' : '';
287+
const def = key === 'jina-code' ? ' (default)' : '';
286288
console.log(` ${key.padEnd(12)} ${String(config.dim).padStart(4)}d ${config.desc}${def}`);
287289
}
288290
console.log('\nUsage: codegraph embed --model <name>');
@@ -296,8 +298,8 @@ program
296298
)
297299
.option(
298300
'-m, --model <name>',
299-
'Embedding model: minilm (default), jina-small, jina-base, jina-code, nomic, nomic-v1.5, bge-large. Run `codegraph models` for details',
300-
'minilm',
301+
'Embedding model: minilm, jina-small, jina-base, jina-code (default), nomic, nomic-v1.5, bge-large. Run `codegraph models` for details',
302+
'jina-code',
301303
)
302304
.action(async (dir, opts) => {
303305
const root = path.resolve(dir || '.');

src/config.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ export const DEFAULTS = {
1919
defaultDepth: 3,
2020
defaultLimit: 20,
2121
},
22-
embeddings: { model: 'minilm', llmProvider: null },
22+
embeddings: { model: 'jina-code', llmProvider: null },
2323
llm: { provider: null, model: null, baseUrl: null, apiKey: null, apiKeyCommand: null },
2424
search: { defaultMinScore: 0.2, rrfK: 60, topK: 15 },
2525
ci: { failOnCycles: false, impactThreshold: null },

src/embedder.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ export const MODELS = {
5555
},
5656
};
5757

58-
export const DEFAULT_MODEL = 'minilm';
58+
export const DEFAULT_MODEL = 'jina-code';
5959
const BATCH_SIZE_MAP = {
6060
minilm: 32,
6161
'jina-small': 16,

src/registry.js

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ import { debug, warn } from './logger.js';
55

66
export const REGISTRY_PATH = path.join(os.homedir(), '.codegraph', 'registry.json');
77

8+
/** Default TTL: entries not accessed within 30 days are pruned. */
9+
export const DEFAULT_TTL_DAYS = 30;
10+
811
/**
912
* Load the registry from disk.
1013
* Returns `{ repos: {} }` on missing or corrupt file.
@@ -69,10 +72,12 @@ export function registerRepo(rootDir, name, registryPath = REGISTRY_PATH) {
6972
}
7073
}
7174

75+
const now = new Date().toISOString();
7276
registry.repos[repoName] = {
7377
path: absRoot,
7478
dbPath: path.join(absRoot, '.codegraph', 'graph.db'),
75-
addedAt: new Date().toISOString(),
79+
addedAt: registry.repos[repoName]?.addedAt || now,
80+
lastAccessedAt: now,
7681
};
7782

7883
saveRegistry(registry, registryPath);
@@ -102,6 +107,7 @@ export function listRepos(registryPath = REGISTRY_PATH) {
102107
path: entry.path,
103108
dbPath: entry.dbPath,
104109
addedAt: entry.addedAt,
110+
lastAccessedAt: entry.lastAccessedAt || entry.addedAt,
105111
}))
106112
.sort((a, b) => a.name.localeCompare(b.name));
107113
}
@@ -118,21 +124,31 @@ export function resolveRepoDbPath(name, registryPath = REGISTRY_PATH) {
118124
warn(`Registry: database missing for "${name}" at ${entry.dbPath}`);
119125
return undefined;
120126
}
127+
// Touch lastAccessedAt on successful resolution
128+
entry.lastAccessedAt = new Date().toISOString();
129+
saveRegistry(registry, registryPath);
121130
return entry.dbPath;
122131
}
123132

124133
/**
125-
* Remove registry entries whose repo directory no longer exists on disk.
126-
* Only checks the repo directory (not the DB file — a missing DB is normal pre-build state).
127-
* Returns an array of `{ name, path }` for each pruned entry.
134+
* Remove registry entries whose repo directory no longer exists on disk,
135+
* or that haven't been accessed within `ttlDays` days.
136+
* Returns an array of `{ name, path, reason }` for each pruned entry.
128137
*/
129-
export function pruneRegistry(registryPath = REGISTRY_PATH) {
138+
export function pruneRegistry(registryPath = REGISTRY_PATH, ttlDays = DEFAULT_TTL_DAYS) {
130139
const registry = loadRegistry(registryPath);
131140
const pruned = [];
141+
const cutoff = Date.now() - ttlDays * 24 * 60 * 60 * 1000;
132142

133143
for (const [name, entry] of Object.entries(registry.repos)) {
134144
if (!fs.existsSync(entry.path)) {
135-
pruned.push({ name, path: entry.path });
145+
pruned.push({ name, path: entry.path, reason: 'missing' });
146+
delete registry.repos[name];
147+
continue;
148+
}
149+
const lastAccess = Date.parse(entry.lastAccessedAt || entry.addedAt);
150+
if (lastAccess < cutoff) {
151+
pruned.push({ name, path: entry.path, reason: 'expired' });
136152
delete registry.repos[name];
137153
}
138154
}

tests/unit/registry.test.js

Lines changed: 136 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import os from 'node:os';
33
import path from 'node:path';
44
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
55
import {
6+
DEFAULT_TTL_DAYS,
67
listRepos,
78
loadRegistry,
89
pruneRegistry,
@@ -142,12 +143,24 @@ describe('registerRepo', () => {
142143
expect(Object.keys(reg.repos)).toHaveLength(1);
143144
});
144145

145-
it('sets addedAt as ISO string', () => {
146+
it('sets addedAt and lastAccessedAt as ISO strings', () => {
146147
const dir = path.join(tmpDir, 'proj');
147148
fs.mkdirSync(dir, { recursive: true });
148149

149150
const { entry } = registerRepo(dir, 'proj', registryPath);
150151
expect(entry.addedAt).toMatch(/^\d{4}-\d{2}-\d{2}T/);
152+
expect(entry.lastAccessedAt).toMatch(/^\d{4}-\d{2}-\d{2}T/);
153+
});
154+
155+
it('preserves original addedAt on re-registration', () => {
156+
const dir = path.join(tmpDir, 'proj');
157+
fs.mkdirSync(dir, { recursive: true });
158+
159+
const { entry: first } = registerRepo(dir, 'proj', registryPath);
160+
const originalAddedAt = first.addedAt;
161+
const { entry: second } = registerRepo(dir, 'proj', registryPath);
162+
163+
expect(second.addedAt).toBe(originalAddedAt);
151164
});
152165

153166
it('auto-suffixes when basename collides with different path', () => {
@@ -239,7 +252,7 @@ describe('listRepos', () => {
239252
expect(repos).toEqual([]);
240253
});
241254

242-
it('returns repos sorted by name', () => {
255+
it('returns repos sorted by name with lastAccessedAt', () => {
243256
const dirA = path.join(tmpDir, 'aaa');
244257
const dirZ = path.join(tmpDir, 'zzz');
245258
const dirM = path.join(tmpDir, 'mmm');
@@ -253,6 +266,9 @@ describe('listRepos', () => {
253266

254267
const repos = listRepos(registryPath);
255268
expect(repos.map((r) => r.name)).toEqual(['aaa', 'mmm', 'zzz']);
269+
for (const r of repos) {
270+
expect(r.lastAccessedAt).toMatch(/^\d{4}-\d{2}-\d{2}T/);
271+
}
256272
});
257273
});
258274

@@ -289,7 +305,7 @@ describe('resolveRepoDbPath', () => {
289305
// ─── pruneRegistry ─────────────────────────────────────────────────
290306

291307
describe('pruneRegistry', () => {
292-
it('removes entries whose directories no longer exist', () => {
308+
it('removes entries whose directories no longer exist (reason: missing)', () => {
293309
const dir1 = path.join(tmpDir, 'exists');
294310
const dir2 = path.join(tmpDir, 'gone');
295311
fs.mkdirSync(dir1, { recursive: true });
@@ -305,12 +321,96 @@ describe('pruneRegistry', () => {
305321
expect(pruned).toHaveLength(1);
306322
expect(pruned[0].name).toBe('gone');
307323
expect(pruned[0].path).toBe(dir2);
324+
expect(pruned[0].reason).toBe('missing');
308325

309326
const reg = loadRegistry(registryPath);
310327
expect(reg.repos.exists).toBeDefined();
311328
expect(reg.repos.gone).toBeUndefined();
312329
});
313330

331+
it('removes entries idle beyond TTL (reason: expired)', () => {
332+
const dir = path.join(tmpDir, 'old-project');
333+
fs.mkdirSync(dir, { recursive: true });
334+
335+
// Manually write a registry entry with an old lastAccessedAt
336+
const oldDate = new Date(Date.now() - 60 * 24 * 60 * 60 * 1000).toISOString(); // 60 days ago
337+
const registry = {
338+
repos: {
339+
'old-project': {
340+
path: dir,
341+
dbPath: path.join(dir, '.codegraph', 'graph.db'),
342+
addedAt: oldDate,
343+
lastAccessedAt: oldDate,
344+
},
345+
},
346+
};
347+
saveRegistry(registry, registryPath);
348+
349+
const pruned = pruneRegistry(registryPath, 30);
350+
expect(pruned).toHaveLength(1);
351+
expect(pruned[0].name).toBe('old-project');
352+
expect(pruned[0].reason).toBe('expired');
353+
});
354+
355+
it('keeps entries within TTL window', () => {
356+
const dir = path.join(tmpDir, 'fresh');
357+
fs.mkdirSync(dir, { recursive: true });
358+
registerRepo(dir, 'fresh', registryPath);
359+
360+
const pruned = pruneRegistry(registryPath, 30);
361+
expect(pruned).toEqual([]);
362+
363+
const reg = loadRegistry(registryPath);
364+
expect(reg.repos.fresh).toBeDefined();
365+
});
366+
367+
it('falls back to addedAt when lastAccessedAt is missing', () => {
368+
const dir = path.join(tmpDir, 'legacy');
369+
fs.mkdirSync(dir, { recursive: true });
370+
371+
const oldDate = new Date(Date.now() - 60 * 24 * 60 * 60 * 1000).toISOString();
372+
const registry = {
373+
repos: {
374+
legacy: {
375+
path: dir,
376+
dbPath: path.join(dir, '.codegraph', 'graph.db'),
377+
addedAt: oldDate,
378+
},
379+
},
380+
};
381+
saveRegistry(registry, registryPath);
382+
383+
const pruned = pruneRegistry(registryPath, 30);
384+
expect(pruned).toHaveLength(1);
385+
expect(pruned[0].reason).toBe('expired');
386+
});
387+
388+
it('respects custom TTL', () => {
389+
const dir = path.join(tmpDir, 'project');
390+
fs.mkdirSync(dir, { recursive: true });
391+
392+
// 10 days ago
393+
const recentDate = new Date(Date.now() - 10 * 24 * 60 * 60 * 1000).toISOString();
394+
const registry = {
395+
repos: {
396+
project: {
397+
path: dir,
398+
dbPath: path.join(dir, '.codegraph', 'graph.db'),
399+
addedAt: recentDate,
400+
lastAccessedAt: recentDate,
401+
},
402+
},
403+
};
404+
saveRegistry(registry, registryPath);
405+
406+
// 30-day TTL: should keep
407+
expect(pruneRegistry(registryPath, 30)).toEqual([]);
408+
// 7-day TTL: should prune
409+
const pruned = pruneRegistry(registryPath, 7);
410+
expect(pruned).toHaveLength(1);
411+
expect(pruned[0].reason).toBe('expired');
412+
});
413+
314414
it('returns empty array when nothing to prune', () => {
315415
const dir = path.join(tmpDir, 'healthy');
316416
fs.mkdirSync(dir, { recursive: true });
@@ -336,3 +436,36 @@ describe('pruneRegistry', () => {
336436
expect(pruned).toEqual([]);
337437
});
338438
});
439+
440+
// ─── DEFAULT_TTL_DAYS ──────────────────────────────────────────────
441+
442+
describe('DEFAULT_TTL_DAYS', () => {
443+
it('is 30 days', () => {
444+
expect(DEFAULT_TTL_DAYS).toBe(30);
445+
});
446+
});
447+
448+
// ─── resolveRepoDbPath lastAccessedAt ──────────────────────────────
449+
450+
describe('resolveRepoDbPath updates lastAccessedAt', () => {
451+
it('touches lastAccessedAt on successful resolve', () => {
452+
const dir = path.join(tmpDir, 'proj');
453+
const dbDir = path.join(dir, '.codegraph');
454+
const dbFile = path.join(dbDir, 'graph.db');
455+
fs.mkdirSync(dbDir, { recursive: true });
456+
fs.writeFileSync(dbFile, '');
457+
458+
registerRepo(dir, 'proj', registryPath);
459+
460+
// Manually backdate lastAccessedAt
461+
const reg = loadRegistry(registryPath);
462+
reg.repos.proj.lastAccessedAt = '2025-01-01T00:00:00.000Z';
463+
saveRegistry(reg, registryPath);
464+
465+
resolveRepoDbPath('proj', registryPath);
466+
467+
const updated = loadRegistry(registryPath);
468+
expect(updated.repos.proj.lastAccessedAt).not.toBe('2025-01-01T00:00:00.000Z');
469+
expect(new Date(updated.repos.proj.lastAccessedAt).getFullYear()).toBeGreaterThanOrEqual(2026);
470+
});
471+
});

0 commit comments

Comments
 (0)