Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions src/adversarial_tests.zig
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ test "adversarial: sparse index finds query spanning word boundaries" {
// ════════════════════════════════════════════════════════════════════════════

test "adversarial: searchContent finds match even when sparse and trigram disagree" {
var exp = Explorer.init(testing.allocator);
var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer exp.deinit();

// Index multiple files containing the same query string
Expand Down Expand Up @@ -121,7 +121,7 @@ test "adversarial: searchContent finds match even when sparse and trigram disagr
}

test "adversarial: searchContent finds all matches across many files" {
var exp = Explorer.init(testing.allocator);
var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer exp.deinit();

// 10 files, all containing the search term in different contexts
Expand Down Expand Up @@ -431,7 +431,7 @@ test "adversarial: regexMatch with 50 alternation branches does not crash" {
// ════════════════════════════════════════════════════════════════════════════

test "adversarial: rebuildTrigrams populates sparse_ngram_index" {
var exp = Explorer.init(testing.allocator);
var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer exp.deinit();

// Index files with skip_trigram=true (simulates cache-hit startup)
Expand All @@ -456,7 +456,7 @@ test "adversarial: rebuildTrigrams populates sparse_ngram_index" {
// ════════════════════════════════════════════════════════════════════════════

test "adversarial: Explorer searchContent finds query embedded in larger code" {
var exp = Explorer.init(testing.allocator);
var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer exp.deinit();

try exp.indexFile("server.go", "package main\n\nfunc handleHTTPRequest(w http.ResponseWriter, r *http.Request) {\n\tlog.Println(\"handling request\")\n}\n");
Expand All @@ -475,7 +475,7 @@ test "adversarial: Explorer searchContent finds query embedded in larger code" {
}

test "adversarial: Explorer searchContent case-insensitive substring" {
var exp = Explorer.init(testing.allocator);
var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer exp.deinit();

try exp.indexFile("readme.md", "# Getting Started\nThis project uses DatabaseManager to handle connections.");
Expand All @@ -495,7 +495,7 @@ test "adversarial: Explorer searchContent case-insensitive substring" {
}

test "adversarial: Explorer searchContentRegex with {n,m} finds correct results" {
var exp = Explorer.init(testing.allocator);
var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer exp.deinit();

try exp.indexFile("data.txt", "aaa bbb abbbbc ccc");
Expand Down Expand Up @@ -693,7 +693,7 @@ test "adversarial: setFrequencyTable changes pairWeight output" {
// ════════════════════════════════════════════════════════════════════════════

test "adversarial: indexed search is sound — never misses a brute-force match" {
var exp = Explorer.init(testing.allocator);
var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer exp.deinit();

const files = [_]struct { name: []const u8, content: []const u8 }{
Expand Down
4 changes: 2 additions & 2 deletions src/bench.zig
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ pub fn main(init: std.process.Init.Minimal) !void {
var store = Store.init(allocator);
defer store.deinit();

var explorer = Explorer.init(allocator);
var explorer = Explorer.init(allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer explorer.deinit();

var agents = AgentRegistry.init(allocator);
Expand All @@ -82,7 +82,7 @@ pub fn main(init: std.process.Init.Minimal) !void {

try watcher.initialScan(io, &store, &explorer, tmp_root, allocator, false);

var bench_ctx = mcp.BenchContext.init(allocator, tmp_root);
var bench_ctx = mcp.BenchContext.init(allocator, tmp_root, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer bench_ctx.deinit();

var telem_off = telemetry.Telemetry{ .enabled = false };
Expand Down
2 changes: 1 addition & 1 deletion src/benchmark.zig
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ pub fn main(init: std.process.Init.Minimal) !void {
// Index
var store = Store.init(alloc);
defer store.deinit();
var explorer = Explorer.init(alloc);
var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer explorer.deinit();

var t0 = try cio.Timer.start();
Expand Down
8 changes: 4 additions & 4 deletions src/config.zig
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ const std = @import("std");
pub const Config = struct {
/// Cap per-file version history in the Store. Default 100.
max_versions: usize = 100,
/// Cap on files kept in the Explorer's in-memory content cache. Default 1000.
max_cached: u32 = 1000,
/// Cap on files kept in the Explorer's in-memory content cache. Default 16384.
max_cached: u32 = 16384,
/// When true, append one JSON line per searchContent invocation to
/// <data_dir>/rerank-traces.jsonl. v0 logger for offline rerank-tuning
/// experiments. Off by default — opt in via .codedbrc.
Expand Down Expand Up @@ -110,13 +110,13 @@ const testing = std.testing;
test "config: defaults" {
const cfg = Config.default;
try testing.expectEqual(@as(usize, 100), cfg.max_versions);
try testing.expectEqual(@as(u32, 1000), cfg.max_cached);
try testing.expectEqual(@as(u32, 16384), cfg.max_cached);
}

test "config: parse single key" {
const cfg = try Config.parse("max_versions = 42\n");
try testing.expectEqual(@as(usize, 42), cfg.max_versions);
try testing.expectEqual(@as(u32, 1000), cfg.max_cached);
try testing.expectEqual(@as(u32, 16384), cfg.max_cached);
}

test "config: parse both keys with comments and whitespace" {
Expand Down
12 changes: 5 additions & 7 deletions src/explore.zig
Original file line number Diff line number Diff line change
Expand Up @@ -513,24 +513,22 @@ pub const Explorer = struct {
mu: cio.RwLock = .{},
root_dir: ?std.Io.Dir = null,
io: ?std.Io = null,
/// Max files kept in the in-memory content cache. Configurable via
/// .codedbrc (#102). Beyond this threshold, readContentForSearch falls
/// back to disk reads.
content_cache_limit: u32 = 1000,
/// When non-null, append one JSON line per searchContent invocation
/// to this path (v0 rerank-trace experiment). Borrowed; caller owns
/// the slice for the Explorer's lifetime.
rerank_trace_path: ?[]const u8 = null,

pub const DEFAULT_CONTENT_CACHE_CAPACITY: u32 = 16384;

pub fn setRoot(self: *Explorer, io: std.Io, root_path: []const u8) void {
self.io = io;
self.root_dir = std.Io.Dir.cwd().openDir(io, root_path, .{}) catch null;
}
pub fn init(allocator: std.mem.Allocator) Explorer {
pub fn init(allocator: std.mem.Allocator, content_cache_capacity: u32) Explorer {
return .{
.outlines = std.StringHashMap(FileOutline).init(allocator),
.dep_graph = DependencyGraph.init(allocator),
.contents = ContentCache.init(allocator, 16384),
.contents = ContentCache.init(allocator, content_cache_capacity),
.symbol_index = std.StringHashMap(std.ArrayList(SymbolLocation)).init(allocator),
.word_index = WordIndex.init(allocator),
.trigram_index = .{ .heap = TrigramIndex.init(allocator) },
Expand Down Expand Up @@ -1062,7 +1060,7 @@ pub const Explorer = struct {
}

pub fn parseContentForIndexing(allocator: std.mem.Allocator, path: []const u8, content: []const u8) !ParsedFile {
var parser = Explorer.init(allocator);
var parser = Explorer.init(allocator, DEFAULT_CONTENT_CACHE_CAPACITY);
defer parser.deinit();
var parsed_outline = try parseOutlineWithParser(&parser, path, content);
defer parsed_outline.deinit();
Expand Down
1 change: 1 addition & 0 deletions src/hot_cache.zig
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ pub const ContentCache = struct {

/// capacity must be >= 1. Panics if the allocator cannot provide the slot array.
pub fn init(allocator: std.mem.Allocator, capacity: u32) ContentCache {
std.debug.assert(capacity >= 1);
const slots = allocator.alloc(Slot, capacity) catch
std.debug.panic("ContentCache.init: OOM allocating {d} slots", .{capacity});
@memset(slots, empty_slot);
Expand Down
5 changes: 2 additions & 3 deletions src/main.zig
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,7 @@ fn mainImpl() !void {
std.log.warn("could not open data log at {s}: {}", .{ data_log_path, err });
};

var explorer = Explorer.init(allocator);
explorer.content_cache_limit = cfg.max_cached;
var explorer = Explorer.init(allocator, cfg.max_cached);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Avoid building trigram indexes from a capped cache

When a user sets .codedbrc max_cached below the repo's file count, this line now bounds explorer.contents, but the cold non-search path still builds/persists trigrams only from that cache (main.zig:351-361) and Explorer.rebuildTrigrams also iterates only self.contents. That means the persisted/rebuilt trigram index can cover only the cached subset rather than every scanned outline, degrading later searches to slow skip/full scans and potentially omitting matches once earlier cached candidates fill max_results; build trigrams from disk or avoid the cache-only path when contents.count() != outlines.count().

Useful? React with 👍 / 👎.


const rerank_trace_path: ?[]u8 = if (cfg.rerank_trace)
(std.fmt.allocPrint(allocator, "{s}/rerank-traces.jsonl", .{data_dir}) catch null)
Expand Down Expand Up @@ -731,7 +730,7 @@ fn mainImpl() !void {

std.log.info("codedb mcp: root={s} files={d} data={s} scan={s}", .{ abs_root, store.currentSeq(), data_dir, mcp_server.getScanState().name() });

mcp_server.run(io, allocator, &store, &explorer, &agents, abs_root, &telem, maybe_deferred);
mcp_server.run(io, allocator, &store, &explorer, &agents, abs_root, cfg.max_cached, &telem, maybe_deferred);

shutdown.store(true, .release);
if (scan_thread) |st| st.join();
Expand Down
43 changes: 23 additions & 20 deletions src/mcp.zig
Original file line number Diff line number Diff line change
Expand Up @@ -231,14 +231,16 @@ const ProjectCache = struct {
entries: [MAX_CACHED]?*Entry,
default_path: []const u8,
default_snapshot_cache: SnapshotCache,
content_cache_capacity: u32,

fn init(alloc_: std.mem.Allocator, default_path_: []const u8) ProjectCache {
fn init(alloc_: std.mem.Allocator, default_path_: []const u8, content_cache_capacity_: u32) ProjectCache {
return .{
.mu = .{},
.alloc = alloc_,
.entries = [_]?*Entry{null} ** MAX_CACHED,
.default_path = default_path_,
.default_snapshot_cache = .{},
.content_cache_capacity = content_cache_capacity_,
};
}

Expand Down Expand Up @@ -305,7 +307,7 @@ const ProjectCache = struct {
self.alloc.destroy(new_entry);
return error.OutOfMemory;
};
new_entry.explorer = Explorer.init(self.alloc);
new_entry.explorer = Explorer.init(self.alloc, self.content_cache_capacity);
new_entry.explorer.setRoot(io, p);
new_entry.store = Store.init(self.alloc);
new_entry.snapshot_cache = .{};
Expand Down Expand Up @@ -383,9 +385,9 @@ const ProjectCache = struct {
pub const BenchContext = struct {
cache: ProjectCache,

pub fn init(alloc: std.mem.Allocator, default_path: []const u8) BenchContext {
pub fn init(alloc: std.mem.Allocator, default_path: []const u8, content_cache_capacity: u32) BenchContext {
return .{
.cache = ProjectCache.init(alloc, default_path),
.cache = ProjectCache.init(alloc, default_path, content_cache_capacity),
};
}

Expand Down Expand Up @@ -724,14 +726,15 @@ pub fn run(
explorer: *Explorer,
agents: *AgentRegistry,
default_path: []const u8,
content_cache_capacity: u32,
telem: *telemetry_mod.Telemetry,
deferred_scan: ?*DeferredScan,
) void {
const stdout = cio.File.stdout();
const stdin = std.Io.File.stdin();
last_activity.store(cio.milliTimestamp(), .release);

var cache = ProjectCache.init(alloc, default_path);
var cache = ProjectCache.init(alloc, default_path, content_cache_capacity);
defer cache.deinit();

// Build the `tools/list` payload. The discriminated `oneOf` on the
Expand Down Expand Up @@ -3803,7 +3806,7 @@ test "issue-258: cached project reads use the project root after contents are re
const project_path_len = try tmp.dir.realPathFile(io, ".", &project_path_buf);
const project_path = project_path_buf[0..project_path_len];

var snapshot_src = Explorer.init(testing.allocator);
var snapshot_src = Explorer.init(testing.allocator, explore_mod.Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer snapshot_src.deinit();
snapshot_src.setRoot(io, project_path);
try snapshot_src.indexFile("src/main.zig", "const project = \"secondary\";\n");
Expand All @@ -3816,12 +3819,12 @@ test "issue-258: cached project reads use the project root after contents are re
const default_path_len = try std.Io.Dir.cwd().realPathFile(io, ".", &default_path_buf);
const default_path = default_path_buf[0..default_path_len];

var default_explorer = Explorer.init(testing.allocator);
var default_explorer = Explorer.init(testing.allocator, explore_mod.Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer default_explorer.deinit();
var default_store = Store.init(testing.allocator);
defer default_store.deinit();

var cache = ProjectCache.init(testing.allocator, default_path);
var cache = ProjectCache.init(testing.allocator, default_path, explore_mod.Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer cache.deinit();

const ctx = try cache.get(io, project_path, &default_explorer, &default_store);
Expand Down Expand Up @@ -3864,7 +3867,7 @@ test "ProjectCache loads project from central snapshot cache" {
std.Io.Dir.cwd().deleteDir(io, data_dir) catch {};
}

var snapshot_src = Explorer.init(testing.allocator);
var snapshot_src = Explorer.init(testing.allocator, explore_mod.Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer snapshot_src.deinit();
snapshot_src.setRoot(io, project_path);
try snapshot_src.indexFile("src/main.zig", "pub fn cachedProject() void {}\n");
Expand All @@ -3880,12 +3883,12 @@ test "ProjectCache loads project from central snapshot cache" {
const default_path_len = try std.Io.Dir.cwd().realPathFile(io, ".", &default_path_buf);
const default_path = default_path_buf[0..default_path_len];

var default_explorer = Explorer.init(testing.allocator);
var default_explorer = Explorer.init(testing.allocator, explore_mod.Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer default_explorer.deinit();
var default_store = Store.init(testing.allocator);
defer default_store.deinit();

var cache = ProjectCache.init(testing.allocator, default_path);
var cache = ProjectCache.init(testing.allocator, default_path, explore_mod.Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer cache.deinit();

const ctx = try cache.get(io, project_path, &default_explorer, &default_store);
Expand Down Expand Up @@ -3919,19 +3922,19 @@ test "issue-353: explicit default project loads snapshot when default explorer i
std.Io.Dir.cwd().deleteDir(io, data_dir) catch {};
}

var snapshot_src = Explorer.init(testing.allocator);
var snapshot_src = Explorer.init(testing.allocator, explore_mod.Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer snapshot_src.deinit();
snapshot_src.setRoot(io, project_path);
try snapshot_src.indexFile("src/main.zig", "pub fn issue353() void {}\n");
try snapshot_mod.writeProjectCacheSnapshot(io, &snapshot_src, project_path, testing.allocator);

var default_explorer = Explorer.init(testing.allocator);
var default_explorer = Explorer.init(testing.allocator, explore_mod.Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer default_explorer.deinit();
default_explorer.setRoot(io, project_path);
var default_store = Store.init(testing.allocator);
defer default_store.deinit();

var cache = ProjectCache.init(testing.allocator, project_path);
var cache = ProjectCache.init(testing.allocator, project_path, explore_mod.Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer cache.deinit();

const ctx = try cache.get(io, project_path, &default_explorer, &default_store);
Expand Down Expand Up @@ -3963,24 +3966,24 @@ test "issue-353: project cache invalidation reloads newly written snapshots" {
std.Io.Dir.cwd().deleteDir(io, data_dir) catch {};
}

var snapshot_src = Explorer.init(testing.allocator);
var snapshot_src = Explorer.init(testing.allocator, explore_mod.Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer snapshot_src.deinit();
snapshot_src.setRoot(io, project_path);
try snapshot_src.indexFile("src/old.zig", "pub fn oldSymbol() void {}\n");
try snapshot_mod.writeProjectCacheSnapshot(io, &snapshot_src, project_path, testing.allocator);

var default_explorer = Explorer.init(testing.allocator);
var default_explorer = Explorer.init(testing.allocator, explore_mod.Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer default_explorer.deinit();
var default_store = Store.init(testing.allocator);
defer default_store.deinit();

var cache = ProjectCache.init(testing.allocator, "/Users/example/default");
var cache = ProjectCache.init(testing.allocator, "/Users/example/default", explore_mod.Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer cache.deinit();

const old_ctx = try cache.get(io, project_path, &default_explorer, &default_store);
try testing.expect(old_ctx.explorer.outlines.contains("src/old.zig"));

var snapshot_next = Explorer.init(testing.allocator);
var snapshot_next = Explorer.init(testing.allocator, explore_mod.Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer snapshot_next.deinit();
snapshot_next.setRoot(io, project_path);
try snapshot_next.indexFile("src/new.zig", "pub fn newSymbol() void {}\n");
Expand All @@ -3997,7 +4000,7 @@ test "codedb_snapshot cache reuses output until store seq changes" {
const io = testing.io;
const alloc = testing.allocator;

var explorer = Explorer.init(alloc);
var explorer = Explorer.init(alloc, explore_mod.Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer explorer.deinit();
try explorer.indexFile("src/main.zig", "pub fn main() void {}\n");

Expand All @@ -4009,7 +4012,7 @@ test "codedb_snapshot cache reuses output until store seq changes" {
defer agents.deinit();
_ = try agents.register("__filesystem__");

var bench_ctx = BenchContext.init(alloc, ".");
var bench_ctx = BenchContext.init(alloc, ".", explore_mod.Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer bench_ctx.deinit();

var parsed = try std.json.parseFromSlice(std.json.Value, alloc, "{}", .{});
Expand Down
Loading
Loading