Skip to content

Commit 1a1f093

Browse files
authored
feat(kb-scope): default-scope search corpus to overlay + required-baseline (E0008.5) (#153)
Implements canon klappy://canon/constraints/core-governance-baseline §Search-Corpus Boundary (klappy/klappy.dev #155 + #156). When knowledge_base_url is set, the search corpus now defaults to overlay + required-baseline-only. Callers opt in to the legacy merged corpus via include_full_baseline: true. When knowledge_base_url is unset, the parameter is a no-op. Affected: oddkit_search, oddkit_catalog, oddkit_preflight (and the unified oddkit action). Other tools unchanged — they read governance via the per-file resolver, not the search index. RV-gate (klappy://canon/constraints/release-validation-gate, tier 1) cleared: - Cursor Bugbot: completed/success (one autofix landed and verified, 8e88a9f) - Independent Sonnet 4.6 read-only validator: APPROVE_WITH_NOTES (full DOLCHE in PR #153 comment) - Smoke against preview URL: 260 passed, 0 failed - Typecheck: 0 errors - Workers Builds, Creed Freshness, Version Sync, Test CF Preview: all green Version bump 0.26.0 → 0.27.0.
1 parent 77856e7 commit 1a1f093

7 files changed

Lines changed: 233 additions & 17 deletions

File tree

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "oddkit",
3-
"version": "0.26.0",
3+
"version": "0.27.0",
44
"description": "Agent-first CLI for ODD-governed repos. Epistemic terrain rendering with portable baseline.",
55
"type": "module",
66
"bin": {

workers/baseline/MANIFEST.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"$schema": "https://klappy.dev/canon/constraints/core-governance-baseline",
3+
"comment": "Required-baseline manifest. The six files every knowledge-base-driven oddkit tool needs to function. Canon source: klappy://canon/constraints/core-governance-baseline §'Required in Baseline'. When knowledge_base_url is set and include_full_baseline is unset/false, the search corpus indexes the project KB plus only these files from the default baseline.",
4+
"version": 1,
5+
"epoch": "E0008.5",
6+
"canon_uri": "klappy://canon/constraints/core-governance-baseline",
7+
"required_paths": [
8+
"canon/values/orientation.md",
9+
"canon/values/axioms.md",
10+
"canon/meta/writing-canon.md",
11+
"canon/constraints/definition-of-done.md",
12+
"canon/constraints/telemetry-governance.md",
13+
"odd/challenge/stakes-calibration.md"
14+
]
15+
}

workers/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "oddkit-mcp-worker",
3-
"version": "0.26.0",
3+
"version": "0.27.0",
44
"private": true,
55
"type": "module",
66
"scripts": {

workers/src/index.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ Use when:
222222
knowledge_base_url: z.string().optional().describe("Optional GitHub repo URL for your knowledge base. When set, strict mode is automatic: missing files fall through to the bundled governance tier rather than silently substituting from the default knowledge base."),
223223
result_grouping: z.enum(["merged", "overlay_first", "grouped"]).optional().describe("For action='search' or 'preflight': controls how overlay (knowledge_base) and baseline results are ordered. 'merged' = pure score order (default when knowledge_base_url unset). 'overlay_first' = overlay docs ranked above baseline (default when knowledge_base_url set). 'grouped' = separate overlay_hits/baseline_hits arrays in response."),
224224
include_metadata: z.boolean().optional().describe("When true, search/get responses include a metadata object with full parsed frontmatter. Default: false."),
225+
include_full_baseline: z.boolean().optional().describe("Search-Corpus Boundary opt-in (E0008.5). When knowledge_base_url is set, the search corpus defaults to overlay + required-baseline only. Pass true to restore the legacy merged corpus (overlay + full baseline). When knowledge_base_url is unset, this parameter is a no-op. Authority: klappy://canon/constraints/core-governance-baseline §'Search-Corpus Boundary'."),
225226
section: z.string().optional().describe("For action='get': extract only the named ## section from the document. Returns section content or available sections if not found."),
226227
sort_by: z.enum(["date", "path"]).optional().describe("For action='catalog': sort articles. 'date' returns newest first (requires frontmatter). 'path' returns all docs alphabetically, including undated."),
227228
limit: z.number().min(1).max(500).optional().describe("For action='catalog': max articles to return when sort_by is provided. Default: 10, max: 500."),
@@ -244,6 +245,7 @@ Use when:
244245
knowledge_base_url: args.knowledge_base_url,
245246
result_grouping: args.result_grouping,
246247
include_metadata: args.include_metadata,
248+
include_full_baseline: args.include_full_baseline,
247249
section: args.section,
248250
sort_by: args.sort_by,
249251
limit: args.limit,
@@ -325,6 +327,7 @@ Use when:
325327
knowledge_base_url: z.string().optional().describe("Optional: GitHub repo URL for your knowledge base. When set, strict mode is automatic: missing files fall through to the bundled governance tier."),
326328
result_grouping: z.enum(["merged", "overlay_first", "grouped"]).optional().describe("Controls how overlay (knowledge_base) and baseline results are ordered. 'merged' = pure score order (default when knowledge_base_url unset). 'overlay_first' = overlay docs ranked above baseline (default when knowledge_base_url set). 'grouped' = separate overlay_hits/baseline_hits arrays in response."),
327329
include_metadata: z.boolean().optional().describe("When true, each hit includes a metadata object with full parsed frontmatter. Default: false."),
330+
include_full_baseline: z.boolean().optional().describe("Search-Corpus Boundary opt-in (E0008.5). When knowledge_base_url is set, the search corpus defaults to overlay + required-baseline only. Pass true to restore the legacy merged corpus (overlay + full baseline). When knowledge_base_url is unset, this is a no-op. Authority: klappy://canon/constraints/core-governance-baseline §'Search-Corpus Boundary'."),
328331
},
329332
annotations: { readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
330333
},
@@ -370,6 +373,7 @@ Use when:
370373
limit: z.number().min(1).max(500).optional().describe("Max articles to return when sort_by is provided. Default: 10, max: 500."),
371374
offset: z.number().min(0).optional().describe("Skip this many articles before returning results. Use with limit for pagination. Default: 0."),
372375
filter_epoch: z.string().optional().describe("Filter to articles with this epoch value in frontmatter (e.g. 'E0007')."),
376+
include_full_baseline: z.boolean().optional().describe("Search-Corpus Boundary opt-in (E0008.5). When knowledge_base_url is set, the catalog reflects overlay + required-baseline only. Pass true to restore the legacy merged catalog (overlay + full baseline). Authority: klappy://canon/constraints/core-governance-baseline §'Search-Corpus Boundary'."),
373377
},
374378
annotations: { readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
375379
},
@@ -390,6 +394,7 @@ Use when:
390394
input: z.string().describe("Description of what you're about to implement."),
391395
knowledge_base_url: z.string().optional().describe("Optional: GitHub repo URL for your knowledge base. When set, strict mode is automatic: missing files fall through to the bundled governance tier."),
392396
result_grouping: z.enum(["merged", "overlay_first", "grouped"]).optional().describe("Controls how overlay (knowledge_base) and baseline start_here results are ordered. 'merged' = pure score order (default when knowledge_base_url unset). 'overlay_first' = overlay docs ranked above baseline (default when knowledge_base_url set). 'grouped' = separate start_here_overlay/start_here_baseline arrays."),
397+
include_full_baseline: z.boolean().optional().describe("Search-Corpus Boundary opt-in (E0008.5). When knowledge_base_url is set, the preflight corpus defaults to overlay + required-baseline only. Pass true to restore the legacy merged corpus (overlay + full baseline). When knowledge_base_url is unset, this is a no-op. Authority: klappy://canon/constraints/core-governance-baseline §'Search-Corpus Boundary'."),
393398
},
394399
annotations: { readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
395400
},
@@ -438,6 +443,7 @@ Use when:
438443
knowledge_base_url: args.knowledge_base_url as string | undefined,
439444
result_grouping: args.result_grouping as "merged" | "overlay_first" | "grouped" | undefined,
440445
include_metadata: args.include_metadata as boolean | undefined,
446+
include_full_baseline: args.include_full_baseline as boolean | undefined,
441447
section: args.section as string | undefined,
442448
sort_by: args.sort_by as string | undefined,
443449
limit: args.limit as number | undefined,

workers/src/orchestrate.ts

Lines changed: 51 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import {
1515
type Env,
1616
type BaselineIndex,
1717
type IndexEntry,
18+
type SearchScope,
1819
type SectionResult,
1920
} from "./zip-baseline-fetcher";
2021
import { buildBM25Index, searchBM25, tokenize, type BM25Index } from "./bm25";
@@ -235,6 +236,14 @@ export interface UnifiedParams {
235236
knowledge_base_url?: string;
236237
result_grouping?: ResultGrouping;
237238
include_metadata?: boolean;
239+
/**
240+
* Search-Corpus Boundary opt-in (E0008.5). When `knowledge_base_url` is set,
241+
* the search corpus defaults to overlay + required-baseline-manifest only.
242+
* Set this to true to restore the legacy merged corpus (overlay + full
243+
* baseline). When `knowledge_base_url` is unset, this parameter is a no-op.
244+
* Authority: klappy://canon/constraints/core-governance-baseline §"Search-Corpus Boundary".
245+
*/
246+
include_full_baseline?: boolean;
238247
section?: string;
239248
sort_by?: string;
240249
limit?: number;
@@ -1348,9 +1357,10 @@ async function runSearch(
13481357
state?: OddkitState,
13491358
includeMetadata?: boolean,
13501359
resolvedGrouping: ResultGrouping = "merged",
1360+
searchScope: SearchScope = "merged",
13511361
): Promise<ActionResult> {
13521362
const startMs = Date.now();
1353-
const index = await fetcher.getIndex(knowledgeBaseUrl);
1363+
const index = await fetcher.getIndex(knowledgeBaseUrl, searchScope);
13541364
const bm25 = getBM25Index(index.entries);
13551365

13561366
// Issue #150 fix-forward: when grouping is active, retrieve a wider candidate
@@ -1412,6 +1422,9 @@ async function runSearch(
14121422
baseline_url: index.baseline_url,
14131423
knowledge_base_url: knowledgeBaseUrl,
14141424
search_index_size: bm25.N,
1425+
search_scope: index.search_scope,
1426+
overlay_doc_count: index.stats.canon,
1427+
baseline_doc_count: index.stats.baseline_indexed ?? index.stats.baseline,
14151428
result_grouping: resolvedGrouping,
14161429
duration_ms: Date.now() - startMs,
14171430
generated_at: new Date().toISOString(),
@@ -1499,6 +1512,9 @@ async function runSearch(
14991512
baseline_url: index.baseline_url,
15001513
knowledge_base_url: knowledgeBaseUrl,
15011514
search_index_size: bm25.N,
1515+
search_scope: index.search_scope,
1516+
overlay_doc_count: index.stats.canon,
1517+
baseline_doc_count: index.stats.baseline_indexed ?? index.stats.baseline,
15021518
result_grouping: resolvedGrouping,
15031519
duration_ms: Date.now() - startMs,
15041520
generated_at: new Date().toISOString(),
@@ -2247,9 +2263,10 @@ async function runCatalog(
22472263
knowledgeBaseUrl?: string,
22482264
state?: OddkitState,
22492265
options?: { sort_by?: string; limit?: number; offset?: number; filter_epoch?: string },
2266+
searchScope: SearchScope = "merged",
22502267
): Promise<ActionResult> {
22512268
const startMs = Date.now();
2252-
const index = await fetcher.getIndex(knowledgeBaseUrl);
2269+
const index = await fetcher.getIndex(knowledgeBaseUrl, searchScope);
22532270
const { sort_by, limit: rawLimit, offset: rawOffset, filter_epoch } = options || {};
22542271
const effectiveLimit = Math.min(Math.max(rawLimit || 10, 1), 500);
22552272
const effectiveOffset = Math.max(rawOffset || 0, 0);
@@ -2315,10 +2332,16 @@ async function runCatalog(
23152332
}));
23162333
}
23172334

2335+
const baselineCount = index.stats.baseline_indexed ?? index.stats.baseline;
2336+
const scopeNote =
2337+
index.search_scope === "kb_with_required_baseline"
2338+
? ` [scoped: required-baseline only; pass include_full_baseline=true to merge]`
2339+
: "";
2340+
23182341
const assistantTextParts = [
23192342
`ODD Documentation Catalog`,
23202343
``,
2321-
`Total: ${index.stats.total} docs (${index.stats.canon} canon, ${index.stats.baseline} baseline)`,
2344+
`Total: ${index.stats.total} docs (${index.stats.canon} canon, ${baselineCount} baseline)${scopeNote}`,
23222345
knowledgeBaseUrl ? `Canon override: ${knowledgeBaseUrl}` : "",
23232346
``,
23242347
`Start here:`,
@@ -2352,7 +2375,8 @@ async function runCatalog(
23522375
const result: Record<string, unknown> = {
23532376
total: index.stats.total,
23542377
canon: index.stats.canon,
2355-
baseline: index.stats.baseline,
2378+
baseline: baselineCount,
2379+
baseline_total: index.stats.baseline,
23562380
categories: Object.keys(byTag),
23572381
start_here: startHere.map((e) => e.path),
23582382
};
@@ -2376,6 +2400,9 @@ async function runCatalog(
23762400
debug: {
23772401
knowledge_base_url: knowledgeBaseUrl,
23782402
baseline_url: index.baseline_url,
2403+
search_scope: index.search_scope,
2404+
overlay_doc_count: index.stats.canon,
2405+
baseline_doc_count: index.stats.baseline_indexed ?? index.stats.baseline,
23792406
generated_at: new Date().toISOString(), // response time — consistent with all other handlers
23802407
index_built_at: index.generated_at, // preserve cache-freshness diagnostic under accurate name
23812408
duration_ms: Date.now() - startMs,
@@ -2389,9 +2416,10 @@ async function runPreflight(
23892416
knowledgeBaseUrl?: string,
23902417
state?: OddkitState,
23912418
resolvedGrouping: ResultGrouping = "merged",
2419+
searchScope: SearchScope = "merged",
23922420
): Promise<ActionResult> {
23932421
const startMs = Date.now();
2394-
const index = await fetcher.getIndex(knowledgeBaseUrl);
2422+
const index = await fetcher.getIndex(knowledgeBaseUrl, searchScope);
23952423
const topic = message.replace(/^preflight:\s*/i, "").trim();
23962424

23972425
// Score all entries, then apply partition before slicing
@@ -2453,6 +2481,9 @@ async function runPreflight(
24532481
debug: {
24542482
docs_considered: index.entries.length,
24552483
knowledge_base_url: knowledgeBaseUrl,
2484+
search_scope: index.search_scope,
2485+
overlay_doc_count: index.stats.canon,
2486+
baseline_doc_count: index.stats.baseline_indexed ?? index.stats.baseline,
24562487
result_grouping: resolvedGrouping,
24572488
duration_ms: Date.now() - startMs,
24582489
generated_at: new Date().toISOString(),
@@ -3333,14 +3364,24 @@ const VALID_ACTIONS = [
33333364
] as const;
33343365

33353366
export async function handleUnifiedAction(params: UnifiedParams): Promise<OddkitEnvelope> {
3336-
const { action, input, context, mode, knowledge_base_url, result_grouping, include_metadata, section, sort_by, limit, offset, filter_epoch, state, env, tracer } = params;
3367+
const { action, input, context, mode, knowledge_base_url, result_grouping, include_metadata, include_full_baseline, section, sort_by, limit, offset, filter_epoch, state, env, tracer } = params;
33373368

33383369
// Conditional default: when knowledge_base_url is set and caller didn't
33393370
// specify result_grouping, default to "overlay_first" (the fix for #150).
33403371
// When KB is unset, default to "merged" (no behavior change).
33413372
const resolvedGrouping: ResultGrouping =
33423373
result_grouping ?? (knowledge_base_url ? "overlay_first" : "merged");
33433374

3375+
// Search-Corpus Boundary (E0008.5): when knowledge_base_url is set, the
3376+
// search corpus defaults to overlay + required-baseline only. Callers opt
3377+
// in to the legacy merged corpus via include_full_baseline=true. When
3378+
// knowledge_base_url is unset, the parameter is a no-op and scope is
3379+
// forced to "merged" (the baseline IS the canon — there is nothing to
3380+
// scope away). Authority: klappy://canon/constraints/core-governance-baseline
3381+
// §"Search-Corpus Boundary".
3382+
const resolvedScope: SearchScope =
3383+
knowledge_base_url && !include_full_baseline ? "kb_with_required_baseline" : "merged";
3384+
33443385
if (!VALID_ACTIONS.includes(action as (typeof VALID_ACTIONS)[number])) {
33453386
return {
33463387
action: "error",
@@ -3371,7 +3412,7 @@ export async function handleUnifiedAction(params: UnifiedParams): Promise<Oddkit
33713412
result = await runEncodeAction(input, context, fetcher, knowledge_base_url, state);
33723413
break;
33733414
case "search":
3374-
result = await runSearch(input, fetcher, knowledge_base_url, state, include_metadata, resolvedGrouping);
3415+
result = await runSearch(input, fetcher, knowledge_base_url, state, include_metadata, resolvedGrouping, resolvedScope);
33753416
break;
33763417
case "get":
33773418
result = await runGet(input, fetcher, knowledge_base_url, state, include_metadata, section);
@@ -3383,13 +3424,13 @@ export async function handleUnifiedAction(params: UnifiedParams): Promise<Oddkit
33833424
result = await runAudit(input, fetcher, knowledge_base_url, state);
33843425
break;
33853426
case "catalog":
3386-
result = await runCatalog(fetcher, knowledge_base_url, state, { sort_by, limit, offset, filter_epoch });
3427+
result = await runCatalog(fetcher, knowledge_base_url, state, { sort_by, limit, offset, filter_epoch }, resolvedScope);
33873428
break;
33883429
case "validate":
33893430
result = await runValidate(input, state);
33903431
break;
33913432
case "preflight":
3392-
result = await runPreflight(input, fetcher, knowledge_base_url, state, resolvedGrouping);
3433+
result = await runPreflight(input, fetcher, knowledge_base_url, state, resolvedGrouping, resolvedScope);
33933434
break;
33943435
case "version":
33953436
result = runVersion(env);
@@ -3398,7 +3439,7 @@ export async function handleUnifiedAction(params: UnifiedParams): Promise<Oddkit
33983439
result = await runCleanupStorage(fetcher, knowledge_base_url);
33993440
break;
34003441
default:
3401-
result = await runSearch(input, fetcher, knowledge_base_url, state, undefined, resolvedGrouping);
3442+
result = await runSearch(input, fetcher, knowledge_base_url, state, undefined, resolvedGrouping, resolvedScope);
34023443
}
34033444

34043445
// Inject trace into debug envelope (E0008.1)

0 commit comments

Comments
 (0)