kryptobaseddev · kryptobaseddev · May 21, 2026 · May 21, 2026 · May 21, 2026 · May 21, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -61,6 +61,80 @@ If you genuinely need a doc-kind not yet listed:
 2. Add a routing entry to `.cleo/canon.yml`.
 3. Re-run `pnpm --filter @cleocode/cleo run build` and the gate stays green.
 
+## Skill Maintenance Discipline (Saga T9799 · Epic T9960)
+
+Canonical `ct-*` skills under `packages/skills/skills/` describe how CLEO
+works to every spawned agent. When core systems change but the skill text
+does not, agents act on stale instructions. The T9540 release-system
+rewrite is the canonical example — `ct-release-orchestrator` still
+described the deleted `cleo release ship` monolith for weeks.
+
+**Rule**: when you edit a path declared in the coverage map, you MUST
+update the corresponding skill in the same PR — or acknowledge the
+deferral explicitly.
+
+### Coverage map (internal-only — never ships)
+
+`packages/skills/internal/skill-coverage.yml` maps each canonical skill
+to the code paths it documents. The file is listed in
+`packages/skills/.npmignore` so it never lands in the published
+`@cleocode/skills` bundle. Sibling tooling under `packages/skills/internal/`
+(drift-check.mjs, the git-hook runners) is excluded the same way.
+
+### Shipped per-skill metadata (in SKILL.md frontmatter)
+
+Every canonical SKILL.md MUST carry a `metadata:` block. These fields
+DO ship — they are documentation, not enforcement, and they let
+consumers and the curator daemon reason about freshness:
+
+```yaml
+metadata:
+  version: 2.0.0           # bump on every material change
+  lastReviewed: 2026-05-21 # ISO date — set by the human/agent who reviewed
+  stability: stable        # experimental | stable | deprecated
+```
+
+### Enforcement (T9960 — in progress)
+
+- **Pre-commit hook**: regenerates `packages/skills/skills.json` from
+  SKILL.md frontmatter. Drift between frontmatter and `skills.json` fails
+  the hook.
+- **CI gate `Skill Drift Check`**: scans the PR diff against the coverage
+  map. If a covered path is touched but the matching SKILL.md is not, the
+  PR fails with `E_SKILL_DRIFT_UNACKNOWLEDGED`.
+- **Trailer override**: a commit trailer
+  `Skill-Drift-Acknowledged: <reason>` bypasses the gate AND auto-files a
+  sentient follow-up task for retroactive skill update.
+- **Tier-0 skills get NO override** — `ct-cleo`, `ct-orchestrator`,
+  `ct-task-executor`, `ct-dev-workflow`, `ct-documentor`, and
+  `CLEO-INJECTION.md` must be kept current in the same PR. The trailer
+  is rejected for these.
+
+### Tier-0 core skills (strict — no drift tolerated)
+
+These define the agent protocol surface. Edit the matching code path,
+edit the skill in the same PR. Period.
+
+- `ct-cleo` — CLI protocol + session lifecycle
+- `ct-orchestrator` — spawn/delegation contract
+- `ct-task-executor` — worker contract
+- `ct-dev-workflow` — commit / branch / release flow
+- `ct-documentor` — docs SSoT routing
+- `CLEO-INJECTION.md` (template, not a skill folder) — protocol injected
+  into every spawn prompt
+
+### Tier-1 LOOM-stage skills (trailer override permitted)
+
+One per LOOM stage in `packages/core/src/validation/protocols/`. Same
+rule applies; trailer override is allowed for non-blocking deferrals.
+
+### Internal-only validator
+
+`ct-skill-validator` ships with `disable-model-invocation: true` and is
+listed in `packages/skills/.npmignore` so it never reaches consumers.
+It is the developer-side toolchain that drives the drift check, depth
+audit, and quality evals.
+
 ## Worktree Location (ADR-055 · Saga T9800 · Decision D009)
 
 ALL git worktrees provisioned for agent tasks MUST live under the canonical

diff --git a/packages/cleo/src/dispatch/domains/__tests__/docs-slug-type-project.test.ts b/packages/cleo/src/dispatch/domains/__tests__/docs-slug-type-project.test.ts
@@ -290,4 +290,112 @@ describe('docs dispatch — slug/type/project (T9636/T9637/T9638)', () => {
     const fetched = await handler.query('fetch', { attachmentRef: prefix });
     expect(fetched.success).toBe(true);
   });
+
+  // ────────────────────────────────────────────────────────────────────────
+  // T9965 — docs.fetch round-trip: slug + type + kind non-null regression
+  //
+  // Guards against regression where docs.fetch returned all-null payload
+  // ({slug:null, type:null, kind:null}) when resolving by slug or uuid.
+  // ────────────────────────────────────────────────────────────────────────
+
+  it('T9965 RT-1: add → list → fetch by slug: slug/type/kind all non-null in metadata', async () => {
+    const handler = new DocsHandler();
+
+    // AC1/AC2: docs add with slug + type
+    const add = await handler.mutate('add', {
+      ownerId: 'T9965',
+      file: fixtureA,
+      slug: 'sg-arch-solid-session-1-handoff',
+      type: 'handoff',
+    });
+    expect(add.success, `docs.add failed: ${JSON.stringify(add.error)}`).toBe(true);
+    const addData = add.data as { attachmentId: string; slug: string; type: string };
+    expect(addData.slug).toBe('sg-arch-solid-session-1-handoff');
+    expect(addData.type).toBe('handoff');
+
+    // Verify list shows slug + type
+    const list = await handler.query('list', { task: 'T9965' });
+    expect(list.success).toBe(true);
+    const listData = list.data as {
+      attachments: Array<{ id: string; slug?: string; type?: string; kind: string }>;
+    };
+    expect(listData.attachments).toHaveLength(1);
+    const listedRow = listData.attachments[0];
+    expect(listedRow?.slug).toBe('sg-arch-solid-session-1-handoff');
+    expect(listedRow?.type).toBe('handoff');
+    expect(listedRow?.kind).toBe('local-file');
+
+    // AC1: fetch by slug returns populated slug/type/kind
+    const fetchBySlug = await handler.query('fetch', {
+      attachmentRef: 'sg-arch-solid-session-1-handoff',
+    });
+    expect(
+      fetchBySlug.success,
+      `docs.fetch by slug failed: ${JSON.stringify(fetchBySlug.error)}`,
+    ).toBe(true);
+    const slugData = fetchBySlug.data as {
+      metadata: { id: string; slug?: string; type?: string; kind: string };
+      sizeBytes: number;
+      inlined: boolean;
+    };
+    expect(slugData.metadata.id).toBe(addData.attachmentId);
+    // Regression assertions: these were null before T9965 fix
+    expect(slugData.metadata.slug, 'metadata.slug must not be null after fetch by slug').toBe(
+      'sg-arch-solid-session-1-handoff',
+    );
+    expect(slugData.metadata.type, 'metadata.type must not be null after fetch by slug').toBe(
+      'handoff',
+    );
+    expect(slugData.metadata.kind, 'metadata.kind must not be null after fetch by slug').toBe(
+      'local-file',
+    );
+    expect(slugData.sizeBytes).toBeGreaterThan(0);
+  });
+
+  it('T9965 RT-2: add → fetch by uuid: slug/type/kind all non-null in metadata', async () => {
+    const handler = new DocsHandler();
+
+    // AC2: docs add with slug + type, then fetch by UUID
+    const add = await handler.mutate('add', {
+      ownerId: 'T9965-uuid',
+      file: fixtureB,
+      slug: 'handoff-for-uuid-test',
+      type: 'handoff',
+    });
+    expect(add.success, `docs.add failed: ${JSON.stringify(add.error)}`).toBe(true);
+    const addData = add.data as {
+      attachmentId: string;
+      sha256: string;
+      slug: string;
+      type: string;
+    };
+    expect(addData.slug).toBe('handoff-for-uuid-test');
+    expect(addData.type).toBe('handoff');
+
+    // AC2: fetch by UUID returns populated slug/type/kind
+    const fetchByUuid = await handler.query('fetch', {
+      attachmentRef: addData.attachmentId,
+    });
+    expect(
+      fetchByUuid.success,
+      `docs.fetch by uuid failed: ${JSON.stringify(fetchByUuid.error)}`,
+    ).toBe(true);
+    const uuidData = fetchByUuid.data as {
+      metadata: { id: string; slug?: string; type?: string; kind: string };
+      sizeBytes: number;
+      inlined: boolean;
+    };
+    expect(uuidData.metadata.id).toBe(addData.attachmentId);
+    // Regression assertions: these were null before T9965 fix
+    expect(uuidData.metadata.slug, 'metadata.slug must not be null after fetch by uuid').toBe(
+      'handoff-for-uuid-test',
+    );
+    expect(uuidData.metadata.type, 'metadata.type must not be null after fetch by uuid').toBe(
+      'handoff',
+    );
+    expect(uuidData.metadata.kind, 'metadata.kind must not be null after fetch by uuid').toBe(
+      'local-file',
+    );
+    expect(uuidData.sizeBytes).toBeGreaterThan(0);
+  });
 });
diff --git a/packages/cleo/vitest.config.ts b/packages/cleo/vitest.config.ts
@@ -262,6 +262,22 @@ export default defineConfig({
       ).pathname,
       '@cleocode/core': new URL('../../packages/core/src/index.ts', import.meta.url).pathname,
       '@cleocode/lafs': new URL('../../packages/lafs/src/index.ts', import.meta.url).pathname,
+      // T9965: @a2a-js/sdk is a dep of @cleocode/lafs; in worktrees it resolves
+      // through lafs/node_modules rather than root node_modules.
+      '@a2a-js/sdk': new URL(
+        '../../packages/lafs/node_modules/@a2a-js/sdk/dist/index.js',
+        import.meta.url,
+      ).pathname,
+      // T9965: js-yaml + @iarna/toml are deps of @cleocode/caamp; in worktrees
+      // they resolve through caamp/node_modules rather than root node_modules.
+      '@iarna/toml': new URL(
+        '../../packages/caamp/node_modules/@iarna/toml/toml.js',
+        import.meta.url,
+      ).pathname,
+      'js-yaml': new URL(
+        '../../packages/caamp/node_modules/js-yaml/index.js',
+        import.meta.url,
+      ).pathname,
       // T1113: nexus code sub-path exports — legacy dist-path imports used in nexus.ts
       '@cleocode/nexus/dist/src/code/unfold.js': new URL(
         '../../packages/nexus/src/code/unfold.ts',

diff --git a/packages/skills/skills/ct-skill-validator/SKILL.md b/packages/skills/skills/ct-skill-validator/SKILL.md
@@ -34,14 +34,18 @@ python ${CLAUDE_SKILL_DIR}/scripts/validate.py <skill-dir> --json
 # Deep body quality audit (optional, run alongside validate.py):
 python ${CLAUDE_SKILL_DIR}/scripts/audit_body.py <skill-dir>
 
-# Manifest alignment check:
-python ${CLAUDE_SKILL_DIR}/scripts/check_manifest.py <skill-dir> <manifest.json>
+# Manifest alignment check: bundled into validate.py Tier 4. Use:
+#   python validate.py <skill-dir> --manifest <manifest.json> --dispatch-config <dispatch-config.json>
 
 # Progressive-disclosure depth check (T9684 — CI gate):
 python ${CLAUDE_SKILL_DIR}/scripts/check_depth.py <skill-dir>
 
 # Repo-wide depth sweep:
 python ${CLAUDE_SKILL_DIR}/scripts/check_depth.py <repo-root> --all
+
+# Allowlist audit (CI / cron — exit 1 on findings):
+python ${CLAUDE_SKILL_DIR}/scripts/check_depth.py <skill-dir> --audit-allowlist
+python ${CLAUDE_SKILL_DIR}/scripts/check_depth.py <skill-dir> --audit-allowlist --json
 ```
 
 **Depth rule (T9684):** A skill PASSES when ANY of:
@@ -54,6 +58,13 @@ Pre-existing stubs are allowlisted with follow-up task IDs in
 `scripts/check_depth.py::ALLOWLIST`. Gold-standard skills:
 `ct-orchestrator` (9 refs) and `ct-skill-creator` (7 refs).
 
+**Allowlist hygiene:** every entry carries `last_reviewed: YYYY-MM-DD HH:MM:SS`.
+`check_depth.py` runs a silent background audit on every invocation and emits
+WARNs to stderr for malformed or stale (> 30 days) entries. Use
+`--audit-allowlist` for an explicit pass that exits 1 on any finding —
+suitable for a CI cron job. The threshold is tunable via
+`ALLOWLIST_STALE_DAYS` at the top of `check_depth.py`.
+
 The depth check runs on every PR touching `packages/skills/skills/**`
 via `.github/workflows/skills-depth-check.yml`.
 
@@ -119,40 +130,68 @@ Repeat until verdict is `PASS` or `PASS_WITH_WARNINGS`. WARN is acceptable; ERRO
 ## Phase 3: Quality A/B Eval
 
 Tests whether the skill actually improves agent output quality vs. no skill context.
-Uses the eval infrastructure from ct-skill-creator.
+Phase 3 is **delegated** — `ct-skill-validator` does static analysis; runtime
+quality evals live in a dedicated skill (`skill-evaluator` preferred,
+`ct-skill-creator` as legacy fallback).
+
+> **Scope boundary:** `ct-skill-validator` is *static* — it checks structure,
+> frontmatter, body, manifest, depth, ecosystem fit. For deep runtime A/B
+> benchmarking, regression detection, and auto-improvement, the dispatcher
+> below routes to `skill-evaluator`, which owns that workflow end-to-end.
+
+The two eval files in `evals/` serve different purposes:
+- `evals/trigger_queries.json` — trigger queries (does the description activate correctly?)
+- `evals/quality_evals.json`   — output-quality scenarios (does the validator produce the right report?)
+
+### Dispatch (no hardcoded cross-skill paths)
+
+`scripts/run_quality_eval.py` uses `_skill_finder.py` to dynamically locate
+the eval skill at runtime. It searches:
+
+1. `$SKILL_FINDER_PATH` (colon-separated override)
+2. Direct sibling of this skill
+3. `<this-skill>/../../skills/<name>/` (CLEO / awesome-skills layouts)
+4. Walk-up ancestors + their project-shaped children (cross-project)
+5. `~/.claude/skills/<name>/`
+
+Show what would be used (without running anything):
+```bash
+python ${CLAUDE_SKILL_DIR}/scripts/run_quality_eval.py --list
+```
 
 **Trigger accuracy** — does the skill description trigger correctly?
 ```bash
-python ${CLAUDE_SKILL_DIR}/../ct-skill-creator/scripts/run_eval.py \
-  --eval-set ${CLAUDE_SKILL_DIR}/evals/eval_set.json \
-  --skill-path ${CLAUDE_SKILL_DIR}
+python ${CLAUDE_SKILL_DIR}/scripts/run_quality_eval.py <skill-dir> \
+  --trigger --evals ${CLAUDE_SKILL_DIR}/evals/trigger_queries.json
 ```
 
-**Optimize description** (if trigger accuracy < 80%):
+**Quality eval** (with/without skill A/B + grading + blind comparison):
 ```bash
-python ${CLAUDE_SKILL_DIR}/../ct-skill-creator/scripts/run_loop.py \
-  --eval-set ${CLAUDE_SKILL_DIR}/evals/eval_set.json \
-  --skill-path ${CLAUDE_SKILL_DIR} \
-  --model claude-sonnet-4-6 \
-  --max-iterations 5
+python ${CLAUDE_SKILL_DIR}/scripts/run_quality_eval.py <skill-dir> \
+  --runs 3 --executor api \
+  --evals ${CLAUDE_SKILL_DIR}/evals/quality_evals.json
 ```
-`run_loop.py` opens a live HTML accuracy report in the browser automatically.
-
-**Quality eval** (with/without skill A/B):
-1. Spawn two agents in the SAME turn: one WITH skill context loaded, one WITHOUT (baseline)
-2. Give both the same task prompt from [evals/evals.json](evals/evals.json)
-3. Grade each with the grader agent → `grading.json`:
-   `${CLAUDE_SKILL_DIR}/../ct-skill-creator/agents/grader.md`
-4. Blind A/B comparison with the comparator agent → `comparison.json`:
-   `${CLAUDE_SKILL_DIR}/../ct-skill-creator/agents/comparator.md`
-5. Post-hoc analysis with the analyzer agent → `analysis.json`:
-   `${CLAUDE_SKILL_DIR}/../ct-skill-creator/agents/analyzer.md`
-6. Serve the full eval review:
-   `python ${CLAUDE_SKILL_DIR}/../ct-skill-creator/eval-viewer/generate_review.py <workspace-dir>`
-   (Opens browser at localhost:3117)
-
-See [references/validation-rules.md](references/validation-rules.md) and
-`${CLAUDE_SKILL_DIR}/../ct-skill-creator/references/schemas.md` for JSON output schemas.
+
+When `skill-evaluator` is the resolved target, the wrapper drives its full
+loop: generate → run → grade → aggregate → analyze → detect-regression →
+propose. See `skill-evaluator/SKILL.md` for the workflow it actually
+executes.
+
+When `ct-skill-creator` is the resolved fallback, the wrapper invokes its
+`run_eval.py` with the same arguments translated to its CLI shape.
+
+### Manual A/B (if you want to drive runs yourself)
+
+If you need direct control of how runs are spawned (e.g. inside a real
+Claude Code session with subagent isolation), invoke the resolved eval
+skill's scripts directly — locate them with:
+
+```bash
+EVAL_SKILL=$(python ${CLAUDE_SKILL_DIR}/scripts/_skill_finder.py skill-evaluator)
+```
+
+then drive that skill's documented workflow without any further hardcoded
+paths in this file.
 
 ---
 

diff --git a/...kills/ct-skill-validator/evals/evals.json → ...-skill-validator/evals/quality_evals.json b/...kills/ct-skill-validator/evals/evals.json → ...-skill-validator/evals/quality_evals.json
@@ -42,7 +42,7 @@
       "prompt": "Run the manifest alignment check for ct-skill-validator against the CLEO manifest",
       "expected_output": "Manifest alignment results showing whether ct-skill-validator is registered correctly in manifest.json and dispatch-config.json",
       "expectations": [
-        "Claude passes --manifest to validate.py or check_manifest.py",
+        "Claude passes --manifest to validate.py (Tier 4 check)",
         "The manifest.json path is correctly resolved",
         "The output shows Tier 4 CLEO Integration results",
         "Claude reports whether the skill is found in manifest.json"

diff --git a/...ls/ct-skill-validator/evals/eval_set.json → ...kill-validator/evals/trigger_queries.json b/...ls/ct-skill-validator/evals/eval_set.json → ...kill-validator/evals/trigger_queries.json