diff --git a/package.json b/package.json index 2d40ae1..bef3edf 100644 --- a/package.json +++ b/package.json @@ -58,7 +58,7 @@ "format": "biome format --write src tests" }, "dependencies": { - "@tangle-network/agent-eval": "^0.23.0", + "@tangle-network/agent-eval": "^0.28.0", "zod": "^4.3.6" }, "devDependencies": { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 01fa51b..be48ae6 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9,8 +9,8 @@ importers: .: dependencies: '@tangle-network/agent-eval': - specifier: ^0.23.0 - version: 0.23.0(typescript@5.9.3) + specifier: ^0.28.0 + version: 0.28.0(typescript@5.9.3) zod: specifier: ^4.3.6 version: 4.4.2 @@ -445,8 +445,8 @@ packages: '@scure/bip39@2.2.0': resolution: {integrity: sha512-T/Bj/YvYMNkIPq6EENO6/rcs2e7qTNuyoUXf0KBFDmp0ZDu0H2X4Lq6yC3i0c8PcWkov5EbW+yQZZbdMmk154A==} - '@tangle-network/agent-eval@0.23.0': - resolution: {integrity: sha512-YY4J2v1epvTBJ3HeNAYs4AaeurgUZCTfmooGrmDbKeAfWSD6Xzv8RC33xChd1Tge/IGDz1ILRTfpLqyuhNU2aQ==} + '@tangle-network/agent-eval@0.28.0': + resolution: {integrity: sha512-voK3QL2qdjQ86PyAiy6n2RFYPXmS9sRMIfuo77YV7Wl4tFmdiUBaxXuBvSkWCgkeco0QJA/P7WxvwUokX2J62Q==} engines: {node: '>=20'} hasBin: true @@ -1198,7 +1198,7 @@ snapshots: '@noble/hashes': 2.2.0 '@scure/base': 2.2.0 - '@tangle-network/agent-eval@0.23.0(typescript@5.9.3)': + '@tangle-network/agent-eval@0.28.0(typescript@5.9.3)': dependencies: '@asteasolutions/zod-to-openapi': 8.5.0(zod@4.4.2) '@ax-llm/ax': 19.0.45(zod@4.4.2) diff --git a/src/index.ts b/src/index.ts index 0dda8a2..7d94aee 100644 --- a/src/index.ts +++ b/src/index.ts @@ -14,6 +14,7 @@ export * from './kb-store' export * from './lint' export * from './optimization' export * from './proposals' +export * from './propose-from-finding' export * from './release' export * from './research-loop' export * from './schemas' diff --git a/src/propose-from-finding.test.ts b/src/propose-from-finding.test.ts new file mode 100644 index 0000000..098b36e --- /dev/null +++ b/src/propose-from-finding.test.ts @@ -0,0 +1,135 @@ +import type { AnalystFinding } from '@tangle-network/agent-eval' +import { describe, expect, it } from 'vitest' +import { + KnowledgeProposalParseError, + proposeFromFinding, + proposeFromFindings, +} from './propose-from-finding' + +function f(subject: string | undefined, partial: Partial = {}): AnalystFinding { + return { + schema_version: '1.0.0', + finding_id: 'f_test', + analyst_id: 'knowledge-gap', + produced_at: '2026-05-19T00:00:00Z', + severity: 'high', + area: 'knowledge-gap', + claim: 'wiki page missing', + confidence: 0.85, + evidence_refs: [{ kind: 'span', uri: 'span://t/s', excerpt: 'quote' }], + rationale: 'agent asked clarifying question', + recommended_action: 'Create wiki page with claim: invoice rows have two shapes', + subject, + ...partial, + } +} + +describe('proposeFromFinding', () => { + it('returns null for non-knowledge subjects', () => { + expect(proposeFromFinding(f('websearch:outdated:invoices'))).toBeNull() + expect(proposeFromFinding(f('tool-doc:queryTraces'))).toBeNull() + expect(proposeFromFinding(f('system-prompt:request-classification'))).toBeNull() + expect(proposeFromFinding(f('memory:prior-decision-on-vendor'))).toBeNull() + }) + + it('returns null when subject is missing', () => { + expect(proposeFromFinding(f(undefined))).toBeNull() + }) + + it('emits a create-page proposal for agent-knowledge:wiki:', () => { + const p = proposeFromFinding(f('agent-knowledge:wiki:invoice-line-items')) + expect(p).not.toBeNull() + expect(p?.kind).toBe('create-page') + expect(p?.locus).toBe('invoice-line-items') + expect(p?.writeBlocks).toHaveLength(1) + expect(p?.writeBlocks[0]?.path).toBe('knowledge/invoice-line-items.md') + expect(p?.writeBlocks[0]?.content).toMatch(/^---\ntitle: Invoice Line Items/) + expect(p?.writeBlocks[0]?.content).toContain('## Rationale') + expect(p?.writeBlocks[0]?.content).toContain('## Recommended action') + expect(p?.metadata.severity).toBe('high') + expect(p?.sourceFindingId).toBe('f_test') + }) + + it('emits an append-section proposal for wiki:#', () => { + const p = proposeFromFinding(f('agent-knowledge:wiki:invoice-line-items#Two-shapes')) + expect(p?.kind).toBe('append-section') + expect(p?.locus).toBe('invoice-line-items') + expect(p?.writeBlocks[0]?.path).toBe('knowledge/invoice-line-items.md') + expect(p?.writeBlocks[0]?.content).toMatch(/^## Two-shapes/) + expect(p?.writeBlocks[0]?.content).toContain('### Rationale') + }) + + it('emits a draft claim for agent-knowledge:claim:', () => { + const p = proposeFromFinding(f('agent-knowledge:claim:invoice-shape-A')) + expect(p?.kind).toBe('create-claim') + expect(p?.writeBlocks).toHaveLength(0) + expect(p?.claim).toBeDefined() + expect(p?.claim?.status).toBe('draft') + expect(p?.claim?.text).toBe('Create wiki page with claim: invoice rows have two shapes') + expect(p?.claim?.refs).toEqual([ + { sourceId: 'analyst-finding:f_test', anchorId: 'span://t/s', quote: 'quote' }, + ]) + expect(p?.claim?.metadata?.topic).toBe('invoice-shape-A') + }) + + it('emits a lift-raw proposal for agent-knowledge:raw:', () => { + const p = proposeFromFinding(f('agent-knowledge:raw:vendor-2026-Q1-pdf')) + expect(p?.kind).toBe('lift-raw') + expect(p?.writeBlocks[0]?.path).toBe('knowledge/vendor-2026-q1-pdf.md') + expect(p?.writeBlocks[0]?.content).toContain('source: vendor-2026-Q1-pdf') + expect(p?.writeBlocks[0]?.content).toContain('## Why this page exists') + }) + + it('emits a mark-stale proposal for agent-knowledge:stale:', () => { + const p = proposeFromFinding( + f('agent-knowledge:stale:legacy-invoice-schema', { + analyst_id: 'knowledge-poisoning', + severity: 'critical', + confidence: 0.95, + }), + ) + expect(p?.kind).toBe('mark-stale') + expect(p?.writeBlocks[0]?.path).toBe('knowledge/legacy-invoice-schema.stale.md') + expect(p?.writeBlocks[0]?.content).toContain('status: superseded') + expect(p?.writeBlocks[0]?.content).toContain('superseded_by_finding: f_test') + expect(p?.metadata.severity).toBe('critical') + }) + + it('throws on malformed knowledge subject (missing locus)', () => { + expect(() => proposeFromFinding(f('agent-knowledge:wiki:'))).toThrow( + KnowledgeProposalParseError, + ) + }) + + it('throws on unknown knowledge kind', () => { + expect(() => proposeFromFinding(f('agent-knowledge:embedding:x'))).toThrow( + KnowledgeProposalParseError, + ) + }) + + it('proposal id is stable per finding so cross-run diffs hold', () => { + const p1 = proposeFromFinding(f('agent-knowledge:wiki:x')) + const p2 = proposeFromFinding(f('agent-knowledge:wiki:x')) + expect(p1?.id).toBe(p2?.id) + }) + + it('sanitises slugs with unsafe characters', () => { + const p = proposeFromFinding(f('agent-knowledge:wiki:Invoice/Line Items')) + expect(p?.writeBlocks[0]?.path).toBe('knowledge/invoice-line-items.md') + }) +}) + +describe('proposeFromFindings (batch)', () => { + it('partitions findings into proposals / skipped / errors', () => { + const out = proposeFromFindings([ + f('agent-knowledge:wiki:foo'), + f('websearch:outdated:bar'), // skipped + f('agent-knowledge:wiki:', { finding_id: 'f_bad' }), // error + f('agent-knowledge:claim:baz'), + ]) + expect(out.proposals.map((p) => p.kind)).toEqual(['create-page', 'create-claim']) + expect(out.skipped).toBe(1) + expect(out.errors).toHaveLength(1) + expect(out.errors[0]?.findingId).toBe('f_bad') + }) +}) diff --git a/src/propose-from-finding.ts b/src/propose-from-finding.ts new file mode 100644 index 0000000..5835479 --- /dev/null +++ b/src/propose-from-finding.ts @@ -0,0 +1,330 @@ +/** + * Bridge from `AnalystFinding` (agent-eval) to knowledge proposals. + * + * Closes the failure → wiki side of the recursive-self-improvement + * loop: a knowledge-gap or knowledge-poisoning finding produced by an + * analyst becomes a concrete proposal an operator (or auto-merge bot) + * can review and apply. The bridge is intentionally lossless on the + * fail-loud side — a finding the parser can't classify returns a + * `KnowledgeProposalParseError` rather than a silent skip, so the + * loop never accepts an underspecified edit. + * + * Subject grammar this bridge understands (analyst-side convention, + * stamped in the kind prompts): + * + * agent-knowledge:wiki: create / update page + * agent-knowledge:wiki:# insert section under page + * agent-knowledge:claim: draft claim row + * agent-knowledge:raw: lift raw → curated + * agent-knowledge:stale: mark page superseded + * + * Anything else (websearch:outdated:*, tool-doc:*, system-prompt:*, + * memory:*) is NOT a knowledge-base concern and returns `null` so the + * loop's improvement-applier handles it. + */ + +import type { AnalystFinding, AnalystSeverity } from '@tangle-network/agent-eval' +import type { ClaimRef, KnowledgeClaim, KnowledgeWriteBlock } from './types' + +export interface KnowledgeProposal { + /** + * Stable id derived from the finding so cross-run diffs share an + * identity. Re-proposing the same finding produces the same id. + */ + id: string + /** The finding that generated this proposal — useful for audit + revert. */ + sourceFindingId: string + /** What the proposal does. */ + kind: + | 'create-page' + | 'update-page' + | 'append-section' + | 'create-claim' + | 'lift-raw' + | 'mark-stale' + /** Locus on disk (page slug or claim topic). */ + locus: string + /** + * Page write blocks the standard `applyKnowledgeWriteBlocks` consumer + * accepts. Empty for proposals that don't change page text (e.g. + * `create-claim` produces a `claim` field instead). + */ + writeBlocks: KnowledgeWriteBlock[] + /** + * Granular claim draft for proposals whose unit-of-change is a claim + * row rather than a whole page. `status: 'draft'` until reviewed. + */ + claim?: KnowledgeClaim + /** Per-proposal metadata: severity, confidence, source span. */ + metadata: { + severity: AnalystSeverity + confidence: number + evidence_uri?: string + analyst_id: string + } +} + +export class KnowledgeProposalParseError extends Error { + constructor( + public readonly findingId: string, + public readonly subject: string, + message: string, + ) { + super(`proposeFromFinding(${findingId}, subject=${subject}): ${message}`) + this.name = 'KnowledgeProposalParseError' + } +} + +/** + * Convert one `AnalystFinding` into a knowledge proposal. Returns + * `null` when the finding's locus isn't a knowledge-base concern + * (`websearch:outdated:*`, `tool-doc:*`, `system-prompt:*`, + * `memory:*`, missing subject). Throws when the locus IS a + * knowledge-base concern but is malformed — that's a bug in the + * analyst prompt and should fail loud. + * + * Caller convention: feed the function the analyst's full findings + * list and filter out the `null`s; the orchestrator passes the + * remaining proposals to the existing review / apply pipeline. + */ +export function proposeFromFinding(finding: AnalystFinding): KnowledgeProposal | null { + if (!finding.subject) return null + if (!finding.subject.startsWith('agent-knowledge:')) return null + + const rest = finding.subject.slice('agent-knowledge:'.length) + const [kindPart, ...locusParts] = rest.split(':') + const locus = locusParts.join(':') + if (!kindPart || !locus) { + throw new KnowledgeProposalParseError( + finding.finding_id, + finding.subject, + 'expected `agent-knowledge::` shape', + ) + } + + const baseMeta: KnowledgeProposal['metadata'] = { + severity: finding.severity, + confidence: finding.confidence, + evidence_uri: finding.evidence_refs[0]?.uri, + analyst_id: finding.analyst_id, + } + + switch (kindPart) { + case 'wiki': + return wikiProposal(finding, locus, baseMeta) + case 'claim': + return claimProposal(finding, locus, baseMeta) + case 'raw': + return liftRawProposal(finding, locus, baseMeta) + case 'stale': + return markStaleProposal(finding, locus, baseMeta) + default: + throw new KnowledgeProposalParseError( + finding.finding_id, + finding.subject, + `unknown kind "${kindPart}" (expected one of: wiki | claim | raw | stale)`, + ) + } +} + +function wikiProposal( + finding: AnalystFinding, + locus: string, + metadata: KnowledgeProposal['metadata'], +): KnowledgeProposal { + const hashIdx = locus.indexOf('#') + const pageSlug = hashIdx >= 0 ? locus.slice(0, hashIdx) : locus + const heading = hashIdx >= 0 ? locus.slice(hashIdx + 1) : null + const path = `knowledge/${ensureSlug(pageSlug)}.md` + + const body = renderWikiBody(finding, pageSlug, heading) + return { + id: `prop-${finding.finding_id}`, + sourceFindingId: finding.finding_id, + kind: heading ? 'append-section' : 'create-page', + locus: pageSlug, + writeBlocks: [{ path, content: body }], + metadata, + } +} + +function claimProposal( + finding: AnalystFinding, + locus: string, + metadata: KnowledgeProposal['metadata'], +): KnowledgeProposal { + const refs: ClaimRef[] = finding.evidence_refs + .filter((r) => r.uri) + .map((r) => ({ + sourceId: `analyst-finding:${finding.finding_id}`, + anchorId: r.uri, + quote: r.excerpt, + })) + const claim: KnowledgeClaim = { + id: `claim-${finding.finding_id}`, + text: finding.recommended_action ?? finding.claim, + refs, + confidence: finding.confidence, + status: 'draft', + metadata: { + analyst_id: finding.analyst_id, + source_finding_id: finding.finding_id, + topic: locus, + }, + } + return { + id: `prop-${finding.finding_id}`, + sourceFindingId: finding.finding_id, + kind: 'create-claim', + locus, + writeBlocks: [], + claim, + metadata, + } +} + +function liftRawProposal( + finding: AnalystFinding, + sourceId: string, + metadata: KnowledgeProposal['metadata'], +): KnowledgeProposal { + const path = `knowledge/${ensureSlug(sourceId)}.md` + const body = [ + '---', + `title: ${sourceId}`, + `source: ${sourceId}`, + `status: draft`, + `lifted_from_finding: ${finding.finding_id}`, + '---', + '', + '## Why this page exists', + '', + finding.claim, + '', + ...(finding.rationale ? ['## Rationale', '', finding.rationale, ''] : []), + ...(finding.recommended_action + ? ['## Recommended action', '', finding.recommended_action, ''] + : []), + ].join('\n') + return { + id: `prop-${finding.finding_id}`, + sourceFindingId: finding.finding_id, + kind: 'lift-raw', + locus: sourceId, + writeBlocks: [{ path, content: body }], + metadata, + } +} + +function markStaleProposal( + finding: AnalystFinding, + pageSlug: string, + metadata: KnowledgeProposal['metadata'], +): KnowledgeProposal { + const path = `knowledge/${ensureSlug(pageSlug)}.stale.md` + const body = [ + '---', + `title: ${pageSlug} (marked stale)`, + `status: superseded`, + `superseded_by_finding: ${finding.finding_id}`, + `confidence: ${finding.confidence}`, + '---', + '', + '## Why marked stale', + '', + finding.claim, + '', + ...(finding.rationale ? ['## Evidence', '', finding.rationale, ''] : []), + ...(finding.recommended_action ? ['## Action', '', finding.recommended_action, ''] : []), + ].join('\n') + return { + id: `prop-${finding.finding_id}`, + sourceFindingId: finding.finding_id, + kind: 'mark-stale', + locus: pageSlug, + writeBlocks: [{ path, content: body }], + metadata, + } +} + +/** + * Plural convenience: filter + map across an entire findings batch + * with one call. Parse errors collect into `errors[]`; the loop + * decides per-error whether to abort or continue. + */ +export interface ProposeFromFindingsResult { + proposals: KnowledgeProposal[] + skipped: number + errors: KnowledgeProposalParseError[] +} + +export function proposeFromFindings( + findings: ReadonlyArray, +): ProposeFromFindingsResult { + const proposals: KnowledgeProposal[] = [] + const errors: KnowledgeProposalParseError[] = [] + let skipped = 0 + for (const f of findings) { + try { + const p = proposeFromFinding(f) + if (p) proposals.push(p) + else skipped += 1 + } catch (err) { + if (err instanceof KnowledgeProposalParseError) errors.push(err) + else throw err + } + } + return { proposals, skipped, errors } +} + +function ensureSlug(s: string): string { + return ( + s + .toLowerCase() + .replace(/[^a-z0-9-]+/g, '-') + .replace(/^-+|-+$/g, '') + .slice(0, 200) || 'untitled' + ) +} + +function renderWikiBody(finding: AnalystFinding, slug: string, heading: string | null): string { + const title = humanize(slug) + if (heading) { + return [ + `## ${heading}`, + '', + finding.claim, + '', + ...(finding.rationale ? ['### Rationale', '', finding.rationale, ''] : []), + ...(finding.recommended_action ? ['### Action', '', finding.recommended_action, ''] : []), + `_Drafted from finding ${finding.finding_id} (confidence ${finding.confidence.toFixed(2)})._`, + ].join('\n') + } + return [ + '---', + `title: ${title}`, + `status: draft`, + `drafted_from_finding: ${finding.finding_id}`, + `confidence: ${finding.confidence}`, + '---', + '', + `# ${title}`, + '', + finding.claim, + '', + ...(finding.rationale ? ['## Rationale', '', finding.rationale, ''] : []), + ...(finding.recommended_action + ? ['## Recommended action', '', finding.recommended_action, ''] + : []), + ].join('\n') +} + +function humanize(slug: string): string { + return ( + slug + .split('-') + .filter(Boolean) + .map((w) => w[0]?.toUpperCase() + w.slice(1)) + .join(' ') || slug + ) +}