diff --git a/src/lib/agents/wiki-librarian/prompt.ts b/src/lib/agents/wiki-librarian/prompt.ts index f89f8c9..5106bfc 100644 --- a/src/lib/agents/wiki-librarian/prompt.ts +++ b/src/lib/agents/wiki-librarian/prompt.ts @@ -23,6 +23,15 @@ * runs avoids "consolidating" two off-topic articles into one * tidier-but-still-off-topic article. * + * Workflow step 3 (fix fabricated user names) is the second known + * recovery surface for a per-conversation agent failure mode. A + * conversation that mentions a friend named "Elliot" can produce a + * user-article that calls the user "Elliot" instead of their actual + * configured name. The renderUserProfileBlock helper now carries + * HARD anti-fabrication wording so this failure mode should be rare + * going forward, but the librarian still runs the corrective pass + * so any historical occurrences get cleaned up on the next 12h cycle. + * * Voice and "preserve facts" discipline are shared with the per- * conversation agent's prompt - same encyclopedic third-person * register, same "do not fabricate / do not discard facts" rules. @@ -38,23 +47,55 @@ export interface WikiLibrarianUserProfile { location: string | null; } +/** + * Same strict wording the per-conversation agent uses - HARD rules + * around the configured name, with explicit anti-fabrication language. + * The librarian inherits the same risk: a name from conversation + * context can leak into an article that's actually about the user. + * See `../wiki/prompt.ts:renderUserProfileBlock` for the matching + * rationale block on the per-conversation side. + */ function renderUserProfileBlock( profile: WikiLibrarianUserProfile | null ): string { if (!profile) return ''; - const lines: string[] = []; - if (profile.name && profile.name.trim().length > 0) { - lines.push(`The user's name is ${profile.name.trim()}.`); + const name = + profile.name && profile.name.trim().length > 0 + ? profile.name.trim() + : null; + const location = + profile.location && profile.location.trim().length > 0 + ? profile.location.trim() + : null; + if (!name && !location) return ''; + const lines: string[] = ['**About the user:**', '']; + if (name) { + lines.push(`The user's name is **${name}**.`); + lines.push( + `When an article refers to the user themselves, the user's ` + + `name is **${name}** and ONLY ${name}. NEVER substitute ` + + `another name for the user, even if other names appear in ` + + `the article or in conversation history - those names belong ` + + `to other people the user knows. If you find an article that ` + + `appears to be about the user but uses a name OTHER than ` + + `${name} (a per-conversation agent hallucination is the ` + + `usual cause), wiki_update it to replace the wrong name with ` + + `${name} or a natural pronoun.` + ); + } else { + lines.push( + "The user has not supplied a name in Settings. When an article " + + "refers to the user themselves, the right rendering is a " + + "natural pronoun (\"they\") or the phrase \"the user\". " + + "If you find an article that appears to be about the user " + + "but uses an invented name, wiki_update to replace the " + + "name with a pronoun." + ); } - if (profile.location && profile.location.trim().length > 0) { - lines.push(`Their location is ${profile.location.trim()}.`); + if (location) { + lines.push(`Their location is ${location}.`); } - if (lines.length === 0) return ''; - lines.push( - "Use the user's name when an article refers to them as the subject, " + - "rather than the generic phrase \"the user\"." - ); - return ['**About the user:**', '', ...lines].join('\n'); + return lines.join('\n'); } export function buildWikiLibrarianPrompt(opts: { @@ -158,13 +199,29 @@ export function buildWikiLibrarianPrompt(opts: { ' wiki_update the article that is the better home (longer,', ' broader, or more accurate) to absorb the unique facts from', ' the duplicate, then wiki_delete the duplicate.', - '3. **Check for stale facts.** When an excerpt makes a specific', + '3. **Fix fabricated names for the user.** If the "About the', + ' user" block above has a name, scan for articles that appear', + ' to be about the user but use a DIFFERENT name (a common', + ' per-conversation hallucination is grabbing a friend\'s name', + ' from conversation context and applying it to the user).', + ' Read the full body via wiki_search to confirm the article', + ' is in fact about the user, then wiki_update to replace the', + ' wrong name with the configured one (or a natural pronoun).', + ' Use memory_search and conversation_search to disambiguate -', + ' if a name like "Elliot" appears in memories as someone the', + ' user knows, the article that calls the user "Elliot" is', + ' the wrong one to fix that way; write an article ABOUT', + ' Elliot is out of your scope (you cannot wiki_create), but', + ' you CAN wiki_update the misnamed article to use the right', + ' name for the user and let the per-conversation agent land', + ' the separate Elliot article on its own next cycle.', + '4. **Check for stale facts.** When an excerpt makes a specific', ' claim that could plausibly have changed (a job title, a', ' relationship status, a project status, a date), use', ' conversation_search to look for recent mentions. If you find', ' a clear contradiction, wiki_update the article. If you find', ' nothing or only ambiguous evidence, leave it alone.', - '4. **Tighten subject boundaries.** When two articles cover', + '5. **Tighten subject boundaries.** When two articles cover', ' adjacent topics that confusingly bleed into each other (a', ' "Maya" article and a "household" article that both cover', ' the same person), decide which article is the right home', diff --git a/src/lib/agents/wiki/prompt.ts b/src/lib/agents/wiki/prompt.ts index e0a94e5..e7f089c 100644 --- a/src/lib/agents/wiki/prompt.ts +++ b/src/lib/agents/wiki/prompt.ts @@ -26,6 +26,21 @@ * block tells the model their name + location so articles * about the user themselves can use the actual name rather * than the generic phrase. + * - **No fabricated names for the user.** Production traffic + * surfaced the model inventing names for the user when the + * conversation happened to mention someone else by name (e.g. + * a brainstorm where the user mentioned a friend named "Elliot" + * produced articles that called the user "Elliot" instead of + * the configured "Jeff"). The renderUserProfileBlock helper now + * uses HARD anti-fabrication wording ("the user's name is + * **Jeff** and ONLY Jeff", "NEVER invent another name for the + * user, even if other names appear in the conversation") rather + * than the original soft "prefer their name" wording. The + * unknown-name path (no name in Settings) is split out so we + * don't tell the model to "use their name" when it has none. + * The body's "Do not fabricate" section also gains an explicit + * "do not fabricate names" line that points back to the profile + * block as the single source of truth. * - **User-centric scope.** Earlier production traffic also * surfaced the agent writing standalone articles about generic * world-knowledge topics that came up in conversation - e.g. @@ -83,25 +98,60 @@ export interface WikiUserProfile { * Settings form pays zero tokens for the section. Matches the * journal's `buildUserProfileNote` shape so the voice stays consistent * across surfaces. + * + * The wording around the name is intentionally strict. Production + * traffic showed the model fabricating names for the user (e.g. an + * article was written about "Elliot" when the configured name was + * "Jeff", because the conversation mentioned a friend named Elliot + * and the model conflated the user with someone else in context). + * The block now uses HARD rules ("ONLY this name", "NEVER invent + * another name") rather than the original soft "prefer their name". + * The unknown-name path (location set, name not) is split out so we + * don't tell the model to "use their name" when no name was supplied. */ export function renderUserProfileBlock( profile: WikiUserProfile | null ): string { if (!profile) return ''; - const lines: string[] = []; - if (profile.name && profile.name.trim().length > 0) { - lines.push(`The user's name is ${profile.name.trim()}.`); + const name = + profile.name && profile.name.trim().length > 0 + ? profile.name.trim() + : null; + const location = + profile.location && profile.location.trim().length > 0 + ? profile.location.trim() + : null; + if (!name && !location) return ''; + const lines: string[] = ['**About the user:**', '']; + if (name) { + lines.push(`The user's name is **${name}**.`); + lines.push( + `When an article refers to the user themselves, the user's ` + + `name is **${name}** and ONLY ${name}. NEVER invent another ` + + `name for the user, even if other names appear in the ` + + `conversation - those other names belong to other people the ` + + `user knows. If the conversation mentions a friend named ` + + `Maya, an article about the user does not call the user ` + + `Maya; it calls the user ${name}. If you are uncertain ` + + `whether the article subject IS the user, default to using ` + + `the literal name from context (Maya, Elliot, etc.) for that ` + + `subject and reserve "${name}" for explicit references to ` + + `the user. A natural pronoun ("they") is also fine where the ` + + `prose flows better than repeating the name.` + ); + } else { + lines.push( + "The user has not supplied a name in Settings. When an article " + + "refers to the user themselves, use a natural pronoun " + + "(\"they\") or the phrase \"the user\". NEVER invent a name " + + "for the user, even if other names appear in the conversation " + + "- those names belong to other people the user knows." + ); } - if (profile.location && profile.location.trim().length > 0) { - lines.push(`Their location is ${profile.location.trim()}.`); + if (location) { + lines.push(`Their location is ${location}.`); } - if (lines.length === 0) return ''; - lines.push( - "When an article refers to the user themselves, prefer their name " + - "(or a natural pronoun if their name is a single first name) over " + - "the generic phrase \"the user\"." - ); - return ['**About the user:**', '', ...lines].join('\n'); + return lines.join('\n'); } export function buildWikiAutonomousPrompt( @@ -246,6 +296,14 @@ const WIKI_AUTONOMOUS_BODY_LINES = [ "or in memories you read via memory_search. Don't import outside", 'knowledge.', '', + '**Do not fabricate names** - especially names for the user. The', + '"About the user" block above (when present) is the single source', + 'of truth for what to call the user. Other names that appear in the', + 'conversation belong to other people the user knows; never assign', + 'them to the user. If you cannot tell who the article subject is,', + 'use the literal name as it appears in the conversation rather than', + 'inventing one.', + '', '**Be conservative.** Fewer high-signal articles beat many noisy', 'ones. The bar for updating is "the conversation added durable', 'information about that subject", not "the conversation mentioned',