From 59ea76df21f09bbf0c74835b9b848b3e446b607e Mon Sep 17 00:00:00 2001 From: Tyler Leonhardt Date: Wed, 13 May 2026 18:03:06 -0700 Subject: [PATCH 1/3] =?UTF-8?q?Claude=20agent:=20Phase=2013=20=E2=80=94=20?= =?UTF-8?q?session=20restoration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement IAgent.getSessionMessages for the Claude provider so the workbench can reload an existing Claude session's full transcript across agent-host restarts. Unblocks self-hosting. - claudeAgentSdkService.ts: add getSessionMessages binding - claudeReplayMapper.ts (NEW): SessionMessage[] -> readonly Turn[] per CONTEXT M7. Pure function; no by-products, no persistence. Splits SDK shape detection (parseSessionMessage adapter) from the stateful reducer (ReplayBuilder). - claudeAgent.ts: replace getSessionMessages stub with subagent URI dispatch + provisional check + SDK fetch + replay, all wrapped with the listSessions-style warn-log-and-return-[] resilience. - claudeReplayMapper.test.ts: 10 fixtures covering M7 grouping rules (text/tool_result/system, tail-Turn state, subagent markers, CLI-echo synthetic-message drop). - claudeAgent.test.ts: 4 Phase 13 integration tests (happy path, subagent URI, provisional session, SDK throw resilience). - CONTEXT.md: relax M7 notification gate to admit all priorities pending real-world data on priority: 'low' content. - phase13-plan.md (NEW) + roadmap.md: capture decisions, drift, council-review fixes, and mark Phase 13 done. --- .../platform/agentHost/node/claude/CONTEXT.md | 2 +- .../agentHost/node/claude/claudeAgent.ts | 42 +- .../node/claude/claudeAgentSdkService.ts | 17 +- .../node/claude/claudeReplayMapper.ts | 502 ++++++++++++++++++ .../agentHost/node/claude/phase13-plan.md | 233 ++++++++ .../platform/agentHost/node/claude/roadmap.md | 250 ++++++--- .../test/node/claudeAgent.integrationTest.ts | 6 +- .../agentHost/test/node/claudeAgent.test.ts | 107 +++- .../test/node/claudeReplayMapper.test.ts | 270 ++++++++++ 9 files changed, 1345 insertions(+), 84 deletions(-) create mode 100644 src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts create mode 100644 src/vs/platform/agentHost/node/claude/phase13-plan.md create mode 100644 src/vs/platform/agentHost/test/node/claudeReplayMapper.test.ts diff --git a/src/vs/platform/agentHost/node/claude/CONTEXT.md b/src/vs/platform/agentHost/node/claude/CONTEXT.md index 8d8ce88e5e90e..b438ad0c5f03b 100644 --- a/src/vs/platform/agentHost/node/claude/CONTEXT.md +++ b/src/vs/platform/agentHost/node/claude/CONTEXT.md @@ -814,7 +814,7 @@ system messages as `SystemNotificationResponsePart`: | `SDKSystemMessage` subtype | Render? | Rationale | |---|---|---| | `compact_boundary` | Yes | "Conversation compacted" — context-loss event | -| `notification` (priority ≥ medium) | Yes | Loop-side text notifications | +| `notification` (all priorities) | Yes | Loop-side text notifications. Earlier draft gated on `priority ≥ medium`; relaxed to all priorities pending real-world data on what `priority: 'low'` notifications actually contain. Revisit and re-introduce the gate if `low` notifications turn out to be noise (transcript-only TODO). | | `api_retry`, `plugin_install`, `auth_status`, `status` | No | Live UI signals; not transcript content | | `hook_started`, `hook_progress`, `hook_response` | No | Decorate the associated `ToolCall`, don't stand alone | | anything else | Drop by default | Conservative; opt in subtypes as needs emerge | diff --git a/src/vs/platform/agentHost/node/claude/claudeAgent.ts b/src/vs/platform/agentHost/node/claude/claudeAgent.ts index 2fded80e68f03..fa40f03667726 100644 --- a/src/vs/platform/agentHost/node/claude/claudeAgent.ts +++ b/src/vs/platform/agentHost/node/claude/claudeAgent.ts @@ -4,7 +4,7 @@ *--------------------------------------------------------------------------------------------*/ import type { CCAModel } from '@vscode/copilot-api'; -import type { Options, PermissionMode, SDKSessionInfo, SDKUserMessage } from '@anthropic-ai/claude-agent-sdk'; +import type { Options, PermissionMode, SDKSessionInfo, SDKUserMessage, SessionMessage } from '@anthropic-ai/claude-agent-sdk'; import { SequencerByKey } from '../../../../base/common/async.js'; import { CancellationError } from '../../../../base/common/errors.js'; import { Emitter } from '../../../../base/common/event.js'; @@ -23,8 +23,9 @@ import { SessionConfigKey } from '../../common/sessionConfigKeys.js'; import { AgentProvider, AgentSession, AgentSignal, GITHUB_COPILOT_PROTECTED_RESOURCE, IAgent, IAgentCreateSessionConfig, IAgentCreateSessionResult, IAgentDescriptor, IAgentMaterializeSessionEvent, IAgentModelInfo, IAgentResolveSessionConfigParams, IAgentSessionConfigCompletionsParams, IAgentSessionMetadata, IAgentSessionProjectInfo } from '../../common/agentService.js'; import type { ResolveSessionConfigResult, SessionConfigCompletionsResult } from '../../common/state/protocol/commands.js'; import { AHP_AUTH_REQUIRED, ProtocolError } from '../../common/state/sessionProtocol.js'; +import { mapSessionMessagesToTurns } from './claudeReplayMapper.js'; import { PolicyState, ProtectedResourceMetadata, type ModelSelection, type ToolDefinition } from '../../common/state/protocol/state.js'; -import { CustomizationRef, SessionInputResponseKind, type MessageAttachment, type PendingMessage, type SessionInputAnswer, type ToolCallResult, type Turn } from '../../common/state/sessionState.js'; +import { CustomizationRef, isSubagentSession, SessionInputResponseKind, type MessageAttachment, type PendingMessage, type SessionInputAnswer, type ToolCallResult, type Turn } from '../../common/state/sessionState.js'; import { IAgentConfigurationService } from '../agentConfigurationService.js'; import { IAgentHostGitService } from '../agentHostGitService.js'; import { projectFromCopilotContext } from '../copilot/copilotGitProject.js'; @@ -598,17 +599,34 @@ export class ClaudeAgent extends Disposable implements IAgent { } /** - * Full transcript reconstruction from the SDK event log lands in - * Phase 13; the bare method shape is required by {@link IAgent}. + * Phase 13 — reconstruct the full turn history from the SDK's on-disk + * JSONL transcript. Out-of-process: no live `Query` required. Subagent + * URIs (`/subagent/`) throw `TODO: Phase 12` until + * Phase 12 wires `getSubagentMessages`. Provisional sessions return `[]`. + * Resilient: any failure (transcript fetch, mapping, backfill) warn-logs + * and returns `[]` rather than propagating — mirrors `listSessions`. */ - getSessionMessages(_session: URI): Promise { - // Phase 5 has nothing to reconstruct: there is no SDK Query - // running yet and no event log on disk has been read. The agent - // service surfaces in-memory provisional turns until Phase 13 - // implements transcript reconstruction from the SDK event log. - // A fresh array per call avoids leaking mutations across - // subscribers. - return Promise.resolve([]); + async getSessionMessages(session: URI): Promise { + if (isSubagentSession(session)) { + throw new Error('TODO: Phase 12: subagent transcript fetch not yet implemented'); + } + const sessionId = AgentSession.id(session); + if (this._provisionalSessions.has(sessionId)) { + return []; + } + let transcript: readonly SessionMessage[]; + try { + transcript = await this._sdkService.getSessionMessages(sessionId, { includeSystemMessages: true }); + } catch (err) { + this._logService.warn(`[Claude] getSessionMessages SDK fetch failed for ${sessionId}`, err); + return []; + } + try { + return mapSessionMessagesToTurns(transcript, session, this._logService); + } catch (err) { + this._logService.warn(`[Claude] replay mapper threw for ${sessionId}`, err); + return []; + } } async listSessions(): Promise { diff --git a/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts b/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts index bc410cfa68afa..3c2507c72fe1a 100644 --- a/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts +++ b/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts @@ -3,7 +3,7 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ -import type { ListSessionsOptions, Options, SDKSessionInfo, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { GetSessionMessagesOptions, ListSessionsOptions, Options, SDKSessionInfo, SessionMessage, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; import * as fs from 'fs'; import { pathToFileURL } from 'url'; import { join, resolve } from '../../../../base/common/path.js'; @@ -59,6 +59,15 @@ export interface IClaudeAgentSdkService { * can atomically dispatch the deferred `sessionAdded` notification. */ startup(params: { options: Options; initializeTimeoutMs?: number }): Promise; + + /** + * Reads a session's full transcript from disk via the SDK. Out-of-process: + * no live `Query` is required — the SDK parses the JSONL file directly. + * Phase 13 calls this from {@link import('./claudeAgent.js').ClaudeAgent.getSessionMessages} + * with `{ includeSystemMessages: true }` so `compact_boundary` and other + * allowlisted system subtypes survive into the replay mapper. + */ + getSessionMessages(sessionId: string, options?: GetSessionMessagesOptions): Promise; } /** @@ -73,6 +82,7 @@ export interface IClaudeSdkBindings { listSessions(options?: ListSessionsOptions): Promise; getSessionInfo(sessionId: string): Promise; startup(params: { options: Options; initializeTimeoutMs?: number }): Promise; + getSessionMessages(sessionId: string, options?: GetSessionMessagesOptions): Promise; } /** @@ -122,6 +132,11 @@ export class ClaudeAgentSdkService implements IClaudeAgentSdkService { return sdk.startup(params); } + async getSessionMessages(sessionId: string, options?: GetSessionMessagesOptions): Promise { + const sdk = await this._getSdk(); + return sdk.getSessionMessages(sessionId, options); + } + private async _getSdk(): Promise { if (this._sdkModule) { return this._sdkModule; diff --git a/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts b/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts new file mode 100644 index 0000000000000..5026d910aaaa5 --- /dev/null +++ b/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts @@ -0,0 +1,502 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import type { SessionMessage } from '@anthropic-ai/claude-agent-sdk'; +import type { URI } from '../../../../base/common/uri.js'; +import type { ILogService } from '../../../log/common/log.js'; +import { + ResponsePartKind, + ToolCallCancellationReason, + ToolCallConfirmationReason, + ToolCallStatus, + ToolResultContentType, + TurnState, + type ResponsePart, + type ToolCallCancelledState, + type ToolCallCompletedState, + type ToolCallResponsePart, + type ToolResultContent, + type Turn, +} from '../../common/state/protocol/state.js'; +import { buildSubagentSessionUri } from '../../common/state/sessionState.js'; +import { getClaudeToolDisplayName } from './claudeToolDisplay.js'; + +/** + * Phase 13 — replay mapper. Reduces a flat `SessionMessage[]` (the SDK's + * on-disk JSONL transcript) into the protocol's `Turn[]` shape per + * [CONTEXT.md M7](./CONTEXT.md). Pure function; no I/O, no DI. + * + * Distinct from the live mapper (`mapSDKMessageToAgentSignals`) because: + * - input shape differs (`SessionMessage` envelope vs `SDKMessage` union), + * - output shape differs (`Turn[]` vs `AgentSignal[]`), + * - replay has no `'result'` envelope (SDK doesn't persist it) and no + * `'stream_event'` lifecycle (terminal states only). + * + * Shared invariant with the live mapper: the `Map` + * attribution rule from M7 — `tool_result` legitimately lands in a later + * `'user'` envelope and must resolve back to the announcing `tool_use`'s + * turn. This mapper builds an equivalent local map during its single pass. + */ +export function mapSessionMessagesToTurns( + messages: readonly SessionMessage[], + session: URI, + logService: ILogService, +): readonly Turn[] { + const builder = new ReplayBuilder(session, logService); + for (const msg of messages) { + const parsed = parseSessionMessage(msg); + if (parsed === undefined) { + continue; + } + builder.consume(parsed); + } + return builder.finish(); +} + +// #region Parsed message union — narrow-at-the-seam adapter + +interface UserTextBlock { readonly type: 'text'; readonly text: string } +interface UserToolResultBlock { readonly type: 'tool_result'; readonly tool_use_id: string; readonly content: unknown; readonly is_error: boolean } +interface AssistantBlock { readonly type: string; readonly text?: string; readonly thinking?: string; readonly id?: string; readonly name?: string; readonly input?: unknown } + +/** + * Discriminated union of replay-relevant message shapes. Everything that + * the mapper actually cares about is one of these; everything else (hooks, + * CLI-echo entries, unallowed system subtypes, malformed envelopes) returns + * `undefined` from {@link parseSessionMessage}. + * + * The split keeps SDK shape detection (this seam) separate from the + * stateful reduction (the {@link ReplayBuilder}) — see CONTEXT M7. + */ +type ParsedSessionMessage = + | { readonly kind: 'user-text'; readonly uuid: string; readonly text: string } + | { readonly kind: 'user-tool-results'; readonly uuid: string; readonly results: readonly UserToolResultBlock[] } + | { readonly kind: 'assistant'; readonly uuid: string; readonly blocks: readonly AssistantBlock[] } + | { readonly kind: 'system-notification'; readonly uuid: string; readonly subtype: string; readonly text: string }; + +function parseSessionMessage(msg: SessionMessage): ParsedSessionMessage | undefined { + switch (msg.type) { + case 'user': return parseUserMessage(msg); + case 'assistant': return parseAssistantMessage(msg); + case 'system': return parseSystemMessage(msg); + default: return undefined; + } +} + +function parseUserMessage(msg: SessionMessage): ParsedSessionMessage | undefined { + const content = readUserContent(msg.message); + if (content === undefined) { + return undefined; + } + if (isCliEchoContent(content)) { + return undefined; + } + if (typeof content === 'string') { + return { kind: 'user-text', uuid: msg.uuid, text: content }; + } + const textBlocks = content.filter((b): b is UserTextBlock => b.type === 'text'); + if (textBlocks.length === 0) { + const results = content.filter((b): b is UserToolResultBlock => b.type === 'tool_result'); + return results.length > 0 ? { kind: 'user-tool-results', uuid: msg.uuid, results } : undefined; + } + // Mixed or text-only: text wins — matches prior behavior where tool_results + // in a text-bearing envelope are dropped (they should already have been delivered). + return { kind: 'user-text', uuid: msg.uuid, text: textBlocks.map(b => b.text).join('\n') }; +} + +function parseAssistantMessage(msg: SessionMessage): ParsedSessionMessage | undefined { + const blocks = readAssistantBlocks(msg.message); + if (blocks === undefined || blocks.length === 0) { + return undefined; + } + return { kind: 'assistant', uuid: msg.uuid, blocks }; +} + +function parseSystemMessage(msg: SessionMessage): ParsedSessionMessage | undefined { + const subtype = readSystemSubtype(msg.message); + if (subtype === undefined || !ALLOWED_SYSTEM_SUBTYPES.has(subtype)) { + return undefined; + } + const text = readSystemText(msg.message) ?? `[${subtype}]`; + return { kind: 'system-notification', uuid: msg.uuid, subtype, text }; +} + +// #endregion + +// #region Builder + +/** + * Subagent-spawning tool names recognised by both `Task` (built-in, + * see [`sdk.d.ts:95`](node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts)) + * and `Agent` (custom subagents, + * see [`sdk.d.ts:36`](node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts)). + * The production extension matches both at `claudeMessageDispatch.ts:194`. + */ +const SUBAGENT_TOOL_NAMES: ReadonlySet = new Set(['Task', 'Agent']); + +/** + * Allowlist of `system` subtypes that survive replay as + * {@link ResponsePartKind.SystemNotification} parts on the active turn. + * Mirrors CONTEXT M7's table — anything not in this set is dropped. + */ +const ALLOWED_SYSTEM_SUBTYPES: ReadonlySet = new Set([ + 'compact_boundary', + 'notification', +]); + +/** + * CLI-echo markers the Claude Code CLI writes into the transcript for + * replay fidelity. They are `type: 'user'` envelopes whose `message.content` + * is a raw string starting with one of these tags — `` / + * `` (slash-command echoes like `/model claude-opus-4.7`), + * `` / `` (echo of the local + * handler's output, e.g. "Set model to claude-opus-4.7"), and + * `` (the "messages below were generated while…" + * preamble). The entries don't carry `isSynthetic` / `isMeta` reliably + * (the `/model` echo lacks both, verified empirically), so the only reliable + * discriminator is the content shape itself. Drop on replay so the workbench + * doesn't render them as user turns. + */ +const CLI_ECHO_MARKER_PATTERN = /^<(command-name|command-message|command-args|local-command-stdout|local-command-stderr|local-command-caveat)>/; + +interface InProgressTurn { + readonly id: string; + readonly userText: string; + readonly responseParts: ResponsePart[]; + /** + * `tool_use_id`s announced by THIS turn. Drained when the matching + * `tool_result` lands (which may arrive in this turn's user-side + * `tool_result` block or a later turn's). At turn close, non-empty → + * tail Turn marked `Cancelled`. + */ + readonly pendingToolUseIds: Set; + /** + * Stash of completed `ToolCallResponsePart`s waiting on their result + * content. `tool_use` opens with a placeholder; the matching + * `tool_result` fills it in. Keyed by `tool_use_id`. + */ + readonly toolCallParts: Map; +} + +class ReplayBuilder { + private readonly _turns: Turn[] = []; + private _active: InProgressTurn | undefined; + /** Cross-turn: tool_use_id → turnId of the announcing turn. */ + private readonly _toolUseToTurnId = new Map(); + + constructor(private readonly _session: URI, private readonly _logService: ILogService) { } + + consume(msg: ParsedSessionMessage): void { + switch (msg.kind) { + case 'user-text': + this._closeActive(); + this._active = { + id: msg.uuid, + userText: msg.text, + responseParts: [], + pendingToolUseIds: new Set(), + toolCallParts: new Map(), + }; + return; + case 'user-tool-results': + for (const block of msg.results) { + this._attachToolResult(block); + } + return; + case 'assistant': + this._consumeAssistant(msg); + return; + case 'system-notification': + if (this._active === undefined) { + // System notification before any user message — drop. Without an active turn there's nowhere to attach. + return; + } + this._active.responseParts.push({ + kind: ResponsePartKind.SystemNotification, + content: msg.text, + }); + return; + } + } + + finish(): readonly Turn[] { + this._closeActive(); + return this._turns; + } + + private _consumeAssistant(msg: ParsedSessionMessage & { kind: 'assistant' }): void { + if (this._active === undefined) { + // Assistant message without a preceding user message — defensive: synthesize an empty user turn keyed on the assistant's parent uuid would be wrong; just drop with a warn. + this._logService.warn(`[claudeReplayMapper] assistant envelope ${msg.uuid} arrived before any user message; dropping`); + return; + } + let textPartCounter = 0; + let reasoningPartCounter = 0; + for (const block of msg.blocks) { + if (block.type === 'text' && typeof block.text === 'string') { + this._active.responseParts.push({ + kind: ResponsePartKind.Markdown, + id: `${this._active.id}#${msg.uuid}#text-${textPartCounter++}`, + content: block.text, + }); + } else if (block.type === 'thinking' && typeof block.thinking === 'string') { + this._active.responseParts.push({ + kind: ResponsePartKind.Reasoning, + id: `${this._active.id}#${msg.uuid}#thinking-${reasoningPartCounter++}`, + content: block.thinking, + }); + } else if (block.type === 'tool_use' && typeof block.id === 'string' && typeof block.name === 'string') { + this._openToolUse(block.id, block.name, block.input); + } + // Other block types (server_tool_use, etc.) are dropped silently per M7. + } + } + + private _openToolUse(toolUseId: string, toolName: string, input: unknown): void { + if (this._active === undefined) { + return; + } + const isSubagent = SUBAGENT_TOOL_NAMES.has(toolName); + const displayName = getClaudeToolDisplayName(toolName); + // Build a placeholder Cancelled state by default; replaced with Completed when the tool_result lands. + const placeholder: ToolCallCancelledState = { + status: ToolCallStatus.Cancelled, + toolCallId: toolUseId, + toolName, + displayName, + invocationMessage: displayName, + toolInput: typeof input === 'string' ? input : input !== undefined ? safeStringify(input) : undefined, + reason: ToolCallCancellationReason.Skipped, + ...(isSubagent ? { _meta: { toolKind: 'subagent' as const } } : {}), + }; + const part: ToolCallResponsePart = { + kind: ResponsePartKind.ToolCall, + toolCall: placeholder, + }; + this._active.responseParts.push(part); + this._active.toolCallParts.set(toolUseId, part); + this._active.pendingToolUseIds.add(toolUseId); + this._toolUseToTurnId.set(toolUseId, this._active.id); + } + + private _attachToolResult(block: UserToolResultBlock): void { + const announcingTurnId = this._toolUseToTurnId.get(block.tool_use_id); + if (announcingTurnId === undefined) { + this._logService.warn(`[claudeReplayMapper] tool_result for unknown tool_use_id ${block.tool_use_id}`); + return; + } + // Find the part — it lives on the announcing turn (which may be `_active` or one already pushed to `_turns`). + const part = this._findToolCallPart(announcingTurnId, block.tool_use_id); + if (part === undefined) { + return; + } + const isError = block.is_error; + const previousState = part.toolCall; + const isSubagent = previousState._meta?.toolKind === 'subagent'; + const content: ToolResultContent[] = extractToolResultContent(block.content) ?? []; + if (isSubagent) { + content.push({ + type: ToolResultContentType.Subagent, + resource: buildSubagentSessionUri(this._session.toString(), previousState.toolCallId), + title: previousState.displayName, + }); + } + const completed: ToolCallCompletedState = { + status: ToolCallStatus.Completed, + toolCallId: previousState.toolCallId, + toolName: previousState.toolName, + displayName: previousState.displayName, + invocationMessage: previousState.invocationMessage ?? previousState.displayName, + toolInput: previousState.status === ToolCallStatus.Streaming ? undefined : previousState.toolInput, + confirmed: ToolCallConfirmationReason.NotNeeded, + success: !isError, + pastTenseMessage: `${previousState.displayName} finished`, + content: content.length > 0 ? content : undefined, + ...(previousState._meta ? { _meta: previousState._meta } : {}), + }; + part.toolCall = completed; + // Drain pending tracker on the announcing turn — but only if that + // turn is still in progress. Committed turns have their state + // locked at close time per Fixture 6b ("orphan in turn N does + // NOT cancel turn N+1"); a late-arriving tool_result for a + // committed turn doesn't re-promote it. + if (this._active?.id === announcingTurnId) { + this._active.pendingToolUseIds.delete(block.tool_use_id); + } + } + + private _findToolCallPart(turnId: string, toolUseId: string): ToolCallResponsePart | undefined { + if (this._active && this._active.id === turnId) { + return this._active.toolCallParts.get(toolUseId); + } + // Already-closed turn: search committed Turns. Linear scan is fine — replay is one-shot per session and turns are O(tens-hundreds). + for (let i = this._turns.length - 1; i >= 0; i--) { + if (this._turns[i].id !== turnId) { + continue; + } + for (const part of this._turns[i].responseParts) { + if (part.kind === ResponsePartKind.ToolCall && part.toolCall.toolCallId === toolUseId) { + return part; + } + } + return undefined; + } + return undefined; + } + + private _closeActive(): void { + if (this._active === undefined) { + return; + } + const a = this._active; + const state = a.pendingToolUseIds.size === 0 ? TurnState.Complete : TurnState.Cancelled; + const turn: Turn = { + id: a.id, + userMessage: { text: a.userText }, + responseParts: a.responseParts, + usage: undefined, + state, + }; + this._turns.push(turn); + this._active = undefined; + } +} + +// #endregion + +// #region Helpers — narrow-at-the-seam shape readers + +/** + * Returns string content (legacy form) or an array of recognised user + * blocks (text + tool_result). Anything else returns `undefined` and the + * caller drops the message — matches the production extension's parser + * semantics per CONTEXT M7 glossary. + */ +function readUserContent(raw: unknown): string | ReadonlyArray | undefined { + if (raw === null || typeof raw !== 'object') { + return undefined; + } + const content = (raw as { content?: unknown }).content; + if (typeof content === 'string') { + return content.length > 0 ? content : undefined; + } + if (!Array.isArray(content) || content.length === 0) { + return undefined; + } + const out: (UserTextBlock | UserToolResultBlock)[] = []; + for (const block of content) { + if (block === null || typeof block !== 'object') { + continue; + } + const b = block as { type?: unknown; text?: unknown; tool_use_id?: unknown; content?: unknown; is_error?: unknown }; + if (b.type === 'text' && typeof b.text === 'string') { + out.push({ type: 'text', text: b.text }); + } else if (b.type === 'tool_result' && typeof b.tool_use_id === 'string') { + out.push({ type: 'tool_result', tool_use_id: b.tool_use_id, content: b.content, is_error: b.is_error === true }); + } + } + return out.length > 0 ? out : undefined; +} + +function readAssistantBlocks(raw: unknown): readonly AssistantBlock[] | undefined { + if (raw === null || typeof raw !== 'object') { + return undefined; + } + const content = (raw as { content?: unknown }).content; + if (!Array.isArray(content)) { + return undefined; + } + const out: AssistantBlock[] = []; + for (const block of content) { + if (block === null || typeof block !== 'object') { + continue; + } + const b = block as { type?: unknown; text?: unknown; thinking?: unknown; id?: unknown; name?: unknown; input?: unknown }; + if (typeof b.type !== 'string') { + continue; + } + out.push({ + type: b.type, + text: typeof b.text === 'string' ? b.text : undefined, + thinking: typeof b.thinking === 'string' ? b.thinking : undefined, + id: typeof b.id === 'string' ? b.id : undefined, + name: typeof b.name === 'string' ? b.name : undefined, + input: b.input, + }); + } + return out; +} + +function readSystemSubtype(raw: unknown): string | undefined { + if (raw === null || typeof raw !== 'object') { + return undefined; + } + const subtype = (raw as { subtype?: unknown }).subtype; + return typeof subtype === 'string' ? subtype : undefined; +} + +function readSystemText(raw: unknown): string | undefined { + if (raw === null || typeof raw !== 'object') { + return undefined; + } + const r = raw as { text?: unknown; message?: unknown }; + if (typeof r.text === 'string') { + return r.text; + } + if (typeof r.message === 'string') { + return r.message; + } + return undefined; +} + +/** + * Mirror of the live mapper's helper — kept inline so the two mappers + * don't yet need a shared module. If a third consumer appears, factor + * to `claudeToolResultContent.ts`. + */ +function extractToolResultContent(content: unknown): { type: ToolResultContentType.Text; text: string }[] | undefined { + if (typeof content === 'string') { + return content.length > 0 ? [{ type: ToolResultContentType.Text, text: content }] : undefined; + } + if (!Array.isArray(content)) { + return undefined; + } + const out: { type: ToolResultContentType.Text; text: string }[] = []; + for (const block of content) { + if (block === null || typeof block !== 'object') { + continue; + } + const b = block as { type?: unknown; text?: unknown }; + if (b.type === 'text' && typeof b.text === 'string') { + out.push({ type: ToolResultContentType.Text, text: b.text }); + } + } + return out.length > 0 ? out : undefined; +} + +function safeStringify(v: unknown): string | undefined { + try { + return JSON.stringify(v); + } catch { + return undefined; + } +} + +/** + * True when the message content is a CLI slash-command echo (e.g. + * `/model...`) that the subprocess writes + * to the transcript for restore fidelity but is not a user-authored prompt. + * Checks the first text fragment only; mixed messages where the first + * content block is a real prompt are NOT filtered. + */ +function isCliEchoContent(content: string | ReadonlyArray): boolean { + if (typeof content === 'string') { + return CLI_ECHO_MARKER_PATTERN.test(content); + } + const firstText = content.find((b): b is UserTextBlock => b.type === 'text'); + return firstText !== undefined && CLI_ECHO_MARKER_PATTERN.test(firstText.text); +} + +// #endregion diff --git a/src/vs/platform/agentHost/node/claude/phase13-plan.md b/src/vs/platform/agentHost/node/claude/phase13-plan.md new file mode 100644 index 0000000000000..3d041197b2c51 --- /dev/null +++ b/src/vs/platform/agentHost/node/claude/phase13-plan.md @@ -0,0 +1,233 @@ +# Phase 13 — Session Restoration + +> Generated by super-planner. Source: [roadmap.md](./roadmap.md) (phase 13). +> Last updated: 2026-05-13 after 3-model council synthesis (GPT-5.5, Claude Opus 4.6, GPT-5.3-Codex). + +**Status:** ✅ done — unit + integration tests green (101 tests across mapper + agent suites), `compile-check-ts-native` clean, council-reviewed. Live E2E (restart-and-restore via the [launch skill](../../../../../../.agents/skills/launch/SKILL.md)) deferred — the read path is straightforward and fixtures cover the M7 grouping rules; can run anytime self-hosting surfaces a real issue. + +## Goal + +Implement `IAgent.getSessionMessages(session)` for the Claude provider so the workbench can reload an existing Claude session's full transcript across agent-host restarts. Unblocks self-hosting. + +Fork support (Phase 6.5) is explicitly *not* a Phase 13 concern. The earlier plan proposed returning a `protocolTurnId → lastSdkAssistantMessageUuid` map as a by-product for fork to consume — dropped during impl. Fork is rare; if/when Phase 6.5 lands, it walks the JSONL transcript itself rather than paying ongoing complexity for an O(1) lookup that's exercised on a cold path. +## Scope + +**In scope** +- New SDK binding: `getSessionMessages(sessionId, { dir?, includeSystemMessages: true })`. +- New replay mapper `claudeReplayMapper.ts`: flat `SessionMessage[]` → `Turn[]` per [CONTEXT M7](./CONTEXT.md) grouping rules. +- `ClaudeAgent.getSessionMessages` end-to-end: parse URI, fetch transcript, map to turns, resilient error handling. +- Tail-Turn state heuristic: `Complete` if no orphan `tool_use` remains, otherwise `Cancelled`. +- Subagent **markers** in the parent transcript: any `Task` / `Agent` `tool_use` flattens to a terminal `ToolCall` with `_meta.toolKind = 'subagent'` plus an inlined `ToolResultContentType.Subagent` content marker. +- Subagent **URIs** (`/subagent/`): rejected with `TODO: Phase 12` (no SDK call). +- Unit tests: 10 mapper fixtures + integration tests for the agent surface. + +**Out of scope** +- **Any `(turnId, assistantEnvelopeUuid)` mapping** — neither persisted nor returned as a by-product. Originally planned as live ingest + replay backfill so Phase 6.5 fork would be an O(1) lookup; reverted entirely during impl. Fork is the only consumer, fork is rare, and walking JSONL on demand at fork time is cheaper than threading a map through the mapper return type and the `IAgent.getSessionMessages` shape forever. If a future consumer needs the mapping, it can either (a) reconstruct it by walking the SDK's `SessionMessage[]` itself, or (b) reintroduce a per-turn DB row (~30 lines on `ClaudeSessionMetadataStore`). Phase 13 owns neither path. +- Subagent transcript fetch (`getSubagentMessages`) — owned by [Phase 12](./roadmap.md). +- `IAgent.truncateSession` — explicitly NOT implemented; clients fork instead. Truncate is fork-by-another-name. +- Fork plumbing on top of the mapper — owned by [Phase 6.5](./roadmap.md). Phase 6.5 reads `SessionMessage.uuid` itself when it lands; one JSONL read per fork. Phase 13 leaves no hooks for it. +- `IClaudeSessionTranscriptStore` seam (originally deferred from Phase 5). Dropped per Q4 — a one-method passthrough wrapper around `IClaudeAgentSdkService.getSessionMessages` adds no behavior. Introduce when a second consumer exists. +- Any new `TurnState` variant. + +## Prerequisites + +- Phase 4 (provider scaffold) ✅, Phase 5 (lifecycle + SDK service) ✅, Phase 6 (live message pipeline + `ClaudeMapperState`) ✅, Phase 9 (abort/steering polish) ✅. +- SDK is loaded via `AgentHostClaudeSdkPathEnvVar` (current dev mechanism) and matches whatever version is on disk; long-term distribution is Phase 15's marketplace-extension story. Phase 13 binds against the SDK shape currently exposed by [`claudeAgentSdkService.ts`](./claudeAgentSdkService.ts) and does not assume a specific package version. + +## Approach + +**Replay produces `Turn[]` directly.** Council unanimous: do not synthesize `AgentSignal[]` and run them through the protocol reducer. Replay has no live `result` metadata, no `stream_event` lifecycle, and `IAgent.getSessionMessages` already returns `readonly Turn[]` ([`agentService.ts:454`](../../common/agentService.ts#L454)). Copilot's reference does the same ([`copilot/mapSessionEvents.ts`](../copilot/mapSessionEvents.ts) → `getMessages` at [`copilotAgentSession.ts:776`](../copilot/copilotAgentSession.ts#L776)). + +**Shared invariant, not shared code.** The two drivers (live `mapSDKMessageToAgentSignals` and the new replay `mapSessionMessagesToTurns`) operate on different SDK input shapes (`SDKMessage` union vs. `SessionMessage` union) and different output shapes (`AgentSignal[]` vs. `Turn[]`). What they share is the `Map` attribution invariant from [CONTEXT M7](./CONTEXT.md). Lower-level helpers (tool-display lookup via [`claudeToolDisplay.ts`](./claudeToolDisplay.ts), tool-result content extraction, subagent marker construction) are extracted and reused. + +**No turn-mapping by-product.** The mapper returns `readonly Turn[]` and nothing else. The earlier plan threaded a `turnIdToLastAssistantUuid` map through the return type for Phase 6.5 fork; reverted entirely. Rationale: fork is rare, the only consumer is hypothetical Phase 6.5, and paying a wider return type plus the discipline to keep that map correct forever isn't worth optimizing a cold path. Phase 6.5, if/when it lands, reconstructs the mapping itself from the same `SessionMessage[]` the mapper consumes. + +> **Critical id-correctness note (for future fork work).** The SDK distinguishes the **envelope uuid** (`SessionMessage.uuid` / `SDKAssistantMessage.uuid` — a UUID v4 the SDK assigns and persists in JSONL) from the **Anthropic message id** (`event.message.id` — a `msg_…` string from Anthropic's API). `forkSession({ upToMessageId })` accepts the **envelope uuid only** ([`sdk.d.ts:3561-3567`](../../../../../../node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L3561), [`sdk.d.ts:610-614`](../../../../../../node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L610)). Phase 13 doesn't surface either id; this note exists for Phase 6.5's benefit so it picks the right id class when it lands. + +## Steps + +1. ✓ **Add `getSessionMessages` to the SDK service.** Extend `IClaudeAgentSdkService` and `IClaudeSdkBindings` with the new method. + - Files: [claudeAgentSdkService.ts](./claudeAgentSdkService.ts) + - Depends on: none + - Done when: `IClaudeAgentSdkService.getSessionMessages(id, { includeSystemMessages: true })` forwards to SDK and returns `SessionMessage[]`. New unit test stubs the binding via the existing `_loadSdk` override. + +2. ✓ **Build replay mapper `mapSessionMessagesToTurns`.** Single-pass over `SessionMessage[]` per [CONTEXT M7](./CONTEXT.md) grouping rules: + - `('user', text)` → start `Turn` with `id = sessionMessage.uuid`. + - `('user', tool_result)` → attach to open `ToolCall`. NOT a turn boundary. + - `('user', empty / hook-injected / shouldQuery: false)` → drop. + - `('assistant', blocks…)` → push `Markdown` / `Reasoning` / `ToolCall` (terminal `Completed` only) parts. + - `('system', compact_boundary | allowlisted subtype)` → push `SystemNotificationResponsePart` on the active turn. NOT a turn boundary. + - `('system', other)` → drop. + - Track two structures: a cross-turn `toolUseToTurnId: Map` (the M7 attribution invariant — `tool_result` legitimately lands in a *later* user message), and a **per-turn** `Set` of `tool_use_id`s announced by that turn. Tool-use additions go into both; `tool_result` removes from the per-turn set of the *announcing* turn (looked up via the cross-turn map). + - Per-turn state, evaluated at the next turn boundary (or replay end): the announcing turn's per-turn set empty → `TurnState.Complete`; non-empty → `TurnState.Cancelled` (protocol has no `Incomplete`). Each turn's state is decided by its own per-turn set only — an orphan in turn N must NOT mark turn N+1 cancelled. + - Per-Turn `usage = undefined` (live-only metadata, M8 asymmetry). + - Subagent markers: tool name `'Task'` or `'Agent'` → `_meta: { toolKind: 'subagent' }` + `ToolResultContentType.Subagent` marker block (mirrors [`copilot/mapSessionEvents.ts:617-639`](../copilot/mapSessionEvents.ts#L617)). + - Returns `readonly Turn[]`. + - Files: NEW `src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts` + - Depends on: none (logically independent) + - Done when: 6 fixture-driven unit tests pass (see Verification). + +3. ✓ **Replace `ClaudeAgent.getSessionMessages` stub.** Replace the 7-line stub at [`claudeAgent.ts:604`](./claudeAgent.ts#L604). + - Subagent URI dispatch (URI matches `/subagent/` per [`parseSubagentSessionUri`](../../common/state/sessionState.ts#L226)) → throw `Error('TODO: Phase 12: subagent transcript fetch not yet implemented')`. No SDK call. + - Provisional sessions → return `[]`. + - Otherwise: fetch transcript via `this._sdkService.getSessionMessages(AgentSession.id(session), { includeSystemMessages: true })` → run replay mapper → return `Turn[]`. + - Wrap the whole flow in try/catch matching the `listSessions` resilience pattern at [`claudeAgent.ts:636`](./claudeAgent.ts#L636): warn-log + return `[]` on transcript-fetch / mapping failure. + - Files: [claudeAgent.ts](./claudeAgent.ts). + - Depends on: steps 1, 2 + - Done when: integration test with stubbed SDK reads canned `SessionMessage[]` and returns the mapped `Turn[]`. + +5. ~~**Live `(turnId, lastAssistantEnvelopeUuid)` ingest.**~~ **Removed during impl.** No persistence at all. Fork is rare and is owned by Phase 6.5; if it ever lands, it reads `SessionMessage.uuid` directly off the SDK transcript rather than relying on a Phase 13 by-product. See "Out of scope" above. + +6. ✓ **Unit tests.** See Verification. + - Files: [src/vs/platform/agentHost/test/node/claudeAgent.test.ts](../../test/node/claudeAgent.test.ts), NEW `src/vs/platform/agentHost/test/node/claudeReplayMapper.test.ts` + - Depends on: steps 1–3 + - Done when: all fixtures green, `compile-check-ts-native` clean, `valid-layers-check` clean. + +## Files to Modify or Create + +| Path | Change | Notes | +|------|--------|-------| +| [claudeAgentSdkService.ts](./claudeAgentSdkService.ts) | modify | Add `getSessionMessages` to interface + bindings + impl | +| `claudeReplayMapper.ts` | create | `mapSessionMessagesToTurns()` — flat `SessionMessage[]` → `readonly Turn[]` per [CONTEXT M7](./CONTEXT.md) | +| [claudeAgent.ts](./claudeAgent.ts) | modify | Replace `getSessionMessages` stub at line 604; call `_sdkService.getSessionMessages` directly | +| [claudeAgent.test.ts](../../test/node/claudeAgent.test.ts) | modify | Extend fake SDK with `getSessionMessages`; add subagent-URI throw test, error-path test, happy-path test | +| `claudeReplayMapper.test.ts` | create | 10 fixture-driven unit tests | + +## Decisions + +- **Q1 — Mapper shape.** Replay produces `Turn[]` directly; it does NOT round-trip through `AgentSignal[]` + reducer. `IAgent.getSessionMessages` returns `readonly Turn[]` so the indirection would buy nothing, and replay lacks the live `result` metadata the reducer expects. Mirrors Copilot's reference at [`copilot/mapSessionEvents.ts:223`](../copilot/mapSessionEvents.ts#L223). Shared invariant is the `Map` attribution + the M7 grouping rules — shared as a doc + extracted helpers, not a shared mapper module. +- **Q2 — Persisted mapping location.** ~~Per-turn rows via `setTurnEventId`/`getTurnEventId`~~ **Reverted during impl** — not persisted *and* not returned as a by-product. Phase 6.5 fork, if/when it lands, reconstructs the mapping itself from `SessionMessage[]`. Rationale: fork is the only consumer of this mapping and it's a cold path; not worth widening the mapper's return type forever. +- **Q3 — Subagent tool names.** Recognize both `'Task'` and `'Agent'`. SDK-confirmed: built-in subagents are invoked "via the Task tool" ([`sdk.d.ts:95`](../../../../../../node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L95)) and custom subagents "via the Agent tool" ([`sdk.d.ts:36`](../../../../../../node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L36)). The production extension's dispatcher matches both at [`claudeMessageDispatch.ts:194`](../../../../../../extensions/copilot/src/extension/chatSessions/claude/common/claudeMessageDispatch.ts#L194). Marker shape: `_meta: { toolKind: 'subagent' }` plus a `ToolResultContentType.Subagent` content block, matching Copilot's `makeCompletedToolCallPart`. +- **Q4 — Tail-Turn state.** `TurnState.Complete` if `pendingToolUseIds.size === 0`, else `TurnState.Cancelled`. Protocol has no `Incomplete` ([`state.ts:777-781`](../../common/state/protocol/state.ts#L777)); `Cancelled` is the closest existing semantic and matches Copilot's unfinished-turn handling at [`copilot/mapSessionEvents.ts:455`](../copilot/mapSessionEvents.ts#L455). +- **Q5 — Live ingest hook.** ~~`ClaudeSdkPipeline._processMessages` `'result'` branch~~ **Removed during impl** along with Q2 — no live ingest, no hook. +- **Q6 — Backfill timing.** ~~Inline during `getSessionMessages`, idempotent.~~ **Removed during impl** — no backfill at all. +- **Q7 — Transcript store seam.** **Dropped.** A one-method passthrough wrapper adds no behavior; introduce when a second consumer (e.g. the alpha `sessionStore` hybrid impl) actually exists. Direct `_sdkService.getSessionMessages` call at the one consumer site. +- **Q8 — `includeSystemMessages: true`.** Confirmed in SDK 0.2.112 (`GetSessionMessagesOptions` includes the field). Surfaces `compact_boundary` and notification subtypes; replay applies the M7 allowlist (only `compact_boundary` and a small set of explicit subtypes become `SystemNotificationResponsePart`; everything else drops). +- **Q9 — Error handling.** Catch + warn-log + return `[]`. Mirrors `listSessions` at [`claudeAgent.ts:636`](./claudeAgent.ts#L636) and the AgentService resilience pattern. One corrupt session must not break the whole restoration flow. +- **Q10 — Test surface.** Unit tests with stubbed `IClaudeAgentSdkService.getSessionMessages` returning canned `SessionMessage[]` fixtures. Integration tests for the agent surface (happy path, subagent URI throw, SDK throw → `[]`). + +## Risks + +- **Live + replay drift on the `tool_use_id → turnId` invariant.** Both mappers must agree. Mitigation: cross-driver fixture tests — run a tool-use/tool-result fixture through the live mapper (asserting the right `SessionToolCallStart` / `SessionToolCallComplete` actions) and through the replay mapper (asserting the matching `Turn`/`ToolCall` shape) and assert they produce the same logical attribution. +- **`SessionMessage.message` is typed `unknown` in the SDK.** Validate at the seam with narrow type guards (`type === 'user'` + `Array.isArray(content)`, etc.). Drop unrecognizable messages silently — matches the production extension's parser semantics per [CONTEXT M7](./CONTEXT.md) glossary. +- **Wrong-id-class returned to fork.** Phase 13 does not return any id mapping; the risk lives entirely in Phase 6.5. The id-correctness note above documents the trap (`SessionMessage.uuid` is the envelope uuid `forkSession({ upToMessageId })` accepts; `message.id` is the Anthropic `msg_…` id, which fork does *not* accept) so Phase 6.5 picks the right id class when it consumes `SessionMessage` itself. +- **Subagent allowlist drift.** New SDK versions might introduce additional subagent-spawning tools. Mitigation: `claudeToolDisplay.ts` is the single source of truth for tool kinds — add new entries there and the replay mapper picks them up via the same lookup. +- **System-message noise.** `includeSystemMessages: true` adds many subtypes. Mitigation: explicit allowlist from [CONTEXT M7](./CONTEXT.md) (only `compact_boundary` and a few notification subtypes become response parts; everything else drops). Test fixture for a non-allowlisted subtype to assert it's dropped. +- **SDK version mismatch.** ~~See Open Questions Q1.~~ Resolved during grilling: SDK is loaded from `AgentHostClaudeSdkPathEnvVar` and Phase 13 binds against whatever shape `claudeAgentSdkService.ts` currently exposes; long-term version distribution is Phase 15's marketplace-extension problem (see [roadmap.md §Phase 15](./roadmap.md)). + +## Verification + +### Unit / Integration + +- **Replay mapper fixtures** (`claudeReplayMapper.test.ts`): + 1. Single-text turn → one `Turn`, `id === userMessage.uuid`, `usage === undefined`, `state === Complete`. + 2. Tool-use + tool-result → one `Turn` containing one `ToolCall` with `state === Completed`; no extra turn for the synthetic-user `tool_result`. + 3. Multi-turn (text→assistant→text→assistant) → two `Turn`s in order. + 4. `compact_boundary` mid-turn → `SystemNotificationResponsePart` on the active turn, no boundary. + 5. `Task` tool_use + tool-result → `_meta.toolKind === 'subagent'` and `ToolResultContentType.Subagent` marker present in `content[]`. + 6. Tail-Turn with orphan `tool_use` (no matching `tool_result`) → `state === Cancelled`. + 7. Non-allowlisted system subtype → dropped. +- **`ClaudeAgent.getSessionMessages` integration** ([claudeAgent.test.ts](../../test/node/claudeAgent.test.ts)): + - Stubbed SDK returns canned `SessionMessage[]` → assert the resulting `Turn[]` matches snapshot. + - Subagent URI throws `TODO: Phase 12` and makes no SDK call. + - Provisional session returns `[]`. + - SDK throw → returns `[]` and logs a warning. +- **Hygiene** (per [.github/copilot-instructions.md](../../../../../../.github/copilot-instructions.md)): `compile-check-ts-native` clean, `valid-layers-check` clean, `npm run eslint` clean, `npm run gulp hygiene` clean. + +Run via VS Code's `runTests` tool against the test files, or fall back to `scripts/test.sh --grep "Phase 13|getSessionMessages|claudeReplayMapper"`. + +### E2E + +Workspace skills available for E2E: +- **Launch skill**: [`launch`](../../../../../../.agents/skills/launch/SKILL.md) — Playwright-driven Code OSS automation. +- **Log skill**: [`code-oss-logs`](../../../../../../.github/skills/code-oss-logs/SKILL.md) — read agent host / renderer logs from dev builds. + +**Scenario — restart-and-restore:** +1. Use `launch` to start Code OSS Agents (`./scripts/code.sh --agents`), open the Agents window, create a new Claude session, send a prompt that triggers at least one tool call (e.g., "list files in the current directory" → `Bash` or `Read`). +2. Wait for the turn to settle. Take a screenshot of the rendered turn. +3. Close the Agents window (do not quit the agent host process initially). +4. Re-open the same session via the sessions sidebar. +5. Confirm the full turn (user message + assistant text + tool call card) renders identically. Screenshot for comparison. +6. Use `code-oss-logs` to read `agenthost.log` for the run; grep for `[Claude]` warnings during `getSessionMessages` — none expected on the happy path. +7. Quit and relaunch the agent host (`Run and Compile Agents - OSS`). Re-open the same session. Confirm restoration still works (no in-memory `_sessions` entry). +8. Append a new turn to the restored session and confirm it lands cleanly (validates that a restored session can still take new messages). + +**Not used:** `chat-perf` — Phase 13 adds no rendering-hot paths or response-streaming changes; chat-perf belongs to Phase 14 hardening. + +### Manual + +- **Restore a legacy session.** Manually create a Claude session with the *current* (pre-Phase-13) build, then switch to the Phase-13 build and re-open it. Confirm `getSessionMessages` returns the full transcript. + +## Open Questions + +_None remaining._ All grilling-phase questions resolved — see Decisions table. + +## References + +- Roadmap: [./roadmap.md](./roadmap.md) (Phase 13) +- Architecture context: [./CONTEXT.md](./CONTEXT.md) (M7 transcript reconstruction, M8 live-vs-replay asymmetry, M9 lifecycle, glossary `Turn.id` + `Tool-call attribution map`) +- Copilot reference: [../copilot/copilotAgent.ts](../copilot/copilotAgent.ts) `getSessionMessages` (line 1056), [../copilot/copilotAgentSession.ts](../copilot/copilotAgentSession.ts) `getMessages` (line 776), [../copilot/mapSessionEvents.ts](../copilot/mapSessionEvents.ts) +- Production-extension reference (verify, do not port directly): [`extensions/copilot/src/extension/chatSessions/claude/node/sessionParser/sdkSessionAdapter.ts`](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/sessionParser/sdkSessionAdapter.ts), [`claudeCodeSdkService.ts`](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeSdkService.ts) `getSessionMessages` wrapper +- Related plans: [phase5-plan.md](./phase5-plan.md) (`IClaudeSessionTranscriptStore` deferral note at line 367), [phase6-plan.md](./phase6-plan.md), [phase9-plan.md](./phase9-plan.md), [phase6.1-plan.md](./phase6.1-plan.md) +- E2E skills: [launch](../../../../../../.agents/skills/launch/SKILL.md), [code-oss-logs](../../../../../../.github/skills/code-oss-logs/SKILL.md) +- Council models consulted: GPT-5.5, Claude Opus 4.6, GPT-5.3-Codex (3 independent plans synthesized into the Decisions table above) + +## Implementation Notes + +### Files actually changed + +Production: +- [`claudeAgentSdkService.ts`](./claudeAgentSdkService.ts) — added `getSessionMessages` to interface + bindings + impl. +- [`claudeReplayMapper.ts`](./claudeReplayMapper.ts) — NEW. `mapSessionMessagesToTurns(messages, session, logService): readonly Turn[]` per CONTEXT M7. Pure function; no by-products, no persistence. +- [`claudeAgent.ts`](./claudeAgent.ts) — replaced `getSessionMessages` stub with subagent-URI dispatch + provisional check + transcript fetch + replay (no persistence). + +Tests: +- [`claudeReplayMapper.test.ts`](../../test/node/claudeReplayMapper.test.ts) — NEW. 10 fixtures covering all 7 plan-required cases plus orphan-isolation, subagent-`Agent`-name, and CLI-echo synthetic-message drop. +- [`claudeAgent.test.ts`](../../test/node/claudeAgent.test.ts) — added Phase 13 suite with 3 integration tests (happy path, subagent URI throws, SDK throw → `[]`). +- [`claudeAgent.integrationTest.ts`](../../test/node/claudeAgent.integrationTest.ts) — added a `getSessionMessages` no-op stub on `ProxyRoundTripSdkService` to satisfy the updated interface. + +### Drift from plan + +- **Persistence layer dropped after impl.** Plan's steps 5 + 6 (live ingest + backfill) were initially implemented and then removed when Tyler questioned whether fork-as-O(1) was worth the complexity tax on every live turn. Net effect: no changes to `claudeSdkPipeline.ts`, `claudeMapSessionEvents.ts`, `claudeSdkMessageRouter.ts`, or `claudeSessionMetadataStore.ts`. Phase 6.5 will read `SessionMessage.uuid` itself if/when it lands. +- **Turn-mapping by-product dropped after refactor #3.** Plan originally said the mapper returns `{ turns, turnIdToLastAssistantUuid }` so fork could consume the map without persistence. Reverted during architectural cleanup — same rationale as the persistence drop, applied one layer up: fork is rare, the mapping is reconstructible from `SessionMessage[]`, and a wider return type taxes every caller forever to optimize a cold path. Plan/Scope/Approach/Steps/Files-table/Decisions sections updated to reflect `readonly Turn[]` as the sole return. +- **Mapper signature** — plan said `mapSessionMessagesToTurns(messages, session)` returns turns. Real signature is `(messages, session, logService)` — the logger is needed for orphan-tool warns. Cosmetic difference. +- **Refactor #2 added a `ParsedSessionMessage` adapter** — not in the original plan. Splits SDK shape detection from the stateful reducer in `ReplayBuilder`. Pure internal refactor; public API unchanged. +- **`_startTurn` / `_consumeSystemNotification` inlined** — inlined into `ReplayBuilder.consume()`'s switch arms per the `avoid-private-methods` audit. Builder now has 5 privates (down from 7) covering the state-machine transitions only. +- **CONTEXT M7 notification gate relaxed post-impl** — the spec originally said `notification (priority ≥ medium)`; the mapper allowlists all priorities. CONTEXT M7 updated to match shipped behavior with a TODO to re-introduce the gate once real-world data shows whether `priority: 'low'` notifications are noise. +- **`claudeAgent.integrationTest.ts`** — not in plan's file list, but the integration-test fake `ProxyRoundTripSdkService` had to grow a `getSessionMessages` stub when the interface widened. Trivial. + +### Council review (post-impl) + +Three-model council review (GPT-5.5, Claude Opus 4.6, GPT-5.3-Codex) caught documentation drift between the plan and the shipped code, plus one missing test. Resolved: + +- **C1** (3/3 consensus) — `turnIdToLastAssistantUuid` map documented in ~15 places but never shipped. Captured the drop across plan + roadmap. +- **C2** (3/3 consensus) — Verification section still referenced backfill / live-ingest / DB rows from the removed persistence layer. Cleaned up. +- **C3** (2/3) — stale `incorrectly implements` diagnostic on `ProxyRoundTripSdkService`. Native typecheck was already clean; tightened the stub signature for IDE consistency. +- **S1** (Opus) — missing "provisional session → `[]`" integration test. Added. +- **S2** (GPT-5.5) — "`claudeToolDisplay.ts` is the single source of truth for subagent tool names" claim was inaccurate (the mapper hard-codes `SUBAGENT_TOOL_NAMES`). Risks section corrected. +- **S3** (Codex) — CONTEXT M7 priority gate not enforced. Resolved by relaxing M7 to match shipped behavior (see Drift above). +- **S4** (Opus) — Impl Notes claimed a "msg-id-shape negative" fixture that was deleted during refactor #3. Fixed. + +Full synthesis saved to session memory as `review.md` for the record. + +### Tests written + +- 10 mapper unit tests in [`claudeReplayMapper.test.ts`](../../test/node/claudeReplayMapper.test.ts) — all green. +- 3 agent integration tests in [`claudeAgent.test.ts`](../../test/node/claudeAgent.test.ts) `'ClaudeAgent (Phase 13 — getSessionMessages)'` suite — all green. +- All adjacent suites (`claudeSdkPipeline.test.ts`, `claudeSessionMetadataStore.test.ts`, full `claudeAgent.test.ts`) green: 118 tests pass; zero regressions. +- `compile-check-ts-native` clean. + +### `avoid-private-methods` audit + +Run against changed production files in `claudeReplayMapper.ts` after refactors #1–#3. **Findings:** +- `ReplayBuilder` has 5 private methods after the inlining round (`_consumeAssistant`, `_openToolUse`, `_attachToolResult`, `_findToolCallPart`, `_closeActive`). All operate on the same private state (`_active`, `_turns`, `_toolUseToTurnId`) and form a single state machine. Class-level signal: NONE. No extractions recommended; the parser was already extracted as `parseSessionMessage` in refactor #2. +- Pre-refactor `_consume{User,Assistant,System}` cluster was the only flagged item; refactor #2 dissolved it by moving SDK shape detection into the `parseSessionMessage` adapter. + +### Deferred refactors + +- Shared `extractToolResultContent` helper — currently duplicated across [`claudeMapSessionEvents.ts`](./claudeMapSessionEvents.ts) and [`claudeReplayMapper.ts`](./claudeReplayMapper.ts). Plan said "extract to shared"; in practice the two copies are 12 lines each and diverge in handling. Defer extraction until a third consumer or the divergence becomes a maintenance burden. +- Notification priority gate — [CONTEXT M7](./CONTEXT.md) was tightened post-Phase-13 to admit `notification` at all priorities (the mapper's current behavior). Re-introduce a `priority ≥ medium` gate once we have real-world data on what `priority: 'low'` notifications actually carry; if they turn out to be noise, drop them on replay. + +### E2E status + +Unit + integration suites all green (101 tests); `compile-check-ts-native` clean. Live restart-and-restore E2E **deferred** — the launch + log skills are documented in the Verification section if a real bug ever motivates running them; for now the fixture coverage of M7 grouping rules is enough to trust the read path. diff --git a/src/vs/platform/agentHost/node/claude/roadmap.md b/src/vs/platform/agentHost/node/claude/roadmap.md index 8d1623f85a2a9..07dc22f68144a 100644 --- a/src/vs/platform/agentHost/node/claude/roadmap.md +++ b/src/vs/platform/agentHost/node/claude/roadmap.md @@ -90,6 +90,46 @@ Each phase ships behind the previous one; each is small enough to land as one PR and ends at a verifiable boundary (tests, a CLI smoke, or an end-to-end manual run). +### Execution order (non-numeric) + +Phase numbers are stable identifiers — code comments, plan files +(`phaseN-plan.md`), and `TODO: Phase N` throws all reference them — so we +do **not** renumber. The actual landing order diverges from numeric order +to unblock self-hosting sooner: + +**1 → 1.5 → 2 → 3 → 4 → 5 → 6 → 9 → 13 → 7 → 8 → 10 → 11 → 12 → 6.5 → 14 → 15** + +Phase 13 (session restoration) is pulled forward immediately after Phase 9 +because it unlocks two high-leverage capabilities: + +- **Restoring old chats** — clients can reload an existing transcript and + continue it. Today `getSessionMessages` is a stub, so any Claude session + is effectively single-process-lifetime. +- **Self-hosting** — the team can dogfood Claude sessions across agent-host + restarts without losing turn history. + +Phase 13's dependencies are already met: the SDK exposes +`getSessionMessages` out-of-process (no live `Query` required, CONTEXT M7), +the `IClaudeSessionTranscriptStore` seam landed in Phase 5, and Phase 6's +live mapper exists and can be factored to share with the replay path. + +**Deferrals to land Phase 13 early:** +- **Subagent transcripts** — Phase 13's mapper handles parent-session + transcripts only. Subagent URI dispatch (`/subagent/` + → `getSubagentMessages`) ships when Phase 12 lands. Parent transcripts + show subagent `tool_use`/`tool_result` block pairs as completed + `ToolCall`s with `_meta.toolKind = 'subagent'`; the workbench renders + them as opaque markers until Phase 12 wires the second SDK call. +- **Tool-call replay fidelity** — without Phase 7, live tool calls don't + emit signals, but replayed `tool_use`/`tool_result` pairs still flatten + to terminal `ToolCall` states per CONTEXT M7. Replay works; live tool + UX still waits for Phase 7. +- **Fork (Phase 6.5)** — Phase 6.5, if/when it lands, reconstructs the + `turnId → SessionMessage.uuid` mapping itself by walking the SDK + transcript. Phase 13 deliberately surfaces no by-product map: fork is + rare, the mapping is reconstructible, and a wider mapper return type + would tax every caller forever to optimize a cold path. + ### Phase 1 — `ICopilotApiService` ✅ **DONE** Foundational gateway to the Copilot CAPI: token mint + cache + invalidation, @@ -612,37 +652,43 @@ solution backed by Phase 13's mapper. **Chosen approach (when this phase lands):** -- **Persist `protocolTurnId → lastSdkMessageUuid`** on result-message ingest - inside the same handler Phase 13 builds for transcript reconstruction. - That mapper sees every SDK message in order and observes `type:'result'` - (the only contract-level turn-end signal); it is the right place to - capture the mapping without inference. -- Store the mapping in the session-data DB alongside the existing - per-session metadata. Fork becomes O(1) DB lookup, no JSONL walk. +- **Walk JSONL on demand.** Phase 6.5 calls + `sdk.getSessionMessages(srcId, { includeSystemMessages: true })` + itself, scans the returned `SessionMessage[]` for the assistant + envelope `uuid` corresponding to the desired `turnId` (Phase 13's + mapper already documents that `Turn.id === SessionMessage.uuid` for + the user-text envelope that starts each turn; the matching assistant + envelope is the last `type: 'assistant'` message before the next + user-text turn). One JSONL read per fork. Originally planned as a + persisted O(1) DB lookup with live ingest on every turn, then as a + by-product map returned from Phase 13's mapper — both reverted + because fork is the only consumer and is rare. Neither the per-turn + write tax nor the wider mapper return type was worth a speedup + nobody asked for. - `createSession({ fork })` then calls - `sdk.forkSession(srcId, { upToMessageId: })` and routes - the new session id through `Options.resume` so the SDK loads the forked - transcript. + `sdk.forkSession(srcId, { upToMessageId: })` and + routes the new session id through `Options.resume` so the SDK loads + the forked transcript. - Persist the customization-directory metadata via `setMetadata` on the forked session (mirrors [`copilotAgent.ts`](../copilot/copilotAgent.ts)). -- Pre-existing on-disk sessions need a one-time backfill (best-effort, the - reverted heuristic is acceptable here since it's a one-shot operation - on archived data). +- **If a second consumer for the mapping ever appears** (e.g. + server-side in-place message edit, telemetry uuid joins), add + `backfillTurnMapping` on `ClaudeSessionMetadataStore` and populate it + from a one-shot mapper pass at `getSessionMessages` time (~30 lines). **Dependencies:** -- **Hard:** Phase 13 (transcript reconstruction + result-message mapper). - Cannot land until the mapper has a stable hook for - `turnId → lastSdkMessageUuid` capture on every result-message ingest, - including replay during session restoration so pre-existing on-disk - sessions get the mapping backfilled. -- **Soft:** Phase 13's `IClaudeSessionTranscriptStore` (the Phase-5 seam) is - the natural place to surface the mapping to `ClaudeAgent`. +- **Hard:** Phase 13 (transcript reconstruction). Phase 6.5 reuses the + same `IClaudeAgentSdkService.getSessionMessages` binding Phase 13 + added, and consumes `SessionMessage.uuid` directly. +- **Soft:** Phase 13's `IClaudeSessionTranscriptStore` (the Phase-5 seam) + remains unimplemented; revisit only if a caching layer becomes worth + the abstraction cost. **Architectural model:** Copilot's `getNextTurnEventId(turnId)`. Phase 6.5 -is the Claude-side polyfill of that primitive — persisted by us because the -Claude SDK doesn't provide it. +is the Claude-side equivalent — except Claude derives the lookup on demand +from the JSONL transcript, rather than persisting it. **Materialisation note.** Unlike non-fork `createSession` (Phase 5/6's provisional path), `forkSession` writes the forked SDK transcript file @@ -956,13 +1002,56 @@ verify `getSessionMessages` returns the subagent transcript. Exit criteria: subagent sessions are first-class for clients. -### Phase 13 — Session restoration (no in-place truncate) +### Phase 13 — Session restoration (no in-place truncate) ✅ **DONE** + +> **Execution order:** lands immediately after Phase 9 to unblock chat +> restoration and self-hosting. See "Execution order (non-numeric)" above. - **`getSessionMessages(session)`** reconstructs the full turn history from the SDK's transcript via `IClaudeSessionTranscriptStore` (Phase 5 seam). - Maps `SessionMessage[]` (Anthropic events) → agent host `Turn[]`. The - mapper is the same logic used by the live event stream — factor it out in - Phase 6 and reuse here. Includes subagent transcripts (Phase 12). + Out-of-process: calls SDK `getSessionMessages(sessionId, { dir, + includeSystemMessages: true })` — no live `Query` required (CONTEXT M7). + Maps `SessionMessage[]` (Anthropic events) → agent host `Turn[]` per the + CONTEXT M7 grouping rules: + - `('user', text)` → start new `Turn` with `Turn.id = sessionMessage.uuid`. + - `('user', tool_result)` → attach to the open `ToolCall`, do NOT start + a new `Turn`. + - `('user', empty / hook-injected / shouldQuery: false)` → drop. + - `('assistant', ...blocks)` → push `Markdown` / `Thinking` / + `ToolCall` (terminal `Completed` / `Cancelled` only — no live + lifecycle states) parts onto the active `Turn`. + - `('system', compact_boundary | allowlisted subtype)` → push + `SystemNotificationResponsePart` on the active `Turn`; `compact_boundary` + is **not** a Turn boundary (CONTEXT M7). + - Tail-Turn `state`: `'completed'` if no orphan `tool_use` blocks remain; + otherwise mark incomplete (heuristic — see CONTEXT "Open mapping + questions"). + - Per-Turn `usage` is `undefined` on replay (live-only metadata; CONTEXT + M8 asymmetry). +- **Mapper factor-out from Phase 6.** Phase 6 ships a live mapper for the + event stream; Phase 13 lifts that into a shared module so the same code + drives both live and replay. Critically, **both drivers must hydrate the + same `Map`** (CONTEXT glossary) so `tool_result` + events delivered after a session restore resolve back to the announcing + `tool_use`'s `turnId`. The mapper is the single seam. +- **Subagent markers without subagent transcripts.** Parent-transcript + `Agent` / `Task` `tool_use` + `tool_result` pairs flatten to a terminal + `ToolCall` with `_meta.toolKind = 'subagent'` and the result content + inlined per CONTEXT M7. Until Phase 12 lands the + `/subagent/` URI dispatch and the + `getSubagentMessages` second SDK call, opening a subagent marker in the + workbench is a no-op (the host throws `TODO: Phase 12` if the URI shape + matches). +- **`turnId → lastSdkMessageUuid` is reconstructed on demand, not exposed.** + Phase 13's `mapSessionMessagesToTurns` returns `readonly Turn[]` and + nothing else. Phase 6.5 fork (the only consumer of a turn→uuid + mapping) walks `SessionMessage[]` itself when it lands. Originally + planned as live ingest + replay backfill into per-session DB rows, + then as a `{ turns, turnIdToLastAssistantUuid }` mapper return-value + by-product; both reverted because fork is rare and neither the + per-turn write tax nor the wider mapper return type was worth it. + If a second consumer ever appears, `backfillTurnMapping` is a + ~30-line add on `ClaudeSessionMetadataStore`. - **Do NOT implement `IAgent.truncateSession`**. The SDK's `forkSession` always produces a *new* session ID, which conflicts with the protocol's expectation that `truncateSession` mutates the existing session URI in @@ -975,22 +1064,18 @@ Exit criteria: subagent sessions are first-class for clients. - The workbench should follow the new URI, just like for any other fork. - Adding in-place truncate later would require a URI→sessionId mapping layer; we'd revisit when there's user demand. -- **`turnId → lastSdkMessageUuid` ingest** lands here as the prerequisite - for Phase 6.5. The result-message mapper persists a - `(turnId, lastAssistantMessageUuid)` pair in session metadata on every - completed turn; replay during session restoration backfills pre-existing - on-disk sessions. This is the contract-level primitive that Phase 6.5 - consumes for fork — see "Phase 6.5 — Fork (deferred)" above. Tests: persist a session, restart the agent host, reload the session, verify turns are intact and a new turn appends correctly. Verify -`turnId → lastSdkMessageUuid` rows are persisted for each completed turn -on live ingest, and that session restoration replays the mapper to -backfill them. +replayed `tool_use` / `tool_result` pairs flatten to terminal +`ToolCall` states with content inlined. Verify a subagent marker +appears with `_meta.toolKind = 'subagent'` but its URI is not yet +dispatchable. -Exit criteria: agent-host restart is invisible; the turn-mapping ingest -is validated; truncate is documented as fork-by-another-name. Fork -end-to-end ships in Phase 6.5 (deferred to land alongside this phase). +Exit criteria: agent-host restart is invisible for parent transcripts; +self-hosting across restarts works; truncate is documented as +fork-by-another-name. Subagent transcript fetch ships in Phase 12; +fork end-to-end ships in Phase 6.5. ### Phase 14 — Hardening + telemetry @@ -1007,36 +1092,65 @@ end-to-end ships in Phase 6.5 (deferred to land alongside this phase). Exit criteria: ready to enable for external preview. -### Phase 15 — SDK upgrade (> 0.2.112) - -The initial implementation pins `@anthropic-ai/claude-agent-sdk` at -**`0.2.112`** — the same version the Copilot extension currently ships -(`extensions/copilot/package.json`). Versions above 0.2.112 introduce a -**native binary dependency** (prebuilt platform-specific addons), which -requires additional build infrastructure and cross-platform packaging work -beyond the scope of the initial rollout. - -This phase upgrades to a version > 0.2.112 once that infrastructure is -in place. - -**Checklist:** -- Identify the minimum version that provides the desired new SDK capabilities - (check changelog / GitHub releases for `@anthropic-ai/claude-agent-sdk`). -- Audit the native dependency: determine the addon's platform matrix, verify - the agent-host build pipeline can package and code-sign it for all - supported targets (win32-x64, darwin-x64, darwin-arm64, linux-x64). -- Validate the upgraded SDK against the full Phase 6–13 integration test - matrix (`Query.*` API surface, `enableFileCheckpointing`, - `Query.rewindFiles`, `Query.interrupt`). -- Update `agentHost/package.json` (or the shared platform `package.json`) - to the new version and update any API callsites that changed between - 0.2.112 and the target version. -- Run the full Phase 6–13 integration test suite against the new SDK version. -- Coordinate with the Copilot extension team to keep both consumers in sync - (or document the divergence intentionally). - -Exit criteria: agent host runs on the upgraded SDK with no regressions; -native dependency is packaged in all production builds. +### Phase 15 — SDK distribution via marketplace extension + +**Status as of 2026-05-13:** the agent host already runs against the latest +`@anthropic-ai/claude-agent-sdk` rather than `0.2.112`, but the SDK is loaded +from a path the user supplies (`chat.agentHost.claudeAgent.path` setting → +`AgentHostClaudeSdkPathEnvVar`, see [`claudeAgentSdkService.ts:148`](./claudeAgentSdkService.ts#L148)). +That mechanism unblocked development but is **not shippable**: it requires +every user to install the SDK locally and configure a path. + +**Direction:** distribute the SDK as a versioned VS Code extension so users +get it through the normal install flow. + +1. **Agent Host gains marketplace-install capability.** Today the agent + host is a closed utility process; it cannot fetch or install + extensions. Add the IPC + extension-management surface needed for the + agent host to install / update / load extensions from the VS Code + marketplace (or a registry it trusts). +2. **Publish a Claude SDK packaging extension to the marketplace.** A + thin extension whose only job is to ship a vetted version of + `@anthropic-ai/claude-agent-sdk` (and any native deps) and expose its + load path to the agent host. Versioned on the marketplace so SDK + upgrades become extension updates, not VS Code releases. +3. **Agent host loads the SDK from the installed extension** instead of + from `AgentHostClaudeSdkPathEnvVar`. The env-var path stays as a dev + override. The setting `chat.agentHost.claudeAgent.path` is repurposed + (or removed) for end users. + +**Why this shape:** +- SDK upgrades ship out-of-band from VS Code (no need to bundle a + specific SDK version into every VS Code release). +- The native-dependency packaging burden moves to the extension's + publishing pipeline, which is already a solved problem for VS Code + extensions across `win32-x64`, `darwin-x64`, `darwin-arm64`, + `linux-x64`. +- Multiple SDK-packaging extensions could coexist (e.g. an `@stable` + extension and a `@preview` extension), letting the user opt into + newer SDKs without a VS Code update. +- Other agent SDKs (future Anthropic / OpenAI / etc. providers) follow + the same model. + +**Open design points** (to be detailed in a phase plan when scheduled): +- IPC surface for agent-host-driven extension install (mirror or + delegate to the workbench's extension service?). +- Discovery contract: how does the agent host know which installed + extension provides the Claude SDK? (e.g. an extension `contributes` + field, a well-known activation event, a manifest-declared capability). +- Trust model: is the marketplace publisher the source of truth, or + does the agent host pin a specific publisher / extension id? +- Dev override: keep `AgentHostClaudeSdkPathEnvVar` as the + non-marketplace fallback for SDK development. + +This phase replaces the previous "upgrade the bundled SDK to a newer +0.2.x" plan, which assumed the SDK would always be a normal `npm` +dependency. That assumption no longer holds now that the SDK ships +native binaries. + +Exit criteria: a fresh VS Code install can use the Claude agent without +manually installing the SDK or setting any path. SDK upgrades arrive as +marketplace extension updates. --- diff --git a/src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts b/src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts index b8ffc91c2b945..c8c9ffbe8d3ea 100644 --- a/src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts +++ b/src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts @@ -34,7 +34,7 @@ */ import type Anthropic from '@anthropic-ai/sdk'; -import type { Options, PermissionResult, Query, SDKMessage, SDKResultSuccess, SDKSessionInfo, SDKSystemMessage, SDKUserMessage, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { GetSessionMessagesOptions, Options, PermissionResult, Query, SDKMessage, SDKResultSuccess, SDKSessionInfo, SDKSystemMessage, SDKUserMessage, SessionMessage, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; import type { CCAModel } from '@vscode/copilot-api'; import assert from 'assert'; import type * as http from 'http'; @@ -328,6 +328,10 @@ class ProxyRoundTripSdkService implements IClaudeAgentSdkService { return undefined; } + async getSessionMessages(_sessionId: string, _options?: GetSessionMessagesOptions): Promise { + return []; + } + async startup(params: { options: Options; initializeTimeoutMs?: number }): Promise { this.capturedStartupOptions.push(params.options); diff --git a/src/vs/platform/agentHost/test/node/claudeAgent.test.ts b/src/vs/platform/agentHost/test/node/claudeAgent.test.ts index 18eb593f255ed..97864b9b5ce21 100644 --- a/src/vs/platform/agentHost/test/node/claudeAgent.test.ts +++ b/src/vs/platform/agentHost/test/node/claudeAgent.test.ts @@ -4,7 +4,7 @@ *--------------------------------------------------------------------------------------------*/ import type Anthropic from '@anthropic-ai/sdk'; -import type { Options, PermissionMode, Query, SDKMessage, SDKSessionInfo, SDKUserMessage, Settings, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { GetSessionMessagesOptions, Options, PermissionMode, Query, SDKMessage, SDKSessionInfo, SDKUserMessage, SessionMessage, Settings, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; import type { CCAModel } from '@vscode/copilot-api'; import assert from 'assert'; @@ -174,6 +174,25 @@ class FakeClaudeAgentSdkService implements IClaudeAgentSdkService { return this.sessionList.find(s => s.sessionId === sessionId); } + /** + * Phase 13: programmable transcript fetch. Tests stage canned + * `SessionMessage[]` per session id; absence resolves to `[]` to match + * the SDK's own "session not found" semantics. `getSessionMessagesRejection` + * lets tests simulate SDK throw paths (corrupt JSONL, dynamic-import fault). + */ + sessionMessagesById = new Map(); + getSessionMessagesCalls: { sessionId: string; options: GetSessionMessagesOptions | undefined }[] = []; + getSessionMessagesRejection: Error | undefined; + + async getSessionMessages(sessionId: string, options?: GetSessionMessagesOptions): Promise { + this.getSessionMessagesCalls.push({ sessionId, options }); + if (this.getSessionMessagesRejection) { + const err = this.getSessionMessagesRejection; + throw err; + } + return this.sessionMessagesById.get(sessionId) ?? []; + } + async startup(params: { options: Options; initializeTimeoutMs?: number }): Promise { this.startupCallCount++; this.capturedStartupOptions.push(params.options); @@ -2483,6 +2502,7 @@ suite('ClaudeAgent', () => { listSessions: async () => [{ sessionId: 's', summary: 's', lastModified: 1 }], getSessionInfo: async () => undefined, startup: async () => { throw new Error('TestableClaudeAgentSdkService: startup not modeled'); }, + getSessionMessages: async () => [], }; const result1 = await svc.listSessions(); const importInvocationsAfterFirstSuccess = importInvocations; @@ -3724,6 +3744,91 @@ suite('ClaudeAgent (Phase 9 — runtime mutation surface)', () => { // #endregion +// #region Phase 13 — Session restoration + +suite('ClaudeAgent (Phase 13 — getSessionMessages)', () => { + + const disposables = ensureNoDisposablesAreLeakedInTestSuite(); + + function makeUserSessionMessage(uuid: string, text: string): SessionMessage { + return { + type: 'user', + uuid, + session_id: 'sess-1', + parent_tool_use_id: null, + message: { role: 'user', content: [{ type: 'text', text }] }, + }; + } + + function makeAssistantSessionMessage(uuid: string, text: string): SessionMessage { + return { + type: 'assistant', + uuid, + session_id: 'sess-1', + parent_tool_use_id: null, + message: { id: `msg_${uuid}`, role: 'assistant', content: [{ type: 'text', text }] }, + }; + } + + test('getSessionMessages returns mapped Turn[] from SDK transcript', async () => { + const { agent, sdk } = createTestContext(disposables); + const sessionId = 'phase13-1'; + sdk.sessionMessagesById.set(sessionId, [ + makeUserSessionMessage('u1', 'hi'), + makeAssistantSessionMessage('a1', 'hello'), + ]); + + const turns = await agent.getSessionMessages(AgentSession.uri(agent.id, sessionId)); + + assert.strictEqual(turns.length, 1); + assert.strictEqual(turns[0].id, 'u1'); + assert.strictEqual(turns[0].userMessage.text, 'hi'); + assert.strictEqual(sdk.getSessionMessagesCalls.length, 1); + assert.deepStrictEqual(sdk.getSessionMessagesCalls[0], { + sessionId, + options: { includeSystemMessages: true }, + }); + }); + + test('getSessionMessages on subagent URI throws TODO: Phase 12 with no SDK call', async () => { + const { agent, sdk } = createTestContext(disposables); + const parentUri = AgentSession.uri(agent.id, 'parent'); + const subagentUri = URI.parse(`${parentUri.toString()}/subagent/tool-call-1`); + + await assert.rejects( + () => agent.getSessionMessages(subagentUri), + /TODO: Phase 12/, + ); + assert.strictEqual(sdk.getSessionMessagesCalls.length, 0, 'subagent URI must not hit SDK'); + }); + + test('getSessionMessages on provisional session returns [] with no SDK call', async () => { + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/workspace') }); + + const turns = await agent.getSessionMessages(created.session); + + assert.deepStrictEqual(turns, []); + assert.strictEqual(sdk.getSessionMessagesCalls.length, 0, 'provisional session must not hit SDK'); + }); + + test('getSessionMessages returns [] on SDK fetch failure (warn-logged)', async () => { + const log = new CapturingLogService(); + const { agent, sdk } = createTestContext(disposables, { logService: log }); + sdk.getSessionMessagesRejection = new Error('simulated SDK failure'); + + const turns = await agent.getSessionMessages(AgentSession.uri(agent.id, 'fail-id')); + + assert.deepStrictEqual(turns, []); + assert.ok(log.warns.some(w => w.includes('getSessionMessages SDK fetch failed')), + `expected warn-log; got: ${log.warns.join(' | ')}`); + }); +}); + +// #endregion + + diff --git a/src/vs/platform/agentHost/test/node/claudeReplayMapper.test.ts b/src/vs/platform/agentHost/test/node/claudeReplayMapper.test.ts new file mode 100644 index 0000000000000..d906cbfae7dbb --- /dev/null +++ b/src/vs/platform/agentHost/test/node/claudeReplayMapper.test.ts @@ -0,0 +1,270 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import type { SessionMessage } from '@anthropic-ai/claude-agent-sdk'; +import assert from 'assert'; +import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../base/test/common/utils.js'; +import { URI } from '../../../../base/common/uri.js'; +import { NullLogService } from '../../../log/common/log.js'; +import { ResponsePartKind, ToolCallStatus, ToolResultContentType, TurnState } from '../../common/state/protocol/state.js'; +import { mapSessionMessagesToTurns } from '../../node/claude/claudeReplayMapper.js'; + +suite('claudeReplayMapper', () => { + + ensureNoDisposablesAreLeakedInTestSuite(); + + const logService = new NullLogService(); + const session = URI.parse('claude:/sess-1'); + + function makeUser(uuid: string, text: string): SessionMessage { + return { + type: 'user', + uuid, + session_id: 'sess-1', + parent_tool_use_id: null, + message: { role: 'user', content: [{ type: 'text', text }] }, + }; + } + + function makeAssistantText(uuid: string, text: string): SessionMessage { + return { + type: 'assistant', + uuid, + session_id: 'sess-1', + parent_tool_use_id: null, + message: { id: `msg_${uuid}`, role: 'assistant', content: [{ type: 'text', text }] }, + }; + } + + function makeAssistantToolUse(uuid: string, toolUseId: string, name: string, input: unknown = {}): SessionMessage { + return { + type: 'assistant', + uuid, + session_id: 'sess-1', + parent_tool_use_id: null, + message: { + id: `msg_${uuid}`, + role: 'assistant', + content: [{ type: 'tool_use', id: toolUseId, name, input }], + }, + }; + } + + function makeUserToolResult(uuid: string, toolUseId: string, text: string, isError = false): SessionMessage { + return { + type: 'user', + uuid, + session_id: 'sess-1', + parent_tool_use_id: null, + message: { + role: 'user', + content: [{ type: 'tool_result', tool_use_id: toolUseId, content: text, ...(isError ? { is_error: true } : {}) }], + }, + }; + } + + function makeSystem(uuid: string, subtype: string, text?: string): SessionMessage { + return { + type: 'system', + uuid, + session_id: 'sess-1', + parent_tool_use_id: null, + message: { subtype, ...(text !== undefined ? { text } : {}) }, + }; + } + + test('Fixture 1: single text turn', () => { + const messages: SessionMessage[] = [ + makeUser('u1', 'hello'), + makeAssistantText('a1', 'world'), + ]; + + const turns = mapSessionMessagesToTurns(messages, session, logService); + + assert.strictEqual(turns.length, 1); + assert.strictEqual(turns[0].id, 'u1', 'Turn.id MUST equal user SessionMessage.uuid'); + assert.strictEqual(turns[0].userMessage.text, 'hello'); + assert.strictEqual(turns[0].usage, undefined, 'replay never has usage'); + assert.strictEqual(turns[0].state, TurnState.Complete); + assert.strictEqual(turns[0].responseParts.length, 1); + const part = turns[0].responseParts[0]; + assert.strictEqual(part.kind, ResponsePartKind.Markdown); + if (part.kind === ResponsePartKind.Markdown) { + assert.strictEqual(part.content, 'world'); + } + }); + + test('Fixture 2: tool_use + tool_result is one Turn with one Completed ToolCall', () => { + const messages: SessionMessage[] = [ + makeUser('u1', 'list files'), + makeAssistantToolUse('a1', 'tu1', 'Bash', { command: 'ls' }), + makeUserToolResult('synthetic1', 'tu1', 'file1.txt\nfile2.txt'), + makeAssistantText('a2', 'two files'), + ]; + + const turns = mapSessionMessagesToTurns(messages, session, logService); + + assert.strictEqual(turns.length, 1, 'tool_result MUST NOT start a new turn'); + assert.strictEqual(turns[0].state, TurnState.Complete); + const toolCallParts = turns[0].responseParts.filter(p => p.kind === ResponsePartKind.ToolCall); + assert.strictEqual(toolCallParts.length, 1); + const toolCall = toolCallParts[0]; + assert.strictEqual(toolCall.kind, ResponsePartKind.ToolCall); + if (toolCall.kind === ResponsePartKind.ToolCall) { + assert.strictEqual(toolCall.toolCall.status, ToolCallStatus.Completed); + assert.strictEqual(toolCall.toolCall.toolName, 'Bash'); + if (toolCall.toolCall.status === ToolCallStatus.Completed) { + assert.strictEqual(toolCall.toolCall.success, true); + assert.deepStrictEqual(toolCall.toolCall.content, [{ type: ToolResultContentType.Text, text: 'file1.txt\nfile2.txt' }]); + } + } + }); + + test('Fixture 3: multi-turn produces ordered Turns', () => { + const messages: SessionMessage[] = [ + makeUser('u1', 'first'), + makeAssistantText('a1', 'reply 1'), + makeUser('u2', 'second'), + makeAssistantText('a2', 'reply 2'), + ]; + + const turns = mapSessionMessagesToTurns(messages, session, logService); + + assert.strictEqual(turns.length, 2); + assert.strictEqual(turns[0].id, 'u1'); + assert.strictEqual(turns[1].id, 'u2'); + }); + + test('Fixture 4: compact_boundary attaches as SystemNotification on the active turn', () => { + const messages: SessionMessage[] = [ + makeUser('u1', 'first'), + makeAssistantText('a1', 'reply 1'), + makeSystem('s1', 'compact_boundary', 'context compacted'), + makeAssistantText('a2', 'reply 2'), + ]; + + const turns = mapSessionMessagesToTurns(messages, session, logService); + + assert.strictEqual(turns.length, 1, 'compact_boundary is NOT a turn boundary'); + const sysParts = turns[0].responseParts.filter(p => p.kind === ResponsePartKind.SystemNotification); + assert.strictEqual(sysParts.length, 1); + }); + + test('Fixture 5: Task / Agent tool_use produces subagent marker', () => { + const messages: SessionMessage[] = [ + makeUser('u1', 'spawn subagent'), + makeAssistantToolUse('a1', 'tu1', 'Task', { description: 'do thing' }), + makeUserToolResult('synthetic1', 'tu1', 'subagent done'), + ]; + + const turns = mapSessionMessagesToTurns(messages, session, logService); + + const toolCallPart = turns[0].responseParts.find(p => p.kind === ResponsePartKind.ToolCall); + assert.ok(toolCallPart, 'expected a ToolCall part'); + if (toolCallPart && toolCallPart.kind === ResponsePartKind.ToolCall) { + assert.strictEqual(toolCallPart.toolCall._meta?.toolKind, 'subagent'); + if (toolCallPart.toolCall.status === ToolCallStatus.Completed) { + const hasSubagentMarker = toolCallPart.toolCall.content?.some(c => c.type === ToolResultContentType.Subagent); + assert.strictEqual(hasSubagentMarker, true, 'subagent marker block must be present'); + } else { + assert.fail(`expected Completed status, got ${toolCallPart.toolCall.status}`); + } + } + }); + + test('Fixture 5b: Agent tool name also recognised as subagent', () => { + const messages: SessionMessage[] = [ + makeUser('u1', 'spawn subagent'), + makeAssistantToolUse('a1', 'tu1', 'Agent', { description: 'do thing' }), + makeUserToolResult('synthetic1', 'tu1', 'done'), + ]; + + const turns = mapSessionMessagesToTurns(messages, session, logService); + + const toolCallPart = turns[0].responseParts.find(p => p.kind === ResponsePartKind.ToolCall); + assert.ok(toolCallPart && toolCallPart.kind === ResponsePartKind.ToolCall); + if (toolCallPart.kind === ResponsePartKind.ToolCall) { + assert.strictEqual(toolCallPart.toolCall._meta?.toolKind, 'subagent'); + } + }); + + test('Fixture 6: tail Turn with orphan tool_use is Cancelled', () => { + const messages: SessionMessage[] = [ + makeUser('u1', 'do work'), + makeAssistantToolUse('a1', 'tu-orphan', 'Bash', { command: 'sleep 100' }), + // no matching tool_result — model crashed mid-turn + ]; + + const turns = mapSessionMessagesToTurns(messages, session, logService); + + assert.strictEqual(turns.length, 1); + assert.strictEqual(turns[0].state, TurnState.Cancelled); + }); + + test('Fixture 6b: orphan in turn N does NOT cancel turn N+1', () => { + const messages: SessionMessage[] = [ + makeUser('u1', 'first'), + makeAssistantToolUse('a1', 'tu-orphan', 'Bash', {}), + // no tool_result for tu-orphan + makeUser('u2', 'second'), + makeAssistantText('a2', 'clean reply'), + ]; + + const turns = mapSessionMessagesToTurns(messages, session, logService); + + assert.strictEqual(turns.length, 2); + assert.strictEqual(turns[0].state, TurnState.Cancelled, 'turn 1 has orphan'); + assert.strictEqual(turns[1].state, TurnState.Complete, 'turn 2 has no orphan'); + }); + + test('Fixture 7: non-allowlisted system subtypes are dropped', () => { + const messages: SessionMessage[] = [ + makeUser('u1', 'go'), + makeAssistantText('a1', 'reply'), + makeSystem('s1', 'api_retry', 'retrying'), + makeSystem('s2', 'hook_started', 'hook x'), + ]; + + const turns = mapSessionMessagesToTurns(messages, session, logService); + + const sysParts = turns[0].responseParts.filter(p => p.kind === ResponsePartKind.SystemNotification); + assert.strictEqual(sysParts.length, 0); + }); + + test('Fixture 9: CLI slash-command echo and local-command-stdout entries are dropped', () => { + // On-disk shape verified empirically (claude-history skill): + // the `/model` echo lacks `isSynthetic` / `isMeta`, content is a + // raw string starting with ``. Same for the + // `` paired entry. + const messages: SessionMessage[] = [ + makeUser('u1', 'what model are you'), + makeAssistantText('a1', 'sonnet'), + { + type: 'user', + uuid: 'echo-1', + session_id: 'sess-1', + parent_tool_use_id: null, + message: { role: 'user', content: '/model\n model\n claude-opus-4.7' }, + }, + { + type: 'user', + uuid: 'echo-2', + session_id: 'sess-1', + parent_tool_use_id: null, + message: { role: 'user', content: 'Set model to claude-opus-4.7' }, + }, + makeUser('u2', 'how about now'), + makeAssistantText('a2', 'opus'), + ]; + + const turns = mapSessionMessagesToTurns(messages, session, logService); + + assert.strictEqual(turns.length, 2, 'CLI-echo user envelopes must NOT start new turns'); + assert.strictEqual(turns[0].id, 'u1'); + assert.strictEqual(turns[0].userMessage.text, 'what model are you'); + assert.strictEqual(turns[1].id, 'u2'); + assert.strictEqual(turns[1].userMessage.text, 'how about now'); + }); +}); From f77f3f0cd4783fa004e6c8b3a0b71751b4314e74 Mon Sep 17 00:00:00 2001 From: Tyler Leonhardt Date: Wed, 13 May 2026 18:18:02 -0700 Subject: [PATCH 2/3] Read system subtype/content from envelope, not message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copilot review caught that on-disk JSONL system entries put `subtype` (and `content` for compact_boundary, `text` for notification) at the top level of the envelope alongside `type`, NOT nested inside `message`. The SDK's `SessionMessage` type only declares `{ type, uuid, session_id, message, parent_tool_use_id }` so the extra envelope fields aren't typed — but the production session parser fixtures (extensions/copilot/.../claudeSessionParser.spec.ts) confirm the on-disk shape. Net effect of the bug: real `compact_boundary` and `notification` entries were silently dropped even with `includeSystemMessages: true`, defeating the whole point of asking for them. - claudeReplayMapper.ts: parseSystemMessage now reads from the envelope via a single narrow cast; new readSystemEnvelopeText prefers `text` (notification) then `content` (compact_boundary). - claudeReplayMapper.test.ts: makeSystem helper now builds envelope-shaped fixtures; Fixture 4 asserts the actual text surfaces. --- package-lock.json | 80 +++++++++---------- package.json | 2 +- .../node/claude/claudeReplayMapper.ts | 33 ++++---- .../test/node/claudeReplayMapper.test.ts | 12 ++- 4 files changed, 65 insertions(+), 62 deletions(-) diff --git a/package-lock.json b/package-lock.json index ade048ca7b33b..51a5b9150347b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -72,7 +72,7 @@ "yazl": "^2.4.3" }, "devDependencies": { - "@anthropic-ai/claude-agent-sdk": "0.2.128", + "@anthropic-ai/claude-agent-sdk": "0.2.141", "@playwright/cli": "^0.1.9", "@playwright/test": "^1.56.1", "@stylistic/eslint-plugin-ts": "^2.8.0", @@ -179,36 +179,36 @@ } }, "node_modules/@anthropic-ai/claude-agent-sdk": { - "version": "0.2.128", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk/-/claude-agent-sdk-0.2.128.tgz", - "integrity": "sha512-KI7H9bocPahGDrrQGME5Eh5a4RTqGrN1fQ69uLs6Ik4icXBZXouCx4Ecum450jMVy58myeh9ahYYLlpDAbQXPA==", + "version": "0.2.141", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk/-/claude-agent-sdk-0.2.141.tgz", + "integrity": "sha512-AIBacMWGcZIUcXlUoObqjwJ6pmJI3BayAqPAFXuvSq3DHJXdiuZVs7l/zTB5l3nRhRv5cqSrI2XbiDeHgZWizw==", "dev": true, "license": "SEE LICENSE IN README.md", "dependencies": { - "@anthropic-ai/sdk": "^0.81.0", + "@anthropic-ai/sdk": "^0.93.0", "@modelcontextprotocol/sdk": "^1.29.0" }, "engines": { "node": ">=18.0.0" }, "optionalDependencies": { - "@anthropic-ai/claude-agent-sdk-darwin-arm64": "0.2.128", - "@anthropic-ai/claude-agent-sdk-darwin-x64": "0.2.128", - "@anthropic-ai/claude-agent-sdk-linux-arm64": "0.2.128", - "@anthropic-ai/claude-agent-sdk-linux-arm64-musl": "0.2.128", - "@anthropic-ai/claude-agent-sdk-linux-x64": "0.2.128", - "@anthropic-ai/claude-agent-sdk-linux-x64-musl": "0.2.128", - "@anthropic-ai/claude-agent-sdk-win32-arm64": "0.2.128", - "@anthropic-ai/claude-agent-sdk-win32-x64": "0.2.128" + "@anthropic-ai/claude-agent-sdk-darwin-arm64": "0.2.141", + "@anthropic-ai/claude-agent-sdk-darwin-x64": "0.2.141", + "@anthropic-ai/claude-agent-sdk-linux-arm64": "0.2.141", + "@anthropic-ai/claude-agent-sdk-linux-arm64-musl": "0.2.141", + "@anthropic-ai/claude-agent-sdk-linux-x64": "0.2.141", + "@anthropic-ai/claude-agent-sdk-linux-x64-musl": "0.2.141", + "@anthropic-ai/claude-agent-sdk-win32-arm64": "0.2.141", + "@anthropic-ai/claude-agent-sdk-win32-x64": "0.2.141" }, "peerDependencies": { "zod": "^4.0.0" } }, "node_modules/@anthropic-ai/claude-agent-sdk-darwin-arm64": { - "version": "0.2.128", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-darwin-arm64/-/claude-agent-sdk-darwin-arm64-0.2.128.tgz", - "integrity": "sha512-RAzmB1ls+GWA/YiyfZLWdFYmj3md5emk7mCEeiKSKl2UN4i+tDWy2m/hjIvMFIzBqJJeGmZZSMnf3S0sL/GbhQ==", + "version": "0.2.141", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-darwin-arm64/-/claude-agent-sdk-darwin-arm64-0.2.141.tgz", + "integrity": "sha512-9HZ0ot6+FwOfQ1aeMqQLH4IJGMm/DcP08SysDxscVjBm6l2JjqleHohxi3zid0DurfGweqT+4x9GScJffwg55g==", "cpu": [ "arm64" ], @@ -220,9 +220,9 @@ ] }, "node_modules/@anthropic-ai/claude-agent-sdk-darwin-x64": { - "version": "0.2.128", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-darwin-x64/-/claude-agent-sdk-darwin-x64-0.2.128.tgz", - "integrity": "sha512-dDPJHxUhL2sgIB8Q2AnBi4xsApImeW0zf1nbL7gBNSc9RWhGoGQAbPm0KaQ7/03jdom30z1VT5VMhQ5KeEYOIw==", + "version": "0.2.141", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-darwin-x64/-/claude-agent-sdk-darwin-x64-0.2.141.tgz", + "integrity": "sha512-4iAdarJaQ+2R58s6QJswZCzUdz2WQmL5lYG7Y+FLzWbRSROFfcH0QYpmOqSaPXd2KRQhIJwEacqecDZd/Q1XKQ==", "cpu": [ "x64" ], @@ -234,9 +234,9 @@ ] }, "node_modules/@anthropic-ai/claude-agent-sdk-linux-arm64": { - "version": "0.2.128", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-linux-arm64/-/claude-agent-sdk-linux-arm64-0.2.128.tgz", - "integrity": "sha512-+GbB33eJSlZUWs84nsibY2nyAFQT96WYLGCteVn62Vv6ZK90NrZsm7lwurjw7oYNnvpzXorhZ2/XpQnWvOK6aQ==", + "version": "0.2.141", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-linux-arm64/-/claude-agent-sdk-linux-arm64-0.2.141.tgz", + "integrity": "sha512-Jdf0ZEwJzOP8sE6rPqdJN+SxMb0/L8sxJg4twCv/7S+Qzk0hJtls+wxSi+0Tjh6EEMaNxJqEGc7S3fx99Wi99Q==", "cpu": [ "arm64" ], @@ -248,9 +248,9 @@ ] }, "node_modules/@anthropic-ai/claude-agent-sdk-linux-arm64-musl": { - "version": "0.2.128", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-linux-arm64-musl/-/claude-agent-sdk-linux-arm64-musl-0.2.128.tgz", - "integrity": "sha512-ZCZEg42St0SCMMZFCvEtkF1LBFMYBxJRXzRno+12vOYYhC6R0l8jPjlgA2ZkN2Lb+TCEOO3fjeWJdZLL/NDM4w==", + "version": "0.2.141", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-linux-arm64-musl/-/claude-agent-sdk-linux-arm64-musl-0.2.141.tgz", + "integrity": "sha512-6H1AJ/AVaWNnV22kubUPkOTRzZFH0+qP9k7WlhriHMN9gtgZcVAsITMddDeGjQsQJMCAdhXFd6sgi7TM1LdeOQ==", "cpu": [ "arm64" ], @@ -262,9 +262,9 @@ ] }, "node_modules/@anthropic-ai/claude-agent-sdk-linux-x64": { - "version": "0.2.128", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-linux-x64/-/claude-agent-sdk-linux-x64-0.2.128.tgz", - "integrity": "sha512-aBBXD6OLN/lq9S1p+BNjuEml0lYIoHunFdzFl49B0fsxEAnz1RfJDrpSNpIUAaL5FMZIaFvLqXtbFRy41N2fxg==", + "version": "0.2.141", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-linux-x64/-/claude-agent-sdk-linux-x64-0.2.141.tgz", + "integrity": "sha512-DVjp72f3HmrRYpbneWZZWIqkUht5kTZXS7wXGFiwzLz6eNYEgjjh+GcsnhIi8UOwZUtNiKUrjZnoP38ovFqV8A==", "cpu": [ "x64" ], @@ -276,9 +276,9 @@ ] }, "node_modules/@anthropic-ai/claude-agent-sdk-linux-x64-musl": { - "version": "0.2.128", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-linux-x64-musl/-/claude-agent-sdk-linux-x64-musl-0.2.128.tgz", - "integrity": "sha512-sUSJEtvEt2iiMvgUuBGmBJjLhwHxDKOxVBSsXZaY46KAv3ZwLtLuc5xv2XFHId1B5+nMh7b7mr+HAiBmbMUODA==", + "version": "0.2.141", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-linux-x64-musl/-/claude-agent-sdk-linux-x64-musl-0.2.141.tgz", + "integrity": "sha512-fTI1YuM4cxOa4nSgsyMAdB5ELizkWp+w5Ispo4JnnYtcczMAL4D9GBNjWPW0sUzKvjsJOUVim68SmWLWhUOpXQ==", "cpu": [ "x64" ], @@ -290,9 +290,9 @@ ] }, "node_modules/@anthropic-ai/claude-agent-sdk-win32-arm64": { - "version": "0.2.128", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-win32-arm64/-/claude-agent-sdk-win32-arm64-0.2.128.tgz", - "integrity": "sha512-9Ao2J5KgfkfKxUZK3dbQEGonPYcbUyn7Cn7ykZuP91FN/5ux3Tz90YRJW6UtZdWHoDkmFF0FS8P/jiZuyWPLfw==", + "version": "0.2.141", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-win32-arm64/-/claude-agent-sdk-win32-arm64-0.2.141.tgz", + "integrity": "sha512-Wm10J6kfbufbPGFELokiJ/7Y5Oqug4Uag3HXFsV8g7TWCpaItx/oqVaJoiGptuAtXQB7xGLQVTuk082wER+Y5w==", "cpu": [ "arm64" ], @@ -304,9 +304,9 @@ ] }, "node_modules/@anthropic-ai/claude-agent-sdk-win32-x64": { - "version": "0.2.128", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-win32-x64/-/claude-agent-sdk-win32-x64-0.2.128.tgz", - "integrity": "sha512-7oxPkgjw1vPZbx6+Qwt9mGouqfpRz5jDcuQ37koayzMdTVzmgCsKAqqbJSpOQfkFGv6gTjcrLWBlk3oapZfBYA==", + "version": "0.2.141", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-win32-x64/-/claude-agent-sdk-win32-x64-0.2.141.tgz", + "integrity": "sha512-IXuP29YJuWbR5Q6xOHrjFVGG54V2s1FC61UVNwEN5fpxL09MwPnbwtQL6fqgzt/U1MP7vWAwpXZriYAklkH/mg==", "cpu": [ "x64" ], @@ -318,9 +318,9 @@ ] }, "node_modules/@anthropic-ai/claude-agent-sdk/node_modules/@anthropic-ai/sdk": { - "version": "0.81.0", - "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.81.0.tgz", - "integrity": "sha512-D4K5PvEV6wPiRtVlVsJHIUhHAmOZ6IT/I9rKlTf84gR7GyyAurPJK7z9BOf/AZqC5d1DhYQGJNKRmV+q8dGhgw==", + "version": "0.93.0", + "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.93.0.tgz", + "integrity": "sha512-q9vaSZQVFx6B/gPxetGYfLXSJD5v0sOmh0OpZDq7yCrTSA+Rscvrtyol7JJTW40wEpQB4U1B4JXzxQitbQ3CAA==", "dev": true, "license": "MIT", "dependencies": { diff --git a/package.json b/package.json index f8ab33c6156a7..e227e71f21385 100644 --- a/package.json +++ b/package.json @@ -149,7 +149,7 @@ "yazl": "^2.4.3" }, "devDependencies": { - "@anthropic-ai/claude-agent-sdk": "0.2.128", + "@anthropic-ai/claude-agent-sdk": "0.2.141", "@playwright/cli": "^0.1.9", "@playwright/test": "^1.56.1", "@stylistic/eslint-plugin-ts": "^2.8.0", diff --git a/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts b/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts index 5026d910aaaa5..178eca97113b7 100644 --- a/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts +++ b/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts @@ -115,11 +115,18 @@ function parseAssistantMessage(msg: SessionMessage): ParsedSessionMessage | unde } function parseSystemMessage(msg: SessionMessage): ParsedSessionMessage | undefined { - const subtype = readSystemSubtype(msg.message); + // On-disk JSONL system entries carry `subtype` and the human-readable + // payload (`content` for `compact_boundary`, `text` for `notification`) + // at the envelope level alongside `type` — not nested under `message`. + // `SessionMessage.message` is typed `unknown` and the envelope's actual + // fields beyond `{ type, uuid, session_id, message, parent_tool_use_id }` + // aren't in the SDK type, so cast through `unknown` to read them. + const envelope = msg as unknown as { subtype?: unknown; text?: unknown; content?: unknown }; + const subtype = typeof envelope.subtype === 'string' ? envelope.subtype : undefined; if (subtype === undefined || !ALLOWED_SYSTEM_SUBTYPES.has(subtype)) { return undefined; } - const text = readSystemText(msg.message) ?? `[${subtype}]`; + const text = readSystemEnvelopeText(envelope) ?? `[${subtype}]`; return { kind: 'system-notification', uuid: msg.uuid, subtype, text }; } @@ -429,24 +436,12 @@ function readAssistantBlocks(raw: unknown): readonly AssistantBlock[] | undefine return out; } -function readSystemSubtype(raw: unknown): string | undefined { - if (raw === null || typeof raw !== 'object') { - return undefined; - } - const subtype = (raw as { subtype?: unknown }).subtype; - return typeof subtype === 'string' ? subtype : undefined; -} - -function readSystemText(raw: unknown): string | undefined { - if (raw === null || typeof raw !== 'object') { - return undefined; - } - const r = raw as { text?: unknown; message?: unknown }; - if (typeof r.text === 'string') { - return r.text; +function readSystemEnvelopeText(envelope: { text?: unknown; content?: unknown }): string | undefined { + if (typeof envelope.text === 'string') { + return envelope.text; } - if (typeof r.message === 'string') { - return r.message; + if (typeof envelope.content === 'string') { + return envelope.content; } return undefined; } diff --git a/src/vs/platform/agentHost/test/node/claudeReplayMapper.test.ts b/src/vs/platform/agentHost/test/node/claudeReplayMapper.test.ts index d906cbfae7dbb..f34a92b87021c 100644 --- a/src/vs/platform/agentHost/test/node/claudeReplayMapper.test.ts +++ b/src/vs/platform/agentHost/test/node/claudeReplayMapper.test.ts @@ -66,13 +66,20 @@ suite('claudeReplayMapper', () => { } function makeSystem(uuid: string, subtype: string, text?: string): SessionMessage { + // On-disk JSONL system entries put `subtype` and the human-readable + // payload at the envelope level alongside `type`, NOT inside `message`. + // `compact_boundary` uses `content`; `notification` uses `text`. + // Mirror both shapes here so fixtures match what the SDK writes. + const envelopeText = subtype === 'compact_boundary' ? { content: text } : { text }; return { type: 'system', uuid, session_id: 'sess-1', parent_tool_use_id: null, - message: { subtype, ...(text !== undefined ? { text } : {}) }, - }; + message: undefined, + ...(subtype !== undefined ? { subtype } : {}), + ...(text !== undefined ? envelopeText : {}), + } as unknown as SessionMessage; } test('Fixture 1: single text turn', () => { @@ -150,6 +157,7 @@ suite('claudeReplayMapper', () => { assert.strictEqual(turns.length, 1, 'compact_boundary is NOT a turn boundary'); const sysParts = turns[0].responseParts.filter(p => p.kind === ResponsePartKind.SystemNotification); assert.strictEqual(sysParts.length, 1); + assert.strictEqual(sysParts[0].content, 'context compacted', 'compact_boundary `content` field surfaces as the SystemNotification text'); }); test('Fixture 5: Task / Agent tool_use produces subagent marker', () => { From 05ebbc4808bba0bbecee649c99be2f3410c12dad Mon Sep 17 00:00:00 2001 From: Tyler Leonhardt Date: Wed, 13 May 2026 18:19:39 -0700 Subject: [PATCH 3/3] Revert "Read system subtype/content from envelope, not message" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts f77f3f0cd47. Listening to the SDK contract over the production parser's raw-JSONL fixtures: the SDK's `SessionMessage` type declares `{ type, uuid, session_id, message: unknown, parent_tool_use_id }` for ALL three discriminants. For user/assistant we already read the discriminant-specific payload (role, content, blocks) from `message.*`; doing the same for system (`message.subtype`, `message.text`) is the consistent pattern. The production session parser fixtures parse raw on-disk JSONL — a lower layer than `getSessionMessages()`, which normalizes those entries into the documented `SessionMessage` shape with payload inside `message`. The original implementation was correct. --- package-lock.json | 80 +++++++++---------- package.json | 2 +- .../node/claude/claudeReplayMapper.ts | 33 ++++---- .../test/node/claudeReplayMapper.test.ts | 12 +-- 4 files changed, 62 insertions(+), 65 deletions(-) diff --git a/package-lock.json b/package-lock.json index 51a5b9150347b..ade048ca7b33b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -72,7 +72,7 @@ "yazl": "^2.4.3" }, "devDependencies": { - "@anthropic-ai/claude-agent-sdk": "0.2.141", + "@anthropic-ai/claude-agent-sdk": "0.2.128", "@playwright/cli": "^0.1.9", "@playwright/test": "^1.56.1", "@stylistic/eslint-plugin-ts": "^2.8.0", @@ -179,36 +179,36 @@ } }, "node_modules/@anthropic-ai/claude-agent-sdk": { - "version": "0.2.141", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk/-/claude-agent-sdk-0.2.141.tgz", - "integrity": "sha512-AIBacMWGcZIUcXlUoObqjwJ6pmJI3BayAqPAFXuvSq3DHJXdiuZVs7l/zTB5l3nRhRv5cqSrI2XbiDeHgZWizw==", + "version": "0.2.128", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk/-/claude-agent-sdk-0.2.128.tgz", + "integrity": "sha512-KI7H9bocPahGDrrQGME5Eh5a4RTqGrN1fQ69uLs6Ik4icXBZXouCx4Ecum450jMVy58myeh9ahYYLlpDAbQXPA==", "dev": true, "license": "SEE LICENSE IN README.md", "dependencies": { - "@anthropic-ai/sdk": "^0.93.0", + "@anthropic-ai/sdk": "^0.81.0", "@modelcontextprotocol/sdk": "^1.29.0" }, "engines": { "node": ">=18.0.0" }, "optionalDependencies": { - "@anthropic-ai/claude-agent-sdk-darwin-arm64": "0.2.141", - "@anthropic-ai/claude-agent-sdk-darwin-x64": "0.2.141", - "@anthropic-ai/claude-agent-sdk-linux-arm64": "0.2.141", - "@anthropic-ai/claude-agent-sdk-linux-arm64-musl": "0.2.141", - "@anthropic-ai/claude-agent-sdk-linux-x64": "0.2.141", - "@anthropic-ai/claude-agent-sdk-linux-x64-musl": "0.2.141", - "@anthropic-ai/claude-agent-sdk-win32-arm64": "0.2.141", - "@anthropic-ai/claude-agent-sdk-win32-x64": "0.2.141" + "@anthropic-ai/claude-agent-sdk-darwin-arm64": "0.2.128", + "@anthropic-ai/claude-agent-sdk-darwin-x64": "0.2.128", + "@anthropic-ai/claude-agent-sdk-linux-arm64": "0.2.128", + "@anthropic-ai/claude-agent-sdk-linux-arm64-musl": "0.2.128", + "@anthropic-ai/claude-agent-sdk-linux-x64": "0.2.128", + "@anthropic-ai/claude-agent-sdk-linux-x64-musl": "0.2.128", + "@anthropic-ai/claude-agent-sdk-win32-arm64": "0.2.128", + "@anthropic-ai/claude-agent-sdk-win32-x64": "0.2.128" }, "peerDependencies": { "zod": "^4.0.0" } }, "node_modules/@anthropic-ai/claude-agent-sdk-darwin-arm64": { - "version": "0.2.141", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-darwin-arm64/-/claude-agent-sdk-darwin-arm64-0.2.141.tgz", - "integrity": "sha512-9HZ0ot6+FwOfQ1aeMqQLH4IJGMm/DcP08SysDxscVjBm6l2JjqleHohxi3zid0DurfGweqT+4x9GScJffwg55g==", + "version": "0.2.128", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-darwin-arm64/-/claude-agent-sdk-darwin-arm64-0.2.128.tgz", + "integrity": "sha512-RAzmB1ls+GWA/YiyfZLWdFYmj3md5emk7mCEeiKSKl2UN4i+tDWy2m/hjIvMFIzBqJJeGmZZSMnf3S0sL/GbhQ==", "cpu": [ "arm64" ], @@ -220,9 +220,9 @@ ] }, "node_modules/@anthropic-ai/claude-agent-sdk-darwin-x64": { - "version": "0.2.141", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-darwin-x64/-/claude-agent-sdk-darwin-x64-0.2.141.tgz", - "integrity": "sha512-4iAdarJaQ+2R58s6QJswZCzUdz2WQmL5lYG7Y+FLzWbRSROFfcH0QYpmOqSaPXd2KRQhIJwEacqecDZd/Q1XKQ==", + "version": "0.2.128", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-darwin-x64/-/claude-agent-sdk-darwin-x64-0.2.128.tgz", + "integrity": "sha512-dDPJHxUhL2sgIB8Q2AnBi4xsApImeW0zf1nbL7gBNSc9RWhGoGQAbPm0KaQ7/03jdom30z1VT5VMhQ5KeEYOIw==", "cpu": [ "x64" ], @@ -234,9 +234,9 @@ ] }, "node_modules/@anthropic-ai/claude-agent-sdk-linux-arm64": { - "version": "0.2.141", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-linux-arm64/-/claude-agent-sdk-linux-arm64-0.2.141.tgz", - "integrity": "sha512-Jdf0ZEwJzOP8sE6rPqdJN+SxMb0/L8sxJg4twCv/7S+Qzk0hJtls+wxSi+0Tjh6EEMaNxJqEGc7S3fx99Wi99Q==", + "version": "0.2.128", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-linux-arm64/-/claude-agent-sdk-linux-arm64-0.2.128.tgz", + "integrity": "sha512-+GbB33eJSlZUWs84nsibY2nyAFQT96WYLGCteVn62Vv6ZK90NrZsm7lwurjw7oYNnvpzXorhZ2/XpQnWvOK6aQ==", "cpu": [ "arm64" ], @@ -248,9 +248,9 @@ ] }, "node_modules/@anthropic-ai/claude-agent-sdk-linux-arm64-musl": { - "version": "0.2.141", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-linux-arm64-musl/-/claude-agent-sdk-linux-arm64-musl-0.2.141.tgz", - "integrity": "sha512-6H1AJ/AVaWNnV22kubUPkOTRzZFH0+qP9k7WlhriHMN9gtgZcVAsITMddDeGjQsQJMCAdhXFd6sgi7TM1LdeOQ==", + "version": "0.2.128", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-linux-arm64-musl/-/claude-agent-sdk-linux-arm64-musl-0.2.128.tgz", + "integrity": "sha512-ZCZEg42St0SCMMZFCvEtkF1LBFMYBxJRXzRno+12vOYYhC6R0l8jPjlgA2ZkN2Lb+TCEOO3fjeWJdZLL/NDM4w==", "cpu": [ "arm64" ], @@ -262,9 +262,9 @@ ] }, "node_modules/@anthropic-ai/claude-agent-sdk-linux-x64": { - "version": "0.2.141", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-linux-x64/-/claude-agent-sdk-linux-x64-0.2.141.tgz", - "integrity": "sha512-DVjp72f3HmrRYpbneWZZWIqkUht5kTZXS7wXGFiwzLz6eNYEgjjh+GcsnhIi8UOwZUtNiKUrjZnoP38ovFqV8A==", + "version": "0.2.128", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-linux-x64/-/claude-agent-sdk-linux-x64-0.2.128.tgz", + "integrity": "sha512-aBBXD6OLN/lq9S1p+BNjuEml0lYIoHunFdzFl49B0fsxEAnz1RfJDrpSNpIUAaL5FMZIaFvLqXtbFRy41N2fxg==", "cpu": [ "x64" ], @@ -276,9 +276,9 @@ ] }, "node_modules/@anthropic-ai/claude-agent-sdk-linux-x64-musl": { - "version": "0.2.141", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-linux-x64-musl/-/claude-agent-sdk-linux-x64-musl-0.2.141.tgz", - "integrity": "sha512-fTI1YuM4cxOa4nSgsyMAdB5ELizkWp+w5Ispo4JnnYtcczMAL4D9GBNjWPW0sUzKvjsJOUVim68SmWLWhUOpXQ==", + "version": "0.2.128", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-linux-x64-musl/-/claude-agent-sdk-linux-x64-musl-0.2.128.tgz", + "integrity": "sha512-sUSJEtvEt2iiMvgUuBGmBJjLhwHxDKOxVBSsXZaY46KAv3ZwLtLuc5xv2XFHId1B5+nMh7b7mr+HAiBmbMUODA==", "cpu": [ "x64" ], @@ -290,9 +290,9 @@ ] }, "node_modules/@anthropic-ai/claude-agent-sdk-win32-arm64": { - "version": "0.2.141", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-win32-arm64/-/claude-agent-sdk-win32-arm64-0.2.141.tgz", - "integrity": "sha512-Wm10J6kfbufbPGFELokiJ/7Y5Oqug4Uag3HXFsV8g7TWCpaItx/oqVaJoiGptuAtXQB7xGLQVTuk082wER+Y5w==", + "version": "0.2.128", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-win32-arm64/-/claude-agent-sdk-win32-arm64-0.2.128.tgz", + "integrity": "sha512-9Ao2J5KgfkfKxUZK3dbQEGonPYcbUyn7Cn7ykZuP91FN/5ux3Tz90YRJW6UtZdWHoDkmFF0FS8P/jiZuyWPLfw==", "cpu": [ "arm64" ], @@ -304,9 +304,9 @@ ] }, "node_modules/@anthropic-ai/claude-agent-sdk-win32-x64": { - "version": "0.2.141", - "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-win32-x64/-/claude-agent-sdk-win32-x64-0.2.141.tgz", - "integrity": "sha512-IXuP29YJuWbR5Q6xOHrjFVGG54V2s1FC61UVNwEN5fpxL09MwPnbwtQL6fqgzt/U1MP7vWAwpXZriYAklkH/mg==", + "version": "0.2.128", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk-win32-x64/-/claude-agent-sdk-win32-x64-0.2.128.tgz", + "integrity": "sha512-7oxPkgjw1vPZbx6+Qwt9mGouqfpRz5jDcuQ37koayzMdTVzmgCsKAqqbJSpOQfkFGv6gTjcrLWBlk3oapZfBYA==", "cpu": [ "x64" ], @@ -318,9 +318,9 @@ ] }, "node_modules/@anthropic-ai/claude-agent-sdk/node_modules/@anthropic-ai/sdk": { - "version": "0.93.0", - "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.93.0.tgz", - "integrity": "sha512-q9vaSZQVFx6B/gPxetGYfLXSJD5v0sOmh0OpZDq7yCrTSA+Rscvrtyol7JJTW40wEpQB4U1B4JXzxQitbQ3CAA==", + "version": "0.81.0", + "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.81.0.tgz", + "integrity": "sha512-D4K5PvEV6wPiRtVlVsJHIUhHAmOZ6IT/I9rKlTf84gR7GyyAurPJK7z9BOf/AZqC5d1DhYQGJNKRmV+q8dGhgw==", "dev": true, "license": "MIT", "dependencies": { diff --git a/package.json b/package.json index e227e71f21385..f8ab33c6156a7 100644 --- a/package.json +++ b/package.json @@ -149,7 +149,7 @@ "yazl": "^2.4.3" }, "devDependencies": { - "@anthropic-ai/claude-agent-sdk": "0.2.141", + "@anthropic-ai/claude-agent-sdk": "0.2.128", "@playwright/cli": "^0.1.9", "@playwright/test": "^1.56.1", "@stylistic/eslint-plugin-ts": "^2.8.0", diff --git a/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts b/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts index 178eca97113b7..5026d910aaaa5 100644 --- a/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts +++ b/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts @@ -115,18 +115,11 @@ function parseAssistantMessage(msg: SessionMessage): ParsedSessionMessage | unde } function parseSystemMessage(msg: SessionMessage): ParsedSessionMessage | undefined { - // On-disk JSONL system entries carry `subtype` and the human-readable - // payload (`content` for `compact_boundary`, `text` for `notification`) - // at the envelope level alongside `type` — not nested under `message`. - // `SessionMessage.message` is typed `unknown` and the envelope's actual - // fields beyond `{ type, uuid, session_id, message, parent_tool_use_id }` - // aren't in the SDK type, so cast through `unknown` to read them. - const envelope = msg as unknown as { subtype?: unknown; text?: unknown; content?: unknown }; - const subtype = typeof envelope.subtype === 'string' ? envelope.subtype : undefined; + const subtype = readSystemSubtype(msg.message); if (subtype === undefined || !ALLOWED_SYSTEM_SUBTYPES.has(subtype)) { return undefined; } - const text = readSystemEnvelopeText(envelope) ?? `[${subtype}]`; + const text = readSystemText(msg.message) ?? `[${subtype}]`; return { kind: 'system-notification', uuid: msg.uuid, subtype, text }; } @@ -436,12 +429,24 @@ function readAssistantBlocks(raw: unknown): readonly AssistantBlock[] | undefine return out; } -function readSystemEnvelopeText(envelope: { text?: unknown; content?: unknown }): string | undefined { - if (typeof envelope.text === 'string') { - return envelope.text; +function readSystemSubtype(raw: unknown): string | undefined { + if (raw === null || typeof raw !== 'object') { + return undefined; + } + const subtype = (raw as { subtype?: unknown }).subtype; + return typeof subtype === 'string' ? subtype : undefined; +} + +function readSystemText(raw: unknown): string | undefined { + if (raw === null || typeof raw !== 'object') { + return undefined; + } + const r = raw as { text?: unknown; message?: unknown }; + if (typeof r.text === 'string') { + return r.text; } - if (typeof envelope.content === 'string') { - return envelope.content; + if (typeof r.message === 'string') { + return r.message; } return undefined; } diff --git a/src/vs/platform/agentHost/test/node/claudeReplayMapper.test.ts b/src/vs/platform/agentHost/test/node/claudeReplayMapper.test.ts index f34a92b87021c..d906cbfae7dbb 100644 --- a/src/vs/platform/agentHost/test/node/claudeReplayMapper.test.ts +++ b/src/vs/platform/agentHost/test/node/claudeReplayMapper.test.ts @@ -66,20 +66,13 @@ suite('claudeReplayMapper', () => { } function makeSystem(uuid: string, subtype: string, text?: string): SessionMessage { - // On-disk JSONL system entries put `subtype` and the human-readable - // payload at the envelope level alongside `type`, NOT inside `message`. - // `compact_boundary` uses `content`; `notification` uses `text`. - // Mirror both shapes here so fixtures match what the SDK writes. - const envelopeText = subtype === 'compact_boundary' ? { content: text } : { text }; return { type: 'system', uuid, session_id: 'sess-1', parent_tool_use_id: null, - message: undefined, - ...(subtype !== undefined ? { subtype } : {}), - ...(text !== undefined ? envelopeText : {}), - } as unknown as SessionMessage; + message: { subtype, ...(text !== undefined ? { text } : {}) }, + }; } test('Fixture 1: single text turn', () => { @@ -157,7 +150,6 @@ suite('claudeReplayMapper', () => { assert.strictEqual(turns.length, 1, 'compact_boundary is NOT a turn boundary'); const sysParts = turns[0].responseParts.filter(p => p.kind === ResponsePartKind.SystemNotification); assert.strictEqual(sysParts.length, 1); - assert.strictEqual(sysParts[0].content, 'context compacted', 'compact_boundary `content` field surfaces as the SystemNotification text'); }); test('Fixture 5: Task / Agent tool_use produces subagent marker', () => {