Skip to content

Commit cf0557e

Browse files
kapaleshreyasclaude
andcommitted
feat: structured logging across harness + engines + substrates (Wedge 1.9)
Every lifecycle step of every harness and every substrate now emits one structured log line to stderr, configurable by env: COMPUTERAGENT_LOG=debug|info|warn|error|silent (default: info) COMPUTERAGENT_LOG_FORMAT=pretty|json (default: auto by TTY) What you see at info: substrate.local.boot, harness.boot, harness.ready, session.create, session.start, engine.start, session.end, engine.turn.end, substrate.local.dispose What you additionally see at debug: http.request (every REST hit), engine.tool_use, engine.tool_result, engine.assistant_text, engine.usage, fs.read/write/edit/delete, etc. Architecture (per Wedge 1.9 in the plan): - Tiny ~80 LOC zero-dep `Logger` in @computeragent/protocol. Stable interface; pino can replace the implementation later. - Additive `logger?: Logger` on `EngineContext`, `ServerDeps`, and `CreateHarnessServerOptions`. `nopLogger` is the default. - Engines (claude-agent-sdk, gitagent, deepagents): one log call per tool_use, tool_result, assistant_text, usage_snapshot; plus start + turn.end + error. - Harness server: per-route http.request logs; create-session, run-session, permission_request, permission_decision, cancel, http.error. - Substrates (local, e2b, vzvm): boot, harness.ready, dispose, plus per-runtime steps (upload, install, vm.clone, port_forward). - SDK `new ComputerAgent({ debug: true })`: forces COMPUTERAGENT_LOG=debug in the spawned harness env AND emits a one-line client-side summary per HarnessEvent consumed. - LocalSubstrate (and E2B/VZVM) defaults onLog to relay child stderr to parent stderr verbatim, so harness-side structured logs surface in the parent terminal without extra wiring. Bundle size: +20KB across all three substrate bundles (1.0M of which is the engine code + 80 LOC logger). Acceptable. Known limitation: on sub-second `agent.dispose()` flows, the very last 1-2 engine + session.end lines may be truncated by the stderr flush race when the harness child is SIGTERM'd. A 250ms drain in killProcess covers most cases; not all. Doesn't affect real-length agent runs. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 9d9b249 commit cf0557e

33 files changed

Lines changed: 2472 additions & 729 deletions

packages/engine-claude-agent-sdk/src/engine.ts

Lines changed: 72 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import type {
77
EngineEvent,
88
UserMessage,
99
} from "@computeragent/protocol";
10+
import { nopLogger } from "@computeragent/protocol";
1011
import { buildCanUseTool } from "./permission-bridge.js";
1112
import { deriveEngineUuid } from "./derive-uuid.js";
1213

@@ -39,6 +40,15 @@ export class ClaudeAgentEngine implements EngineDriver<ClaudeAgentOptions> {
3940
async *startSession(
4041
ctx: EngineContext<ClaudeAgentOptions>,
4142
): AsyncIterable<EngineEvent> {
43+
const log = ctx.logger ?? nopLogger;
44+
const turnStartedAt = Date.now();
45+
const model = (ctx.options as { model?: string }).model;
46+
log.info("engine.start", {
47+
engine: "claude-agent-sdk",
48+
sessionId: ctx.sessionId,
49+
model,
50+
workdir: ctx.workdir,
51+
});
4252
const prompt = adaptUserMessages(ctx.userMessageQueue, ctx.sessionId);
4353
const abortController = signalToController(ctx.abortSignal);
4454

@@ -93,19 +103,70 @@ export class ClaudeAgentEngine implements EngineDriver<ClaudeAgentOptions> {
93103
...storeOpts,
94104
};
95105

96-
for await (const message of query({ prompt, options })) {
97-
if (ctx.abortSignal.aborted) break;
106+
try {
107+
for await (const message of query({ prompt, options })) {
108+
if (ctx.abortSignal.aborted) break;
98109

99-
// Surface token/cost telemetry BEFORE the message itself. SDKResultMessage
100-
// is the turn terminator — once the SDK consumer sees it, my issue #2
101-
// fix synthesizes a `ca_session_ended` and stops reading. So usage has
102-
// to land on the wire BEFORE the result, otherwise it's dropped. The
103-
// consumer (SDK aggregator) uses costSemantic="cumulative" to take the
104-
// max and sums the per-turn tokens. Never compute cost client-side — see #5.
105-
const snapshot = toUsageSnapshot(message);
106-
if (snapshot) yield snapshot;
110+
logSdkMessage(log, ctx.sessionId, message);
107111

108-
yield { kind: "sdk_message", payload: message };
112+
// Surface token/cost telemetry BEFORE the message itself. SDKResultMessage
113+
// is the turn terminator — once the SDK consumer sees it, my issue #2
114+
// fix synthesizes a `ca_session_ended` and stops reading. So usage has
115+
// to land on the wire BEFORE the result, otherwise it's dropped. The
116+
// consumer (SDK aggregator) uses costSemantic="cumulative" to take the
117+
// max and sums the per-turn tokens. Never compute cost client-side — see #5.
118+
const snapshot = toUsageSnapshot(message);
119+
if (snapshot) {
120+
log.debug("engine.usage", {
121+
sessionId: ctx.sessionId,
122+
inputTokens: snapshot.kind === "ca_usage_snapshot" ? snapshot.inputTokens : undefined,
123+
outputTokens: snapshot.kind === "ca_usage_snapshot" ? snapshot.outputTokens : undefined,
124+
costUsd: snapshot.kind === "ca_usage_snapshot" ? snapshot.costUsd : undefined,
125+
});
126+
yield snapshot;
127+
}
128+
129+
yield { kind: "sdk_message", payload: message };
130+
}
131+
log.info("engine.turn.end", {
132+
engine: "claude-agent-sdk",
133+
sessionId: ctx.sessionId,
134+
durationMs: Date.now() - turnStartedAt,
135+
});
136+
} catch (err) {
137+
log.error("engine.error", {
138+
engine: "claude-agent-sdk",
139+
sessionId: ctx.sessionId,
140+
error: err instanceof Error ? err.message : String(err),
141+
});
142+
throw err;
143+
}
144+
}
145+
}
146+
147+
/**
148+
* Extract tool_use, tool_result, and assistant_text events from each
149+
* SDKMessage as it flows through. Pure log emission — no transformation.
150+
*/
151+
function logSdkMessage(log: { debug: (e: string, f?: Record<string, unknown>) => void }, sessionId: string, message: unknown): void {
152+
const m = message as {
153+
type?: string;
154+
message?: { content?: Array<{ type: string; name?: string; id?: string; text?: string; tool_use_id?: string; is_error?: boolean; content?: unknown }> };
155+
};
156+
if (m?.type === "assistant" && Array.isArray(m.message?.content)) {
157+
for (const block of m.message.content) {
158+
if (block.type === "tool_use") {
159+
log.debug("engine.tool_use", { sessionId, name: block.name, callId: block.id });
160+
} else if (block.type === "text" && typeof block.text === "string") {
161+
log.debug("engine.assistant_text", { sessionId, textLen: block.text.length });
162+
}
163+
}
164+
} else if (m?.type === "user" && Array.isArray(m.message?.content)) {
165+
for (const block of m.message.content) {
166+
if (block.type === "tool_result") {
167+
const bytes = typeof block.content === "string" ? block.content.length : JSON.stringify(block.content ?? "").length;
168+
log.debug("engine.tool_result", { sessionId, callId: block.tool_use_id, isError: block.is_error, bytes });
169+
}
109170
}
110171
}
111172
}

packages/engine-deepagents/src/engine.ts

Lines changed: 96 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@ import type {
33
EngineContext,
44
EngineDriver,
55
EngineEvent,
6+
Logger,
67
UserMessage,
78
} from "@computeragent/protocol";
9+
import { nopLogger } from "@computeragent/protocol";
810

911
const CAPABILITIES: EngineCapabilities = {
1012
streamingInput: true,
@@ -52,6 +54,7 @@ export class DeepAgentsEngine implements EngineDriver<DeepAgentsOptions> {
5254
async *startSession(
5355
ctx: EngineContext<DeepAgentsOptions>,
5456
): AsyncIterable<EngineEvent> {
57+
const log = ctx.logger ?? nopLogger;
5558
// Lazy import — deepagents pulls in a heavy LangChain dep tree (~150MB).
5659
// Loading it inside startSession means importers of @computeragent/engine-deepagents
5760
// only pay the cost when the engine actually runs.
@@ -97,12 +100,26 @@ export class DeepAgentsEngine implements EngineDriver<DeepAgentsOptions> {
97100
...(ctx.options.systemPrompt ? { systemPrompt: ctx.options.systemPrompt } : {}),
98101
});
99102

103+
log.info("engine.start", {
104+
engine: "deepagents",
105+
sessionId: ctx.sessionId,
106+
model: stripProviderPrefix(modelName),
107+
workdir: ctx.workdir,
108+
backend: "local-shell",
109+
});
110+
100111
// One outer iteration = one user message = one fresh stream call.
101112
// The checkpointer + threadId carry conversation state across turns.
102113
for await (const userMsg of ctx.userMessageQueue) {
103114
if (ctx.abortSignal.aborted) break;
104115

105116
const userText = flattenContent(userMsg.content);
117+
const turnStartedAt = Date.now();
118+
log.info("engine.turn.start", {
119+
engine: "deepagents",
120+
sessionId: ctx.sessionId,
121+
userTextLen: userText.length,
122+
});
106123
const input = { messages: [{ role: "user" as const, content: userText }] };
107124
const config = {
108125
configurable: { thread_id: threadId },
@@ -111,32 +128,52 @@ export class DeepAgentsEngine implements EngineDriver<DeepAgentsOptions> {
111128

112129
let lastUsage: { input_tokens?: number; output_tokens?: number } | undefined;
113130
let finalText = "";
131+
let priorMsgCount = 0;
132+
133+
try {
134+
// Stream LangGraph events in "values" mode — emits the full state on each
135+
// step. We forward every chunk so downstream tools can inspect the state.
136+
const stream = await agent.stream(input, { ...config, streamMode: "values" });
137+
for await (const chunk of stream as AsyncIterable<unknown>) {
138+
if (ctx.abortSignal.aborted) break;
139+
yield { kind: "sdk_message", payload: chunk };
114140

115-
// Stream LangGraph events in "values" mode — emits the full state on each
116-
// step. We forward every chunk so downstream tools can inspect the state.
117-
const stream = await agent.stream(input, { ...config, streamMode: "values" });
118-
for await (const chunk of stream as AsyncIterable<unknown>) {
119-
if (ctx.abortSignal.aborted) break;
120-
yield { kind: "sdk_message", payload: chunk };
121-
122-
// Inspect the latest message for usage metadata + assistant text.
123-
const messages = extractMessagesFromChunk(chunk);
124-
const last = messages[messages.length - 1];
125-
if (last) {
126-
if (typeof last.content === "string" && last.content.trim()) {
127-
finalText = last.content;
128-
} else if (Array.isArray(last.content)) {
129-
const textParts = last.content
130-
.filter((b): b is { type: "text"; text: string } => (b as { type?: string }).type === "text")
131-
.map((b) => b.text);
132-
if (textParts.length > 0) finalText = textParts.join("");
141+
// Inspect the latest message for usage metadata + assistant text.
142+
const messages = extractMessagesFromChunk(chunk);
143+
// Log only NEW messages per chunk so each tool_use/tool_result lands once.
144+
const newMsgs = messages.slice(priorMsgCount);
145+
priorMsgCount = messages.length;
146+
for (const m of newMsgs) logDeepAgentMessage(log, ctx.sessionId, m);
147+
148+
const last = messages[messages.length - 1];
149+
if (last) {
150+
if (typeof last.content === "string" && last.content.trim()) {
151+
finalText = last.content;
152+
} else if (Array.isArray(last.content)) {
153+
const textParts = last.content
154+
.filter((b): b is { type: "text"; text: string } => (b as { type?: string }).type === "text")
155+
.map((b) => b.text);
156+
if (textParts.length > 0) finalText = textParts.join("");
157+
}
158+
if (last.usage_metadata) lastUsage = last.usage_metadata;
133159
}
134-
if (last.usage_metadata) lastUsage = last.usage_metadata;
135160
}
161+
} catch (err) {
162+
log.error("engine.error", {
163+
engine: "deepagents",
164+
sessionId: ctx.sessionId,
165+
error: err instanceof Error ? err.message : String(err),
166+
});
167+
throw err;
136168
}
137169

138170
// Emit usage snapshot BEFORE the result terminator (per issue #2 ordering).
139171
if (lastUsage && (lastUsage.input_tokens !== undefined || lastUsage.output_tokens !== undefined)) {
172+
log.debug("engine.usage", {
173+
sessionId: ctx.sessionId,
174+
inputTokens: lastUsage.input_tokens,
175+
outputTokens: lastUsage.output_tokens,
176+
});
140177
yield {
141178
kind: "ca_usage_snapshot",
142179
...(lastUsage.input_tokens !== undefined ? { inputTokens: lastUsage.input_tokens } : {}),
@@ -158,6 +195,46 @@ export class DeepAgentsEngine implements EngineDriver<DeepAgentsOptions> {
158195
...(lastUsage ? { usage: lastUsage } : {}),
159196
},
160197
};
198+
log.info("engine.turn.end", {
199+
engine: "deepagents",
200+
sessionId: ctx.sessionId,
201+
durationMs: Date.now() - turnStartedAt,
202+
finalTextLen: finalText.length,
203+
});
204+
}
205+
}
206+
}
207+
208+
/**
209+
* Pure log emission for a single LangChain BaseMessage (already destructured
210+
* from a `messages[]` array). LangChain messages expose `_getType()` for role
211+
* and `tool_calls` (when assistant chose to call tools).
212+
*/
213+
function logDeepAgentMessage(
214+
log: Logger,
215+
sessionId: string,
216+
msg: { content?: unknown; _getType?: () => string; tool_calls?: Array<{ name: string; id?: string }>; name?: string; tool_call_id?: string },
217+
): void {
218+
const role = typeof msg._getType === "function" ? msg._getType() : "?";
219+
if (msg.tool_calls && msg.tool_calls.length > 0) {
220+
for (const tc of msg.tool_calls) {
221+
log.debug("engine.tool_use", { sessionId, name: tc.name, callId: tc.id });
222+
}
223+
return;
224+
}
225+
if (role === "tool") {
226+
const bytes = typeof msg.content === "string" ? msg.content.length : JSON.stringify(msg.content ?? "").length;
227+
log.debug("engine.tool_result", { sessionId, name: msg.name, callId: msg.tool_call_id, bytes });
228+
return;
229+
}
230+
if (role === "ai" || role === "assistant") {
231+
if (typeof msg.content === "string" && msg.content.trim()) {
232+
log.debug("engine.assistant_text", { sessionId, textLen: msg.content.length });
233+
} else if (Array.isArray(msg.content)) {
234+
const textLen = msg.content
235+
.filter((b): b is { type: string; text: string } => (b as { type?: string }).type === "text")
236+
.reduce((n, b) => n + (b.text?.length ?? 0), 0);
237+
if (textLen > 0) log.debug("engine.assistant_text", { sessionId, textLen });
161238
}
162239
}
163240
}

0 commit comments

Comments
 (0)