From dce5ea8f43117c785810ecab08af361e95145408 Mon Sep 17 00:00:00 2001 From: Dominik Kundel Date: Thu, 13 Nov 2025 09:38:09 -0800 Subject: [PATCH 1/3] Add shell and apply patch tool --- .agent/PLANS.md | 146 ++++ .changeset/lucky-apes-kiss.md | 9 + AGENTS.md | 6 + docs/src/content/docs/guides/tools.mdx | 2 + docs/src/content/docs/ja/guides/tools.mdx | 16 +- docs/src/content/docs/ko/guides/tools.mdx | 16 +- docs/src/content/docs/zh/guides/tools.mdx | 16 +- .../human-in-the-loop-stream.ts | 5 + examples/agent-patterns/human-in-the-loop.ts | 2 +- examples/ai-sdk-v1/ai-sdk-v1.ts | 4 + examples/docs/human-in-the-loop/index.ts | 2 +- examples/docs/mcp/hostedHITL.ts | 4 +- examples/docs/streaming/streamedHITL.ts | 2 +- examples/mcp/hosted-mcp-human-in-the-loop.ts | 4 +- examples/mcp/hosted-mcp-on-approval.ts | 4 +- examples/memory/file-hitl.ts | 4 +- examples/memory/memory-hitl.ts | 4 +- examples/memory/oai-hitl.ts | 4 +- examples/realtime-next/src/app/page.tsx | 2 +- .../realtime-next/src/app/websocket/page.tsx | 2 +- examples/realtime-twilio-sip/package.json | 2 +- examples/tools/README.md | 14 +- examples/tools/applyPatch.ts | 174 +++++ examples/tools/package.json | 6 +- examples/tools/shell.ts | 130 ++++ package.json | 2 + packages/agents-core/package.json | 4 +- packages/agents-core/src/editor.ts | 42 + .../src/extensions/handoffFilters.ts | 4 + packages/agents-core/src/index.ts | 11 + packages/agents-core/src/items.ts | 31 +- packages/agents-core/src/model.ts | 20 +- packages/agents-core/src/runContext.ts | 6 +- packages/agents-core/src/runImplementation.ts | 410 +++++++++- packages/agents-core/src/runState.ts | 64 +- packages/agents-core/src/shell.ts | 43 ++ packages/agents-core/src/tool.ts | 117 +++ packages/agents-core/src/types/aliases.ts | 12 + packages/agents-core/src/types/protocol.ts | 111 +++ packages/agents-core/src/utils/applyDiff.ts | 358 +++++++++ packages/agents-core/src/utils/index.ts | 1 + packages/agents-core/src/utils/serialize.ts | 12 + .../test/extensions/handoffFilters.test.ts | 35 +- packages/agents-core/test/items.test.ts | 1 + .../test/runImplementation.test.ts | 408 ++++++++++ packages/agents-core/test/runState.test.ts | 70 +- packages/agents-core/test/stubs.ts | 63 ++ packages/agents-core/test/tool.test.ts | 63 +- .../agents-core/test/utils/applyDiff.test.ts | 715 ++++++++++++++++++ .../agents-core/test/utils/serialize.test.ts | 22 + packages/agents-extensions/src/aiSdk.ts | 19 +- packages/agents-extensions/test/aiSdk.test.ts | 24 + .../src/openaiChatCompletionsConverter.ts | 6 +- .../agents-openai/src/openaiResponsesModel.ts | 228 +++++- .../test/openaiResponsesModel.helpers.test.ts | 167 +++- .../agents-realtime/src/realtimeSession.ts | 16 +- packages/agents-realtime/src/utils.ts | 9 + packages/agents/src/index.ts | 12 + pnpm-lock.yaml | 22 +- scripts/embedMeta.ts | 4 +- 60 files changed, 3607 insertions(+), 105 deletions(-) create mode 100644 .agent/PLANS.md create mode 100644 .changeset/lucky-apes-kiss.md create mode 100644 examples/tools/applyPatch.ts create mode 100644 examples/tools/shell.ts create mode 100644 packages/agents-core/src/editor.ts create mode 100644 packages/agents-core/src/shell.ts create mode 100644 packages/agents-core/src/utils/applyDiff.ts create mode 100644 packages/agents-core/test/utils/applyDiff.test.ts diff --git a/.agent/PLANS.md b/.agent/PLANS.md new file mode 100644 index 00000000..48dd390d --- /dev/null +++ b/.agent/PLANS.md @@ -0,0 +1,146 @@ +# Codex Execution Plans (ExecPlans): + +This document describes the requirements for an execution plan ("ExecPlan"), a design document that a coding agent can follow to deliver a working feature or system change. Treat the reader as a complete beginner to this repository: they have only the current working tree and the single ExecPlan file you provide. There is no memory of prior plans and no external context. + +## How to use ExecPlans and PLANS.md + +When authoring an executable specification (ExecPlan), follow PLANS.md _to the letter_. If it is not in your context, refresh your memory by reading the entire PLANS.md file. Be thorough in reading (and re-reading) source material to produce an accurate specification. When creating a spec, start from the skeleton and flesh it out as you do your research. + +When implementing an executable specification (ExecPlan), do not prompt the user for "next steps"; simply proceed to the next milestone. Keep all sections up to date, add or split entries in the list at every stopping point to affirmatively state the progress made and next steps. Resolve ambiguities autonomously, and commit frequently. + +When discussing an executable specification (ExecPlan), record decisions in a log in the spec for posterity; it should be unambiguously clear why any change to the specification was made. ExecPlans are living documents, and it should always be possible to restart from _only_ the ExecPlan and no other work. + +When researching a design with challenging requirements or significant unknowns, use milestones to implement proof of concepts, "toy implementations", etc., that allow validating whether the user's proposal is feasible. Read the source code of libraries by finding or acquiring them, research deeply, and include prototypes to guide a fuller implementation. + +## Requirements + +NON-NEGOTIABLE REQUIREMENTS: + +- Every ExecPlan must be fully self-contained. Self-contained means that in its current form it contains all knowledge and instructions needed for a novice to succeed. +- Every ExecPlan is a living document. Contributors are required to revise it as progress is made, as discoveries occur, and as design decisions are finalized. Each revision must remain fully self-contained. +- Every ExecPlan must enable a complete novice to implement the feature end-to-end without prior knowledge of this repo. +- Every ExecPlan must produce a demonstrably working behavior, not merely code changes to "meet a definition". +- Every ExecPlan must define every term of art in plain language or do not use it. + +Purpose and intent come first. Begin by explaining, in a few sentences, why the work matters from a user's perspective: what someone can do after this change that they could not do before, and how to see it working. Then guide the reader through the exact steps to achieve that outcome, including what to edit, what to run, and what they should observe. + +The agent executing your plan can list files, read files, search, run the project, and run tests. It does not know any prior context and cannot infer what you meant from earlier milestones. Repeat any assumption you rely on. Do not point to external blogs or docs; if knowledge is required, embed it in the plan itself in your own words. If an ExecPlan builds upon a prior ExecPlan and that file is checked in, incorporate it by reference. If it is not, you must include all relevant context from that plan. + +## Formatting + +Format and envelope are simple and strict. Each ExecPlan must be one single fenced code block labeled as `md` that begins and ends with triple backticks. Do not nest additional triple-backtick code fences inside; when you need to show commands, transcripts, diffs, or code, present them as indented blocks within that single fence. Use indentation for clarity rather than code fences inside an ExecPlan to avoid prematurely closing the ExecPlan's code fence. Use two newlines after every heading, use # and ## and so on, and correct syntax for ordered and unordered lists. + +When writing an ExecPlan to a Markdown (.md) file where the content of the file _is only_ the single ExecPlan, you should omit the triple backticks. + +Write in plain prose. Prefer sentences over lists. Avoid checklists, tables, and long enumerations unless brevity would obscure meaning. Checklists are permitted only in the `Progress` section, where they are mandatory. Narrative sections must remain prose-first. + +## Guidelines + +Self-containment and plain language are paramount. If you introduce a phrase that is not ordinary English ("daemon", "middleware", "RPC gateway", "filter graph"), define it immediately and remind the reader how it manifests in this repository (for example, by naming the files or commands where it appears). Do not say "as defined previously" or "according to the architecture doc." Include the needed explanation here, even if you repeat yourself. + +Avoid common failure modes. Do not rely on undefined jargon. Do not describe "the letter of a feature" so narrowly that the resulting code compiles but does nothing meaningful. Do not outsource key decisions to the reader. When ambiguity exists, resolve it in the plan itself and explain why you chose that path. Err on the side of over-explaining user-visible effects and under-specifying incidental implementation details. + +Anchor the plan with observable outcomes. State what the user can do after implementation, the commands to run, and the outputs they should see. Acceptance should be phrased as behavior a human can verify ("after starting the server, navigating to [http://localhost:8080/health](http://localhost:8080/health) returns HTTP 200 with body OK") rather than internal attributes ("added a HealthCheck struct"). If a change is internal, explain how its impact can still be demonstrated (for example, by running tests that fail before and pass after, and by showing a scenario that uses the new behavior). + +Specify repository context explicitly. Name files with full repository-relative paths, name functions and modules precisely, and describe where new files should be created. If touching multiple areas, include a short orientation paragraph that explains how those parts fit together so a novice can navigate confidently. When running commands, show the working directory and exact command line. When outcomes depend on environment, state the assumptions and provide alternatives when reasonable. + +Be idempotent and safe. Write the steps so they can be run multiple times without causing damage or drift. If a step can fail halfway, include how to retry or adapt. If a migration or destructive operation is necessary, spell out backups or safe fallbacks. Prefer additive, testable changes that can be validated as you go. + +Validation is not optional. Include instructions to run tests, to start the system if applicable, and to observe it doing something useful. Describe comprehensive testing for any new features or capabilities. Include expected outputs and error messages so a novice can tell success from failure. Where possible, show how to prove that the change is effective beyond compilation (for example, through a small end-to-end scenario, a CLI invocation, or an HTTP request/response transcript). State the exact test commands appropriate to the project’s toolchain and how to interpret their results. + +Capture evidence. When your steps produce terminal output, short diffs, or logs, include them inside the single fenced block as indented examples. Keep them concise and focused on what proves success. If you need to include a patch, prefer file-scoped diffs or small excerpts that a reader can recreate by following your instructions rather than pasting large blobs. + +## Milestones + +Milestones are narrative, not bureaucracy. If you break the work into milestones, introduce each with a brief paragraph that describes the scope, what will exist at the end of the milestone that did not exist before, the commands to run, and the acceptance you expect to observe. Keep it readable as a story: goal, work, result, proof. Progress and milestones are distinct: milestones tell the story, progress tracks granular work. Both must exist. Never abbreviate a milestone merely for the sake of brevity, do not leave out details that could be crucial to a future implementation. + +Each milestone must be independently verifiable and incrementally implement the overall goal of the execution plan. + +## Living plans and design decisions + +- ExecPlans are living documents. As you make key design decisions, update the plan to record both the decision and the thinking behind it. Record all decisions in the `Decision Log` section. +- ExecPlans must contain and maintain a `Progress` section, a `Surprises & Discoveries` section, a `Decision Log`, and an `Outcomes & Retrospective` section. These are not optional. +- When you discover optimizer behavior, performance tradeoffs, unexpected bugs, or inverse/unapply semantics that shaped your approach, capture those observations in the `Surprises & Discoveries` section with short evidence snippets (test output is ideal). +- If you change course mid-implementation, document why in the `Decision Log` and reflect the implications in `Progress`. Plans are guides for the next contributor as much as checklists for you. +- At completion of a major task or the full plan, write an `Outcomes & Retrospective` entry summarizing what was achieved, what remains, and lessons learned. + +# Prototyping milestones and parallel implementations + +It is acceptable—-and often encouraged—-to include explicit prototyping milestones when they de-risk a larger change. Examples: adding a low-level operator to a dependency to validate feasibility, or exploring two composition orders while measuring optimizer effects. Keep prototypes additive and testable. Clearly label the scope as “prototyping”; describe how to run and observe results; and state the criteria for promoting or discarding the prototype. + +Prefer additive code changes followed by subtractions that keep tests passing. Parallel implementations (e.g., keeping an adapter alongside an older path during migration) are fine when they reduce risk or enable tests to continue passing during a large migration. Describe how to validate both paths and how to retire one safely with tests. When working with multiple new libraries or feature areas, consider creating spikes that evaluate the feasibility of these features _independently_ of one another, proving that the external library performs as expected and implements the features we need in isolation. + +## Skeleton of a Good ExecPlan + +```md +# + +This ExecPlan is a living document. The sections `Progress`, `Surprises & Discoveries`, `Decision Log`, and `Outcomes & Retrospective` must be kept up to date as work proceeds. + +If PLANS.md file is checked into the repo, reference the path to that file here from the repository root and note that this document must be maintained in accordance with PLANS.md. + +## Purpose / Big Picture + +Explain in a few sentences what someone gains after this change and how they can see it working. State the user-visible behavior you will enable. + +## Progress + +Use a list with checkboxes to summarize granular steps. Every stopping point must be documented here, even if it requires splitting a partially completed task into two (“done” vs. “remaining”). This section must always reflect the actual current state of the work. + +- [x] (2025-10-01 13:00Z) Example completed step. +- [ ] Example incomplete step. +- [ ] Example partially completed step (completed: X; remaining: Y). + +Use timestamps to measure rates of progress. + +## Surprises & Discoveries + +Document unexpected behaviors, bugs, optimizations, or insights discovered during implementation. Provide concise evidence. + +- Observation: … + Evidence: … + +## Decision Log + +Record every decision made while working on the plan in the format: + +- Decision: … + Rationale: … + Date/Author: … + +## Outcomes & Retrospective + +Summarize outcomes, gaps, and lessons learned at major milestones or at completion. Compare the result against the original purpose. + +## Context and Orientation + +Describe the current state relevant to this task as if the reader knows nothing. Name the key files and modules by full path. Define any non-obvious term you will use. Do not refer to prior plans. + +## Plan of Work + +Describe, in prose, the sequence of edits and additions. For each edit, name the file and location (function, module) and what to insert or change. Keep it concrete and minimal. + +## Concrete Steps + +State the exact commands to run and where to run them (working directory). When a command generates output, show a short expected transcript so the reader can compare. This section must be updated as work proceeds. + +## Validation and Acceptance + +Describe how to start or exercise the system and what to observe. Phrase acceptance as behavior, with specific inputs and outputs. If tests are involved, say "run and expect passed; the new test fails before the change and passes after>". + +## Idempotence and Recovery + +If steps can be repeated safely, say so. If a step is risky, provide a safe retry or rollback path. Keep the environment clean after completion. + +## Artifacts and Notes + +Include the most important transcripts, diffs, or snippets as indented examples. Keep them concise and focused on what proves success. + +## Interfaces and Dependencies + +Be prescriptive. Name the libraries, modules, and services to use and why. Specify the types, traits/interfaces, and function signatures that must exist at the end of the milestone. + +If you follow the guidance above, a single, stateless agent -- or a human novice -- can read your ExecPlan from top to bottom and produce a working, observable result. That is the bar: SELF-CONTAINED, SELF-SUFFICIENT, NOVICE-GUIDING, OUTCOME-FOCUSED. + +When you revise a plan, you must ensure your changes are comprehensively reflected across all sections, including the living document sections, and you must write a note at the bottom of the plan describing the change and the reason why. ExecPlans must describe not just the what but the why for almost everything. +``` diff --git a/.changeset/lucky-apes-kiss.md b/.changeset/lucky-apes-kiss.md new file mode 100644 index 00000000..ef84de19 --- /dev/null +++ b/.changeset/lucky-apes-kiss.md @@ -0,0 +1,9 @@ +--- +'@openai/agents-extensions': patch +'@openai/agents-realtime': patch +'@openai/agents-openai': patch +'@openai/agents-core': patch +'@openai/agents': patch +--- + +introduce new shell and apply_patch tools diff --git a/AGENTS.md b/AGENTS.md index ca170676..e7afcf3a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -4,6 +4,10 @@ This guide helps new contributors get started with the OpenAI Agents JS monorepo **Location:** `AGENTS.md` at the repository root. +## ExecPlans + +When writing complex features or significant refactors, use an ExecPlan (as described in .agent/PLANS.md) from design to implementation. + ## Table of Contents 1. [Overview](#overview) @@ -123,6 +127,8 @@ See [this README](integration-tests/README.md) for details. pnpm examples:basic pnpm examples:agents-as-tools pnpm examples:deterministic + pnpm examples:tools-shell + pnpm examples:tools-apply-patch # See root package.json "examples:*" scripts for full list ``` - Metadata embedding (prebuild): diff --git a/docs/src/content/docs/guides/tools.mdx b/docs/src/content/docs/guides/tools.mdx index 66ed71f8..5231050b 100644 --- a/docs/src/content/docs/guides/tools.mdx +++ b/docs/src/content/docs/guides/tools.mdx @@ -29,6 +29,8 @@ When you use the `OpenAIResponsesModel` you can add the following built‑in too | Web search | `'web_search'` | Internet search. | | File / retrieval search | `'file_search'` | Query vector stores hosted on OpenAI. | | Computer use | `'computer'` | Automate GUI interactions. | +| Shell | `'shell'` | Run shell commands on the host. | +| Apply patch | `'apply_patch'` | Apply V4A diffs to local files. | | Code Interpreter | `'code_interpreter'` | Run code in a sandboxed environment. | | Image generation | `'image_generation'` | Generate images based on text. | diff --git a/docs/src/content/docs/ja/guides/tools.mdx b/docs/src/content/docs/ja/guides/tools.mdx index 96413424..c6686f97 100644 --- a/docs/src/content/docs/ja/guides/tools.mdx +++ b/docs/src/content/docs/ja/guides/tools.mdx @@ -23,13 +23,15 @@ import mcpLocalServer from '../../../../../../examples/docs/tools/mcpLocalServer `OpenAIResponsesModel` を使う場合、以下の組み込みツールを追加できます: -| ツール | Type string | 目的 | -| ---------------- | -------------------- | ------------------------------------------------- | -| Web 検索 | `'web_search'` | インターネット検索 | -| ファイル / 検索 | `'file_search'` | OpenAI 上でホストされる ベクトルストア へのクエリ | -| コンピュータ操作 | `'computer'` | GUI の操作を自動化 | -| Code Interpreter | `'code_interpreter'` | サンドボックス環境でコードを実行 | -| 画像生成 | `'image_generation'` | テキストに基づいて画像を生成 | +| ツール | 型文字列 | 目的 | +| ----------------------- | -------------------- | ----------------------------------------------- | +| Web search | `'web_search'` | インターネット検索 | +| File / retrieval search | `'file_search'` | OpenAI 上でホストされる ベクトルストア のクエリ | +| Computer use | `'computer'` | GUI 操作を自動化 | +| Shell | `'shell'` | ホスト上でシェルコマンドを実行 | +| Apply patch | `'apply_patch'` | ホスト上のファイルに V4A 差分を適用 | +| Code Interpreter | `'code_interpreter'` | サンドボックス環境でコードを実行 | +| Image generation | `'image_generation'` | テキストに基づいて画像を生成 | diff --git a/docs/src/content/docs/ko/guides/tools.mdx b/docs/src/content/docs/ko/guides/tools.mdx index 5fcd4e1c..c001b839 100644 --- a/docs/src/content/docs/ko/guides/tools.mdx +++ b/docs/src/content/docs/ko/guides/tools.mdx @@ -23,13 +23,15 @@ import mcpLocalServer from '../../../../../../examples/docs/tools/mcpLocalServer `OpenAIResponsesModel`을 사용할 때 다음 내장 도구를 추가할 수 있습니다: -| 도구 | 타입 문자열 | 목적 | -| ---------------- | -------------------- | ---------------------------------- | -| 웹 검색 | `'web_search'` | 인터넷 검색 | -| 파일 / 검색 | `'file_search'` | OpenAI에 호스팅된 벡터 스토어 쿼리 | -| 컴퓨터 사용 | `'computer'` | GUI 상호작용 자동화 | -| Code Interpreter | `'code_interpreter'` | 샌드박스 환경에서 코드 실행 | -| 이미지 생성 | `'image_generation'` | 텍스트 기반 이미지 생성 | +| 도구 | Type 문자열 | 목적 | +| ---------------- | -------------------- | ------------------------------------ | +| 웹 검색 | `'web_search'` | 인터넷 검색 | +| 파일 / 검색 | `'file_search'` | OpenAI가 호스팅하는 벡터 스토어 조회 | +| 컴퓨터 사용 | `'computer'` | GUI 상호작용 자동화 | +| Shell | `'shell'` | 호스트에서 셸 명령 실행 | +| Apply patch | `'apply_patch'` | 호스트 파일에 V4A 패치 적용 | +| Code Interpreter | `'code_interpreter'` | 샌드박스 환경에서 코드 실행 | +| 이미지 생성 | `'image_generation'` | 텍스트 기반 이미지 생성 | diff --git a/docs/src/content/docs/zh/guides/tools.mdx b/docs/src/content/docs/zh/guides/tools.mdx index 50f8271e..caacceaa 100644 --- a/docs/src/content/docs/zh/guides/tools.mdx +++ b/docs/src/content/docs/zh/guides/tools.mdx @@ -23,13 +23,15 @@ import mcpLocalServer from '../../../../../../examples/docs/tools/mcpLocalServer 当你使用 `OpenAIResponsesModel` 时,可以添加以下内置工具: -| Tool | Type string | Purpose | -| ----------------------- | -------------------- | ------------------------------------- | -| Web search | `'web_search'` | Internet search. | -| File / retrieval search | `'file_search'` | Query vector stores hosted on OpenAI. | -| Computer use | `'computer'` | Automate GUI interactions. | -| Code Interpreter | `'code_interpreter'` | Run code in a sandboxed environment. | -| Image generation | `'image_generation'` | Generate images based on text. | +| 工具 | 类型字符串 | 目的 | +| ---------------- | -------------------- | ---------------------------- | +| Web 搜索 | `'web_search'` | 互联网搜索。 | +| 文件/检索搜索 | `'file_search'` | 查询 OpenAI 托管的向量存储。 | +| 计算机操作 | `'computer'` | 自动化 GUI 交互。 | +| Shell | `'shell'` | 在主机上运行 Shell 命令。 | +| Apply patch | `'apply_patch'` | 对本地文件应用 V4A 补丁。 | +| Code Interpreter | `'code_interpreter'` | 在沙盒环境中运行代码。 | +| 图像生成 | `'image_generation'` | 基于文本生成图像。 | diff --git a/examples/agent-patterns/human-in-the-loop-stream.ts b/examples/agent-patterns/human-in-the-loop-stream.ts index b8904947..6aceb220 100644 --- a/examples/agent-patterns/human-in-the-loop-stream.ts +++ b/examples/agent-patterns/human-in-the-loop-stream.ts @@ -74,6 +74,11 @@ async function main() { ); const state = stream.state; for (const interruption of stream.interruptions) { + if (interruption.rawItem.type !== 'function_call') { + throw new Error( + 'Invalid interruption type: ' + interruption.rawItem.type, + ); + } const ok = await confirm( `Agent ${interruption.agent.name} would like to use the tool ${interruption.rawItem.name} with "${interruption.rawItem.arguments}". Do you approve?`, ); diff --git a/examples/agent-patterns/human-in-the-loop.ts b/examples/agent-patterns/human-in-the-loop.ts index 84cc77d0..67a149b8 100644 --- a/examples/agent-patterns/human-in-the-loop.ts +++ b/examples/agent-patterns/human-in-the-loop.ts @@ -86,7 +86,7 @@ async function main() { for (const interruption of result.interruptions) { const confirmed = await confirm( - `Agent ${interruption.agent.name} would like to use the tool ${interruption.rawItem.name} with "${interruption.rawItem.arguments}". Do you approve?`, + `Agent ${interruption.agent.name} would like to use the tool ${interruption.name} with "${interruption.arguments || 'no arguments'}". Do you approve?`, ); if (confirmed) { diff --git a/examples/ai-sdk-v1/ai-sdk-v1.ts b/examples/ai-sdk-v1/ai-sdk-v1.ts index fa19068f..38f14bbe 100644 --- a/examples/ai-sdk-v1/ai-sdk-v1.ts +++ b/examples/ai-sdk-v1/ai-sdk-v1.ts @@ -309,6 +309,10 @@ export function toolToLanguageV1Tool( }; } + if (tool.type === 'shell' || tool.type === 'apply_patch') { + throw new UserError(`Unsupported tool type: ${tool.type}`); + } + const exhaustiveCheck: never = tool; throw new Error(`Unsupported tool type: ${exhaustiveCheck}`); } diff --git a/examples/docs/human-in-the-loop/index.ts b/examples/docs/human-in-the-loop/index.ts index d5440cc3..a79e0c5e 100644 --- a/examples/docs/human-in-the-loop/index.ts +++ b/examples/docs/human-in-the-loop/index.ts @@ -65,7 +65,7 @@ async function main() { for (const interruption of result.interruptions) { const confirmed = await confirm( - `Agent ${interruption.agent.name} would like to use the tool ${interruption.rawItem.name} with "${interruption.rawItem.arguments}". Do you approve?`, + `Agent ${interruption.agent.name} would like to use the tool ${interruption.name} with "${interruption.arguments}". Do you approve?`, ); if (confirmed) { diff --git a/examples/docs/mcp/hostedHITL.ts b/examples/docs/mcp/hostedHITL.ts index 3ce7b7ef..ebada6fd 100644 --- a/examples/docs/mcp/hostedHITL.ts +++ b/examples/docs/mcp/hostedHITL.ts @@ -42,8 +42,8 @@ import * as readline from 'node:readline/promises'; async function confirm(item: RunToolApprovalItem): Promise { const rl = readline.createInterface({ input: stdin, output: stdout }); - const name = item.rawItem.name; - const params = item.rawItem.providerData?.arguments; + const name = item.name; + const params = item.arguments; const answer = await rl.question( `Approve running tool (mcp: ${name}, params: ${params})? (y/n) `, ); diff --git a/examples/docs/streaming/streamedHITL.ts b/examples/docs/streaming/streamedHITL.ts index 154a78fa..385acdaf 100644 --- a/examples/docs/streaming/streamedHITL.ts +++ b/examples/docs/streaming/streamedHITL.ts @@ -21,7 +21,7 @@ while (stream.interruptions?.length) { const state = stream.state; for (const interruption of stream.interruptions) { const approved = confirm( - `Agent ${interruption.agent.name} would like to use the tool ${interruption.rawItem.name} with "${interruption.rawItem.arguments}". Do you approve?`, + `Agent ${interruption.agent.name} would like to use the tool ${interruption.name} with "${interruption.arguments}". Do you approve?`, ); if (approved) { state.approve(interruption); diff --git a/examples/mcp/hosted-mcp-human-in-the-loop.ts b/examples/mcp/hosted-mcp-human-in-the-loop.ts index db1b548e..2a98334e 100644 --- a/examples/mcp/hosted-mcp-human-in-the-loop.ts +++ b/examples/mcp/hosted-mcp-human-in-the-loop.ts @@ -4,8 +4,8 @@ import { stdin, stdout } from 'node:process'; async function confirm(item: RunToolApprovalItem): Promise { const rl = readline.createInterface({ input: stdin, output: stdout }); - const name = item.rawItem.name; - const params = JSON.parse(item.rawItem.providerData?.arguments || '{}'); + const name = item.name; + const params = JSON.parse(item.arguments ?? '{}'); const answer = await rl.question( `Approve running tool (mcp: ${name}, params: ${JSON.stringify(params)})? (y/n) `, ); diff --git a/examples/mcp/hosted-mcp-on-approval.ts b/examples/mcp/hosted-mcp-on-approval.ts index 5b74f1c4..ae66e0e8 100644 --- a/examples/mcp/hosted-mcp-on-approval.ts +++ b/examples/mcp/hosted-mcp-on-approval.ts @@ -4,8 +4,8 @@ import { Agent, run, hostedMcpTool, RunToolApprovalItem } from '@openai/agents'; async function promptApproval(item: RunToolApprovalItem): Promise { const rl = readline.createInterface({ input: stdin, output: stdout }); - const name = item.rawItem.name; - const params = JSON.parse(item.rawItem.providerData?.arguments || '{}'); + const name = item.name; + const params = JSON.parse(item.arguments ?? '{}'); const answer = await rl.question( `Approve running tool (mcp: ${name}, params: ${JSON.stringify(params)})? (y/n) `, ); diff --git a/examples/memory/file-hitl.ts b/examples/memory/file-hitl.ts index 66f0a667..55a87e55 100644 --- a/examples/memory/file-hitl.ts +++ b/examples/memory/file-hitl.ts @@ -32,7 +32,7 @@ const instructions = 'You assist support agents. For every user turn you must call lookup_customer_profile and fetch_image_data before responding so replies include stored notes and the sample image. If a tool reports a transient failure, request approval and retry the same call once before responding. Keep responses under three sentences.'; function formatToolArguments(interruption: RunToolApprovalItem): string { - const args = interruption.rawItem.arguments; + const args = interruption.arguments; if (!args) { return ''; } @@ -67,7 +67,7 @@ async function resolveInterruptions>( const args = formatToolArguments(interruption); const approved = await promptYesNo( rl, - `Agent ${interruption.agent.name} wants to call ${interruption.rawItem.name} with ${args || 'no arguments'}`, + `Agent ${interruption.agent.name} wants to call ${interruption.name} with ${args || 'no arguments'}`, ); if (approved) { result.state.approve(interruption); diff --git a/examples/memory/memory-hitl.ts b/examples/memory/memory-hitl.ts index 58e4f55a..e44efc1b 100644 --- a/examples/memory/memory-hitl.ts +++ b/examples/memory/memory-hitl.ts @@ -32,7 +32,7 @@ const instructions = 'You assist support agents. For every user turn you must call lookup_customer_profile and fetch_image_data before responding so replies include stored notes and the sample image. If a tool reports a transient failure, request approval and retry the same call once before responding. Keep responses under three sentences.'; function formatToolArguments(interruption: RunToolApprovalItem): string { - const args = interruption.rawItem.arguments; + const args = interruption.arguments; if (!args) { return ''; } @@ -67,7 +67,7 @@ async function resolveInterruptions>( const args = formatToolArguments(interruption); const approved = await promptYesNo( rl, - `Agent ${interruption.agent.name} wants to call ${interruption.rawItem.name} with ${args || 'no arguments'}`, + `Agent ${interruption.agent.name} wants to call ${interruption.name} with ${args || 'no arguments'}`, ); if (approved) { result.state.approve(interruption); diff --git a/examples/memory/oai-hitl.ts b/examples/memory/oai-hitl.ts index 39cbc38b..556e07cf 100644 --- a/examples/memory/oai-hitl.ts +++ b/examples/memory/oai-hitl.ts @@ -32,7 +32,7 @@ const instructions = 'You assist support agents. For every user turn you must call lookup_customer_profile and fetch_image_data before responding so replies include stored notes and the sample image. If a tool reports a transient failure, request approval and retry the same call once before responding. Keep responses under three sentences.'; function formatToolArguments(interruption: RunToolApprovalItem): string { - const args = interruption.rawItem.arguments; + const args = interruption.arguments; if (!args) { return ''; } @@ -67,7 +67,7 @@ async function resolveInterruptions>( const args = formatToolArguments(interruption); const approved = await promptYesNo( rl, - `Agent ${interruption.agent.name} wants to call ${interruption.rawItem.name} with ${args || 'no arguments'}`, + `Agent ${interruption.agent.name} wants to call ${interruption.name} with ${args || 'no arguments'}`, ); if (approved) { result.state.approve(interruption); diff --git a/examples/realtime-next/src/app/page.tsx b/examples/realtime-next/src/app/page.tsx index c410358b..0d16e3a2 100644 --- a/examples/realtime-next/src/app/page.tsx +++ b/examples/realtime-next/src/app/page.tsx @@ -143,7 +143,7 @@ export default function Home() { (_context, _agent, approvalRequest) => { // You'll be prompted when making the tool call that requires approval in web browser. const approved = confirm( - `Approve tool call to ${approvalRequest.approvalItem.rawItem.name} with parameters:\n ${JSON.stringify(approvalRequest.approvalItem.rawItem.arguments, null, 2)}?`, + `Approve tool call to ${approvalRequest.approvalItem.name} with parameters:\n ${approvalRequest.approvalItem.arguments ?? '{}'}?`, ); if (approved) { session.current?.approve(approvalRequest.approvalItem); diff --git a/examples/realtime-next/src/app/websocket/page.tsx b/examples/realtime-next/src/app/websocket/page.tsx index 7773969c..f62bc464 100644 --- a/examples/realtime-next/src/app/websocket/page.tsx +++ b/examples/realtime-next/src/app/websocket/page.tsx @@ -164,7 +164,7 @@ export default function Home() { (_context, _agent, approvalRequest) => { // You'll be prompted when making the tool call that requires approval in web browser. const approved = confirm( - `Approve tool call to ${approvalRequest.approvalItem.rawItem.name} with parameters:\n ${JSON.stringify(approvalRequest.approvalItem.rawItem.arguments, null, 2)}?`, + `Approve tool call to ${approvalRequest.approvalItem.name} with parameters:\n ${approvalRequest.approvalItem.arguments ?? '{}'}?`, ); if (approved) { session.current?.approve(approvalRequest.approvalItem); diff --git a/examples/realtime-twilio-sip/package.json b/examples/realtime-twilio-sip/package.json index 64593e8a..dadd6d07 100644 --- a/examples/realtime-twilio-sip/package.json +++ b/examples/realtime-twilio-sip/package.json @@ -8,7 +8,7 @@ "dotenv": "^16.5.0", "fastify": "^5.3.3", "fastify-raw-body": "^5.0.0", - "openai": "^6.7.0" + "openai": "^6" }, "scripts": { "build-check": "tsc --noEmit", diff --git a/examples/tools/README.md b/examples/tools/README.md index 6d6956ef..29fdff25 100644 --- a/examples/tools/README.md +++ b/examples/tools/README.md @@ -32,4 +32,16 @@ These examples demonstrate the hosted tools provided by the Agents SDK. ```bash pnpm examples:tools-image-generation - ``` \ No newline at end of file + ``` + +- `shell.ts` – Demonstrates the `shellTool` helper for running terminal commands. + + ```bash + pnpm examples:tools-shell + ``` + +- `applyPatch.ts` – Uses the `applyPatchTool` helper to apply file edits generated by the model. + + ```bash + pnpm examples:tools-apply-patch + ``` diff --git a/examples/tools/applyPatch.ts b/examples/tools/applyPatch.ts new file mode 100644 index 00000000..db12ac46 --- /dev/null +++ b/examples/tools/applyPatch.ts @@ -0,0 +1,174 @@ +import os from 'node:os'; +import path from 'node:path'; +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { + Agent, + run, + withTrace, + applyPatchTool, + Editor, + ApplyPatchOperation, + ApplyPatchResult, +} from '@openai/agents'; +import { applyDiff } from '@openai/agents'; +import chalk from 'chalk'; + +function printDiff(diff: string) { + const lines = diff.split('\n'); + for (const line of lines) { + if (line.startsWith('+')) { + console.log(chalk.green(line)); + } else if (line.startsWith('-')) { + console.log(chalk.red(line)); + } else { + console.log(chalk.dim(line)); + } + } +} + +class WorkspaceEditor implements Editor { + constructor(private readonly root: string) {} + + async createFile( + operation: Extract, + ): Promise { + const targetPath = await this.resolve(operation.path); + await mkdir(path.dirname(targetPath), { recursive: true }); + const content = applyDiff('', operation.diff, 'create'); + await writeFile(targetPath, content, 'utf8'); + return { status: 'completed', output: `Created ${operation.path}` }; + } + + async updateFile( + operation: Extract, + ): Promise { + const targetPath = await this.resolve(operation.path); + const original = await readFile(targetPath, 'utf8').catch((error: any) => { + if (error?.code === 'ENOENT') { + throw new Error(`Cannot update missing file: ${operation.path}`); + } + throw error; + }); + const patched = applyDiff(original, operation.diff); + await writeFile(targetPath, patched, 'utf8'); + return { status: 'completed', output: `Updated ${operation.path}` }; + } + + async deleteFile( + operation: Extract, + ): Promise { + const targetPath = await this.resolve(operation.path); + await rm(targetPath, { force: true }); + return { status: 'completed', output: `Deleted ${operation.path}` }; + } + + private async resolve(relativePath: string): Promise { + const resolved = path.resolve(this.root, relativePath); + if (!resolved.startsWith(this.root)) { + throw new Error(`Operation outside workspace: ${relativePath}`); + } + return resolved; + } +} + +async function promptApplyPatchApproval( + operation: ApplyPatchOperation, +): Promise { + if (process.env.APPLY_PATCH_AUTO_APPROVE === '1') { + return true; + } + + const { createInterface } = await import('node:readline/promises'); + const rl = createInterface({ + input: process.stdin, + output: process.stdout, + }); + + try { + console.log(chalk.bold.bgYellow.black(' Apply patch approval required: ')); + console.log(`${chalk.bold(operation.type)}: ${operation.path}`); + if ('diff' in operation && typeof operation.diff === 'string') { + printDiff(operation.diff); + } + const answer = await rl.question(`Proceed? [y/N] `); + const approved = answer.trim().toLowerCase(); + return approved === 'y' || approved === 'yes'; + } finally { + rl.close(); + } +} + +async function seedWorkspace(root: string): Promise { + await mkdir(root, { recursive: true }); +} + +async function main() { + const workspaceRoot = await mkdtemp( + path.join(os.tmpdir(), 'apply-patch-example-'), + ); + console.log(chalk.dim(`Temporary workspace: ${chalk.cyan(workspaceRoot)}`)); + await seedWorkspace(workspaceRoot); + const editor = new WorkspaceEditor(workspaceRoot); + + const agent = new Agent({ + name: 'Patch Assistant', + model: 'gpt-5.1', + instructions: `You can edit files inside ${workspaceRoot} using the apply_patch tool.`, + tools: [ + applyPatchTool({ + editor, + // could also be a function for you to determine if approval is needed + needsApproval: true, + onApproval: async (_ctx, approvalItem) => { + const op = + approvalItem.rawItem.type === 'apply_patch_call' + ? approvalItem.rawItem.operation + : undefined; + const approve = op ? await promptApplyPatchApproval(op) : false; + return { approve }; + }, + }), + ], + }); + + try { + console.log(chalk.dim('Asking agent to create tasks.md …\n')); + await withTrace('apply-patch-example', async () => { + const result = await run( + agent, + 'Create tasks.md with a shopping checklist of 5 entries.', + ); + console.log(`${chalk.bold('Agent:')} ${chalk.cyan(result.finalOutput)}`); + const updatedNotes = await readFile( + path.join(workspaceRoot, 'tasks.md'), + 'utf8', + ); + console.log(`\n\n${chalk.dim('tasks.md after creation:')}`); + console.log(updatedNotes); + console.log( + `\n\n${chalk.dim('Asking agent to check off the last two items …')}\n`, + ); + const result2 = await run( + agent, + `\n===== tasks.md\n${updatedNotes}\n\n\nCheck off the last two items from the file.`, + ); + console.log(`${chalk.bold('Agent:')} ${chalk.cyan(result2.finalOutput)}`); + }); + + console.log(`\n\n${chalk.dim('Final tasks.md:')}`); + const finalNotes = await readFile( + path.join(workspaceRoot, 'tasks.md'), + 'utf8', + ); + console.log(finalNotes); + } catch (err) { + console.error(err); + } finally { + await rm(workspaceRoot, { recursive: true, force: true }); + } +} + +main().catch((error) => { + console.error(error); + process.exitCode = 1; +}); diff --git a/examples/tools/package.json b/examples/tools/package.json index 43053e4d..d8fcde2a 100644 --- a/examples/tools/package.json +++ b/examples/tools/package.json @@ -3,6 +3,8 @@ "name": "tools", "dependencies": { "@openai/agents": "workspace:*", + "@openai/agents-core": "workspace:*", + "chalk": "^5.6.2", "playwright": "^1.55.1" }, "scripts": { @@ -12,6 +14,8 @@ "start:web-search": "tsx web-search.ts", "start:web-search-filters": "tsx web-search-filters.ts", "start:code-interpreter": "tsx code-interpreter.ts", - "start:image-generation": "tsx image-generation.ts" + "start:image-generation": "tsx image-generation.ts", + "start:shell": "tsx shell.ts", + "start:apply-patch": "tsx applyPatch.ts" } } diff --git a/examples/tools/shell.ts b/examples/tools/shell.ts new file mode 100644 index 00000000..2e5ba6cd --- /dev/null +++ b/examples/tools/shell.ts @@ -0,0 +1,130 @@ +import { exec } from 'node:child_process'; +import { promisify } from 'node:util'; +import process from 'node:process'; +import { + Agent, + run, + withTrace, + Shell, + ShellAction, + ShellResult, + ShellOutputResult, + shellTool, +} from '@openai/agents'; +import chalk from 'chalk'; + +const execAsync = promisify(exec); + +class LocalShell implements Shell { + constructor(private readonly cwd: string = process.cwd()) {} + + async run(action: ShellAction): Promise { + const output: ShellResult['output'] = []; + + for (const command of action.commands) { + let stdout = ''; + let stderr = ''; + let exitCode: number | null = 0; + let outcome: ShellOutputResult['outcome'] = { + type: 'exit', + exitCode: 0, + }; + try { + const { stdout: localStdout, stderr: localStderr } = await execAsync( + command, + { + cwd: this.cwd, + timeout: action.timeoutMs, + maxBuffer: action.maxOutputLength, + }, + ); + stdout = localStdout; + stderr = localStderr; + } catch (error: any) { + exitCode = typeof error?.code === 'number' ? error.code : null; + stdout = error?.stdout ?? ''; + stderr = error?.stderr ?? ''; + outcome = + error?.killed || error?.signal === 'SIGTERM' + ? { type: 'timeout' } + : { type: 'exit', exitCode }; + } + output.push({ + command, + stdout, + stderr, + outcome, + }); + if (outcome.type === 'timeout') { + break; + } + } + + return { + output, + providerData: { + working_directory: this.cwd, + }, + }; + } +} + +async function promptShellApproval(commands: string[]): Promise { + if (process.env.SHELL_AUTO_APPROVE === '1') { + return true; + } + + console.log( + chalk.bold.bgYellow.black(' Shell command approval required: \n'), + ); + commands.forEach((cmd) => console.log(chalk.dim(` > ${cmd}`))); + const { createInterface } = await import('node:readline/promises'); + const rl = createInterface({ + input: process.stdin, + output: process.stdout, + }); + try { + const answer = await rl.question('\nProceed? [y/N] '); + const approved = answer.trim().toLowerCase(); + return approved === 'y' || approved === 'yes'; + } finally { + rl.close(); + } +} + +async function main() { + const shell = new LocalShell(); + + const agent = new Agent({ + name: 'Shell Assistant', + model: 'gpt-5.1', + instructions: + 'You can execute shell commands to inspect the repository. Keep responses concise and include command output when helpful.', + tools: [ + shellTool({ + shell, + // could also be a function for you to determine if approval is needed + needsApproval: true, + onApproval: async (_ctx, approvalItem) => { + const commands = + approvalItem.rawItem.type === 'shell_call' + ? approvalItem.rawItem.action.commands + : []; + const approve = await promptShellApproval(commands); + return { approve }; + }, + }), + ], + }); + + await withTrace('shell-tool-example', async () => { + const result = await run(agent, 'Show the Node.js version.'); + + console.log(`${chalk.bold('Agent:')} ${chalk.cyan(result.finalOutput)}`); + }); +} + +main().catch((error) => { + console.error(error); + process.exitCode = 1; +}); diff --git a/package.json b/package.json index 3d1fc563..98bcdb60 100644 --- a/package.json +++ b/package.json @@ -39,6 +39,8 @@ "examples:tools-computer-use": "pnpm -F tools start:computer-use", "examples:tools-file-search": "pnpm -F tools start:file-search", "examples:tools-web-search": "pnpm -F tools start:web-search", + "examples:tools-shell": "pnpm -F tools start:shell", + "examples:tools-apply-patch": "pnpm -F tools start:apply-patch", "examples:tool-filter": "tsx examples/mcp/tool-filter-example.ts", "ci:publish": "pnpm publish -r --no-git-checks", "bump-version": "changeset version && pnpm -F @openai/* prebuild", diff --git a/packages/agents-core/package.json b/packages/agents-core/package.json index a3f61230..c027d019 100644 --- a/packages/agents-core/package.json +++ b/packages/agents-core/package.json @@ -70,8 +70,8 @@ "@modelcontextprotocol/sdk": "^1.17.2" }, "dependencies": { - "openai": "^6", - "debug": "^4.4.0" + "debug": "^4.4.0", + "openai": "^6" }, "peerDependencies": { "zod": "^3.25.40 || ^4.0" diff --git a/packages/agents-core/src/editor.ts b/packages/agents-core/src/editor.ts new file mode 100644 index 00000000..2d2b1bf1 --- /dev/null +++ b/packages/agents-core/src/editor.ts @@ -0,0 +1,42 @@ +import type { ApplyPatchOperation } from './types/protocol'; +export type { ApplyPatchOperation } from './types/protocol'; + +/** + * Result returned by an Editor operation. + */ +export type ApplyPatchResult = { + /** + * Whether the operation completed successfully. Defaults to `completed`. + */ + status?: 'completed' | 'failed'; + /** + * Optional textual output to forward to the model. + */ + output?: string; +}; + +/** + * Host interface responsible for applying diffs on disk. + */ +export interface Editor { + /** + * Creates a new file from a V4A diff. + */ + createFile( + operation: Extract, + ): Promise; + + /** + * Updates an existing file based on a V4A diff. + */ + updateFile( + operation: Extract, + ): Promise; + + /** + * Deletes an existing file. + */ + deleteFile( + operation: Extract, + ): Promise; +} diff --git a/packages/agents-core/src/extensions/handoffFilters.ts b/packages/agents-core/src/extensions/handoffFilters.ts index 236efea2..04f88f6a 100644 --- a/packages/agents-core/src/extensions/handoffFilters.ts +++ b/packages/agents-core/src/extensions/handoffFilters.ts @@ -13,6 +13,10 @@ const TOOL_TYPES = new Set([ 'function_call_result', 'computer_call', 'computer_call_result', + 'shell_call', + 'shell_call_output', + 'apply_patch_call', + 'apply_patch_call_output', 'hosted_tool_call', ]); diff --git a/packages/agents-core/src/index.ts b/packages/agents-core/src/index.ts index 72e71ffa..7a1dc10f 100644 --- a/packages/agents-core/src/index.ts +++ b/packages/agents-core/src/index.ts @@ -14,6 +14,8 @@ export { ToolUseBehaviorFlags, } from './agent'; export { Computer } from './computer'; +export { ShellAction, ShellResult, ShellOutputResult, Shell } from './shell'; +export { ApplyPatchOperation, ApplyPatchResult, Editor } from './editor'; export { AgentsError, GuardrailExecutionError, @@ -68,6 +70,7 @@ export { } from './items'; export { AgentHooks } from './lifecycle'; export { getLogger } from './logger'; +export { applyDiff } from './utils/applyDiff'; export { getAllMcpTools, invalidateServerToolsCache, @@ -123,6 +126,10 @@ export { HostedTool, ComputerTool, computerTool, + ShellTool, + shellTool, + ApplyPatchTool, + applyPatchTool, HostedMCPTool, hostedMcpTool, FunctionTool, @@ -149,6 +156,10 @@ export type { HostedToolCallItem, ComputerCallResultItem, ComputerUseCallItem, + ShellCallItem, + ShellCallResultItem, + ApplyPatchCallItem, + ApplyPatchCallResultItem, FunctionCallItem, FunctionCallResultItem, JsonSchemaDefinition, diff --git a/packages/agents-core/src/items.ts b/packages/agents-core/src/items.ts index 4955994a..fa77d0ee 100644 --- a/packages/agents-core/src/items.ts +++ b/packages/agents-core/src/items.ts @@ -66,7 +66,9 @@ export class RunToolCallOutputItem extends RunItemBase { constructor( public rawItem: | protocol.FunctionCallResultItem - | protocol.ComputerCallResultItem, + | protocol.ComputerCallResultItem + | protocol.ShellCallResultItem + | protocol.ApplyPatchCallResultItem, public agent: Agent, public output: string | unknown, ) { @@ -142,16 +144,41 @@ export class RunToolApprovalItem extends RunItemBase { public readonly type = 'tool_approval_item' as const; constructor( - public rawItem: protocol.FunctionCallItem | protocol.HostedToolCallItem, + public rawItem: + | protocol.FunctionCallItem + | protocol.HostedToolCallItem + | protocol.ShellCallItem + | protocol.ApplyPatchCallItem, public agent: Agent, + /** + * Explicit tool name to use for approval tracking when not present on the raw item. + */ + public toolName?: string, ) { super(); + this.toolName = toolName ?? (rawItem as any).name; + } + + /** + * Returns the tool name if available on the raw item or provided explicitly. + * Kept for backwards compatibility with code that previously relied on `rawItem.name`. + */ + get name(): string | undefined { + return this.toolName ?? (this.rawItem as any).name; + } + + /** + * Returns the arguments if the raw item has an arguments property otherwise this will be undefined. + */ + get arguments(): string | undefined { + return 'arguments' in this.rawItem ? this.rawItem.arguments : undefined; } toJSON() { return { ...super.toJSON(), agent: this.agent.toJSON(), + toolName: this.toolName, }; } } diff --git a/packages/agents-core/src/model.ts b/packages/agents-core/src/model.ts index 55bc46b1..97ddb6f3 100644 --- a/packages/agents-core/src/model.ts +++ b/packages/agents-core/src/model.ts @@ -1,6 +1,12 @@ import { Usage } from './usage'; import { StreamEvent } from './types/protocol'; -import { HostedTool, ComputerTool, FunctionTool } from './tool'; +import { + HostedTool, + ComputerTool, + FunctionTool, + ShellTool, + ApplyPatchTool, +} from './tool'; import { Handoff } from './handoff'; import { AgentInputItem, @@ -169,6 +175,16 @@ export type SerializedComputerTool = { dimensions: ComputerTool['computer']['dimensions']; }; +export type SerializedShellTool = { + type: ShellTool['type']; + name: ShellTool['name']; +}; + +export type SerializedApplyPatchTool = { + type: ApplyPatchTool['type']; + name: ApplyPatchTool['name']; +}; + export type SerializedHostedTool = { type: HostedTool['type']; name: HostedTool['name']; @@ -178,6 +194,8 @@ export type SerializedHostedTool = { export type SerializedTool = | SerializedFunctionTool | SerializedComputerTool + | SerializedShellTool + | SerializedApplyPatchTool | SerializedHostedTool; export type SerializedHandoff = { diff --git a/packages/agents-core/src/runContext.ts b/packages/agents-core/src/runContext.ts index cf5982f8..166ff5b5 100644 --- a/packages/agents-core/src/runContext.ts +++ b/packages/agents-core/src/runContext.ts @@ -103,7 +103,8 @@ export class RunContext { approvalItem: RunToolApprovalItem, { alwaysApprove = false }: { alwaysApprove?: boolean } = {}, ) { - const toolName = approvalItem.rawItem.name; + const toolName = + approvalItem.toolName ?? (approvalItem.rawItem as any).name; if (alwaysApprove) { this.#approvals.set(toolName, { approved: true, @@ -136,7 +137,8 @@ export class RunContext { approvalItem: RunToolApprovalItem, { alwaysReject = false }: { alwaysReject?: boolean } = {}, ) { - const toolName = approvalItem.rawItem.name; + const toolName = + approvalItem.toolName ?? (approvalItem.rawItem as any).name; if (alwaysReject) { this.#approvals.set(toolName, { approved: false, diff --git a/packages/agents-core/src/runImplementation.ts b/packages/agents-core/src/runImplementation.ts index 1667be19..2e24ef66 100644 --- a/packages/agents-core/src/runImplementation.ts +++ b/packages/agents-core/src/runImplementation.ts @@ -31,7 +31,10 @@ import { Tool, FunctionToolResult, HostedMCPTool, + ShellTool, + ApplyPatchTool, } from './tool'; +import type { ShellResult } from './shell'; import { AgentInputItem, UnknownContext } from './types'; import { Runner } from './run'; import { RunContext } from './runContext'; @@ -52,6 +55,7 @@ import { RunResult, StreamedRunResult } from './result'; import { z } from 'zod'; import * as protocol from './types/protocol'; import { Computer } from './computer'; +import type { ApplyPatchResult } from './editor'; import { RunState } from './runState'; import { isZodObject } from './utils'; import * as ProviderData from './types/providerData'; @@ -75,6 +79,18 @@ type ToolRunComputer = { computer: ComputerTool; }; +// Captures a shell invocation emitted by the model. +type ToolRunShell = { + toolCall: protocol.ShellCallItem; + shell: ShellTool; +}; + +// Captures an apply_patch operation emitted by the model. +type ToolRunApplyPatch = { + toolCall: protocol.ApplyPatchCallItem; + applyPatch: ApplyPatchTool; +}; + // Tracks hosted MCP approval requests awaiting either automatic or user-driven authorization. type ToolRunMCPApprovalRequest = { requestItem: RunToolApprovalItem; @@ -88,6 +104,8 @@ export type ProcessedResponse = { handoffs: ToolRunHandoff[]; functions: ToolRunFunction[]; computerActions: ToolRunComputer[]; + shellActions: ToolRunShell[]; + applyPatchActions: ToolRunApplyPatch[]; mcpApprovalRequests: ToolRunMCPApprovalRequest[]; toolsUsed: string[]; hasToolsOrApprovalsToRun(): boolean; @@ -170,6 +188,8 @@ export function processModelResponse( const runHandoffs: ToolRunHandoff[] = []; const runFunctions: ToolRunFunction[] = []; const runComputerActions: ToolRunComputer[] = []; + const runShellActions: ToolRunShell[] = []; + const runApplyPatchActions: ToolRunApplyPatch[] = []; const runMCPApprovalRequests: ToolRunMCPApprovalRequest[] = []; const toolsUsed: string[] = []; const handoffMap = new Map(handoffs.map((h) => [h.toolName, h])); @@ -178,6 +198,10 @@ export function processModelResponse( tools.filter((t) => t.type === 'function').map((t) => [t.name, t]), ); const computerTool = tools.find((t) => t.type === 'computer'); + const shellTool = tools.find((t): t is ShellTool => t.type === 'shell'); + const applyPatchTool = tools.find( + (t): t is ApplyPatchTool => t.type === 'apply_patch', + ); const mcpToolMap = new Map( tools .filter((t) => t.type === 'hosted_tool' && t.providerData?.type === 'mcp') @@ -257,6 +281,43 @@ export function processModelResponse( toolCall: output, computer: computerTool, }); + } else if (output.type === 'shell_call') { + items.push(new RunToolCallItem(output, agent)); + toolsUsed.push('shell'); + if (!shellTool) { + addErrorToCurrentSpan({ + message: 'Model produced shell action without a shell tool.', + data: { + agent_name: agent.name, + }, + }); + throw new ModelBehaviorError( + 'Model produced shell action without a shell tool.', + ); + } + runShellActions.push({ + toolCall: output, + shell: shellTool, + }); + } else if (output.type === 'apply_patch_call') { + items.push(new RunToolCallItem(output, agent)); + toolsUsed.push('apply_patch'); + if (!applyPatchTool) { + addErrorToCurrentSpan({ + message: + 'Model produced apply_patch action without an apply_patch tool.', + data: { + agent_name: agent.name, + }, + }); + throw new ModelBehaviorError( + 'Model produced apply_patch action without an apply_patch tool.', + ); + } + runApplyPatchActions.push({ + toolCall: output, + applyPatch: applyPatchTool, + }); } if (output.type !== 'function_call') { @@ -300,6 +361,8 @@ export function processModelResponse( handoffs: runHandoffs, functions: runFunctions, computerActions: runComputerActions, + shellActions: runShellActions, + applyPatchActions: runApplyPatchActions, mcpApprovalRequests: runMCPApprovalRequests, toolsUsed: toolsUsed, hasToolsOrApprovalsToRun(): boolean { @@ -307,7 +370,9 @@ export function processModelResponse( runHandoffs.length > 0 || runFunctions.length > 0 || runMCPApprovalRequests.length > 0 || - runComputerActions.length > 0 + runComputerActions.length > 0 || + runShellActions.length > 0 || + runApplyPatchActions.length > 0 ); }, }; @@ -520,10 +585,14 @@ export async function resolveInterruptedTurn( // Keep track of approvals we still need to surface next turn so HITL flows can resume cleanly. for (const run of mcpApprovalRuns) { // the approval_request_id "mcpr_123..." - const approvalRequestId = run.requestItem.rawItem.id!; + const rawItem = run.requestItem.rawItem; + if (rawItem.type !== 'hosted_tool_call') { + continue; + } + const approvalRequestId = rawItem.id!; const approved = state._context.isToolApproved({ // Since this item name must be the same with the one sent from Responses API server - toolName: run.requestItem.rawItem.name, + toolName: rawItem.name, callId: approvalRequestId, }); if (typeof approved !== 'undefined') { @@ -637,20 +706,33 @@ export async function resolveTurnAfterModelResponse( } // Run function tools and computer actions in parallel; neither depends on the other's side effects. - const [functionResults, computerResults] = await Promise.all([ - executeFunctionToolCalls( - agent, - processedResponse.functions as ToolRunFunction[], - runner, - state, - ), - executeComputerActions( - agent, - processedResponse.computerActions, - runner, - state._context, - ), - ]); + const [functionResults, computerResults, shellResults, applyPatchResults] = + await Promise.all([ + executeFunctionToolCalls( + agent, + processedResponse.functions as ToolRunFunction[], + runner, + state, + ), + executeComputerActions( + agent, + processedResponse.computerActions, + runner, + state._context, + ), + executeShellActions( + agent, + processedResponse.shellActions, + runner, + state._context, + ), + executeApplyPatchOperations( + agent, + processedResponse.applyPatchActions, + runner, + state._context, + ), + ]); for (const result of functionResults) { appendIfNew(result.runItem); @@ -658,6 +740,12 @@ export async function resolveTurnAfterModelResponse( for (const item of computerResults) { appendIfNew(item); } + for (const item of shellResults) { + appendIfNew(item); + } + for (const item of applyPatchResults) { + appendIfNew(item); + } // run hosted MCP approval requests if (processedResponse.mcpApprovalRequests.length > 0) { @@ -746,6 +834,8 @@ export async function resolveTurnAfterModelResponse( const hadToolCallsOrActions = (processedResponse.functions?.length ?? 0) > 0 || (processedResponse.computerActions?.length ?? 0) > 0 || + (processedResponse.shellActions?.length ?? 0) > 0 || + (processedResponse.applyPatchActions?.length ?? 0) > 0 || (processedResponse.mcpApprovalRequests?.length ?? 0) > 0 || (processedResponse.handoffs?.length ?? 0) > 0; if (hadToolCallsOrActions) { @@ -1313,6 +1403,292 @@ async function _runComputerActionAndScreenshot( throw new Error('Computer does not implement screenshot()'); } +function toErrorMessage(error: unknown): string { + if (error instanceof Error) { + return error.message || error.toString(); + } + try { + return JSON.stringify(error); + } catch { + return String(error); + } +} + +export async function executeShellActions( + agent: Agent, + actions: ToolRunShell[], + runner: Runner, + runContext: RunContext, + customLogger: Logger | undefined = undefined, +): Promise { + const _logger = customLogger ?? logger; + const results: RunItem[] = []; + + for (const action of actions) { + const shellTool = action.shell; + const toolCall = action.toolCall; + const approvalItem = new RunToolApprovalItem( + toolCall, + agent, + shellTool.name, + ); + const requiresApproval = await shellTool.needsApproval( + runContext, + toolCall.action, + toolCall.callId, + ); + + if (requiresApproval) { + if (shellTool.onApproval) { + const decision = await shellTool.onApproval(runContext, approvalItem); + if (decision.approve === true) { + runContext.approveTool(approvalItem); + } else if (decision.approve === false) { + runContext.rejectTool(approvalItem); + } + } + + const approval = runContext.isToolApproved({ + toolName: shellTool.name, + callId: toolCall.callId, + }); + + if (approval === false) { + const response = 'Tool execution was not approved.'; + const rejectionOutput: protocol.ShellCallOutputContent = { + stdout: '', + stderr: response, + outcome: { type: 'exit', exitCode: null }, + }; + results.push( + new RunToolCallOutputItem( + { + type: 'shell_call_output', + callId: toolCall.callId, + output: [rejectionOutput], + }, + agent, + response, + ), + ); + continue; + } + + if (approval !== true) { + results.push(approvalItem); + continue; + } + } + + runner.emit('agent_tool_start', runContext, agent, shellTool, { + toolCall, + }); + if (typeof agent.emit === 'function') { + agent.emit('agent_tool_start', runContext, shellTool, { toolCall }); + } + + let shellOutputs: ShellResult['output'] | undefined; + const providerMeta: Record = {}; + let maxOutputLength: number | undefined; + + try { + const shellResult = await shellTool.shell.run(toolCall.action); + shellOutputs = shellResult.output ?? []; + + if (shellResult.providerData) { + Object.assign(providerMeta, shellResult.providerData); + } + + if (typeof shellResult.maxOutputLength === 'number') { + maxOutputLength = shellResult.maxOutputLength; + } + } catch (err) { + const errorText = toErrorMessage(err); + shellOutputs = [ + { + stdout: '', + stderr: errorText, + outcome: { type: 'exit', exitCode: null }, + }, + ]; + _logger.error('Failed to execute shell action:', err); + } + + shellOutputs = shellOutputs ?? []; + + runner.emit( + 'agent_tool_end', + runContext, + agent, + shellTool, + JSON.stringify(shellOutputs), + { + toolCall, + }, + ); + if (typeof agent.emit === 'function') { + agent.emit( + 'agent_tool_end', + runContext, + shellTool, + JSON.stringify(shellOutputs), + { + toolCall, + }, + ); + } + + const rawItem: protocol.ShellCallResultItem = { + type: 'shell_call_output', + callId: toolCall.callId, + output: shellOutputs ?? [], + }; + + if (typeof maxOutputLength === 'number') { + rawItem.maxOutputLength = maxOutputLength; + } + + if (Object.keys(providerMeta).length > 0) { + rawItem.providerData = providerMeta; + } + + results.push(new RunToolCallOutputItem(rawItem, agent, rawItem.output)); + } + + return results; +} + +export async function executeApplyPatchOperations( + agent: Agent, + actions: ToolRunApplyPatch[], + runner: Runner, + runContext: RunContext, + customLogger: Logger | undefined = undefined, +): Promise { + const _logger = customLogger ?? logger; + const results: RunItem[] = []; + + for (const action of actions) { + const applyPatchTool = action.applyPatch; + const toolCall = action.toolCall; + const approvalItem = new RunToolApprovalItem( + toolCall, + agent, + applyPatchTool.name, + ); + const requiresApproval = await applyPatchTool.needsApproval( + runContext, + toolCall.operation, + toolCall.callId, + ); + + if (requiresApproval) { + if (applyPatchTool.onApproval) { + const decision = await applyPatchTool.onApproval( + runContext, + approvalItem, + ); + if (decision.approve === true) { + runContext.approveTool(approvalItem); + } else if (decision.approve === false) { + runContext.rejectTool(approvalItem); + } + } + + const approval = runContext.isToolApproved({ + toolName: applyPatchTool.name, + callId: toolCall.callId, + }); + + if (approval === false) { + const response = 'Tool execution was not approved.'; + results.push( + new RunToolCallOutputItem( + { + type: 'apply_patch_call_output', + callId: toolCall.callId, + status: 'failed', + output: response, + }, + agent, + response, + ), + ); + continue; + } + + if (approval !== true) { + results.push(approvalItem); + continue; + } + } + + runner.emit('agent_tool_start', runContext, agent, applyPatchTool, { + toolCall, + }); + if (typeof agent.emit === 'function') { + agent.emit('agent_tool_start', runContext, applyPatchTool, { + toolCall, + }); + } + + let status: 'completed' | 'failed' = 'completed'; + let output = ''; + + try { + let result: ApplyPatchResult | void; + switch (toolCall.operation.type) { + case 'create_file': + result = await applyPatchTool.editor.createFile(toolCall.operation); + break; + case 'update_file': + result = await applyPatchTool.editor.updateFile(toolCall.operation); + break; + case 'delete_file': + result = await applyPatchTool.editor.deleteFile(toolCall.operation); + break; + default: + throw new Error('Unsupported apply_patch operation'); + } + + if (result && typeof result.status === 'string') { + status = result.status; + } + + if (result && typeof result.output === 'string') { + output = result.output; + } + } catch (err) { + status = 'failed'; + output = toErrorMessage(err); + _logger.error('Failed to execute apply_patch operation:', err); + } + + runner.emit('agent_tool_end', runContext, agent, applyPatchTool, output, { + toolCall, + }); + if (typeof agent.emit === 'function') { + agent.emit('agent_tool_end', runContext, applyPatchTool, output, { + toolCall, + }); + } + + const rawItem: protocol.ApplyPatchCallResultItem = { + type: 'apply_patch_call_output', + callId: toolCall.callId, + status, + }; + + if (output) { + rawItem.output = output; + } + + results.push(new RunToolCallOutputItem(rawItem, agent, output)); + } + + return results; +} + /** * @internal * Executes any computer-use actions emitted by the model and returns the resulting items so the diff --git a/packages/agents-core/src/runState.ts b/packages/agents-core/src/runState.ts index 9c95da5c..aee534c3 100644 --- a/packages/agents-core/src/runState.ts +++ b/packages/agents-core/src/runState.ts @@ -32,7 +32,7 @@ import * as protocol from './types/protocol'; import { AgentInputItem, UnknownContext } from './types'; import type { InputGuardrailResult, OutputGuardrailResult } from './guardrail'; import { safeExecute } from './utils/safeExecute'; -import { HostedMCPTool } from './tool'; +import { HostedMCPTool, ShellTool, ApplyPatchTool } from './tool'; /** * The schema version of the serialized run state. This is used to ensure that the serialized @@ -121,8 +121,11 @@ const itemSchema = z.discriminatedUnion('type', [ }), z.object({ type: z.literal('tool_approval_item'), - rawItem: protocol.FunctionCallItem.or(protocol.HostedToolCallItem), + rawItem: protocol.FunctionCallItem.or(protocol.HostedToolCallItem) + .or(protocol.ShellCallItem) + .or(protocol.ApplyPatchCallItem), agent: serializedAgentSchema, + toolName: z.string().optional(), }), ]); @@ -155,6 +158,22 @@ const serializedProcessedResponseSchema = z.object({ computer: z.any(), }), ), + shellActions: z + .array( + z.object({ + toolCall: z.any(), + shell: z.any(), + }), + ) + .optional(), + applyPatchActions: z + .array( + z.object({ + toolCall: z.any(), + applyPatch: z.any(), + }), + ) + .optional(), mcpApprovalRequests: z .array( z.object({ @@ -724,6 +743,7 @@ export function deserializeItem( return new RunToolApprovalItem( serializedItem.rawItem, agentMap.get(serializedItem.agent.name) as Agent, + serializedItem.toolName, ); } } @@ -750,6 +770,16 @@ async function deserializeProcessedResponse( .filter((tool) => tool.type === 'computer') .map((tool) => [tool.name, tool]), ); + const shellTools = new Map( + allTools + .filter((tool): tool is ShellTool => tool.type === 'shell') + .map((tool) => [tool.name, tool]), + ); + const applyPatchTools = new Map( + allTools + .filter((tool): tool is ApplyPatchTool => tool.type === 'apply_patch') + .map((tool) => [tool.name, tool]), + ); const handoffs = new Map( currentAgent.handoffs.map((entry) => { if (entry instanceof Agent) { @@ -800,6 +830,32 @@ async function deserializeProcessedResponse( }; }, ), + shellActions: (serializedProcessedResponse.shellActions ?? []).map( + (shellAction) => { + const toolName = shellAction.shell.name; + if (!shellTools.has(toolName)) { + throw new UserError(`Shell tool ${toolName} not found`); + } + + return { + toolCall: shellAction.toolCall, + shell: shellTools.get(toolName)!, + }; + }, + ), + applyPatchActions: ( + serializedProcessedResponse.applyPatchActions ?? [] + ).map((applyPatchAction) => { + const toolName = applyPatchAction.applyPatch.name; + if (!applyPatchTools.has(toolName)) { + throw new UserError(`Apply patch tool ${toolName} not found`); + } + + return { + toolCall: applyPatchAction.toolCall, + applyPatch: applyPatchTools.get(toolName)!, + }; + }), mcpApprovalRequests: ( serializedProcessedResponse.mcpApprovalRequests ?? [] ).map((approvalRequest) => ({ @@ -819,7 +875,9 @@ async function deserializeProcessedResponse( result.handoffs.length > 0 || result.functions.length > 0 || result.mcpApprovalRequests.length > 0 || - result.computerActions.length > 0 + result.computerActions.length > 0 || + result.shellActions.length > 0 || + result.applyPatchActions.length > 0 ); }, }; diff --git a/packages/agents-core/src/shell.ts b/packages/agents-core/src/shell.ts new file mode 100644 index 00000000..ad4b91a6 --- /dev/null +++ b/packages/agents-core/src/shell.ts @@ -0,0 +1,43 @@ +import type { + ShellAction as ProtocolShellAction, + ShellCallOutputContent, +} from './types/protocol'; + +/** + * Describes the work to perform when executing a shell tool call. + * Re-export protocol type to keep a single source of truth. + */ +export type ShellAction = ProtocolShellAction; + +/** + * Result returned by a shell tool implementation. + */ +/** + * Output for a single executed command. + */ +export type ShellOutputResult = ShellCallOutputContent; + +export type ShellResult = { + /** + * One entry per executed command (or logical chunk) in order. + */ + output: ShellOutputResult[]; + /** + * If you applied truncation yourself, set the limit you enforced for telemetry. + */ + maxOutputLength?: number; + /** + * Optional provider-specific metadata merged into the tool call output. + */ + providerData?: Record; +}; + +/** + * Executes shell commands on behalf of the agent. + */ +export interface Shell { + /** + * Runs the given action and returns the resulting output. + */ + run(action: ShellAction): Promise; +} diff --git a/packages/agents-core/src/tool.ts b/packages/agents-core/src/tool.ts index fc24af7c..248ae94e 100644 --- a/packages/agents-core/src/tool.ts +++ b/packages/agents-core/src/tool.ts @@ -1,5 +1,7 @@ import type { Agent } from './agent'; import type { Computer } from './computer'; +import type { Shell, ShellAction } from './shell'; +import type { Editor, ApplyPatchOperation } from './editor'; import { JsonObjectSchema, JsonObjectSchemaNonStrict, @@ -43,6 +45,28 @@ export type ToolApprovalFunction = ( callId?: string, ) => Promise; +export type ShellApprovalFunction = ( + runContext: RunContext, + action: ShellAction, + callId?: string, +) => Promise; + +export type ShellOnApprovalFunction = ( + runContext: RunContext, + approvalItem: RunToolApprovalItem, +) => Promise<{ approve: boolean; reason?: string }>; + +export type ApplyPatchApprovalFunction = ( + runContext: RunContext, + operation: ApplyPatchOperation, + callId?: string, +) => Promise; + +export type ApplyPatchOnApprovalFunction = ( + runContext: RunContext, + approvalItem: RunToolApprovalItem, +) => Promise<{ approve: boolean; reason?: string }>; + export type ToolEnabledFunction = ( runContext: RunContext, agent: Agent, @@ -142,6 +166,97 @@ export function computerTool( }; } +export type ShellTool = { + type: 'shell'; + /** + * Public name exposed to the model. Defaults to `shell`. + */ + name: string; + /** + * The shell implementation to execute commands. + */ + shell: Shell; + /** + * Predicate determining whether this shell action requires approval. + */ + needsApproval: ShellApprovalFunction; + /** + * Optional handler to auto-approve or reject when approval is required. + * If provided, it will be invoked immediately when an approval is needed. + */ + onApproval?: ShellOnApprovalFunction; +}; + +export function shellTool( + options: Partial> & { + shell: Shell; + needsApproval?: boolean | ShellApprovalFunction; + onApproval?: ShellOnApprovalFunction; + }, +): ShellTool { + const needsApproval: ShellApprovalFunction = + typeof options.needsApproval === 'function' + ? options.needsApproval + : async () => + typeof options.needsApproval === 'boolean' + ? options.needsApproval + : false; + + return { + type: 'shell', + name: options.name ?? 'shell', + shell: options.shell, + needsApproval, + onApproval: options.onApproval, + }; +} + +export type ApplyPatchTool = { + type: 'apply_patch'; + /** + * Public name exposed to the model. Defaults to `apply_patch`. + */ + name: string; + /** + * Diff applier invoked when the tool is called. + */ + editor: Editor; + /** + * Predicate determining whether this apply_patch operation requires approval. + */ + needsApproval: ApplyPatchApprovalFunction; + /** + * Optional handler to auto-approve or reject when approval is required. + */ + onApproval?: ApplyPatchOnApprovalFunction; +}; + +export function applyPatchTool( + options: Partial< + Omit + > & { + editor: Editor; + needsApproval?: boolean | ApplyPatchApprovalFunction; + onApproval?: ApplyPatchOnApprovalFunction; + }, +): ApplyPatchTool { + const needsApproval: ApplyPatchApprovalFunction = + typeof options.needsApproval === 'function' + ? options.needsApproval + : async () => + typeof options.needsApproval === 'boolean' + ? options.needsApproval + : false; + + return { + type: 'apply_patch', + name: options.name ?? 'apply_patch', + editor: options.editor, + needsApproval, + onApproval: options.onApproval, + }; +} + export type HostedMCPApprovalFunction = ( context: RunContext, data: RunToolApprovalItem, @@ -314,6 +429,8 @@ export type HostedTool = { export type Tool = | FunctionTool | ComputerTool + | ShellTool + | ApplyPatchTool | HostedTool; /** diff --git a/packages/agents-core/src/types/aliases.ts b/packages/agents-core/src/types/aliases.ts index 51ad4692..828495d3 100644 --- a/packages/agents-core/src/types/aliases.ts +++ b/packages/agents-core/src/types/aliases.ts @@ -5,8 +5,12 @@ import { HostedToolCallItem, FunctionCallItem, ComputerUseCallItem, + ShellCallItem, FunctionCallResultItem, ComputerCallResultItem, + ShellCallResultItem, + ApplyPatchCallItem, + ApplyPatchCallResultItem, ReasoningItem, UnknownItem, } from './protocol'; @@ -31,8 +35,12 @@ export type AgentOutputItem = | HostedToolCallItem | FunctionCallItem | ComputerUseCallItem + | ShellCallItem + | ApplyPatchCallItem | FunctionCallResultItem | ComputerCallResultItem + | ShellCallResultItem + | ApplyPatchCallResultItem | ReasoningItem | UnknownItem; @@ -46,7 +54,11 @@ export type AgentInputItem = | HostedToolCallItem | FunctionCallItem | ComputerUseCallItem + | ShellCallItem + | ApplyPatchCallItem | FunctionCallResultItem | ComputerCallResultItem + | ShellCallResultItem + | ApplyPatchCallResultItem | ReasoningItem | UnknownItem; diff --git a/packages/agents-core/src/types/protocol.ts b/packages/agents-core/src/types/protocol.ts index 676f1ebc..e7131749 100644 --- a/packages/agents-core/src/types/protocol.ts +++ b/packages/agents-core/src/types/protocol.ts @@ -553,8 +553,111 @@ export const ComputerCallResultItem = ItemBase.extend({ export type ComputerCallResultItem = z.infer; +export const ShellAction = z.object({ + commands: z.array(z.string()), + timeoutMs: z.number().int().min(0).optional(), + maxOutputLength: z.number().int().min(0).optional(), +}); + +export type ShellAction = z.infer; + +export const ShellCallItem = ItemBase.extend({ + type: z.literal('shell_call'), + callId: z.string(), + status: z.enum(['in_progress', 'completed', 'incomplete']).optional(), + action: ShellAction, +}); + +export type ShellCallItem = z.infer; + +export const ShellCallOutcome = z.discriminatedUnion('type', [ + z.object({ type: z.literal('timeout') }), + z.object({ + type: z.literal('exit'), + exitCode: z.number().int().nullable(), + }), +]); + +export type ShellCallOutcome = z.infer; + +export const ShellCallOutputContent = z + .object({ + stdout: z.string(), + stderr: z.string(), + outcome: ShellCallOutcome, + }) + .passthrough(); + +export type ShellCallOutputContent = z.infer; + +export const ShellCallResultItem = ItemBase.extend({ + type: z.literal('shell_call_output'), + callId: z.string(), + maxOutputLength: z.number().optional(), + output: z.array(ShellCallOutputContent), +}); + +export type ShellCallResultItem = z.infer; + +export const ApplyPatchOperationCreateFile = z.object({ + type: z.literal('create_file'), + path: z.string(), + diff: z.string(), +}); + +export type ApplyPatchOperationCreateFile = z.infer< + typeof ApplyPatchOperationCreateFile +>; + +export const ApplyPatchOperationUpdateFile = z.object({ + type: z.literal('update_file'), + path: z.string(), + diff: z.string(), +}); + +export type ApplyPatchOperationUpdateFile = z.infer< + typeof ApplyPatchOperationUpdateFile +>; + +export const ApplyPatchOperationDeleteFile = z.object({ + type: z.literal('delete_file'), + path: z.string(), +}); + +export type ApplyPatchOperationDeleteFile = z.infer< + typeof ApplyPatchOperationDeleteFile +>; + +export const ApplyPatchOperation = z.discriminatedUnion('type', [ + ApplyPatchOperationCreateFile, + ApplyPatchOperationUpdateFile, + ApplyPatchOperationDeleteFile, +]); + +export type ApplyPatchOperation = z.infer; + +export const ApplyPatchCallItem = ItemBase.extend({ + type: z.literal('apply_patch_call'), + callId: z.string(), + status: z.enum(['in_progress', 'completed']), + operation: ApplyPatchOperation, +}); + +export type ApplyPatchCallItem = z.infer; + +export const ApplyPatchCallResultItem = ItemBase.extend({ + type: z.literal('apply_patch_call_output'), + callId: z.string(), + status: z.enum(['completed', 'failed']), + output: z.string().optional(), +}); + +export type ApplyPatchCallResultItem = z.infer; + export const ToolCallItem = z.discriminatedUnion('type', [ ComputerUseCallItem, + ShellCallItem, + ApplyPatchCallItem, FunctionCallItem, HostedToolCallItem, ]); @@ -606,7 +709,11 @@ export const OutputModelItem = z.discriminatedUnion('type', [ HostedToolCallItem, FunctionCallItem, ComputerUseCallItem, + ShellCallItem, + ApplyPatchCallItem, FunctionCallResultItem, + ShellCallResultItem, + ApplyPatchCallResultItem, ReasoningItem, UnknownItem, ]); @@ -620,8 +727,12 @@ export const ModelItem = z.union([ HostedToolCallItem, FunctionCallItem, ComputerUseCallItem, + ShellCallItem, + ApplyPatchCallItem, FunctionCallResultItem, ComputerCallResultItem, + ShellCallResultItem, + ApplyPatchCallResultItem, ReasoningItem, UnknownItem, ]); diff --git a/packages/agents-core/src/utils/applyDiff.ts b/packages/agents-core/src/utils/applyDiff.ts new file mode 100644 index 00000000..8865b03c --- /dev/null +++ b/packages/agents-core/src/utils/applyDiff.ts @@ -0,0 +1,358 @@ +/** + * Applies a headerless V4A diff to the provided file content. + * - mode "default": patch an existing file using V4A sections ("@@" + +/-/space lines). + * - mode "create": create-file syntax that requires every line to start with "+". + * + * The function preserves trailing newlines from the original file and throws when + * the diff cannot be applied cleanly. + */ +export function applyDiff( + input: string, + diff: string, + mode: 'default' | 'create' = 'default', +): string { + const diffLines = normalizeDiffLines(diff); + + if (mode === 'create') { + return parseCreateDiff(diffLines); + } + + const { chunks } = parseUpdateDiff(diffLines, input); + return applyChunks(input, chunks); +} + +type Chunk = { origIndex: number; delLines: string[]; insLines: string[] }; + +type ParserState = { lines: string[]; index: number; fuzz: number }; + +const END_PATCH = '*** End Patch'; +const END_FILE = '*** End of File'; +const END_SECTION_MARKERS = [ + END_PATCH, + '*** Update File:', + '*** Delete File:', + '*** Add File:', + END_FILE, +]; + +const SECTION_TERMINATORS = [ + END_PATCH, + '*** Update File:', + '*** Delete File:', + '*** Add File:', +]; + +function normalizeDiffLines(diff: string): string[] { + return diff + .split(/\r?\n/) + .map((line) => line.replace(/\r$/, '')) + .filter((line, idx, arr) => !(idx === arr.length - 1 && line === '')); +} + +function isDone(state: ParserState, prefixes: string[]): boolean { + if (state.index >= state.lines.length) return true; + if (prefixes.some((p) => state.lines[state.index]?.startsWith(p))) + return true; + return false; +} + +function readStr(state: ParserState, prefix: string): string { + const current = state.lines[state.index]; + if (typeof current === 'string' && current.startsWith(prefix)) { + state.index += 1; + return current.slice(prefix.length); + } + return ''; +} + +function parseCreateDiff(lines: string[]): string { + const parser: ParserState = { + lines: [...lines, END_PATCH], + index: 0, + fuzz: 0, + }; + const output: string[] = []; + + while (!isDone(parser, SECTION_TERMINATORS)) { + const line = parser.lines[parser.index]; + parser.index += 1; + if (!line.startsWith('+')) { + throw new Error(`Invalid Add File Line: ${line}`); + } + output.push(line.slice(1)); + } + + return output.join('\n'); +} + +function parseUpdateDiff( + lines: string[], + input: string, +): { chunks: Chunk[]; fuzz: number } { + const parser: ParserState = { + lines: [...lines, END_PATCH], + index: 0, + fuzz: 0, + }; + const inputLines = input.split('\n'); + const chunks: Chunk[] = []; + let cursor = 0; + + while (!isDone(parser, END_SECTION_MARKERS)) { + const anchor = readStr(parser, '@@ '); + const hasBareAnchor = !anchor && parser.lines[parser.index] === '@@'; + if (hasBareAnchor) parser.index += 1; + + if (!(anchor || hasBareAnchor || cursor === 0)) { + throw new Error(`Invalid Line:\n${parser.lines[parser.index]}`); + } + + if (anchor.trim()) { + cursor = advanceCursorToAnchor(anchor, inputLines, cursor, parser); + } + + const { nextContext, sectionChunks, endIndex, eof } = readSection( + parser.lines, + parser.index, + ); + const nextContextText = nextContext.join('\n'); + const { newIndex, fuzz } = findContext( + inputLines, + nextContext, + cursor, + eof, + ); + + if (newIndex === -1) { + if (eof) { + throw new Error(`Invalid EOF Context ${cursor}:\n${nextContextText}`); + } + throw new Error(`Invalid Context ${cursor}:\n${nextContextText}`); + } + + parser.fuzz += fuzz; + for (const ch of sectionChunks) { + chunks.push({ ...ch, origIndex: ch.origIndex + newIndex }); + } + + cursor = newIndex + nextContext.length; + parser.index = endIndex; + } + + return { chunks, fuzz: parser.fuzz }; +} + +function advanceCursorToAnchor( + anchor: string, + inputLines: string[], + cursor: number, + parser: ParserState, +): number { + let found = false; + + if (!inputLines.slice(0, cursor).some((s) => s === anchor)) { + for (let i = cursor; i < inputLines.length; i += 1) { + if (inputLines[i] === anchor) { + cursor = i + 1; + found = true; + break; + } + } + } + + if ( + !found && + !inputLines.slice(0, cursor).some((s) => s.trim() === anchor.trim()) + ) { + for (let i = cursor; i < inputLines.length; i += 1) { + if (inputLines[i].trim() === anchor.trim()) { + cursor = i + 1; + parser.fuzz += 1; + found = true; + break; + } + } + } + + return cursor; +} + +function readSection( + lines: string[], + startIndex: number, +): { + nextContext: string[]; + sectionChunks: Chunk[]; + endIndex: number; + eof: boolean; +} { + const context: string[] = []; + let delLines: string[] = []; + let insLines: string[] = []; + const sectionChunks: Chunk[] = []; + let mode: 'keep' | 'add' | 'delete' = 'keep'; + let index = startIndex; + const origIndex = index; + + while (index < lines.length) { + const raw = lines[index]; + if ( + raw.startsWith('@@') || + raw.startsWith(END_PATCH) || + raw.startsWith('*** Update File:') || + raw.startsWith('*** Delete File:') || + raw.startsWith('*** Add File:') || + raw.startsWith(END_FILE) + ) { + break; + } + if (raw === '***') break; + if (raw.startsWith('***')) { + throw new Error(`Invalid Line: ${raw}`); + } + + index += 1; + const lastMode: 'keep' | 'add' | 'delete' = mode; + let line = raw; + if (line === '') line = ' '; + + if (line[0] === '+') { + mode = 'add'; + } else if (line[0] === '-') { + mode = 'delete'; + } else if (line[0] === ' ') { + mode = 'keep'; + } else { + throw new Error(`Invalid Line: ${line}`); + } + + line = line.slice(1); + + const switchingToContext = mode === 'keep' && lastMode !== mode; + if (switchingToContext && (insLines.length || delLines.length)) { + sectionChunks.push({ + origIndex: context.length - delLines.length, + delLines, + insLines, + }); + delLines = []; + insLines = []; + } + + if (mode === 'delete') { + delLines.push(line); + context.push(line); + } else if (mode === 'add') { + insLines.push(line); + } else { + context.push(line); + } + } + + if (insLines.length || delLines.length) { + sectionChunks.push({ + origIndex: context.length - delLines.length, + delLines, + insLines, + }); + delLines = []; + insLines = []; + } + + if (index < lines.length && lines[index] === END_FILE) { + index += 1; + return { nextContext: context, sectionChunks, endIndex: index, eof: true }; + } + + if (index === origIndex) { + throw new Error(`Nothing in this section - index=${index} ${lines[index]}`); + } + + return { nextContext: context, sectionChunks, endIndex: index, eof: false }; +} + +function findContext( + lines: string[], + context: string[], + start: number, + eof: boolean, +): { newIndex: number; fuzz: number } { + if (eof) { + const endStart = Math.max(0, lines.length - context.length); + const endMatch = findContextCore(lines, context, endStart); + if (endMatch.newIndex !== -1) return endMatch; + const fallback = findContextCore(lines, context, start); + return { newIndex: fallback.newIndex, fuzz: fallback.fuzz + 10000 }; + } + return findContextCore(lines, context, start); +} + +function findContextCore( + lines: string[], + context: string[], + start: number, +): { newIndex: number; fuzz: number } { + if (!context.length) { + return { newIndex: start, fuzz: 0 }; + } + + for (let i = start; i < lines.length; i += 1) { + if (equalsSlice(lines, context, i, (s) => s)) + return { newIndex: i, fuzz: 0 }; + } + for (let i = start; i < lines.length; i += 1) { + if (equalsSlice(lines, context, i, (s) => s.trimEnd())) + return { newIndex: i, fuzz: 1 }; + } + for (let i = start; i < lines.length; i += 1) { + if (equalsSlice(lines, context, i, (s) => s.trim())) + return { newIndex: i, fuzz: 100 }; + } + + return { newIndex: -1, fuzz: 0 }; +} + +function equalsSlice( + source: string[], + target: string[], + start: number, + mapFn: (value: string) => string, +): boolean { + if (start + target.length > source.length) return false; + for (let i = 0; i < target.length; i += 1) { + if (mapFn(source[start + i]) !== mapFn(target[i])) return false; + } + return true; +} + +function applyChunks(input: string, chunks: Chunk[]): string { + const origLines = input.split('\n'); + const destLines: string[] = []; + let origIndex = 0; + + for (const chunk of chunks) { + if (chunk.origIndex > origLines.length) { + throw new Error( + `applyDiff: chunk.origIndex ${chunk.origIndex} > input length ${origLines.length}`, + ); + } + if (origIndex > chunk.origIndex) { + throw new Error( + `applyDiff: overlapping chunk at ${chunk.origIndex} (cursor ${origIndex})`, + ); + } + + destLines.push(...origLines.slice(origIndex, chunk.origIndex)); + origIndex = chunk.origIndex; + + if (chunk.insLines.length) { + destLines.push(...chunk.insLines); + } + + origIndex += chunk.delLines.length; + } + + destLines.push(...origLines.slice(origIndex)); + const result = destLines.join('\n'); + return result; +} diff --git a/packages/agents-core/src/utils/index.ts b/packages/agents-core/src/utils/index.ts index 78979943..21282e96 100644 --- a/packages/agents-core/src/utils/index.ts +++ b/packages/agents-core/src/utils/index.ts @@ -2,3 +2,4 @@ export { isZodObject } from './typeGuards'; export { toSmartString } from './smartString'; export { EventEmitterDelegate } from '../lifecycle'; export { encodeUint8ArrayToBase64 } from './base64'; +export { applyDiff } from './applyDiff'; diff --git a/packages/agents-core/src/utils/serialize.ts b/packages/agents-core/src/utils/serialize.ts index a5f507f9..7ad5ccff 100644 --- a/packages/agents-core/src/utils/serialize.ts +++ b/packages/agents-core/src/utils/serialize.ts @@ -22,6 +22,18 @@ export function serializeTool(tool: Tool): SerializedTool { dimensions: tool.computer.dimensions, }; } + if (tool.type === 'shell') { + return { + type: 'shell', + name: tool.name, + }; + } + if (tool.type === 'apply_patch') { + return { + type: 'apply_patch', + name: tool.name, + }; + } return { type: 'hosted_tool', name: tool.name, diff --git a/packages/agents-core/test/extensions/handoffFilters.test.ts b/packages/agents-core/test/extensions/handoffFilters.test.ts index 1ac29476..28fd61af 100644 --- a/packages/agents-core/test/extensions/handoffFilters.test.ts +++ b/packages/agents-core/test/extensions/handoffFilters.test.ts @@ -44,6 +44,35 @@ const hostedToolCall: protocol.HostedToolCallItem = { output: 'results', }; +const shellCall: protocol.ShellCallItem = { + type: 'shell_call', + callId: 'shell-call', + status: 'completed', + action: { commands: ['echo hi'] }, +}; + +const shellCallResult: protocol.ShellCallResultItem = { + type: 'shell_call_output', + callId: 'shell-call', + output: [ + { stdout: 'hi', stderr: '', outcome: { type: 'exit', exitCode: 0 } }, + ], +}; + +const applyPatchCall: protocol.ApplyPatchCallItem = { + type: 'apply_patch_call', + callId: 'patch-call', + status: 'completed', + operation: { type: 'delete_file', path: 'tmp.txt' }, +}; + +const applyPatchCallResult: protocol.ApplyPatchCallResultItem = { + type: 'apply_patch_call_output', + callId: 'patch-call', + status: 'completed', + output: 'done', +}; + describe('removeAllTools', () => { test('should be available', () => { const result = removeAllTools({ @@ -100,6 +129,10 @@ describe('removeAllTools', () => { computerCall, computerCallResult, hostedToolCall, + shellCall, + shellCallResult, + applyPatchCall, + applyPatchCallResult, ]; const result = removeAllTools({ @@ -109,7 +142,7 @@ describe('removeAllTools', () => { }); expect(result.inputHistory).toStrictEqual([userMessage]); - expect(history).toHaveLength(6); + expect(history).toHaveLength(10); expect((result.inputHistory as AgentInputItem[])[0]).toBe(userMessage); }); }); diff --git a/packages/agents-core/test/items.test.ts b/packages/agents-core/test/items.test.ts index 2bc82c55..2aa89e67 100644 --- a/packages/agents-core/test/items.test.ts +++ b/packages/agents-core/test/items.test.ts @@ -183,6 +183,7 @@ describe('items toJSON()', () => { type: 'tool_approval_item', rawItem: item.rawItem, agent: item.agent.toJSON(), + toolName: 'test', }); }); }); diff --git a/packages/agents-core/test/runImplementation.test.ts b/packages/agents-core/test/runImplementation.test.ts index 9205cd38..9bb70c61 100644 --- a/packages/agents-core/test/runImplementation.test.ts +++ b/packages/agents-core/test/runImplementation.test.ts @@ -28,6 +28,8 @@ import { prepareInputItemsWithSession, executeFunctionToolCalls, executeComputerActions, + executeShellActions, + executeApplyPatchOperations, executeHandoffCalls, resolveTurnAfterModelResponse, streamStepItemsToRunResult, @@ -40,6 +42,8 @@ import { FunctionToolResult, tool, computerTool, + applyPatchTool, + shellTool, hostedMcpTool, } from '../src/tool'; import { handoff } from '../src/handoff'; @@ -55,6 +59,8 @@ import { TEST_MODEL_RESPONSE_WITH_FUNCTION, TEST_TOOL, FakeModelProvider, + FakeShell, + FakeEditor, fakeModelMessage, } from './stubs'; import * as protocol from '../src/types/protocol'; @@ -98,6 +104,94 @@ describe('processModelResponse', () => { ); expect(result.hasToolsOrApprovalsToRun()).toBe(true); }); + + it('queues shell actions when shell tool registered', () => { + const shellCall: protocol.ShellCallItem = { + type: 'shell_call', + callId: 'call_shell', + status: 'completed', + action: { commands: ['echo hi'] }, + }; + const modelResponse: ModelResponse = { + output: [shellCall], + usage: new Usage(), + }; + + const shell = shellTool({ shell: new FakeShell() }); + const result = processModelResponse(modelResponse, TEST_AGENT, [shell], []); + + expect(result.shellActions).toHaveLength(1); + expect(result.shellActions[0]?.toolCall).toEqual(shellCall); + expect(result.shellActions[0]?.shell).toBe(shell); + expect(result.toolsUsed).toEqual(['shell']); + }); + + it('throws when shell action emitted without shell tool', () => { + const shellCall: protocol.ShellCallItem = { + type: 'shell_call', + callId: 'call_shell', + status: 'completed', + action: { commands: ['echo hi'] }, + }; + const modelResponse: ModelResponse = { + output: [shellCall], + usage: new Usage(), + }; + + expect(() => + processModelResponse(modelResponse, TEST_AGENT, [TEST_TOOL], []), + ).toThrow(ModelBehaviorError); + }); + + it('queues apply_patch actions when editor tool registered', () => { + const applyPatchCall: protocol.ApplyPatchCallItem = { + type: 'apply_patch_call', + callId: 'call_patch', + status: 'completed', + operation: { + type: 'update_file', + path: 'README.md', + diff: 'diff --git', + }, + }; + const modelResponse: ModelResponse = { + output: [applyPatchCall], + usage: new Usage(), + }; + + const editor = applyPatchTool({ editor: new FakeEditor() }); + const result = processModelResponse( + modelResponse, + TEST_AGENT, + [editor], + [], + ); + + expect(result.applyPatchActions).toHaveLength(1); + expect(result.applyPatchActions[0]?.toolCall).toEqual(applyPatchCall); + expect(result.applyPatchActions[0]?.applyPatch).toBe(editor); + expect(result.toolsUsed).toEqual(['apply_patch']); + }); + + it('throws when apply_patch action emitted without editor tool', () => { + const applyPatchCall: protocol.ApplyPatchCallItem = { + type: 'apply_patch_call', + callId: 'call_patch', + status: 'completed', + operation: { + type: 'delete_file', + path: 'temp.txt', + }, + }; + const modelResponse: ModelResponse = { + output: [applyPatchCall], + usage: new Usage(), + }; + + expect(() => + processModelResponse(modelResponse, TEST_AGENT, [TEST_TOOL], []), + ).toThrow(ModelBehaviorError); + }); }); describe('getTracing', () => { @@ -252,6 +346,8 @@ describe('saveToSession', () => { }, ], computerActions: [], + shellActions: [], + applyPatchActions: [], mcpApprovalRequests: [], toolsUsed: [], hasToolsOrApprovalsToRun() { @@ -410,6 +506,8 @@ describe('saveToSession', () => { }, ], computerActions: [], + shellActions: [], + applyPatchActions: [], mcpApprovalRequests: [], toolsUsed: [approvalCall.name, autoCall.name], hasToolsOrApprovalsToRun() { @@ -1172,6 +1270,310 @@ describe('executeComputerActions', () => { }); }); +describe('executeShellActions', () => { + it('runs shell commands and truncates output when maxOutputLength provided', async () => { + const shell = new FakeShell(); + shell.result = { + output: [ + { + stdout: '0123456789', + stderr: 'stderr-info', + outcome: { type: 'exit', exitCode: 0 }, + }, + ], + }; + const shellToolDef = shellTool({ shell }); + const agent = new Agent({ name: 'ShellAgent' }); + const runContext = new RunContext(); + const runner = new Runner({ tracingDisabled: true }); + const toolCall: protocol.ShellCallItem = { + type: 'shell_call', + callId: 'call_shell', + status: 'completed', + action: { commands: ['echo hi'], maxOutputLength: 5 }, + }; + + const results = await executeShellActions( + agent, + [{ toolCall, shell: shellToolDef } as any], + runner, + runContext, + ); + + expect(results).toHaveLength(1); + const rawItem = results[0].rawItem as protocol.ShellCallResultItem; + expect(rawItem.output).toEqual(shell.result.output); + expect(rawItem.providerData).toBeUndefined(); + expect(rawItem.maxOutputLength).toBeUndefined(); + expect(shell.calls).toHaveLength(1); + }); + + it('returns failed status when shell throws', async () => { + const shell = new FakeShell(); + shell.error = new Error('boom'); + const shellToolDef = shellTool({ shell }); + const agent = new Agent({ name: 'ShellAgent' }); + const runContext = new RunContext(); + const runner = new Runner({ tracingDisabled: true }); + const toolCall: protocol.ShellCallItem = { + type: 'shell_call', + callId: 'call_shell', + status: 'completed', + action: { commands: ['echo hi'] }, + }; + + const results = await executeShellActions( + agent, + [{ toolCall, shell: shellToolDef } as any], + runner, + runContext, + ); + + const rawItem = results[0].rawItem as protocol.ShellCallResultItem; + expect(Array.isArray(rawItem.output)).toBe(true); + expect(rawItem.output[0]).toMatchObject({ + stdout: '', + stderr: 'boom', + outcome: { type: 'exit', exitCode: null }, + }); + }); + + it('returns approval item when needsApproval is true and not yet approved', async () => { + const shell = new FakeShell(); + const shellToolDef = shellTool({ shell, needsApproval: async () => true }); + const agent = new Agent({ name: 'ShellAgent' }); + const runContext = new RunContext(); + const runner = new Runner({ tracingDisabled: true }); + const toolCall: protocol.ShellCallItem = { + type: 'shell_call', + callId: 'call_shell', + status: 'completed', + action: { commands: ['echo hi'] }, + }; + + const results = await executeShellActions( + agent, + [{ toolCall, shell: shellToolDef } as any], + runner, + runContext, + ); + + expect(results).toHaveLength(1); + expect(results[0].type).toBe('tool_approval_item'); + expect(shell.calls).toHaveLength(0); + }); + + it('honors onApproval for shell tools', async () => { + const shell = new FakeShell(); + const onApproval = vi.fn(async () => ({ approve: true })); + const shellToolDef = shellTool({ + shell, + needsApproval: async () => true, + onApproval, + }); + const agent = new Agent({ name: 'ShellAgent' }); + const runContext = new RunContext(); + const runner = new Runner({ tracingDisabled: true }); + const toolCall: protocol.ShellCallItem = { + type: 'shell_call', + callId: 'call_shell', + status: 'completed', + action: { commands: ['echo hi'] }, + }; + + const results = await executeShellActions( + agent, + [{ toolCall, shell: shellToolDef } as any], + runner, + runContext, + ); + + expect(onApproval).toHaveBeenCalled(); + // Should proceed to execution + expect(shell.calls).toHaveLength(1); + expect(results[0].rawItem.type).toBe('shell_call_output'); + }); + + it('returns failed output when approval explicitly rejected', async () => { + const shell = new FakeShell(); + const shellToolDef = shellTool({ shell, needsApproval: async () => true }); + const agent = new Agent({ name: 'ShellAgent' }); + const runContext = new RunContext(); + runContext._rebuildApprovals({ + shell: { approved: [], rejected: ['call_shell'] }, + }); + const runner = new Runner({ tracingDisabled: true }); + const toolCall: protocol.ShellCallItem = { + type: 'shell_call', + callId: 'call_shell', + status: 'completed', + action: { commands: ['echo hi'] }, + }; + + const results = await executeShellActions( + agent, + [{ toolCall, shell: shellToolDef } as any], + runner, + runContext, + ); + + const rawItem = results[0].rawItem as protocol.ShellCallResultItem; + expect(Array.isArray(rawItem.output)).toBe(true); + expect(rawItem.output[0]).toMatchObject({ + stdout: '', + stderr: 'Tool execution was not approved.', + outcome: { type: 'exit', exitCode: null }, + }); + expect(shell.calls).toHaveLength(0); + }); +}); + +describe('executeApplyPatchOperations', () => { + it('returns completed status when editor succeeds', async () => { + const editor = new FakeEditor(); + editor.result = { status: 'completed', output: 'done' }; + const applyPatch = applyPatchTool({ editor }); + const agent = new Agent({ name: 'EditorAgent' }); + const runContext = new RunContext(); + const runner = new Runner({ tracingDisabled: true }); + const toolCall: protocol.ApplyPatchCallItem = { + type: 'apply_patch_call', + callId: 'call_patch', + status: 'in_progress', + operation: { type: 'delete_file', path: 'tmp.txt' }, + }; + + const results = await executeApplyPatchOperations( + agent, + [{ toolCall, applyPatch } as any], + runner, + runContext, + ); + + const rawItem = results[0].rawItem as protocol.ApplyPatchCallResultItem; + expect(rawItem.status).toBe('completed'); + expect(rawItem.output).toBe('done'); + expect(editor.operations).toHaveLength(1); + expect(editor.operations[0]).toEqual(toolCall.operation); + }); + + it('returns failed status when editor throws', async () => { + const editor = new FakeEditor(); + editor.errors.delete_file = new Error('cannot delete'); + const applyPatch = applyPatchTool({ editor }); + const agent = new Agent({ name: 'EditorAgent' }); + const runContext = new RunContext(); + const runner = new Runner({ tracingDisabled: true }); + const toolCall: protocol.ApplyPatchCallItem = { + type: 'apply_patch_call', + callId: 'call_patch', + status: 'in_progress', + operation: { type: 'delete_file', path: 'tmp.txt' }, + }; + + const results = await executeApplyPatchOperations( + agent, + [{ toolCall, applyPatch } as any], + runner, + runContext, + ); + + const rawItem = results[0].rawItem as protocol.ApplyPatchCallResultItem; + expect(rawItem.status).toBe('failed'); + expect(typeof rawItem.output).toBe('string'); + }); + + it('returns approval item when needsApproval is true and not yet approved', async () => { + const editor = new FakeEditor(); + const applyPatch = applyPatchTool({ + editor, + needsApproval: async () => true, + }); + const agent = new Agent({ name: 'EditorAgent' }); + const runContext = new RunContext(); + const runner = new Runner({ tracingDisabled: true }); + const toolCall: protocol.ApplyPatchCallItem = { + type: 'apply_patch_call', + callId: 'call_patch', + status: 'in_progress', + operation: { type: 'delete_file', path: 'tmp.txt' }, + }; + + const results = await executeApplyPatchOperations( + agent, + [{ toolCall, applyPatch } as any], + runner, + runContext, + ); + + expect(results).toHaveLength(1); + expect(results[0].type).toBe('tool_approval_item'); + expect(editor.operations).toHaveLength(0); + }); + + it('honors onApproval for apply_patch tools', async () => { + const editor = new FakeEditor(); + const onApproval = vi.fn(async () => ({ approve: true })); + const applyPatch = applyPatchTool({ + editor, + needsApproval: async () => true, + onApproval, + }); + const agent = new Agent({ name: 'EditorAgent' }); + const runContext = new RunContext(); + const runner = new Runner({ tracingDisabled: true }); + const toolCall: protocol.ApplyPatchCallItem = { + type: 'apply_patch_call', + callId: 'call_patch', + status: 'in_progress', + operation: { type: 'delete_file', path: 'tmp.txt' }, + }; + + const results = await executeApplyPatchOperations( + agent, + [{ toolCall, applyPatch } as any], + runner, + runContext, + ); + + expect(onApproval).toHaveBeenCalled(); + expect(editor.operations).toHaveLength(1); + expect(results[0].rawItem.type).toBe('apply_patch_call_output'); + }); + + it('returns failed output when approval explicitly rejected', async () => { + const editor = new FakeEditor(); + const applyPatch = applyPatchTool({ + editor, + needsApproval: async () => true, + }); + const agent = new Agent({ name: 'EditorAgent' }); + const runContext = new RunContext(); + runContext._rebuildApprovals({ + apply_patch: { approved: [], rejected: ['call_patch'] }, + }); + const runner = new Runner({ tracingDisabled: true }); + const toolCall: protocol.ApplyPatchCallItem = { + type: 'apply_patch_call', + callId: 'call_patch', + status: 'in_progress', + operation: { type: 'delete_file', path: 'tmp.txt' }, + }; + + const results = await executeApplyPatchOperations( + agent, + [{ toolCall, applyPatch } as any], + runner, + runContext, + ); + + const rawItem = results[0].rawItem as protocol.ApplyPatchCallResultItem; + expect(rawItem.status).toBe('failed'); + expect(rawItem.output).toBe('Tool execution was not approved.'); + expect(editor.operations).toHaveLength(0); + }); +}); + // -------------------------------------------------------------------------- // Additional tests based on comprehensive test plan // -------------------------------------------------------------------------- @@ -2182,6 +2584,8 @@ describe('resolveTurnAfterModelResponse', () => { handoffs: [], functions: [], computerActions: [], + shellActions: [], + applyPatchActions: [], mcpApprovalRequests: [ { requestItem: approvalItem, @@ -2269,6 +2673,8 @@ describe('resolveInterruptedTurn', () => { handoffs: [], functions: [], computerActions: [], + shellActions: [], + applyPatchActions: [], mcpApprovalRequests: [], toolsUsed: [], hasToolsOrApprovalsToRun() { @@ -2328,6 +2734,8 @@ describe('resolveInterruptedTurn', () => { handoffs: [], functions: [], computerActions: [{ toolCall: computerCall, computer }], + shellActions: [], + applyPatchActions: [], mcpApprovalRequests: [], toolsUsed: ['computer_use'], hasToolsOrApprovalsToRun() { diff --git a/packages/agents-core/test/runState.test.ts b/packages/agents-core/test/runState.test.ts index c6c67765..f68276e8 100644 --- a/packages/agents-core/test/runState.test.ts +++ b/packages/agents-core/test/runState.test.ts @@ -12,9 +12,14 @@ import { RunToolApprovalItem as ToolApprovalItem, RunMessageOutputItem, } from '../src/items'; -import { computerTool } from '../src/tool'; +import { applyPatchTool, computerTool, shellTool } from '../src/tool'; import * as protocol from '../src/types/protocol'; -import { TEST_MODEL_MESSAGE, FakeComputer } from './stubs'; +import { + TEST_MODEL_MESSAGE, + FakeComputer, + FakeShell, + FakeEditor, +} from './stubs'; describe('RunState', () => { it('initializes with default values', () => { @@ -281,6 +286,8 @@ describe('deserialize helpers', () => { functions: [], handoffs: [], computerActions: [{ toolCall: call, computer: tool }], + shellActions: [], + applyPatchActions: [], mcpApprovalRequests: [], toolsUsed: [], hasToolsOrApprovalsToRun: () => true, @@ -292,6 +299,60 @@ describe('deserialize helpers', () => { ); }); + it('deserializeProcessedResponse restores shell actions', async () => { + const shell = shellTool({ shell: new FakeShell() }); + const agent = new Agent({ name: 'Shell', tools: [shell] }); + const state = new RunState(new RunContext(), '', agent, 1); + const call: protocol.ShellCallItem = { + type: 'shell_call', + callId: 's1', + status: 'completed', + action: { commands: ['echo hi'] }, + }; + state._lastProcessedResponse = { + newItems: [], + functions: [], + handoffs: [], + computerActions: [], + shellActions: [{ toolCall: call, shell }], + applyPatchActions: [], + mcpApprovalRequests: [], + toolsUsed: [], + hasToolsOrApprovalsToRun: () => true, + }; + + const restored = await RunState.fromString(agent, state.toString()); + expect(restored._lastProcessedResponse?.shellActions[0]?.shell).toBe(shell); + }); + + it('deserializeProcessedResponse restores apply_patch actions', async () => { + const editorTool = applyPatchTool({ editor: new FakeEditor() }); + const agent = new Agent({ name: 'Editor', tools: [editorTool] }); + const state = new RunState(new RunContext(), '', agent, 1); + const call: protocol.ApplyPatchCallItem = { + type: 'apply_patch_call', + callId: 'ap1', + status: 'completed', + operation: { type: 'delete_file', path: 'tmp.txt' }, + }; + state._lastProcessedResponse = { + newItems: [], + functions: [], + handoffs: [], + computerActions: [], + shellActions: [], + applyPatchActions: [{ toolCall: call, applyPatch: editorTool }], + mcpApprovalRequests: [], + toolsUsed: [], + hasToolsOrApprovalsToRun: () => true, + }; + + const restored = await RunState.fromString(agent, state.toString()); + expect( + restored._lastProcessedResponse?.applyPatchActions[0]?.applyPatch, + ).toBe(editorTool); + }); + it('deserializeProcessedResponse restores currentStep', async () => { const tool = computerTool({ computer: new FakeComputer() }); const agent = new Agent({ name: 'Comp', tools: [tool] }); @@ -307,6 +368,8 @@ describe('deserialize helpers', () => { functions: [], handoffs: [], computerActions: [{ toolCall: call, computer: tool }], + shellActions: [], + applyPatchActions: [], mcpApprovalRequests: [ { requestItem: { @@ -325,6 +388,9 @@ describe('deserialize helpers', () => { }, type: 'tool_approval_item', agent: new Agent({ name: 'foo ' }), + name: 'fetch_generic_url_content', + arguments: + '{"url":"https://raw.githubusercontent.com/openai/codex/main/README.md"}', toJSON: function (): any { throw new Error('Function not implemented.'); }, diff --git a/packages/agents-core/test/stubs.ts b/packages/agents-core/test/stubs.ts index 865c8803..2acf9c3a 100644 --- a/packages/agents-core/test/stubs.ts +++ b/packages/agents-core/test/stubs.ts @@ -9,6 +9,12 @@ import { import { tool } from '../src/tool'; import type { Computer } from '../src/computer'; import type { Environment } from '../src/computer'; +import type { Shell, ShellAction, ShellResult } from '../src/shell'; +import type { + Editor, + ApplyPatchOperation, + ApplyPatchResult, +} from '../src/editor'; import * as protocol from '../src/types/protocol'; import { Usage } from '../src/usage'; import { Span, Trace, TracingExporter } from '../src'; @@ -141,3 +147,60 @@ export class FakeTracingExporter implements TracingExporter { return Promise.resolve(); } } + +export class FakeShell implements Shell { + public readonly calls: ShellAction[] = []; + public result: ShellResult = { + output: [ + { + stdout: 'shell result', + stderr: '', + outcome: { type: 'exit', exitCode: 0 }, + }, + ], + }; + public error: Error | null = null; + + async run(action: ShellAction): Promise { + this.calls.push(action); + if (this.error) { + throw this.error; + } + return this.result; + } +} + +export class FakeEditor implements Editor { + public readonly operations: ApplyPatchOperation[] = []; + public result: ApplyPatchResult | void = { status: 'completed' }; + public errors: Partial> = {}; + + async createFile( + operation: Extract, + ): Promise { + return this.handle(operation); + } + + async updateFile( + operation: Extract, + ): Promise { + return this.handle(operation); + } + + async deleteFile( + operation: Extract, + ): Promise { + return this.handle(operation); + } + + private async handle( + operation: ApplyPatchOperation, + ): Promise { + this.operations.push(operation); + const error = this.errors[operation.type]; + if (error) { + throw error; + } + return this.result; + } +} diff --git a/packages/agents-core/test/tool.test.ts b/packages/agents-core/test/tool.test.ts index a3be626e..55f6a926 100644 --- a/packages/agents-core/test/tool.test.ts +++ b/packages/agents-core/test/tool.test.ts @@ -1,9 +1,16 @@ -import { describe, it, expect } from 'vitest'; -import { computerTool, hostedMcpTool, tool } from '../src/tool'; +import { describe, it, expect, vi } from 'vitest'; +import { + applyPatchTool, + computerTool, + hostedMcpTool, + shellTool, + tool, +} from '../src/tool'; import { z } from 'zod'; import { Computer } from '../src'; import { Agent } from '../src/agent'; import { RunContext } from '../src/runContext'; +import { FakeEditor, FakeShell } from './stubs'; interface Bar { bar: string; @@ -34,6 +41,58 @@ describe('Tool', () => { expect(t.type).toBe('computer'); expect(t.name).toBe('computer_use_preview'); }); + + it('shellTool assigns default name', () => { + const shell = new FakeShell(); + const t = shellTool({ shell }); + expect(t.type).toBe('shell'); + expect(t.name).toBe('shell'); + expect(t.shell).toBe(shell); + }); + + it('shellTool needsApproval boolean becomes function', async () => { + const shell = new FakeShell(); + const t = shellTool({ shell, needsApproval: true }); + const approved = await t.needsApproval( + new RunContext(), + { commands: [] }, + 'id', + ); + expect(approved).toBe(true); + }); + + it('shellTool onApproval is passed through', async () => { + const shell = new FakeShell(); + const onApproval = vi.fn(async () => ({ approve: true })); + const t = shellTool({ shell, onApproval }); + expect(t.onApproval).toBe(onApproval); + }); + + it('applyPatchTool assigns default name', () => { + const editor = new FakeEditor(); + const t = applyPatchTool({ editor }); + expect(t.type).toBe('apply_patch'); + expect(t.name).toBe('apply_patch'); + expect(t.editor).toBe(editor); + }); + + it('applyPatchTool needsApproval boolean becomes function', async () => { + const editor = new FakeEditor(); + const t = applyPatchTool({ editor, needsApproval: true }); + const approved = await t.needsApproval( + new RunContext(), + { type: 'delete_file', path: 'tmp' }, + 'id', + ); + expect(approved).toBe(true); + }); + + it('applyPatchTool onApproval is passed through', async () => { + const editor = new FakeEditor(); + const onApproval = vi.fn(async () => ({ approve: true })); + const t = applyPatchTool({ editor, onApproval }); + expect(t.onApproval).toBe(onApproval); + }); }); describe('create a tool using hostedMcpTool utility', () => { diff --git a/packages/agents-core/test/utils/applyDiff.test.ts b/packages/agents-core/test/utils/applyDiff.test.ts new file mode 100644 index 00000000..5f1fbf69 --- /dev/null +++ b/packages/agents-core/test/utils/applyDiff.test.ts @@ -0,0 +1,715 @@ +import { describe, it, expect } from 'vitest'; +import { applyDiff } from '../../src/utils/applyDiff'; + +describe('applyDiff', () => { + it('applies added lines to empty input via V4A floating hunk', () => { + const diff = ['@@', '+hello', '+world'].join('\n'); + const result = applyDiff('', diff); + expect(result).toBe('hello\nworld\n'); + }); + + it('applies plus-prefixed content for create mode', () => { + const diff = ['+hello', '+world', '+'].join('\n'); + const result = applyDiff('', diff, 'create'); + expect(result).toBe('hello\nworld\n'); + }); + + it('rejects create diff without + prefixes', () => { + const diff = ['line1', 'line2'].join('\n'); + expect(() => applyDiff('', diff, 'create')).toThrow(); + }); + + it('applies floating hunk without marker or line numbers', () => { + const input = ['- Milk', '- Bread', '- Eggs', '- Apples', '- Coffee'].join( + '\n', + ); + const diff = [ + '@@', + ' - Milk', + ' - Bread', + ' - Eggs', + '-- Apples', + '-- Coffee', + '+- [x] Apples', + '+- [x] Coffee', + ].join('\n'); + const result = applyDiff(input, diff); + expect(result).toBe( + ['- Milk', '- Bread', '- Eggs', '- [x] Apples', '- [x] Coffee'].join( + '\n', + ), + ); + }); + + it('applies V4A replacements with context', () => { + const input = ['line1', 'line2', 'line3'].join('\n') + '\n'; + const diff = ['@@ line1', '-line2', '+updated', ' line3'].join('\n'); + const result = applyDiff(input, diff); + expect(result).toBe(['line1', 'updated', 'line3'].join('\n') + '\n'); + }); + + it('applies V4A deletions', () => { + const input = ['keep', 'remove me', 'stay'].join('\n') + '\n'; + const diff = ['@@ keep', '-remove me', ' stay'].join('\n'); + const result = applyDiff(input, diff); + expect(result).toBe(['keep', 'stay'].join('\n') + '\n'); + }); + + it('applies V4A context marker diffs (class method rename)', () => { + const input = + [ + 'class Foo:', + ' def baz(self):', + ' return f"foo {randint()}"', + '', + 'def main():', + ' foo = Foo()', + ' print(foo.baz())', + ].join('\n') + '\n'; + const diff = [ + '@@ class Foo:', + '- def baz(self):', + '+ def rand(self):', + ' return f"foo {randint()}"', + '@@ def main():', + ' foo = Foo()', + '- print(foo.baz())', + '+ print(foo.rand())', + ].join('\n'); + const result = applyDiff(input, diff); + expect(result).toBe( + [ + 'class Foo:', + ' def rand(self):', + ' return f"foo {randint()}"', + '', + 'def main():', + ' foo = Foo()', + ' print(foo.rand())', + ].join('\n') + '\n', + ); + }); + + it('treats line-number markers as context anchors', () => { + const input = 'one\ntwo\n'; + const diff = ['@@ -1,2 +1,2 @@', ' one', '-two', '+2'].join('\n'); + const result = applyDiff(input, diff); + expect(result).toBe('one\n2\n'); + }); + + it('throws on context mismatch', () => { + const input = 'one\ntwo\n'; + const diff = ['@@ -1,2 +1,2 @@', ' x', '-two', '+2'].join('\n'); + expect(() => applyDiff(input, diff)).toThrow(); + }); + + // + // Example 1 – Simple text replacement + // + it('Example 1: README.md basic replacement', () => { + const input = ['Hello, world!', 'This is my project.'].join('\n'); + + const diff = ['-Hello, world!', '+Hello, V4A diff format!'].join('\n'); + + const expected = ['Hello, V4A diff format!', 'This is my project.'].join( + '\n', + ); + + expect(applyDiff(input, diff, 'default')).toBe(expected); + }); + + // + // Example 2 – Replace a whole function (no extra context) + // + it('Example 2: greet.py function replacement', () => { + const input = [ + 'def greet(name):', + ' return "Hello " + name', + '', + 'if __name__ == "__main__":', + ' print(greet("Alice"))', + ].join('\n'); + + const diff = [ + '-def greet(name):', + '- return "Hello " + name', + '+def greet(name: str) -> str:', + '+ return f"Hello, {name}!"', + ].join('\n'); + + const expected = [ + 'def greet(name: str) -> str:', + ' return f"Hello, {name}!"', + '', + 'if __name__ == "__main__":', + ' print(greet("Alice"))', + ].join('\n'); + + expect(applyDiff(input, diff, 'default')).toBe(expected); + }); + + // + // Example 3 – Toggle a config value with context lines + // + it('Example 3: config.yml toggle debug flag', () => { + const input = ['env: dev', 'debug: false', 'log_level: info'].join('\n'); + + const diff = [ + ' env: dev', + '-debug: false', + '+debug: true', + ' log_level: info', + ].join('\n'); + + const expected = ['env: dev', 'debug: true', 'log_level: info'].join('\n'); + + expect(applyDiff(input, diff, 'default')).toBe(expected); + }); + + // + // Example 4 – Insert a new import (pure insertion) + // + it('Example 4: app.py insert import sys', () => { + const input = [ + 'import os', + '', + 'def main():', + ' print("Running app")', + '', + 'if __name__ == "__main__":', + ' main()', + ].join('\n'); + + const diff = [ + ' import os', + '+import sys', + '', + ' def main():', + ' print("Running app")', + '', + ' if __name__ == "__main__":', + ' main()', + ].join('\n'); + + const expected = [ + 'import os', + 'import sys', + '', + 'def main():', + ' print("Running app")', + '', + 'if __name__ == "__main__":', + ' main()', + ].join('\n'); + + expect(applyDiff(input, diff, 'default')).toBe(expected); + }); + + // + // Example 5 – Remove a logging line + // + it('Example 5: service.py remove debug logging', () => { + const input = [ + 'def handle_request(req):', + ' print("DEBUG: got request", req)', + ' return {"status": "ok"}', + ].join('\n'); + + const diff = [ + ' def handle_request(req):', + '- print("DEBUG: got request", req)', + ' return {"status": "ok"}', + ].join('\n'); + + const expected = [ + 'def handle_request(req):', + ' return {"status": "ok"}', + ].join('\n'); + + expect(applyDiff(input, diff, 'default')).toBe(expected); + }); + + // + // Example 6 – Use @@ for function context + // + it('Example 6: math_utils.py update add() with @@ context', () => { + const input = [ + 'def add(a, b):', + ' return a + b', + '', + 'def mul(a, b):', + ' return a * b', + ].join('\n'); + + const diff = [ + '@@', + '-def add(a, b):', + '- return a + b', + '+def add(a: int, b: int) -> int:', + '+ """Add two integers."""', + '+ return a + b', + ].join('\n'); + + const expected = [ + 'def add(a: int, b: int) -> int:', + ' """Add two integers."""', + ' return a + b', + '', + 'def mul(a, b):', + ' return a * b', + ].join('\n'); + + expect(applyDiff(input, diff, 'default')).toBe(expected); + }); + + // + // Example 7 – Nested @@ (class + method) + // + it('Example 7: repository.py update get_user method', () => { + const input = [ + 'class UserRepository:', + ' def get_user(self, user_id):', + ' raise NotImplementedError', + '', + ' def save_user(self, user):', + ' raise NotImplementedError', + ].join('\n'); + + const diff = [ + '@@ class UserRepository:', + ' def get_user(self, user_id):', + '- raise NotImplementedError', + '+ """Fetch a user by ID or return None."""', + '+ return self._db.get(user_id)', + ].join('\n'); + + const expected = [ + 'class UserRepository:', + ' def get_user(self, user_id):', + ' """Fetch a user by ID or return None."""', + ' return self._db.get(user_id)', + '', + ' def save_user(self, user):', + ' raise NotImplementedError', + ].join('\n'); + + expect(applyDiff(input, diff, 'default')).toBe(expected); + }); + + // + // Example 8 – Single hunk, simple constant change + // + it('Example 8: settings.py bump timeout', () => { + const input = [ + 'API_URL = "https://api.example.com"', + 'TIMEOUT_SECONDS = 5', + 'RETRIES = 1', + ].join('\n'); + + const diff = [ + ' API_URL = "https://api.example.com"', + '-TIMEOUT_SECONDS = 5', + '+TIMEOUT_SECONDS = 10', + ' RETRIES = 1', + ].join('\n'); + + const expected = [ + 'API_URL = "https://api.example.com"', + 'TIMEOUT_SECONDS = 10', + 'RETRIES = 1', + ].join('\n'); + + expect(applyDiff(input, diff, 'default')).toBe(expected); + }); + + // + // Example 9 – Add a new text file + // + it('Example 9: docs/intro.txt create file', () => { + const input = ''; + + const diff = [ + '+Welcome to the project!', + '+This documentation will guide you through setup.', + ].join('\n'); + + const expected = [ + 'Welcome to the project!', + 'This documentation will guide you through setup.', + ].join('\n'); + + expect(applyDiff(input, diff, 'create')).toBe(expected); + }); + + // + // Example 10 – Add a new Python module + // + it('Example 10: utils/strings.py create module', () => { + const input = ''; + + const diff = [ + '+def slugify(text: str) -> str:', + '+ return text.lower().replace(" ", "-")', + '+', + '+__all__ = ["slugify"]', + ].join('\n'); + + const expected = [ + 'def slugify(text: str) -> str:', + ' return text.lower().replace(" ", "-")', + '', + '__all__ = ["slugify"]', + ].join('\n'); + + expect(applyDiff(input, diff, 'create')).toBe(expected); + }); + + // + // Example 11 – Add app.py and (conceptually) update main.py + // + it('Example 11: app.py create + main.py update', () => { + // main.py before + const mainInput = [ + 'from app import run', + '', + 'if __name__ == "__main__":', + ' run()', + ].join('\n'); + + // app.py before (does not exist) + const appInput = ''; + + // Diff body for app.py (from *** Add File: app.py ...) + const appDiff = [ + '+def run():', + '+ print("Hello from app.run()")', + '+', + ].join('\n'); + + // Diff body for main.py (from *** Update File: main.py ...) + const mainDiff = [ + '-from app import run', + '+from app import run', + ' ', + ' if __name__ == "__main__":', + ' run()', + ].join('\n'); + + const appExpected = [ + 'def run():', + ' print("Hello from app.run()")', + '', + ].join('\n'); + + const mainExpected = [ + 'from app import run', + '', + 'if __name__ == "__main__":', + ' run()', + ].join('\n'); + + expect(applyDiff(appInput, appDiff, 'create')).toBe(appExpected); + expect(applyDiff(mainInput, mainDiff, 'default')).toBe(mainExpected); + }); + + // + // Example 12 – Add a file with a blank line + // + it('Example 12: LICENSE create file with blank line', () => { + const input = ''; + + const diff = ['+MIT License', '+', '+Copyright (c) 2025'].join('\n'); + + const expected = ['MIT License', '', 'Copyright (c) 2025'].join('\n'); + + expect(applyDiff(input, diff, 'create')).toBe(expected); + }); + + // + // Example 13 – Delete a file + // NOTE: In V4A, the delete is signaled by *** Delete File, with NO diff body. + // Here we assert that applyDiff is a no-op when diff is empty; the caller + // is responsible for actually removing the file. + // + it('Example 13: temp/debug.log delete is orchestrated outside applyDiff', () => { + const input = ['DEBUG something...', 'more debug...'].join('\n'); + + const diff = ''; // nothing between *** Delete File and *** End Patch + + const expected = input; + + expect(applyDiff(input, diff, 'default')).toBe(expected); + }); + + // + // Example 14 – Rename (move) a file with *** Move to: + // NOTE: The path change is handled outside applyDiff; the diff body only + // contains context, so applyDiff should be a no-op on the content. + // + it('Example 14: old_name.txt moved to docs/new_name.txt (content unchanged)', () => { + const input = 'Legacy content'; + + // Body between *** Move to: ... and *** End Patch + const diff = [' Legacy content'].join('\n'); + + const expected = 'Legacy content'; + + expect(applyDiff(input, diff, 'default')).toBe(expected); + }); + + // + // Example 15 – Multi-file patch (two updates) + // + it('Example 15: api/client.py & api/version.py updates', () => { + const clientInput = 'BASE_URL = "https://old.example.com"'; + const versionInput = 'VERSION = "1.0.0"'; + + const clientDiff = [ + '-BASE_URL = "https://old.example.com"', + '+BASE_URL = "https://api.example.com"', + ].join('\n'); + + const versionDiff = ['-VERSION = "1.0.0"', '+VERSION = "1.1.0"'].join('\n'); + + const clientExpected = 'BASE_URL = "https://api.example.com"'; + const versionExpected = 'VERSION = "1.1.0"'; + + expect(applyDiff(clientInput, clientDiff, 'default')).toBe(clientExpected); + expect(applyDiff(versionInput, versionDiff, 'default')).toBe( + versionExpected, + ); + }); + + // + // Example 16 – Insert new test case in the middle of a file + // + it('Example 16: tests/test_math.py insert test_sub', () => { + const input = [ + 'def test_add():', + ' assert add(1, 2) == 3', + '', + 'def test_mul():', + ' assert mul(2, 3) == 6', + ].join('\n'); + + const diff = [ + ' def test_add():', + ' assert add(1, 2) == 3', + '', + '+def test_sub():', + '+ assert sub(5, 2) == 3', + '+', + ' def test_mul():', + ' assert mul(2, 3) == 6', + ].join('\n'); + + const expected = [ + 'def test_add():', + ' assert add(1, 2) == 3', + '', + 'def test_sub():', + ' assert sub(5, 2) == 3', + '', + 'def test_mul():', + ' assert mul(2, 3) == 6', + ].join('\n'); + + expect(applyDiff(input, diff, 'default')).toBe(expected); + }); + + // + // Example 17 – Update last lines near EOF (originally used *** End of File) + // + it('Example 17: footer.txt update last two lines', () => { + const input = ['Line A', 'Line B', 'Line C'].join('\n'); + + // Body between *** Update File and *** End of File + const diff = [ + ' Line A', + '-Line B', + '-Line C', + '+Line B (updated)', + '+Line C (updated)', + ].join('\n'); + + const expected = ['Line A', 'Line B (updated)', 'Line C (updated)'].join( + '\n', + ); + + expect(applyDiff(input, diff, 'default')).toBe(expected); + }); + + // + // Example 18 – Markdown heading change with context + // + it('Example 18: docs/guide.md update heading and intro', () => { + const input = [ + '# Getting Started', + '', + 'This is the old intro text.', + '', + '## Installation', + '', + 'Steps go here.', + ].join('\n'); + + const diff = [ + '-# Getting Started', + '-', + '-This is the old intro text.', + '+# Quick Start Guide', + '+', + '+This is the updated introduction, with clearer instructions.', + '', + ' ## Installation', + ].join('\n'); + + const expected = [ + '# Quick Start Guide', + '', + 'This is the updated introduction, with clearer instructions.', + '', + '## Installation', + '', + 'Steps go here.', + ].join('\n'); + + expect(applyDiff(input, diff, 'default')).toBe(expected); + }); + + // + // Example 19 – JSON field update + // + it('Example 19: config.json enabled -> true', () => { + const input = [ + '{', + ' "name": "demo",', + ' "enabled": false,', + ' "retries": 3', + '}', + ].join('\n'); + + const diff = [ + ' {', + ' "name": "demo",', + '- "enabled": false,', + '+ "enabled": true,', + ' "retries": 3', + ' }', + ].join('\n'); + + const expected = [ + '{', + ' "name": "demo",', + ' "enabled": true,', + ' "retries": 3', + '}', + ].join('\n'); + + expect(applyDiff(input, diff, 'default')).toBe(expected); + }); + + // + // Example 20 – Two separate hunks in one file using @@ + // + it('Example 20: web/app.js update add() and greet()', () => { + const input = [ + 'function add(a, b) {', + ' return a + b;', + '}', + '', + 'function greet(name) {', + ' return "Hello " + name;', + '}', + ].join('\n'); + + const diff = [ + '@@', + '-function add(a, b) {', + '- return a + b;', + '-}', + '+function add(a, b) {', + '+ return a + b; // simple add', + '+}', + ' ', + ' function greet(name) {', + '- return "Hello " + name;', + '-}', + '+ return `Hello ${name}!`;', + '+}', + ].join('\n'); + + const expected = [ + 'function add(a, b) {', + ' return a + b; // simple add', + '}', + '', + 'function greet(name) {', + ' return `Hello ${name}!`;', + '}', + ].join('\n'); + + expect(applyDiff(input, diff, 'default')).toBe(expected); + }); + + // + // Example 21 – Anchor on function signature but edit body + // + it('Example 21: controller.py insert logging after validate', () => { + const input = [ + 'def handle(req):', + ' validate(req)', + ' return process(req)', + ].join('\n'); + + const diff = [ + '@@ def handle(req):', + ' validate(req)', + '+ log_request(req)', + ' return process(req)', + ].join('\n'); + + const expected = [ + 'def handle(req):', + ' validate(req)', + ' log_request(req)', + ' return process(req)', + ].join('\n'); + + expect(applyDiff(input, diff, 'default')).toBe(expected); + }); + + // + // Example 22 – Anchor near EOF with indent + // + it('Example 22: greeter.py update main print message', () => { + const input = [ + 'class Greeter:', + ' def hello(self):', + ' return "hi"', + '', + 'def main():', + ' g = Greeter()', + ' print(g.hello())', + ].join('\n'); + + const diff = [ + '@@ def main():', + ' g = Greeter()', + '- print(g.hello())', + '+ print(f"Greeting: {g.hello()}")', + ].join('\n'); + + const expected = [ + 'class Greeter:', + ' def hello(self):', + ' return "hi"', + '', + 'def main():', + ' g = Greeter()', + ' print(f"Greeting: {g.hello()}")', + ].join('\n'); + + expect(applyDiff(input, diff, 'default')).toBe(expected); + }); +}); diff --git a/packages/agents-core/test/utils/serialize.test.ts b/packages/agents-core/test/utils/serialize.test.ts index d3d697da..e31d9d42 100644 --- a/packages/agents-core/test/utils/serialize.test.ts +++ b/packages/agents-core/test/utils/serialize.test.ts @@ -38,6 +38,28 @@ describe('serialize utilities', () => { }); }); + it('serializes shell tools', () => { + const t: any = { + type: 'shell', + name: 'custom-shell', + }; + expect(serializeTool(t)).toEqual({ + type: 'shell', + name: 'custom-shell', + }); + }); + + it('serializes apply_patch tools', () => { + const t: any = { + type: 'apply_patch', + name: 'custom-editor', + }; + expect(serializeTool(t)).toEqual({ + type: 'apply_patch', + name: 'custom-editor', + }); + }); + it('serializes hosted tools', () => { const t: any = { type: 'hosted_tool', name: 'bt', providerData: { a: 1 } }; expect(serializeTool(t)).toEqual({ diff --git a/packages/agents-extensions/src/aiSdk.ts b/packages/agents-extensions/src/aiSdk.ts index 55ffec68..9279a7e9 100644 --- a/packages/agents-extensions/src/aiSdk.ts +++ b/packages/agents-extensions/src/aiSdk.ts @@ -205,6 +205,22 @@ export function itemsToLanguageV2Messages( throw new UserError('Computer call results are not supported'); } + if (item.type === 'shell_call') { + throw new UserError('Shell calls are not supported'); + } + + if (item.type === 'shell_call_output') { + throw new UserError('Shell call results are not supported'); + } + + if (item.type === 'apply_patch_call') { + throw new UserError('Apply patch calls are not supported'); + } + + if (item.type === 'apply_patch_call_output') { + throw new UserError('Apply patch call results are not supported'); + } + if ( item.type === 'reasoning' && item.content.length > 0 && @@ -522,8 +538,7 @@ export function toolToLanguageV2Tool( }; } - const exhaustiveCheck: never = tool; - throw new Error(`Unsupported tool type: ${exhaustiveCheck}`); + throw new Error(`Unsupported tool type: ${JSON.stringify(tool)}`); } /** diff --git a/packages/agents-extensions/test/aiSdk.test.ts b/packages/agents-extensions/test/aiSdk.test.ts index c2357714..2d26a4f3 100644 --- a/packages/agents-extensions/test/aiSdk.test.ts +++ b/packages/agents-extensions/test/aiSdk.test.ts @@ -175,6 +175,30 @@ describe('itemsToLanguageV2Messages', () => { ).toThrow(UserError); }); + test('throws on shell tool calls and results', () => { + expect(() => + itemsToLanguageV2Messages(stubModel({}), [{ type: 'shell_call' } as any]), + ).toThrow(UserError); + expect(() => + itemsToLanguageV2Messages(stubModel({}), [ + { type: 'shell_call_output' } as any, + ]), + ).toThrow(UserError); + }); + + test('throws on apply_patch tool calls and results', () => { + expect(() => + itemsToLanguageV2Messages(stubModel({}), [ + { type: 'apply_patch_call' } as any, + ]), + ).toThrow(UserError); + expect(() => + itemsToLanguageV2Messages(stubModel({}), [ + { type: 'apply_patch_call_output' } as any, + ]), + ).toThrow(UserError); + }); + test('converts user images, function results and reasoning items', () => { const items: protocol.ModelItem[] = [ { diff --git a/packages/agents-openai/src/openaiChatCompletionsConverter.ts b/packages/agents-openai/src/openaiChatCompletionsConverter.ts index 4ea12a0f..509d1b15 100644 --- a/packages/agents-openai/src/openaiChatCompletionsConverter.ts +++ b/packages/agents-openai/src/openaiChatCompletionsConverter.ts @@ -232,7 +232,11 @@ export function itemsToMessages( } } else if ( item.type === 'computer_call' || - item.type === 'computer_call_result' + item.type === 'computer_call_result' || + item.type === 'shell_call' || + item.type === 'shell_call_output' || + item.type === 'apply_patch_call' || + item.type === 'apply_patch_call_output' ) { throw new UserError( 'Computer use calls are not supported for chat completions. Got item: ' + diff --git a/packages/agents-openai/src/openaiResponsesModel.ts b/packages/agents-openai/src/openaiResponsesModel.ts index c01ff871..943f8fc1 100644 --- a/packages/agents-openai/src/openaiResponsesModel.ts +++ b/packages/agents-openai/src/openaiResponsesModel.ts @@ -77,14 +77,24 @@ type ResponseOutputItemWithFunctionResult = function_name?: string; }); +type ResponseShellCallOutput = + OpenAI.Responses.ResponseInputItem.ShellCallOutput; +type ResponseShellCallOutputContent = + OpenAI.Responses.ResponseFunctionShellCallOutputContent; +type ResponseApplyPatchCallOutput = + OpenAI.Responses.ResponseInputItem.ApplyPatchCallOutput; + const HostedToolChoice = z.enum([ 'file_search', 'web_search', 'web_search_preview', - 'computer_use_preview', 'code_interpreter', 'image_generation', 'mcp', + // Specialized local tools + 'computer_use_preview', + 'shell', + 'apply_patch', ]); const DefaultToolChoice = z.enum(['auto', 'required', 'none']); @@ -103,7 +113,7 @@ function getToolChoice( const result = HostedToolChoice.safeParse(toolChoice); if (result.success) { - return { type: result.data }; + return { type: result.data as any }; } return { type: 'function', name: toolChoice }; @@ -590,6 +600,20 @@ function converTool<_TContext = unknown>( }, include: undefined, }; + } else if (tool.type === 'shell') { + return { + tool: { + type: 'shell', + } as OpenAI.Responses.FunctionShellTool, + include: undefined, + }; + } else if (tool.type === 'apply_patch') { + return { + tool: { + type: 'apply_patch', + } as OpenAI.Responses.ApplyPatchTool, + include: undefined, + }; } else if (tool.type === 'hosted_tool') { if (tool.providerData?.type === 'web_search') { return { @@ -913,7 +937,7 @@ function getInputItems( ]; } - return input.map((item) => { + return input.map((item): OpenAI.Responses.ResponseInputItem => { if (isMessageItem(item)) { return getMessageItem(item); } @@ -990,6 +1014,88 @@ function getInputItems( return entry; } + if (item.type === 'shell_call') { + const action: OpenAI.Responses.ResponseInputItem.ShellCall['action'] = { + commands: item.action.commands, + timeout_ms: + typeof item.action.timeoutMs === 'number' + ? item.action.timeoutMs + : null, + max_output_length: + typeof item.action.maxOutputLength === 'number' + ? item.action.maxOutputLength + : null, + }; + + const entry: OpenAI.Responses.ResponseInputItem.ShellCall = { + type: 'shell_call', + id: item.id, + call_id: item.callId, + status: item.status ?? 'in_progress', + action, + }; + + return entry; + } + + if (item.type === 'shell_call_output') { + const shellOutputs: protocol.ShellCallOutputContent[] = item.output; + const sanitizedOutputs: ResponseShellCallOutputContent[] = + shellOutputs.map((entry) => { + const outcome = entry?.outcome; + const exitCode = outcome?.type === 'exit' ? outcome.exitCode : null; + return { + stdout: typeof entry?.stdout === 'string' ? entry.stdout : '', + stderr: typeof entry?.stderr === 'string' ? entry.stderr : '', + outcome: + outcome?.type === 'timeout' + ? { type: 'timeout' } + : { type: 'exit', exit_code: exitCode ?? 0 }, + } as ResponseShellCallOutputContent; + }); + + const entry: OpenAI.Responses.ResponseInputItem.ShellCallOutput & { + max_output_length?: number; + } = { + type: 'shell_call_output', + call_id: item.callId, + output: sanitizedOutputs, + id: item.id ?? undefined, + }; + if (typeof item.maxOutputLength === 'number') { + entry.max_output_length = item.maxOutputLength; + } + + return entry; + } + + if (item.type === 'apply_patch_call') { + if (!item.operation) { + throw new UserError('apply_patch_call missing operation'); + } + const entry: OpenAI.Responses.ResponseInputItem.ApplyPatchCall = { + type: 'apply_patch_call', + id: item.id ?? undefined, + call_id: item.callId, + status: item.status ?? 'in_progress', + operation: item.operation, + }; + + return entry; + } + + if (item.type === 'apply_patch_call_output') { + const entry: OpenAI.Responses.ResponseInputItem.ApplyPatchCallOutput = { + type: 'apply_patch_call_output', + id: item.id ?? undefined, + call_id: item.callId, + status: item.status ?? 'completed', + output: item.output ?? undefined, + }; + + return entry; + } + if (item.type === 'hosted_tool_call') { if ( item.providerData?.type === 'web_search_call' || @@ -1129,7 +1235,7 @@ function getInputItems( return { ...camelOrSnakeToSnakeCase(item.providerData), // place here to prioritize the below fields id: item.id, - } as OpenAI.Responses.ResponseItem; + } as OpenAI.Responses.ResponseInputItem; } const exhaustive = item satisfies never; @@ -1252,6 +1358,120 @@ function convertToOutputItem( providerData, }; return output; + } else if (item.type === 'shell_call') { + const { call_id, status, action, ...providerData } = item; + const shellAction: protocol.ShellAction = { + commands: Array.isArray(action?.commands) ? action.commands : [], + }; + const timeout = action?.timeout_ms; + if (typeof timeout === 'number') { + shellAction.timeoutMs = timeout; + } + const maxOutputLength = action?.max_output_length; + if (typeof maxOutputLength === 'number') { + shellAction.maxOutputLength = maxOutputLength; + } + const output: protocol.ShellCallItem = { + type: 'shell_call', + id: item.id ?? undefined, + callId: call_id, + status: status ?? 'in_progress', + action: shellAction, + providerData, + }; + return output; + } else if (item.type === 'shell_call_output') { + const { + call_id, + output: responseOutput, + max_output_length, + ...providerData + } = item as ResponseShellCallOutput; + let normalizedOutput: protocol.ShellCallOutputContent[] = []; + if (Array.isArray(responseOutput)) { + normalizedOutput = responseOutput.map((entry) => ({ + stdout: typeof entry?.stdout === 'string' ? entry.stdout : '', + stderr: typeof entry?.stderr === 'string' ? entry.stderr : '', + outcome: + entry?.outcome?.type === 'timeout' + ? { type: 'timeout' as const } + : { + type: 'exit' as const, + exitCode: + typeof entry?.outcome?.exit_code === 'number' + ? entry.outcome.exit_code + : null, + }, + })); + } + const output: protocol.ShellCallResultItem = { + type: 'shell_call_output', + id: item.id ?? undefined, + callId: call_id, + output: normalizedOutput, + providerData, + }; + if (typeof max_output_length === 'number') { + output.maxOutputLength = max_output_length; + } + return output; + } else if (item.type === 'apply_patch_call') { + const { call_id, status, operation, ...providerData } = item; + if (!operation) { + throw new UserError('apply_patch_call missing operation'); + } + + let normalizedOperation: protocol.ApplyPatchOperation; + switch (operation.type) { + case 'create_file': + normalizedOperation = { + type: 'create_file', + path: operation.path, + diff: operation.diff, + }; + break; + case 'delete_file': + normalizedOperation = { + type: 'delete_file', + path: operation.path, + }; + break; + case 'update_file': + normalizedOperation = { + type: 'update_file', + path: operation.path, + diff: operation.diff, + }; + break; + default: + throw new UserError('Unknown apply_patch operation type'); + } + + const output: protocol.ApplyPatchCallItem = { + type: 'apply_patch_call', + id: item.id ?? undefined, + callId: call_id, + status: status ?? 'in_progress', + operation: normalizedOperation, + providerData, + }; + return output; + } else if (item.type === 'apply_patch_call_output') { + const { + call_id, + status, + output: responseOutput, + ...providerData + } = item as unknown as ResponseApplyPatchCallOutput; + const output: protocol.ApplyPatchCallResultItem = { + type: 'apply_patch_call_output', + id: item.id ?? undefined, + callId: call_id, + status, + output: typeof responseOutput === 'string' ? responseOutput : undefined, + providerData, + }; + return output; } else if (item.type === 'mcp_list_tools') { const { ...providerData } = item; const output: protocol.HostedToolCallItem = { diff --git a/packages/agents-openai/test/openaiResponsesModel.helpers.test.ts b/packages/agents-openai/test/openaiResponsesModel.helpers.test.ts index 2f418fda..7655a8b0 100644 --- a/packages/agents-openai/test/openaiResponsesModel.helpers.test.ts +++ b/packages/agents-openai/test/openaiResponsesModel.helpers.test.ts @@ -26,6 +26,8 @@ describe('getToolChoice', () => { expect(getToolChoice('computer_use_preview')).toEqual({ type: 'computer_use_preview', }); + expect(getToolChoice('shell')).toEqual({ type: 'shell' }); + expect(getToolChoice('apply_patch')).toEqual({ type: 'apply_patch' }); }); it('supports arbitrary function names', () => { @@ -71,6 +73,16 @@ describe('converTool', () => { }); }); + it('converts shell tools', () => { + const t = converTool({ type: 'shell', name: 'shell' } as any); + expect(t.tool).toEqual({ type: 'shell' }); + }); + + it('converts apply_patch tools', () => { + const t = converTool({ type: 'apply_patch', name: 'apply_patch' } as any); + expect(t.tool).toEqual({ type: 'apply_patch' }); + }); + it('converts builtin tools', () => { const web = converTool({ type: 'hosted_tool', @@ -184,18 +196,93 @@ describe('getInputItems', () => { callId: 'c2', output: { data: 'img' }, }, + { + type: 'shell_call', + id: 'sh1', + callId: 's1', + status: 'completed', + action: { + commands: ['echo hi'], + timeoutMs: 10, + maxOutputLength: 5, + }, + }, + { + type: 'shell_call_output', + id: 'sh2', + callId: 's1', + output: [ + { + stdout: 'hi', + stderr: '', + outcome: { type: 'exit', exitCode: 0 }, + }, + ], + }, + { + type: 'apply_patch_call', + id: 'ap1', + callId: 'p1', + status: 'completed', + operation: { type: 'delete_file', path: 'tmp.txt' }, + }, + { + type: 'apply_patch_call_output', + id: 'ap2', + callId: 'p1', + status: 'failed', + output: 'conflict', + }, { type: 'reasoning', id: 'r1', content: [{ text: 'why' }] }, ] as any); expect(items[0]).toEqual({ id: 'u1', role: 'user', content: 'hi' }); - expect(items[1]).toMatchObject({ type: 'function_call', name: 'fn' }); - expect(items[2]).toMatchObject({ - type: 'function_call_output', - output: 'ok', + expect(items.some((entry) => entry.type === 'function_call')).toBe(true); + expect(items.some((entry) => entry.type === 'function_call_output')).toBe( + true, + ); + expect(items.some((entry) => entry.type === 'computer_call')).toBe(true); + expect(items.some((entry) => entry.type === 'computer_call_output')).toBe( + true, + ); + const shellCall = items.find((entry) => entry.type === 'shell_call') as any; + expect(shellCall).toMatchObject({ + type: 'shell_call', + call_id: 's1', + action: { commands: ['echo hi'], timeout_ms: 10, max_output_length: 5 }, + }); + const shellCallOutput = items.find( + (entry) => entry.type === 'shell_call_output', + ) as any; + expect(shellCallOutput).toMatchObject({ + type: 'shell_call_output', + id: 'sh2', + call_id: 's1', + output: [ + { + stdout: 'hi', + stderr: '', + outcome: { type: 'exit', exit_code: 0 }, + }, + ], }); - expect(items[3]).toMatchObject({ type: 'computer_call', action: 'open' }); - expect(items[4]).toMatchObject({ type: 'computer_call_output' }); - expect(items[5]).toMatchObject({ type: 'reasoning' }); + const applyPatchCall = items.find( + (entry) => entry.type === 'apply_patch_call', + ) as any; + expect(applyPatchCall).toMatchObject({ + type: 'apply_patch_call', + call_id: 'p1', + operation: { type: 'delete_file', path: 'tmp.txt' }, + }); + const applyPatchCallOutput = items.find( + (entry) => entry.type === 'apply_patch_call_output', + ) as any; + expect(applyPatchCallOutput).toMatchObject({ + type: 'apply_patch_call_output', + call_id: 'p1', + status: 'failed', + }); + expect(items.some((entry) => entry.type === 'reasoning')).toBe(true); }); it('converts structured tool outputs into input items', () => { @@ -578,4 +665,70 @@ describe('convertToOutputItem', () => { ], }); }); + + it('converts shell and apply_patch tool items', () => { + const out = convertToOutputItem([ + { + type: 'shell_call', + id: 'sh1', + call_id: 's1', + status: 'completed', + action: { commands: ['echo hi'], timeout_ms: 15, max_output_length: 3 }, + } as any, + { + type: 'shell_call_output', + id: 'sh2', + call_id: 's1', + output: [ + { + stdout: 'hi', + stderr: '', + outcome: { type: 'exit', exit_code: 0 }, + }, + ], + } as any, + { + type: 'apply_patch_call', + id: 'ap1', + call_id: 'p1', + status: 'in_progress', + operation: { type: 'delete_file', path: 'tmp.txt' }, + } as any, + { + type: 'apply_patch_call_output', + id: 'ap2', + call_id: 'p1', + status: 'failed', + output: 'conflict', + } as any, + ]); + + expect(out[0]).toMatchObject({ + type: 'shell_call', + callId: 's1', + action: { commands: ['echo hi'], timeoutMs: 15, maxOutputLength: 3 }, + }); + expect(out[1]).toMatchObject({ + type: 'shell_call_output', + callId: 's1', + output: [ + { + stdout: 'hi', + stderr: '', + outcome: { type: 'exit', exitCode: 0 }, + }, + ], + }); + expect(out[2]).toMatchObject({ + type: 'apply_patch_call', + callId: 'p1', + operation: { type: 'delete_file', path: 'tmp.txt' }, + }); + expect(out[3]).toMatchObject({ + type: 'apply_patch_call_output', + callId: 'p1', + status: 'failed', + output: 'conflict', + }); + }); }); diff --git a/packages/agents-realtime/src/realtimeSession.ts b/packages/agents-realtime/src/realtimeSession.ts index 6815b38c..07981574 100644 --- a/packages/agents-realtime/src/realtimeSession.ts +++ b/packages/agents-realtime/src/realtimeSession.ts @@ -1001,8 +1001,10 @@ export class RealtimeSession< options: { alwaysApprove?: boolean } = { alwaysApprove: false }, ) { this.#context.approveTool(approvalItem, options); + const toolName = + approvalItem.toolName ?? (approvalItem.rawItem as any).name; const tool = this.#currentAgent.tools.find( - (tool) => tool.name === approvalItem.rawItem.name, + (tool) => tool.name === toolName, ); if ( tool && @@ -1020,9 +1022,7 @@ export class RealtimeSession< approvalItemToRealtimeApprovalItem(approvalItem); this.#transport.sendMcpResponse(mcpApprovalRequest, true); } else { - throw new ModelBehaviorError( - `Tool ${approvalItem.rawItem.name} not found`, - ); + throw new ModelBehaviorError(`Tool ${toolName ?? 'unknown'} not found`); } } @@ -1039,8 +1039,10 @@ export class RealtimeSession< this.#context.rejectTool(approvalItem, options); // we still need to simulate a tool call to the agent to let the agent know + const toolName = + approvalItem.toolName ?? (approvalItem.rawItem as any).name; const tool = this.#currentAgent.tools.find( - (tool) => tool.name === approvalItem.rawItem.name, + (tool) => tool.name === toolName, ); if ( tool && @@ -1058,9 +1060,7 @@ export class RealtimeSession< approvalItemToRealtimeApprovalItem(approvalItem); this.#transport.sendMcpResponse(mcpApprovalRequest, false); } else { - throw new ModelBehaviorError( - `Tool ${approvalItem.rawItem.name} not found`, - ); + throw new ModelBehaviorError(`Tool ${toolName ?? 'unknown'} not found`); } } } diff --git a/packages/agents-realtime/src/utils.ts b/packages/agents-realtime/src/utils.ts index efb0dde1..bbbd82a2 100644 --- a/packages/agents-realtime/src/utils.ts +++ b/packages/agents-realtime/src/utils.ts @@ -294,6 +294,15 @@ export function realtimeApprovalItemToApprovalItem( export function approvalItemToRealtimeApprovalItem( item: RunToolApprovalItem, ): RealtimeMcpCallApprovalRequestItem { + if ( + item.rawItem.type !== 'function_call' && + item.rawItem.type !== 'hosted_tool_call' + ) { + throw new Error( + 'Invalid approval item type for Realtime MCP approval request', + ); + } + const { name, arguments: args, providerData } = item.rawItem; const { itemId, serverLabel, ...rest } = providerData ?? {}; if (!itemId || !serverLabel) { diff --git a/packages/agents/src/index.ts b/packages/agents/src/index.ts index b6efef02..7dbb4dfb 100644 --- a/packages/agents/src/index.ts +++ b/packages/agents/src/index.ts @@ -7,4 +7,16 @@ setDefaultOpenAITracingExporter(); export * from '@openai/agents-core'; export * from '@openai/agents-openai'; +export { applyPatchTool, shellTool } from '@openai/agents-core'; +export type { + Shell, + ShellAction, + ShellResult, + ShellOutputResult, + ApplyPatchOperation, + ApplyPatchResult, + Editor, + ShellTool, + ApplyPatchTool, +} from '@openai/agents-core'; export * as realtime from '@openai/agents-realtime'; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 81c58231..c3d2efc8 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -420,8 +420,8 @@ importers: specifier: ^5.0.0 version: 5.0.0 openai: - specifier: ^6.7.0 - version: 6.7.0(ws@8.18.3)(zod@3.25.76) + specifier: ^6 + version: 6.8.1(ws@8.18.3)(zod@3.25.76) examples/research-bot: dependencies: @@ -434,6 +434,12 @@ importers: '@openai/agents': specifier: workspace:* version: link:../../packages/agents + '@openai/agents-core': + specifier: workspace:* + version: link:../../packages/agents-core + chalk: + specifier: ^5.6.2 + version: 5.6.2 playwright: specifier: ^1.55.1 version: 1.55.1 @@ -454,7 +460,7 @@ importers: version: 4.4.3 openai: specifier: ^6 - version: 6.7.0(ws@8.18.3)(zod@3.25.76) + version: 6.8.1(ws@8.18.3)(zod@3.25.76) devDependencies: '@types/debug': specifier: ^4.1.12 @@ -470,7 +476,7 @@ importers: version: 4.4.3 openai: specifier: ^6 - version: 6.7.0(ws@8.18.3)(zod@3.25.76) + version: 6.8.1(ws@8.18.3)(zod@3.25.76) devDependencies: '@types/debug': specifier: ^4.1.12 @@ -521,7 +527,7 @@ importers: version: 4.4.3 openai: specifier: ^6 - version: 6.7.0(ws@8.18.3)(zod@3.25.76) + version: 6.8.1(ws@8.18.3)(zod@3.25.76) devDependencies: '@ai-sdk/provider': specifier: ^1.1.3 @@ -4307,8 +4313,8 @@ packages: oniguruma-to-es@4.3.3: resolution: {integrity: sha512-rPiZhzC3wXwE59YQMRDodUwwT9FZ9nNBwQQfsd1wfdtlKEyCdRV0avrTcSZ5xlIvGRVPd/cx6ZN45ECmS39xvg==} - openai@6.7.0: - resolution: {integrity: sha512-mgSQXa3O/UXTbA8qFzoa7aydbXBJR5dbLQXCRapAOtoNT+v69sLdKMZzgiakpqhclRnhPggPAXoniVGn2kMY2A==} + openai@6.8.1: + resolution: {integrity: sha512-ACifslrVgf+maMz9vqwMP4+v9qvx5Yzssydizks8n+YUJ6YwUoxj51sKRQ8HYMfR6wgKLSIlaI108ZwCk+8yig==} hasBin: true peerDependencies: ws: ^8.18.0 @@ -10269,7 +10275,7 @@ snapshots: regex: 6.0.1 regex-recursion: 6.0.2 - openai@6.7.0(ws@8.18.3)(zod@3.25.76): + openai@6.8.1(ws@8.18.3)(zod@3.25.76): optionalDependencies: ws: 8.18.3 zod: 3.25.76 diff --git a/scripts/embedMeta.ts b/scripts/embedMeta.ts index a922b69e..064392e3 100644 --- a/scripts/embedMeta.ts +++ b/scripts/embedMeta.ts @@ -6,7 +6,7 @@ const packageJson = JSON.parse( readFileSync(resolve(cwd(), 'package.json'), 'utf-8'), ); -const dependencies = Object.entries(packageJson.dependencies); +const dependencies = Object.entries(packageJson.dependencies ?? {}); const openaiDependencies = dependencies.filter( ([name]) => name.startsWith('@openai/') || name === 'openai', ); @@ -21,7 +21,7 @@ const versions = { const METADATA = { name: packageJson.name, version: packageJson.version, - versions: versions, + versions, }; const output = ` From 0213f1d4c02aff8440eb190e66af814b4b439a5c Mon Sep 17 00:00:00 2001 From: Dominik Kundel Date: Thu, 13 Nov 2025 10:07:21 -0800 Subject: [PATCH 2/3] ci: remove npm token --- .github/workflows/release.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index bec33763..a47ae8c7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -49,6 +49,5 @@ jobs: publish: pnpm ci:publish env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - NPM_TOKEN: ${{ secrets.NPM_TOKEN }} NPM_CONFIG_PROVENANCE: true NPM_CONFIG_ACCESS: public From 9ac652144cf077ea68185632624d26aa475a73be Mon Sep 17 00:00:00 2001 From: Dominik Kundel Date: Thu, 13 Nov 2025 10:21:17 -0800 Subject: [PATCH 3/3] update openai version --- pnpm-lock.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c3d2efc8..12e30939 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -421,7 +421,7 @@ importers: version: 5.0.0 openai: specifier: ^6 - version: 6.8.1(ws@8.18.3)(zod@3.25.76) + version: 6.9.0(ws@8.18.3)(zod@3.25.76) examples/research-bot: dependencies: @@ -460,7 +460,7 @@ importers: version: 4.4.3 openai: specifier: ^6 - version: 6.8.1(ws@8.18.3)(zod@3.25.76) + version: 6.9.0(ws@8.18.3)(zod@3.25.76) devDependencies: '@types/debug': specifier: ^4.1.12 @@ -476,7 +476,7 @@ importers: version: 4.4.3 openai: specifier: ^6 - version: 6.8.1(ws@8.18.3)(zod@3.25.76) + version: 6.9.0(ws@8.18.3)(zod@3.25.76) devDependencies: '@types/debug': specifier: ^4.1.12 @@ -527,7 +527,7 @@ importers: version: 4.4.3 openai: specifier: ^6 - version: 6.8.1(ws@8.18.3)(zod@3.25.76) + version: 6.9.0(ws@8.18.3)(zod@3.25.76) devDependencies: '@ai-sdk/provider': specifier: ^1.1.3 @@ -4313,8 +4313,8 @@ packages: oniguruma-to-es@4.3.3: resolution: {integrity: sha512-rPiZhzC3wXwE59YQMRDodUwwT9FZ9nNBwQQfsd1wfdtlKEyCdRV0avrTcSZ5xlIvGRVPd/cx6ZN45ECmS39xvg==} - openai@6.8.1: - resolution: {integrity: sha512-ACifslrVgf+maMz9vqwMP4+v9qvx5Yzssydizks8n+YUJ6YwUoxj51sKRQ8HYMfR6wgKLSIlaI108ZwCk+8yig==} + openai@6.9.0: + resolution: {integrity: sha512-n2sJRYmM+xfJ0l3OfH8eNnIyv3nQY7L08gZQu3dw6wSdfPtKAk92L83M2NIP5SS8Cl/bsBBG3yKzEOjkx0O+7A==} hasBin: true peerDependencies: ws: ^8.18.0 @@ -10275,7 +10275,7 @@ snapshots: regex: 6.0.1 regex-recursion: 6.0.2 - openai@6.8.1(ws@8.18.3)(zod@3.25.76): + openai@6.9.0(ws@8.18.3)(zod@3.25.76): optionalDependencies: ws: 8.18.3 zod: 3.25.76