diff --git a/docs/tools.md b/docs/tools.md index 0acb2cb4a..13606d3b5 100644 --- a/docs/tools.md +++ b/docs/tools.md @@ -173,6 +173,14 @@ for tool in agent.tools: } ``` +### Returning images or files from function tools + +In addition to returning text outputs, you can return one or many images or files as the output of a function tool. To do so, you can return any of: + +- Images: [`ToolOutputImage`][agents.tool.ToolOutputImage] (or the TypedDict version, [`ToolOutputImageDict`][agents.tool.ToolOutputImageDict]) +- Files: [`ToolOutputFileContent`][agents.tool.ToolOutputFileContent] (or the TypedDict version, [`ToolOutputFileContentDict`][agents.tool.ToolOutputFileContentDict]) +- Text: either a string or stringable objects, or [`ToolOutputText`][agents.tool.ToolOutputText] (or the TypedDict version, [`ToolOutputTextDict`][agents.tool.ToolOutputTextDict]) + ### Custom function tools Sometimes, you don't want to use a Python function as a tool. You can directly create a [`FunctionTool`][agents.tool.FunctionTool] if you prefer. You'll need to provide: @@ -288,9 +296,9 @@ async def run_my_agent() -> str: In certain cases, you might want to modify the output of the tool-agents before returning it to the central agent. This may be useful if you want to: -- Extract a specific piece of information (e.g., a JSON payload) from the sub-agent's chat history. -- Convert or reformat the agent’s final answer (e.g., transform Markdown into plain text or CSV). -- Validate the output or provide a fallback value when the agent’s response is missing or malformed. +- Extract a specific piece of information (e.g., a JSON payload) from the sub-agent's chat history. +- Convert or reformat the agent’s final answer (e.g., transform Markdown into plain text or CSV). +- Validate the output or provide a fallback value when the agent’s response is missing or malformed. You can do this by supplying the `custom_output_extractor` argument to the `as_tool` method: @@ -370,16 +378,16 @@ asyncio.run(main()) The `is_enabled` parameter accepts: -- **Boolean values**: `True` (always enabled) or `False` (always disabled) -- **Callable functions**: Functions that take `(context, agent)` and return a boolean -- **Async functions**: Async functions for complex conditional logic +- **Boolean values**: `True` (always enabled) or `False` (always disabled) +- **Callable functions**: Functions that take `(context, agent)` and return a boolean +- **Async functions**: Async functions for complex conditional logic Disabled tools are completely hidden from the LLM at runtime, making this useful for: -- Feature gating based on user permissions -- Environment-specific tool availability (dev vs prod) -- A/B testing different tool configurations -- Dynamic tool filtering based on runtime state +- Feature gating based on user permissions +- Environment-specific tool availability (dev vs prod) +- A/B testing different tool configurations +- Dynamic tool filtering based on runtime state ## Handling errors in function tools diff --git a/examples/basic/dynamic_system_prompt.py b/examples/basic/dynamic_system_prompt.py index 7cd39ab66..d9a99bd37 100644 --- a/examples/basic/dynamic_system_prompt.py +++ b/examples/basic/dynamic_system_prompt.py @@ -28,6 +28,7 @@ def custom_instructions( instructions=custom_instructions, ) + async def main(): context = CustomContext(style=random.choice(["haiku", "pirate", "robot"])) print(f"Using style: {context.style}\n") diff --git a/examples/basic/image_tool_output.py b/examples/basic/image_tool_output.py new file mode 100644 index 000000000..741f07e3b --- /dev/null +++ b/examples/basic/image_tool_output.py @@ -0,0 +1,43 @@ +import asyncio + +from agents import Agent, Runner, ToolOutputImage, ToolOutputImageDict, function_tool + +return_typed_dict = True + + +@function_tool +def fetch_random_image() -> ToolOutputImage | ToolOutputImageDict: + """Fetch a random image.""" + + print("Image tool called") + if return_typed_dict: + return { + "type": "image", + "image_url": "https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg", + "detail": "auto", + } + + return ToolOutputImage( + image_url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg", + detail="auto", + ) + + +async def main(): + agent = Agent( + name="Assistant", + instructions="You are a helpful assistant.", + tools=[fetch_random_image], + ) + + result = await Runner.run( + agent, + input="Fetch an image using the random_image tool, then describe it", + ) + print(result.final_output) + """The image shows the iconic Golden Gate Bridge, a large suspension bridge painted in a + bright reddish-orange color...""" + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/basic/tools.py b/examples/basic/tools.py index 1c4496603..2052d9427 100644 --- a/examples/basic/tools.py +++ b/examples/basic/tools.py @@ -18,6 +18,7 @@ def get_weather(city: Annotated[str, "The city to get the weather for"]) -> Weat print("[debug] get_weather called") return Weather(city=city, temperature_range="14-20C", conditions="Sunny with wind.") + agent = Agent( name="Hello world", instructions="You are a helpful agent.", diff --git a/src/agents/__init__.py b/src/agents/__init__.py index e7ba817ba..b285d6f8c 100644 --- a/src/agents/__init__.py +++ b/src/agents/__init__.py @@ -81,6 +81,12 @@ MCPToolApprovalFunctionResult, MCPToolApprovalRequest, Tool, + ToolOutputFileContent, + ToolOutputFileContentDict, + ToolOutputImage, + ToolOutputImageDict, + ToolOutputText, + ToolOutputTextDict, WebSearchTool, default_tool_error_function, function_tool, @@ -273,6 +279,12 @@ def enable_verbose_stdout_logging(): "MCPToolApprovalFunction", "MCPToolApprovalRequest", "MCPToolApprovalFunctionResult", + "ToolOutputText", + "ToolOutputTextDict", + "ToolOutputImage", + "ToolOutputImageDict", + "ToolOutputFileContent", + "ToolOutputFileContentDict", "function_tool", "Usage", "add_trace_processor", diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py index b410b21b4..1b43635ab 100644 --- a/src/agents/_run_impl.py +++ b/src/agents/_run_impl.py @@ -823,7 +823,7 @@ async def run_single_tool( output=result, run_item=ToolCallOutputItem( output=result, - raw_item=ItemHelpers.tool_call_output_item(tool_run.tool_call, str(result)), + raw_item=ItemHelpers.tool_call_output_item(tool_run.tool_call, result), agent=agent, ), ) diff --git a/src/agents/extensions/memory/__init__.py b/src/agents/extensions/memory/__init__.py index 2dd496ec1..5d670c4ad 100644 --- a/src/agents/extensions/memory/__init__.py +++ b/src/agents/extensions/memory/__init__.py @@ -58,8 +58,6 @@ def __getattr__(name: str) -> Any: return AdvancedSQLiteSession except ModuleNotFoundError as e: - raise ImportError( - f"Failed to import AdvancedSQLiteSession: {e}" - ) from e + raise ImportError(f"Failed to import AdvancedSQLiteSession: {e}") from e raise AttributeError(f"module {__name__} has no attribute {name}") diff --git a/src/agents/items.py b/src/agents/items.py index 651d73127..1a699077a 100644 --- a/src/agents/items.py +++ b/src/agents/items.py @@ -21,6 +21,12 @@ from openai.types.responses.response_code_interpreter_tool_call import ( ResponseCodeInterpreterToolCall, ) +from openai.types.responses.response_function_call_output_item_list_param import ( + ResponseFunctionCallOutputItemListParam, + ResponseFunctionCallOutputItemParam, +) +from openai.types.responses.response_input_file_content_param import ResponseInputFileContentParam +from openai.types.responses.response_input_image_content_param import ResponseInputImageContentParam from openai.types.responses.response_input_item_param import ( ComputerCallOutput, FunctionCallOutput, @@ -36,9 +42,17 @@ ) from openai.types.responses.response_reasoning_item import ResponseReasoningItem from pydantic import BaseModel -from typing_extensions import TypeAlias +from typing_extensions import TypeAlias, assert_never from .exceptions import AgentsException, ModelBehaviorError +from .logger import logger +from .tool import ( + ToolOutputFileContent, + ToolOutputImage, + ToolOutputText, + ValidToolOutputPydanticModels, + ValidToolOutputPydanticModelsTypeAdapter, +) from .usage import Usage if TYPE_CHECKING: @@ -298,11 +312,93 @@ def text_message_output(cls, message: MessageOutputItem) -> str: @classmethod def tool_call_output_item( - cls, tool_call: ResponseFunctionToolCall, output: str + cls, tool_call: ResponseFunctionToolCall, output: Any ) -> FunctionCallOutput: - """Creates a tool call output item from a tool call and its output.""" + """Creates a tool call output item from a tool call and its output. + + Accepts either plain values (stringified) or structured outputs using + input_text/input_image/input_file shapes. Structured outputs may be + provided as Pydantic models or dicts, or an iterable of such items. + """ + + converted_output = cls._convert_tool_output(output) + return { "call_id": tool_call.call_id, - "output": output, + "output": converted_output, "type": "function_call_output", } + + @classmethod + def _convert_tool_output(cls, output: Any) -> str | ResponseFunctionCallOutputItemListParam: + """Converts a tool return value into an output acceptable by the Responses API.""" + + # If the output is either a single or list of the known structured output types, convert to + # ResponseFunctionCallOutputItemListParam. Else, just stringify. + if isinstance(output, (list, tuple)): + maybe_converted_output_list = [ + cls._maybe_get_output_as_structured_function_output(item) for item in output + ] + if all(maybe_converted_output_list): + return [ + cls._convert_single_tool_output_pydantic_model(item) + for item in maybe_converted_output_list + if item is not None + ] + else: + return str(output) + else: + maybe_converted_output = cls._maybe_get_output_as_structured_function_output(output) + if maybe_converted_output: + return [cls._convert_single_tool_output_pydantic_model(maybe_converted_output)] + else: + return str(output) + + @classmethod + def _maybe_get_output_as_structured_function_output( + cls, output: Any + ) -> ValidToolOutputPydanticModels | None: + if isinstance(output, (ToolOutputText, ToolOutputImage, ToolOutputFileContent)): + return output + elif isinstance(output, dict): + try: + return ValidToolOutputPydanticModelsTypeAdapter.validate_python(output) + except pydantic.ValidationError: + logger.debug("dict was not a valid tool output pydantic model") + return None + + return None + + @classmethod + def _convert_single_tool_output_pydantic_model( + cls, output: ValidToolOutputPydanticModels + ) -> ResponseFunctionCallOutputItemParam: + if isinstance(output, ToolOutputText): + return {"type": "input_text", "text": output.text} + elif isinstance(output, ToolOutputImage): + # Forward all provided optional fields so the Responses API receives + # the correct identifiers and settings for the image resource. + result: ResponseInputImageContentParam = {"type": "input_image"} + if output.image_url is not None: + result["image_url"] = output.image_url + if output.file_id is not None: + result["file_id"] = output.file_id + if output.detail is not None: + result["detail"] = output.detail + return result + elif isinstance(output, ToolOutputFileContent): + # Forward all provided optional fields so the Responses API receives + # the correct identifiers and metadata for the file resource. + result_file: ResponseInputFileContentParam = {"type": "input_file"} + if output.file_data is not None: + result_file["file_data"] = output.file_data + if output.file_url is not None: + result_file["file_url"] = output.file_url + if output.file_id is not None: + result_file["file_id"] = output.file_id + if output.filename is not None: + result_file["filename"] = output.filename + return result_file + else: + assert_never(output) + raise ValueError(f"Unexpected tool output type: {output}") diff --git a/src/agents/tool.py b/src/agents/tool.py index 2796754d5..66c70c29d 100644 --- a/src/agents/tool.py +++ b/src/agents/tool.py @@ -15,14 +15,13 @@ from openai.types.responses.tool_param import CodeInterpreter, ImageGeneration, Mcp from openai.types.responses.web_search_tool import Filters as WebSearchToolFilters from openai.types.responses.web_search_tool_param import UserLocation -from pydantic import ValidationError +from pydantic import BaseModel, TypeAdapter, ValidationError from typing_extensions import Concatenate, NotRequired, ParamSpec, TypedDict from . import _debug from .computer import AsyncComputer, Computer from .exceptions import ModelBehaviorError from .function_schema import DocstringStyle, function_schema -from .items import RunItem from .logger import logger from .run_context import RunContextWrapper from .strict_schema import ensure_strict_json_schema @@ -34,6 +33,8 @@ if TYPE_CHECKING: from .agent import Agent, AgentBase + from .items import RunItem + ToolParams = ParamSpec("ToolParams") @@ -48,6 +49,72 @@ ] +class ToolOutputText(BaseModel): + """Represents a tool output that should be sent to the model as text.""" + + type: Literal["text"] = "text" + text: str + + +class ToolOutputTextDict(TypedDict, total=False): + """TypedDict variant for text tool outputs.""" + + type: Literal["text"] + text: str + + +class ToolOutputImage(BaseModel): + """Represents a tool output that should be sent to the model as an image. + + You can provide either an `image_url` (URL or data URL) or a `file_id` for previously uploaded + content. The optional `detail` can control vision detail. + """ + + type: Literal["image"] = "image" + image_url: str | None = None + file_id: str | None = None + detail: Literal["low", "high", "auto"] | None = None + + +class ToolOutputImageDict(TypedDict, total=False): + """TypedDict variant for image tool outputs.""" + + type: Literal["image"] + image_url: NotRequired[str] + file_id: NotRequired[str] + detail: NotRequired[Literal["low", "high", "auto"]] + + +class ToolOutputFileContent(BaseModel): + """Represents a tool output that should be sent to the model as a file. + + Provide one of `file_data` (base64), `file_url`, or `file_id`. You may also + provide an optional `filename` when using `file_data` to hint file name. + """ + + type: Literal["file"] = "file" + file_data: str | None = None + file_url: str | None = None + file_id: str | None = None + filename: str | None = None + + +class ToolOutputFileContentDict(TypedDict, total=False): + """TypedDict variant for file content tool outputs.""" + + type: Literal["file"] + file_data: NotRequired[str] + file_url: NotRequired[str] + file_id: NotRequired[str] + filename: NotRequired[str] + + +ValidToolOutputPydanticModels = Union[ToolOutputText, ToolOutputImage, ToolOutputFileContent] +ValidToolOutputPydanticModelsTypeAdapter: TypeAdapter[ValidToolOutputPydanticModels] = TypeAdapter( + ValidToolOutputPydanticModels +) + + @dataclass class FunctionToolResult: tool: FunctionTool @@ -81,7 +148,9 @@ class FunctionTool: 1. The tool run context. 2. The arguments from the LLM, as a JSON string. - You must return a string representation of the tool output, or something we can call `str()` on. + You must return a one of the structured tool output types (e.g. ToolOutputText, ToolOutputImage, + ToolOutputFileContent) or a string representation of the tool output, or a list of them, + or something we can call `str()` on. In case of errors, you can either raise an Exception (which will cause the run to fail) or return a string error message (which will be sent back to the LLM). """ diff --git a/tests/mcp/test_message_handler.py b/tests/mcp/test_message_handler.py index c6f8a2a2a..82ac1e214 100644 --- a/tests/mcp/test_message_handler.py +++ b/tests/mcp/test_message_handler.py @@ -21,11 +21,7 @@ _MCPServerWithClientSession, ) -HandlerMessage = ( - RequestResponder[ServerRequest, ClientResult] - | ServerNotification - | Exception -) +HandlerMessage = RequestResponder[ServerRequest, ClientResult] | ServerNotification | Exception class _StubClientSession: @@ -69,8 +65,9 @@ def __init__(self, handler: MessageHandlerFnT | None): def create_streams(self): @contextlib.asynccontextmanager async def _streams(): - send_stream, recv_stream = ( - anyio.create_memory_object_stream[SessionMessage | Exception](1)) + send_stream, recv_stream = anyio.create_memory_object_stream[ + SessionMessage | Exception + ](1) try: yield recv_stream, send_stream, None finally: diff --git a/tests/test_tool_output_conversion.py b/tests/test_tool_output_conversion.py new file mode 100644 index 000000000..bef4745bd --- /dev/null +++ b/tests/test_tool_output_conversion.py @@ -0,0 +1,135 @@ +from __future__ import annotations + +from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall + +from agents import ItemHelpers, ToolOutputFileContent, ToolOutputImage, ToolOutputText + + +def _make_tool_call() -> ResponseFunctionToolCall: + return ResponseFunctionToolCall( + id="call-1", + arguments="{}", + call_id="call-1", + name="dummy", + type="function_call", + ) + + +def test_tool_call_output_item_text_model() -> None: + call = _make_tool_call() + out = ToolOutputText(text="hello") + payload = ItemHelpers.tool_call_output_item(call, out) + + assert payload["type"] == "function_call_output" + assert payload["call_id"] == call.call_id + assert isinstance(payload["output"], list) and len(payload["output"]) == 1 + item = payload["output"][0] + assert item["type"] == "input_text" + assert item["text"] == "hello" + + +def test_tool_call_output_item_image_model() -> None: + call = _make_tool_call() + out = ToolOutputImage(image_url="data:image/png;base64,AAAA") + payload = ItemHelpers.tool_call_output_item(call, out) + + assert payload["type"] == "function_call_output" + assert payload["call_id"] == call.call_id + assert isinstance(payload["output"], list) and len(payload["output"]) == 1 + item = payload["output"][0] + assert isinstance(item, dict) + assert item["type"] == "input_image" + assert item["image_url"] == "data:image/png;base64,AAAA" + + +def test_tool_call_output_item_file_model() -> None: + call = _make_tool_call() + out = ToolOutputFileContent(file_data="ZmFrZS1kYXRh", filename="foo.txt") + payload = ItemHelpers.tool_call_output_item(call, out) + + assert payload["type"] == "function_call_output" + assert payload["call_id"] == call.call_id + assert isinstance(payload["output"], list) and len(payload["output"]) == 1 + item = payload["output"][0] + assert isinstance(item, dict) + assert item["type"] == "input_file" + assert item["file_data"] == "ZmFrZS1kYXRh" + + +def test_tool_call_output_item_mixed_list() -> None: + call = _make_tool_call() + outputs = [ + ToolOutputText(text="a"), + ToolOutputImage(image_url="http://example/img.png"), + ToolOutputFileContent(file_data="ZmlsZS1kYXRh"), + ] + + payload = ItemHelpers.tool_call_output_item(call, outputs) + + assert payload["type"] == "function_call_output" + assert payload["call_id"] == call.call_id + items = payload["output"] + assert isinstance(items, list) and len(items) == 3 + + assert items[0]["type"] == "input_text" and items[0]["text"] == "a" + assert items[1]["type"] == "input_image" and items[1]["image_url"] == "http://example/img.png" + assert items[2]["type"] == "input_file" and items[2]["file_data"] == "ZmlsZS1kYXRh" + + +def test_tool_call_output_item_text_dict_variant() -> None: + call = _make_tool_call() + # Dict variant using the pydantic model schema (type="text"). + out = {"type": "text", "text": "hey"} + payload = ItemHelpers.tool_call_output_item(call, out) + + assert payload["type"] == "function_call_output" + assert payload["call_id"] == call.call_id + assert isinstance(payload["output"], list) and len(payload["output"]) == 1 + item = payload["output"][0] + assert isinstance(item, dict) + assert item["type"] == "input_text" + assert item["text"] == "hey" + + +def test_tool_call_output_item_image_forwards_file_id_and_detail() -> None: + """Ensure image outputs forward provided file_id and detail fields.""" + call = _make_tool_call() + out = ToolOutputImage(file_id="file_123", detail="high") + payload = ItemHelpers.tool_call_output_item(call, out) + + assert payload["type"] == "function_call_output" + assert payload["call_id"] == call.call_id + item = payload["output"][0] + assert isinstance(item, dict) + assert item["type"] == "input_image" + assert item["file_id"] == "file_123" + assert item["detail"] == "high" + + +def test_tool_call_output_item_file_forwards_file_id_and_filename() -> None: + """Ensure file outputs forward provided file_id and filename fields.""" + call = _make_tool_call() + out = ToolOutputFileContent(file_id="file_456", filename="report.pdf") + payload = ItemHelpers.tool_call_output_item(call, out) + + assert payload["type"] == "function_call_output" + assert payload["call_id"] == call.call_id + item = payload["output"][0] + assert isinstance(item, dict) + assert item["type"] == "input_file" + assert item["file_id"] == "file_456" + assert item["filename"] == "report.pdf" + + +def test_tool_call_output_item_file_forwards_file_url() -> None: + """Ensure file outputs forward provided file_url when present.""" + call = _make_tool_call() + out = ToolOutputFileContent(file_url="https://example.com/report.pdf") + payload = ItemHelpers.tool_call_output_item(call, out) + + assert payload["type"] == "function_call_output" + assert payload["call_id"] == call.call_id + item = payload["output"][0] + assert isinstance(item, dict) + assert item["type"] == "input_file" + assert item["file_url"] == "https://example.com/report.pdf" diff --git a/tests/voice/test_workflow.py b/tests/voice/test_workflow.py index 94d87b994..2b8548442 100644 --- a/tests/voice/test_workflow.py +++ b/tests/voice/test_workflow.py @@ -144,7 +144,11 @@ async def test_single_agent_workflow(monkeypatch) -> None: "status": "completed", "type": "message", }, - {"call_id": "2", "output": "tool_result", "type": "function_call_output"}, + { + "call_id": "2", + "output": "tool_result", + "type": "function_call_output", + }, { "id": "1", "content": [{"annotations": [], "text": "done", "type": "output_text"}], @@ -180,7 +184,11 @@ async def test_single_agent_workflow(monkeypatch) -> None: "status": "completed", "type": "message", }, - {"call_id": "2", "output": "tool_result", "type": "function_call_output"}, + { + "call_id": "2", + "output": "tool_result", + "type": "function_call_output", + }, { "id": "1", "content": [{"annotations": [], "text": "done", "type": "output_text"}],