From e0fa596a25b1a8276ccfb78790e472d5a6a010c3 Mon Sep 17 00:00:00 2001 From: Mengxin Zhu <843303+zxkane@users.noreply.github.com> Date: Tue, 23 Sep 2025 14:43:13 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20fix:=20implement=20proper=20time?= =?UTF-8?q?out=20handling=20for=20MCP=20client=20tool=20calls?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add timeout support to call_tool_sync and call_tool_async methods - Only apply timeout when read_timeout_seconds is explicitly provided to maintain backward compatibility - Handle TimeoutError exceptions with proper error responses - Add comprehensive tests for timeout scenarios and backward compatibility --- src/strands/tools/mcp/mcp_client.py | 35 ++++- tests/strands/tools/mcp/test_mcp_client.py | 170 +++++++++++++++++++++ 2 files changed, 203 insertions(+), 2 deletions(-) diff --git a/src/strands/tools/mcp/mcp_client.py b/src/strands/tools/mcp/mcp_client.py index 96e80385f..e58244ad4 100644 --- a/src/strands/tools/mcp/mcp_client.py +++ b/src/strands/tools/mcp/mcp_client.py @@ -290,8 +290,23 @@ async def _call_tool_async() -> MCPCallToolResult: ) try: - call_tool_result: MCPCallToolResult = self._invoke_on_background_thread(_call_tool_async()).result() + future = self._invoke_on_background_thread(_call_tool_async()) + + # Only apply timeout if explicitly provided by user to maintain backward compatibility + if read_timeout_seconds is not None: + timeout_seconds = read_timeout_seconds.total_seconds() + call_tool_result = future.result(timeout=timeout_seconds) + else: + # No timeout - preserve original behavior for existing customers + call_tool_result = future.result() + return self._handle_tool_result(tool_use_id, call_tool_result) + except futures.TimeoutError: + timeout_seconds = read_timeout_seconds.total_seconds() if read_timeout_seconds else 0 + logger.exception("tool execution timed out after %s seconds", timeout_seconds) + return self._handle_tool_execution_error( + tool_use_id, Exception(f"Tool call timed out after {timeout_seconds} seconds") + ) except Exception as e: logger.exception("tool execution failed") return self._handle_tool_execution_error(tool_use_id, e) @@ -328,8 +343,24 @@ async def _call_tool_async() -> MCPCallToolResult: try: future = self._invoke_on_background_thread(_call_tool_async()) - call_tool_result: MCPCallToolResult = await asyncio.wrap_future(future) + + # Only apply timeout if explicitly provided by user to maintain backward compatibility + if read_timeout_seconds is not None: + timeout_seconds = read_timeout_seconds.total_seconds() + call_tool_result = await asyncio.wait_for( + asyncio.wrap_future(future), timeout=timeout_seconds + ) + else: + # No timeout - preserve original behavior for existing customers + call_tool_result = await asyncio.wrap_future(future) + return self._handle_tool_result(tool_use_id, call_tool_result) + except asyncio.TimeoutError: + timeout_seconds = read_timeout_seconds.total_seconds() if read_timeout_seconds else 0 + logger.exception("async tool execution timed out after %s seconds", timeout_seconds) + return self._handle_tool_execution_error( + tool_use_id, Exception(f"Tool call timed out after {timeout_seconds} seconds") + ) except Exception as e: logger.exception("tool execution failed") return self._handle_tool_execution_error(tool_use_id, e) diff --git a/tests/strands/tools/mcp/test_mcp_client.py b/tests/strands/tools/mcp/test_mcp_client.py index 67d8fe558..dd854a1aa 100644 --- a/tests/strands/tools/mcp/test_mcp_client.py +++ b/tests/strands/tools/mcp/test_mcp_client.py @@ -541,3 +541,173 @@ def slow_transport(): assert client._background_thread_session is None assert client._background_thread_event_loop is None assert not client._init_future.done() # New future created + + +def test_call_tool_sync_server_jsonrpc_error_response(mock_transport, mock_session): + """Test that call_tool_sync handles JSON-RPC error responses from server without hanging. + + This reproduces the bug where the MCP client hangs when the remote streamable HTTP server + responds with a JSON-RPC error instead of a successful tool result. + """ + import time + from datetime import timedelta + + # Simulate a JSON-RPC error by making the call_tool method hang indefinitely + # This mimics what happens when the MCP SDK receives a JSON-RPC error response + def hanging_call_tool(*args, **kwargs): + # Simulate the hanging behavior by sleeping indefinitely + time.sleep(10) # This will cause a timeout in our test + + mock_session.call_tool = hanging_call_tool + + with MCPClient(mock_transport["transport_callable"]) as client: + # Test with a very short timeout to verify the fix + start_time = time.time() + + # This should not hang indefinitely, should timeout after 2 seconds + result = client.call_tool_sync( + tool_use_id="test-hanging-123", + name="test_tool", + arguments={"param": "value"}, + read_timeout_seconds=timedelta(seconds=2) # Short timeout for test + ) + + elapsed_time = time.time() - start_time + + # The call should complete quickly (within 3 seconds) due to timeout handling + # and return a timeout error result rather than hanging for 10+ seconds + assert elapsed_time < 4, f"Tool call took {elapsed_time} seconds, suggesting it hung" + assert result["status"] == "error" + assert result["toolUseId"] == "test-hanging-123" + assert len(result["content"]) == 1 + assert ("timed out" in result["content"][0]["text"] or + "Tool execution failed" in result["content"][0]["text"]) + + +@pytest.mark.asyncio +async def test_call_tool_async_server_jsonrpc_error_response(mock_transport, mock_session): + """Test that call_tool_async handles JSON-RPC error responses from server without hanging. + + This reproduces the bug where the MCP client hangs when the remote streamable HTTP server + responds with a JSON-RPC error instead of a successful tool result. + """ + import asyncio + import time + from datetime import timedelta + + # Simulate a JSON-RPC error by making the call_tool method hang indefinitely + async def hanging_call_tool(*args, **kwargs): + # Simulate the hanging behavior + await asyncio.sleep(10) # This will cause a timeout in our test + + mock_session.call_tool = hanging_call_tool + + with MCPClient(mock_transport["transport_callable"]) as client: + start_time = time.time() + + # This should not hang indefinitely, should timeout after 2 seconds + result = await client.call_tool_async( + tool_use_id="test-async-hanging-123", + name="test_tool", + arguments={"param": "value"}, + read_timeout_seconds=timedelta(seconds=2) # Short timeout for test + ) + + elapsed_time = time.time() - start_time + + # The call should complete quickly (within 3 seconds) due to timeout handling + # and return a timeout error result rather than hanging for 10+ seconds + assert elapsed_time < 4, f"Async tool call took {elapsed_time} seconds, suggesting it hung" + assert result["status"] == "error" + assert result["toolUseId"] == "test-async-hanging-123" + assert len(result["content"]) == 1 + assert ("timed out" in result["content"][0]["text"] or + "Tool execution failed" in result["content"][0]["text"]) + + +def test_call_tool_sync_no_timeout_preserves_original_behavior(mock_transport, mock_session): + """Test that call_tool_sync does not apply timeout when read_timeout_seconds is None. + + This verifies backward compatibility - when no timeout is specified, the method should + behave as it did originally (no timeout applied) to avoid breaking existing customers. + """ + import asyncio + import time + + # Mock a slow but successful tool call that takes longer than the old default (60s) + # but should succeed since no timeout is applied + async def slow_but_successful_call_tool(name, arguments, read_timeout_seconds=None): + # Simulate a slow operation that would exceed old default timeout + await asyncio.sleep(0.1) # Short delay for test performance + return MCPCallToolResult( + isError=False, + content=[MCPTextContent(type="text", text="Success after delay")] + ) + + mock_session.call_tool = slow_but_successful_call_tool + + with MCPClient(mock_transport["transport_callable"]) as client: + start_time = time.time() + + # Call WITHOUT timeout parameter - should not apply any timeout + result = client.call_tool_sync( + tool_use_id="test-no-timeout-123", + name="test_tool", + arguments={"param": "value"} + # No read_timeout_seconds parameter + ) + + elapsed_time = time.time() - start_time + + # Should succeed without timeout error + assert result["status"] == "success" + assert result["toolUseId"] == "test-no-timeout-123" + assert len(result["content"]) == 1 + assert result["content"][0]["text"] == "Success after delay" + + # Should have taken at least the delay time, proving no premature timeout + assert elapsed_time >= 0.1 + + +@pytest.mark.asyncio +async def test_call_tool_async_no_timeout_preserves_original_behavior(mock_transport, mock_session): + """Test that call_tool_async does not apply timeout when read_timeout_seconds is None. + + This verifies backward compatibility - when no timeout is specified, the method should + behave as it did originally (no timeout applied) to avoid breaking existing customers. + """ + import asyncio + import time + + # Mock a slow but successful async tool call + async def slow_but_successful_call_tool(name, arguments, read_timeout_seconds=None): + # Simulate a slow async operation + await asyncio.sleep(0.1) # Short delay for test performance + return MCPCallToolResult( + isError=False, + content=[MCPTextContent(type="text", text="Async success after delay")] + ) + + mock_session.call_tool = slow_but_successful_call_tool + + with MCPClient(mock_transport["transport_callable"]) as client: + start_time = time.time() + + # Call WITHOUT timeout parameter - should not apply any timeout + result = await client.call_tool_async( + tool_use_id="test-async-no-timeout-123", + name="test_tool", + arguments={"param": "value"} + # No read_timeout_seconds parameter + ) + + elapsed_time = time.time() - start_time + + # Should succeed without timeout error + assert result["status"] == "success" + assert result["toolUseId"] == "test-async-no-timeout-123" + assert len(result["content"]) == 1 + assert result["content"][0]["text"] == "Async success after delay" + + # Should have taken at least the delay time, proving no premature timeout + assert elapsed_time >= 0.1