diff --git a/src/strands/models/bedrock.py b/src/strands/models/bedrock.py index 4a7c81672..80a9a8d22 100644 --- a/src/strands/models/bedrock.py +++ b/src/strands/models/bedrock.py @@ -82,6 +82,8 @@ class BedrockConfig(TypedDict, total=False): guardrail_redact_input_message: If a Bedrock Input guardrail triggers, replace the input with this message. guardrail_redact_output: Flag to redact output if guardrail is triggered. Defaults to False. guardrail_redact_output_message: If a Bedrock Output guardrail triggers, replace output with this message. + guardrail_last_turn_only: Flag to send only the last turn to guardrails instead of full conversation. + Defaults to False. max_tokens: Maximum number of tokens to generate in the response model_id: The Bedrock model ID (e.g., "us.anthropic.claude-sonnet-4-20250514-v1:0") include_tool_result_status: Flag to include status field in tool results. @@ -105,6 +107,7 @@ class BedrockConfig(TypedDict, total=False): guardrail_redact_input_message: Optional[str] guardrail_redact_output: Optional[bool] guardrail_redact_output_message: Optional[str] + guardrail_last_turn_only: Optional[bool] max_tokens: Optional[int] model_id: str include_tool_result_status: Optional[Literal["auto"] | bool] @@ -206,9 +209,19 @@ def _format_request( Returns: A Bedrock converse stream request. """ + # Filter messages for guardrails if guardrail_last_turn_only is enabled + messages_for_request = messages + if ( + self.config.get("guardrail_last_turn_only", False) + and self.config.get("guardrail_id") + and self.config.get("guardrail_version") + ): + messages_for_request = self._get_last_turn_messages(messages) + if not tool_specs: has_tool_content = any( - any("toolUse" in block or "toolResult" in block for block in msg.get("content", [])) for msg in messages + any("toolUse" in block or "toolResult" in block for block in msg.get("content", [])) + for msg in messages_for_request ) if has_tool_content: tool_specs = [noop_tool.tool_spec] @@ -224,7 +237,7 @@ def _format_request( return { "modelId": self.config["model_id"], - "messages": self._format_bedrock_messages(messages), + "messages": self._format_bedrock_messages(messages_for_request), "system": system_blocks, **( { @@ -295,6 +308,41 @@ def _format_request( ), } + def _get_last_turn_messages(self, messages: Messages) -> Messages: + """Get the last turn messages for guardrail evaluation. + + Returns the latest user message and the assistant's response (if it exists). + This reduces the conversation context sent to guardrails when guardrail_last_turn_only is True. + + Args: + messages: Full conversation messages. + + Returns: + Messages containing only the last turn (user + assistant response if exists). + """ + if not messages: + return [] + + # Find the last user message + last_user_index = -1 + for i in range(len(messages) - 1, -1, -1): + if messages[i]["role"] == "user": + last_user_index = i + break + + if last_user_index == -1: + # No user message found, return empty + return [] + + # Start with the last user message + result_messages: Messages = [messages[last_user_index]] + + # Include the assistant's response if it exists (the message after the user message) + if last_user_index < len(messages) - 1 and messages[last_user_index + 1]["role"] == "assistant": + result_messages.append(messages[last_user_index + 1]) + + return result_messages + def _format_bedrock_messages(self, messages: Messages) -> list[dict[str, Any]]: """Format messages for Bedrock API compatibility. diff --git a/tests/strands/models/test_bedrock.py b/tests/strands/models/test_bedrock.py index 2809e8a72..cd9eaa09c 100644 --- a/tests/strands/models/test_bedrock.py +++ b/tests/strands/models/test_bedrock.py @@ -2052,6 +2052,81 @@ def test_format_request_filters_output_schema(model, messages, model_id): assert tool_spec["inputSchema"] == {"type": "object", "properties": {}} +def test_get_last_turn_messages(model): + """Test _get_last_turn_messages helper method.""" + # Test empty messages + assert model._get_last_turn_messages([]) == [] + + # Test single user message (no assistant response yet) + messages = [{"role": "user", "content": [{"text": "Hello"}]}] + result = model._get_last_turn_messages(messages) + assert len(result) == 1 + assert result[0]["role"] == "user" + + # Test user-assistant pair (user message with assistant response) + messages = [ + {"role": "user", "content": [{"text": "Hello"}]}, + {"role": "assistant", "content": [{"text": "Hi"}]}, + {"role": "user", "content": [{"text": "How are you?"}]}, + {"role": "assistant", "content": [{"text": "I'm doing well"}]}, + ] + result = model._get_last_turn_messages(messages) + assert len(result) == 2 + assert result[0]["role"] == "user" + assert result[0]["content"][0]["text"] == "How are you?" + assert result[1]["role"] == "assistant" + assert result[1]["content"][0]["text"] == "I'm doing well" + + # Test last user message without assistant response + messages = [ + {"role": "user", "content": [{"text": "Hello"}]}, + {"role": "assistant", "content": [{"text": "Hi"}]}, + {"role": "user", "content": [{"text": "How are you?"}]}, + ] + result = model._get_last_turn_messages(messages) + assert len(result) == 1 + assert result[0]["role"] == "user" + assert result[0]["content"][0]["text"] == "How are you?" + + +def test_format_request_with_guardrail_last_turn_only(model, model_id): + """Test _format_request uses filtered messages when guardrail_last_turn_only=True.""" + model.update_config(guardrail_id="test-guardrail", guardrail_version="DRAFT", guardrail_last_turn_only=True) + + # Test with last user message only (no assistant response yet) + messages = [ + {"role": "user", "content": [{"text": "First message"}]}, + {"role": "assistant", "content": [{"text": "First response"}]}, + {"role": "user", "content": [{"text": "Latest message"}]}, + ] + + request = model._format_request(messages) + + # Should only include the last user message (no assistant response after it yet) + formatted_messages = request["messages"] + assert len(formatted_messages) == 1 + assert formatted_messages[0]["role"] == "user" + assert formatted_messages[0]["content"][0]["text"] == "Latest message" + + # Test with last user message + assistant response + messages_with_response = [ + {"role": "user", "content": [{"text": "First message"}]}, + {"role": "assistant", "content": [{"text": "First response"}]}, + {"role": "user", "content": [{"text": "How are you?"}]}, + {"role": "assistant", "content": [{"text": "I'm good"}]}, + ] + + request2 = model._format_request(messages_with_response) + + # Should include last user + assistant response + formatted_messages2 = request2["messages"] + assert len(formatted_messages2) == 2 + assert formatted_messages2[0]["role"] == "user" + assert formatted_messages2[0]["content"][0]["text"] == "How are you?" + assert formatted_messages2[1]["role"] == "assistant" + assert formatted_messages2[1]["content"][0]["text"] == "I'm good" + + @pytest.mark.asyncio async def test_stream_backward_compatibility_system_prompt(bedrock_client, model, messages, alist): """Test that system_prompt is converted to system_prompt_content when system_prompt_content is None.""" diff --git a/tests_integ/test_bedrock_guardrails.py b/tests_integ/test_bedrock_guardrails.py index 37fa6028c..f65587342 100644 --- a/tests_integ/test_bedrock_guardrails.py +++ b/tests_integ/test_bedrock_guardrails.py @@ -289,6 +289,93 @@ def list_users() -> str: assert tool_result["content"][0]["text"] == INPUT_REDACT_MESSAGE +def test_guardrail_last_turn_only(boto_session, bedrock_guardrail): + """Test that guardrail_last_turn_only only sends the last turn to guardrails.""" + bedrock_model = BedrockModel( + guardrail_id=bedrock_guardrail, + guardrail_version="DRAFT", + guardrail_last_turn_only=True, + boto_session=boto_session, + ) + + agent = Agent(model=bedrock_model, system_prompt="You are a helpful assistant.", callback_handler=None) + + # First conversation turn - should not trigger guardrail + response1 = agent("Hello, how are you?") + assert response1.stop_reason != "guardrail_intervened" + + # Second conversation turn with blocked word - should trigger guardrail + # Since guardrail_last_turn_only=True, only this message and the previous assistant response + # should be evaluated by the guardrail, not the entire conversation history + response2 = agent("CACTUS") + assert response2.stop_reason == "guardrail_intervened" + assert str(response2).strip() == BLOCKED_INPUT + + +def test_guardrail_last_turn_only_recovery_scenario(boto_session, bedrock_guardrail): + """Test guardrail recovery: blocked content followed by normal question. + + This tests the key benefit of guardrail_last_turn_only: + 1. First turn: blocked content triggers guardrail + 2. Second turn: normal question should work because only last turn is analyzed + """ + bedrock_model = BedrockModel( + guardrail_id=bedrock_guardrail, + guardrail_version="DRAFT", + guardrail_last_turn_only=True, + boto_session=boto_session, + ) + + agent = Agent(model=bedrock_model, system_prompt="You are a helpful assistant.", callback_handler=None) + + # First turn - should be blocked by guardrail + response1 = agent("CACTUS") + assert response1.stop_reason == "guardrail_intervened" + assert str(response1).strip() == BLOCKED_INPUT + + # Second turn - should work normally with last turn only + # This is the key test: normal questions should work after blocked content + response2 = agent("What is the weather like today?") + assert response2.stop_reason != "guardrail_intervened" + assert str(response2).strip() != BLOCKED_INPUT + + # Verify the conversation has both messages + assert len(agent.messages) == 4 # 2 user + 2 assistant messages + + +def test_guardrail_last_turn_only_output_intervention(boto_session, bedrock_guardrail): + """Test that guardrail_last_turn_only works with OUTPUT guardrails. + + This tests that when the assistant tries to output blocked content, + the OUTPUT guardrail intervenes, even with guardrail_last_turn_only=True. + Then verifies that subsequent normal responses work correctly. + """ + bedrock_model = BedrockModel( + guardrail_id=bedrock_guardrail, + guardrail_version="DRAFT", + guardrail_last_turn_only=True, + guardrail_stream_processing_mode="sync", + boto_session=boto_session, + ) + + agent = Agent( + model=bedrock_model, + system_prompt="When asked to say the word, say CACTUS. Otherwise respond normally.", + callback_handler=None, + load_tools_from_directory=False, + ) + + # First turn - assistant tries to output "CACTUS", should be blocked by OUTPUT guardrail + response1 = agent("Say the word.") + assert response1.stop_reason == "guardrail_intervened" + assert BLOCKED_OUTPUT in str(response1) + + # Second turn - normal question should work fine + response2 = agent("What is 2+2?") + assert response2.stop_reason != "guardrail_intervened" + assert BLOCKED_OUTPUT not in str(response2) + + def test_guardrail_input_intervention_properly_redacts_in_session(boto_session, bedrock_guardrail, temp_dir): bedrock_model = BedrockModel( guardrail_id=bedrock_guardrail,