Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 50 additions & 2 deletions src/strands/models/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ class BedrockConfig(TypedDict, total=False):
guardrail_redact_input_message: If a Bedrock Input guardrail triggers, replace the input with this message.
guardrail_redact_output: Flag to redact output if guardrail is triggered. Defaults to False.
guardrail_redact_output_message: If a Bedrock Output guardrail triggers, replace output with this message.
guardrail_last_turn_only: Flag to send only the last turn to guardrails instead of full conversation.
Defaults to False.
max_tokens: Maximum number of tokens to generate in the response
model_id: The Bedrock model ID (e.g., "us.anthropic.claude-sonnet-4-20250514-v1:0")
include_tool_result_status: Flag to include status field in tool results.
Expand All @@ -105,6 +107,7 @@ class BedrockConfig(TypedDict, total=False):
guardrail_redact_input_message: Optional[str]
guardrail_redact_output: Optional[bool]
guardrail_redact_output_message: Optional[str]
guardrail_last_turn_only: Optional[bool]
max_tokens: Optional[int]
model_id: str
include_tool_result_status: Optional[Literal["auto"] | bool]
Expand Down Expand Up @@ -206,9 +209,19 @@ def _format_request(
Returns:
A Bedrock converse stream request.
"""
# Filter messages for guardrails if guardrail_last_turn_only is enabled
messages_for_request = messages
if (
self.config.get("guardrail_last_turn_only", False)
and self.config.get("guardrail_id")
and self.config.get("guardrail_version")
):
messages_for_request = self._get_last_turn_messages(messages)

if not tool_specs:
has_tool_content = any(
any("toolUse" in block or "toolResult" in block for block in msg.get("content", [])) for msg in messages
any("toolUse" in block or "toolResult" in block for block in msg.get("content", []))
for msg in messages_for_request
)
if has_tool_content:
tool_specs = [noop_tool.tool_spec]
Expand All @@ -224,7 +237,7 @@ def _format_request(

return {
"modelId": self.config["model_id"],
"messages": self._format_bedrock_messages(messages),
"messages": self._format_bedrock_messages(messages_for_request),
"system": system_blocks,
**(
{
Expand Down Expand Up @@ -295,6 +308,41 @@ def _format_request(
),
}

def _get_last_turn_messages(self, messages: Messages) -> Messages:
"""Get the last turn messages for guardrail evaluation.

Returns the latest user message and the assistant's response (if it exists).
This reduces the conversation context sent to guardrails when guardrail_last_turn_only is True.

Args:
messages: Full conversation messages.

Returns:
Messages containing only the last turn (user + assistant response if exists).
"""
if not messages:
return []

# Find the last user message
last_user_index = -1
for i in range(len(messages) - 1, -1, -1):
if messages[i]["role"] == "user":
last_user_index = i
break

if last_user_index == -1:
# No user message found, return empty
return []

# Start with the last user message
result_messages: Messages = [messages[last_user_index]]

# Include the assistant's response if it exists (the message after the user message)
if last_user_index < len(messages) - 1 and messages[last_user_index + 1]["role"] == "assistant":
result_messages.append(messages[last_user_index + 1])

return result_messages

def _format_bedrock_messages(self, messages: Messages) -> list[dict[str, Any]]:
"""Format messages for Bedrock API compatibility.

Expand Down
75 changes: 75 additions & 0 deletions tests/strands/models/test_bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -2052,6 +2052,81 @@ def test_format_request_filters_output_schema(model, messages, model_id):
assert tool_spec["inputSchema"] == {"type": "object", "properties": {}}


def test_get_last_turn_messages(model):
"""Test _get_last_turn_messages helper method."""
# Test empty messages
assert model._get_last_turn_messages([]) == []

# Test single user message (no assistant response yet)
messages = [{"role": "user", "content": [{"text": "Hello"}]}]
result = model._get_last_turn_messages(messages)
assert len(result) == 1
assert result[0]["role"] == "user"

# Test user-assistant pair (user message with assistant response)
messages = [
{"role": "user", "content": [{"text": "Hello"}]},
{"role": "assistant", "content": [{"text": "Hi"}]},
{"role": "user", "content": [{"text": "How are you?"}]},
{"role": "assistant", "content": [{"text": "I'm doing well"}]},
]
result = model._get_last_turn_messages(messages)
assert len(result) == 2
assert result[0]["role"] == "user"
assert result[0]["content"][0]["text"] == "How are you?"
assert result[1]["role"] == "assistant"
assert result[1]["content"][0]["text"] == "I'm doing well"

# Test last user message without assistant response
messages = [
{"role": "user", "content": [{"text": "Hello"}]},
{"role": "assistant", "content": [{"text": "Hi"}]},
{"role": "user", "content": [{"text": "How are you?"}]},
]
result = model._get_last_turn_messages(messages)
assert len(result) == 1
assert result[0]["role"] == "user"
assert result[0]["content"][0]["text"] == "How are you?"


def test_format_request_with_guardrail_last_turn_only(model, model_id):
"""Test _format_request uses filtered messages when guardrail_last_turn_only=True."""
model.update_config(guardrail_id="test-guardrail", guardrail_version="DRAFT", guardrail_last_turn_only=True)

# Test with last user message only (no assistant response yet)
messages = [
{"role": "user", "content": [{"text": "First message"}]},
{"role": "assistant", "content": [{"text": "First response"}]},
{"role": "user", "content": [{"text": "Latest message"}]},
]

request = model._format_request(messages)

# Should only include the last user message (no assistant response after it yet)
formatted_messages = request["messages"]
assert len(formatted_messages) == 1
assert formatted_messages[0]["role"] == "user"
assert formatted_messages[0]["content"][0]["text"] == "Latest message"

# Test with last user message + assistant response
messages_with_response = [
{"role": "user", "content": [{"text": "First message"}]},
{"role": "assistant", "content": [{"text": "First response"}]},
{"role": "user", "content": [{"text": "How are you?"}]},
{"role": "assistant", "content": [{"text": "I'm good"}]},
]

request2 = model._format_request(messages_with_response)

# Should include last user + assistant response
formatted_messages2 = request2["messages"]
assert len(formatted_messages2) == 2
assert formatted_messages2[0]["role"] == "user"
assert formatted_messages2[0]["content"][0]["text"] == "How are you?"
assert formatted_messages2[1]["role"] == "assistant"
assert formatted_messages2[1]["content"][0]["text"] == "I'm good"


@pytest.mark.asyncio
async def test_stream_backward_compatibility_system_prompt(bedrock_client, model, messages, alist):
"""Test that system_prompt is converted to system_prompt_content when system_prompt_content is None."""
Expand Down
87 changes: 87 additions & 0 deletions tests_integ/test_bedrock_guardrails.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,93 @@ def list_users() -> str:
assert tool_result["content"][0]["text"] == INPUT_REDACT_MESSAGE


def test_guardrail_last_turn_only(boto_session, bedrock_guardrail):
"""Test that guardrail_last_turn_only only sends the last turn to guardrails."""
bedrock_model = BedrockModel(
guardrail_id=bedrock_guardrail,
guardrail_version="DRAFT",
guardrail_last_turn_only=True,
boto_session=boto_session,
)

agent = Agent(model=bedrock_model, system_prompt="You are a helpful assistant.", callback_handler=None)

# First conversation turn - should not trigger guardrail
response1 = agent("Hello, how are you?")
assert response1.stop_reason != "guardrail_intervened"

# Second conversation turn with blocked word - should trigger guardrail
# Since guardrail_last_turn_only=True, only this message and the previous assistant response
# should be evaluated by the guardrail, not the entire conversation history
response2 = agent("CACTUS")
assert response2.stop_reason == "guardrail_intervened"
assert str(response2).strip() == BLOCKED_INPUT


def test_guardrail_last_turn_only_recovery_scenario(boto_session, bedrock_guardrail):
"""Test guardrail recovery: blocked content followed by normal question.

This tests the key benefit of guardrail_last_turn_only:
1. First turn: blocked content triggers guardrail
2. Second turn: normal question should work because only last turn is analyzed
"""
bedrock_model = BedrockModel(
guardrail_id=bedrock_guardrail,
guardrail_version="DRAFT",
guardrail_last_turn_only=True,
boto_session=boto_session,
)

agent = Agent(model=bedrock_model, system_prompt="You are a helpful assistant.", callback_handler=None)

# First turn - should be blocked by guardrail
response1 = agent("CACTUS")
assert response1.stop_reason == "guardrail_intervened"
assert str(response1).strip() == BLOCKED_INPUT

# Second turn - should work normally with last turn only
# This is the key test: normal questions should work after blocked content
response2 = agent("What is the weather like today?")
assert response2.stop_reason != "guardrail_intervened"
assert str(response2).strip() != BLOCKED_INPUT

# Verify the conversation has both messages
assert len(agent.messages) == 4 # 2 user + 2 assistant messages


def test_guardrail_last_turn_only_output_intervention(boto_session, bedrock_guardrail):
"""Test that guardrail_last_turn_only works with OUTPUT guardrails.

This tests that when the assistant tries to output blocked content,
the OUTPUT guardrail intervenes, even with guardrail_last_turn_only=True.
Then verifies that subsequent normal responses work correctly.
"""
bedrock_model = BedrockModel(
guardrail_id=bedrock_guardrail,
guardrail_version="DRAFT",
guardrail_last_turn_only=True,
guardrail_stream_processing_mode="sync",
boto_session=boto_session,
)

agent = Agent(
model=bedrock_model,
system_prompt="When asked to say the word, say CACTUS. Otherwise respond normally.",
callback_handler=None,
load_tools_from_directory=False,
)

# First turn - assistant tries to output "CACTUS", should be blocked by OUTPUT guardrail
response1 = agent("Say the word.")
assert response1.stop_reason == "guardrail_intervened"
assert BLOCKED_OUTPUT in str(response1)

# Second turn - normal question should work fine
response2 = agent("What is 2+2?")
assert response2.stop_reason != "guardrail_intervened"
assert BLOCKED_OUTPUT not in str(response2)


def test_guardrail_input_intervention_properly_redacts_in_session(boto_session, bedrock_guardrail, temp_dir):
bedrock_model = BedrockModel(
guardrail_id=bedrock_guardrail,
Expand Down