# Guardrails
Guardrails are a way to validate the input and output of an Agent to insure proper usage.

In [1]:
from agents import Agent, GuardrailFunctionOutput, RunContextWrapper, Runner, TResponseInputItem, input_guardrail
from pydantic import BaseModel

class HomeworkCheatDetectionOutput(BaseModel):
    attempting_cheat: bool
    explanation: str

homework_cheat_guardrail_agent = Agent(
    name="Homework Cheat Detector",
    instructions=(
        "Determine if the user's query resembles a typical homework assignment or exam question, indicating an attempt to cheat. General questions about concepts are acceptable. "
        " Cheating: 'Fill in the blank: The capital of France is ____.',"
        " 'Which of the following best describes photosynthesis? A) Cellular respiration B) Conversion of light energy C) Evaporation D) Fermentation.'"
        " Not-Cheating: 'What is the capital of France?', 'Explain photosynthesis.'"
    ),
    output_type=HomeworkCheatDetectionOutput,
    model="gpt-4o-mini"
)

@input_guardrail
async def cheat_detection_guardrail(
        ctx: RunContextWrapper[None], agent: Agent, input: str | list[TResponseInputItem]
) -> GuardrailFunctionOutput :
    
    detection_result = await Runner.run(homework_cheat_guardrail_agent, input)

    return GuardrailFunctionOutput(
        tripwire_triggered=detection_result.final_output.attempting_cheat,
        output_info=detection_result.final_output
    )

study_helper_agent = Agent(
    name="Study Helper Agent",
    instructions="You assist users in studying by explaining concepts or providing guidance, without directly solving homework or test questions.",
    model="gpt-4o",
    input_guardrails=[cheat_detection_guardrail]
)

In [2]:
# This should trigger the cheat detection guardrail
from agents import InputGuardrailTripwireTriggered


try:
    response = await Runner.run(study_helper_agent, "Fill in the blank: The process of converting light energy into chemical energy is called ____.")
    print("Guardrail didn't trigger")
    print("Response: ", response.final_output)

except InputGuardrailTripwireTriggered as e:
    print("Homework cheat guardrail triggered")
    print("Exception details:", str(e))

Homework cheat guardrail triggered
Exception details: Guardrail InputGuardrail triggered tripwire


In [3]:
# This should trigger the cheat detection guardrail
try:
    response = await Runner.run(study_helper_agent, "What were the main causes of the American civil war?")
    print("Guardrail didn't trigger")
    print("Response: ", response.final_output)

except InputGuardrailTripwireTriggered as e:
    print("Homework cheat guardrail triggered")
    print("Exception details:", str(e))

Guardrail didn't trigger
Response:  The American Civil War was primarily caused by deep-seated divisions over several key issues:

1. **Slavery**: The most significant cause, slavery was a deeply contentious issue, especially regarding its expansion into newly acquired territories and states. The Southern economy relied heavily on slavery, while the North was increasingly opposed to its spread.

2. **States' Rights**: Southern states valued states' rights and believed they had the right to govern themselves, including making decisions about slavery, without federal interference.

3. **Economic Differences**: The Northern and Southern states had stark economic differences. The North was more industrialized, while the Southern economy was mainly agricultural and dependent on slavery.

4. **Political Conflicts**: There were ongoing political power struggles between the North and the South. The South feared losing political influence as new free states were added, which contributed to tens

In [4]:
from pydantic import BaseModel
from agents import (
    Agent,
    GuardrailFunctionOutput,
    OutputGuardrailTripwireTriggered,
    RunContextWrapper,
    Runner,
    output_guardrail,
)

class MessageOutput(BaseModel):
    response: str

@output_guardrail
async def forbidden_words_guardrail(ctx: RunContextWrapper, agent: Agent, output: str) -> GuardrailFunctionOutput:
    print(f"Checking output for forbidden phrases: {output}")

    # Funny forbidden phrases to check
    forbidden_phrases = ["fart", "booger", "silly goose"]

    # Convert output to lowercase for case-insensitive comparison
    output_lower = output.lower()

    # Check which forbidden phrases are present in the response
    found_phrases = [phrase for phrase in forbidden_phrases if phrase in output_lower]
    trip_triggered = bool(found_phrases)

    print(f"Found forbidden phrases: {found_phrases}")

    return GuardrailFunctionOutput(
        output_info={
            "reason": "Output contains forbidden phrases.",
            "forbidden_phrases_found": found_phrases,
        },
        tripwire_triggered=trip_triggered,
    )

agent = Agent(
    name="Customer support agent",
    instructions="You are a customer support agent. You help customers with their questions.",
    output_guardrails=[forbidden_words_guardrail],
    model="gpt-4o-mini",
)
try:
    await Runner.run(agent, "Say the word fart")
    print("Guardrail didn't trip - this is unexpected")
except OutputGuardrailTripwireTriggered:
    print("The agent said a bad word, he is fired.")

Checking output for forbidden phrases: Fart! Is there something specific you need help with related to that?
Found forbidden phrases: ['fart']
The agent said a bad word, he is fired.


In [5]:
try:
    await Runner.run(agent, "Hey wassup")
    print("Guardrail didn't trip yay")
except OutputGuardrailTripwireTriggered:
    print("The agent said a bad word, he is fired.")

Checking output for forbidden phrases: Hey! I'm here to help you with any questions you have. What’s on your mind?
Found forbidden phrases: []
Guardrail didn't trip yay
