From c5b21fc11789ed97155ec5abb1857cf551aaa314 Mon Sep 17 00:00:00 2001
From: Steven C <steven@sandboxr.org>
Date: Thu, 30 Oct 2025 10:01:25 -0400
Subject: [PATCH 1/2] Support prompt usage

---
 docs/benchmarking/nsfw.md | 31 -------------------------------
 src/guardrails/agents.py  | 20 ++++++++++++++++----
 2 files changed, 16 insertions(+), 35 deletions(-)
 delete mode 100644 docs/benchmarking/nsfw.md

diff --git a/docs/benchmarking/nsfw.md b/docs/benchmarking/nsfw.md
deleted file mode 100644
index df331e3..0000000
--- a/docs/benchmarking/nsfw.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# NSFW Text Check Benchmark Results
-
-## Dataset Description
-
-This benchmark evaluates model performance on a balanced set of social media posts:
-
-- Open Source [Toxicity dataset](https://github.com/surge-ai/toxicity/blob/main/toxicity_en.csv)
-- 500 NSFW (true) and 500 non-NSFW (false) samples
-- All samples are sourced from real social media platforms
-
-**Total n = 1,000; positive class prevalence = 500 (50.0%)**
-
-## Results
-
-### ROC Curve
-
-![ROC Curve](./NSFW_roc_curve.png)
-
-### Metrics Table
-
-| Model         | ROC AUC | Prec@R=0.80 | Prec@R=0.90 | Prec@R=0.95 | Recall@FPR=0.01 |
-|--------------|---------|-------------|-------------|-------------|-----------------|
-| gpt-4.1      | 0.989   | 0.976       | 0.962       | 0.962       | 0.717           |
-| gpt-4.1-mini | 0.984   | 0.977       | 0.977       | 0.943       | 0.653           |
-| gpt-4.1-nano | 0.952   | 0.972       | 0.823       | 0.823       | 0.429           |
-| gpt-4o-mini  | 0.965   | 0.977       | 0.955       | 0.945       | 0.842           |
-
-#### Notes
-- ROC AUC: Area under the ROC curve (higher is better)
-- Prec@R: Precision at the specified recall threshold
-- Recall@FPR=0.01: Recall when the false positive rate is 1%
diff --git a/src/guardrails/agents.py b/src/guardrails/agents.py
index 0645081..0dbe077 100644
--- a/src/guardrails/agents.py
+++ b/src/guardrails/agents.py
@@ -492,7 +492,7 @@ def __new__(
         cls,
         config: str | Path | dict[str, Any],
         name: str,
-        instructions: str,
+        instructions: str | Callable[[Any, Any], Any] | None = None,
         raise_guardrail_errors: bool = False,
         block_on_tool_violations: bool = False,
         **agent_kwargs: Any,
@@ -511,7 +511,9 @@ def __new__(
         Args:
             config: Pipeline configuration (file path, dict, or JSON string)
             name: Agent name
-            instructions: Agent instructions
+            instructions: Agent instructions. Can be a string, a callable that dynamically
+                generates instructions, or None. If a callable, it will receive the context
+                and agent instance and must return a string.
             raise_guardrail_errors: If True, raise exceptions when guardrails fail to execute.
                 If False (default), treat guardrail errors as safe and continue execution.
             block_on_tool_violations: If True, tool guardrail violations raise exceptions (halt execution).
@@ -553,7 +555,11 @@ def __new__(
         input_tool, input_agent = _separate_tool_level_from_agent_level(stage_guardrails.get("input", []))
         output_tool, output_agent = _separate_tool_level_from_agent_level(stage_guardrails.get("output", []))
 
-        # Create agent-level INPUT guardrails
+        # Extract any user-provided guardrails from agent_kwargs
+        user_input_guardrails = agent_kwargs.pop("input_guardrails", [])
+        user_output_guardrails = agent_kwargs.pop("output_guardrails", [])
+
+        # Create agent-level INPUT guardrails from config
         input_guardrails = []
 
         # Add agent-level guardrails from pre_flight and input stages
@@ -573,7 +579,10 @@ def __new__(
                 )
             )
 
-        # Create agent-level OUTPUT guardrails
+        # Merge with user-provided input guardrails (config ones run first, then user ones)
+        input_guardrails.extend(user_input_guardrails)
+
+        # Create agent-level OUTPUT guardrails from config
         output_guardrails = []
         if output_agent:
             output_guardrails = _create_agents_guardrails_from_config(
@@ -583,6 +592,9 @@ def __new__(
                 raise_guardrail_errors=raise_guardrail_errors,
             )
 
+        # Merge with user-provided output guardrails (config ones run first, then user ones)
+        output_guardrails.extend(user_output_guardrails)
+
         # Apply tool-level guardrails
         tools = agent_kwargs.get("tools", [])
 

From b6997794772dc73f73740c8eaf6b0c03a1fb75eb Mon Sep 17 00:00:00 2001
From: Steven C <steven@sandboxr.org>
Date: Thu, 30 Oct 2025 10:11:11 -0400
Subject: [PATCH 2/2] adding tests

---
 tests/unit/test_agents.py | 137 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)

diff --git a/tests/unit/test_agents.py b/tests/unit/test_agents.py
index 0aa1c4d..d9202fe 100644
--- a/tests/unit/test_agents.py
+++ b/tests/unit/test_agents.py
@@ -597,3 +597,140 @@ def test_guardrail_agent_without_tools(monkeypatch: pytest.MonkeyPatch) -> None:
     agent_instance = agents.GuardrailAgent(config={}, name="NoTools", instructions="None")
 
     assert getattr(agent_instance, "input_guardrails", []) == []  # noqa: S101
+
+
+def test_guardrail_agent_without_instructions(monkeypatch: pytest.MonkeyPatch) -> None:
+    """GuardrailAgent should work without instructions parameter."""
+    pipeline = SimpleNamespace(pre_flight=None, input=None, output=None)
+
+    monkeypatch.setattr(runtime_module, "load_pipeline_bundles", lambda config: pipeline, raising=False)
+    monkeypatch.setattr(runtime_module, "instantiate_guardrails", lambda *args, **kwargs: [], raising=False)
+
+    # Should not raise TypeError about missing instructions
+    agent_instance = agents.GuardrailAgent(config={}, name="NoInstructions")
+
+    assert isinstance(agent_instance, agents_module.Agent)  # noqa: S101
+    assert agent_instance.instructions is None  # noqa: S101
+
+
+def test_guardrail_agent_with_callable_instructions(monkeypatch: pytest.MonkeyPatch) -> None:
+    """GuardrailAgent should accept callable instructions."""
+    pipeline = SimpleNamespace(pre_flight=None, input=None, output=None)
+
+    monkeypatch.setattr(runtime_module, "load_pipeline_bundles", lambda config: pipeline, raising=False)
+    monkeypatch.setattr(runtime_module, "instantiate_guardrails", lambda *args, **kwargs: [], raising=False)
+
+    def dynamic_instructions(ctx: Any, agent: Any) -> str:
+        return f"You are {agent.name}"
+
+    agent_instance = agents.GuardrailAgent(
+        config={},
+        name="DynamicAgent",
+        instructions=dynamic_instructions,
+    )
+
+    assert isinstance(agent_instance, agents_module.Agent)  # noqa: S101
+    assert callable(agent_instance.instructions)  # noqa: S101
+    assert agent_instance.instructions == dynamic_instructions  # noqa: S101
+
+
+def test_guardrail_agent_merges_user_input_guardrails(monkeypatch: pytest.MonkeyPatch) -> None:
+    """User input guardrails should be merged with config guardrails."""
+    agent_guard = _make_guardrail("Config Input Guard")
+
+    class FakePipeline:
+        def __init__(self) -> None:
+            self.pre_flight = None
+            self.input = SimpleNamespace()
+            self.output = None
+
+    pipeline = FakePipeline()
+
+    def fake_load_pipeline_bundles(config: Any) -> FakePipeline:
+        return pipeline
+
+    def fake_instantiate_guardrails(stage: Any, registry: Any | None = None) -> list[Any]:
+        if stage is pipeline.input:
+            return [agent_guard]
+        return []
+
+    from guardrails import runtime as runtime_module
+
+    monkeypatch.setattr(runtime_module, "load_pipeline_bundles", fake_load_pipeline_bundles)
+    monkeypatch.setattr(runtime_module, "instantiate_guardrails", fake_instantiate_guardrails)
+
+    # Create a custom user guardrail
+    custom_guardrail = lambda ctx, agent, input: None  # noqa: E731
+
+    agent_instance = agents.GuardrailAgent(
+        config={},
+        name="MergedAgent",
+        instructions="Test",
+        input_guardrails=[custom_guardrail],
+    )
+
+    # Should have both config and user guardrails merged
+    assert isinstance(agent_instance, agents_module.Agent)  # noqa: S101
+    assert len(agent_instance.input_guardrails) == 2  # noqa: S101
+    # Config guardrail from _create_agents_guardrails_from_config, then user guardrail
+
+
+def test_guardrail_agent_merges_user_output_guardrails(monkeypatch: pytest.MonkeyPatch) -> None:
+    """User output guardrails should be merged with config guardrails."""
+    agent_guard = _make_guardrail("Config Output Guard")
+
+    class FakePipeline:
+        def __init__(self) -> None:
+            self.pre_flight = None
+            self.input = None
+            self.output = SimpleNamespace()
+
+    pipeline = FakePipeline()
+
+    def fake_load_pipeline_bundles(config: Any) -> FakePipeline:
+        return pipeline
+
+    def fake_instantiate_guardrails(stage: Any, registry: Any | None = None) -> list[Any]:
+        if stage is pipeline.output:
+            return [agent_guard]
+        return []
+
+    from guardrails import runtime as runtime_module
+
+    monkeypatch.setattr(runtime_module, "load_pipeline_bundles", fake_load_pipeline_bundles)
+    monkeypatch.setattr(runtime_module, "instantiate_guardrails", fake_instantiate_guardrails)
+
+    # Create a custom user guardrail
+    custom_guardrail = lambda ctx, agent, output: None  # noqa: E731
+
+    agent_instance = agents.GuardrailAgent(
+        config={},
+        name="MergedAgent",
+        instructions="Test",
+        output_guardrails=[custom_guardrail],
+    )
+
+    # Should have both config and user guardrails merged
+    assert isinstance(agent_instance, agents_module.Agent)  # noqa: S101
+    assert len(agent_instance.output_guardrails) == 2  # noqa: S101
+    # Config guardrail from _create_agents_guardrails_from_config, then user guardrail
+
+
+def test_guardrail_agent_with_empty_user_guardrails(monkeypatch: pytest.MonkeyPatch) -> None:
+    """GuardrailAgent should handle empty user guardrail lists gracefully."""
+    pipeline = SimpleNamespace(pre_flight=None, input=None, output=None)
+
+    monkeypatch.setattr(runtime_module, "load_pipeline_bundles", lambda config: pipeline, raising=False)
+    monkeypatch.setattr(runtime_module, "instantiate_guardrails", lambda *args, **kwargs: [], raising=False)
+
+    agent_instance = agents.GuardrailAgent(
+        config={},
+        name="EmptyListAgent",
+        instructions="Test",
+        input_guardrails=[],
+        output_guardrails=[],
+    )
+
+    assert isinstance(agent_instance, agents_module.Agent)  # noqa: S101
+    assert agent_instance.input_guardrails == []  # noqa: S101
+    assert agent_instance.output_guardrails == []  # noqa: S101