openai · gabor-openai · Oct 29, 2025 · Oct 28, 2025 · Oct 28, 2025 · Oct 28, 2025
diff --git a/docs/benchmarking/NSFW_roc_curve.png b/docs/benchmarking/NSFW_roc_curve.png
diff --git a/docs/ref/checks/nsfw.md b/docs/ref/checks/nsfw.md
@@ -82,10 +82,12 @@ This benchmark evaluates model performance on a balanced set of social media pos
 
 | Model         | ROC AUC | Prec@R=0.80 | Prec@R=0.90 | Prec@R=0.95 | Recall@FPR=0.01 |
 |--------------|---------|-------------|-------------|-------------|-----------------|
-| gpt-4.1      | 0.989   | 0.976       | 0.962       | 0.962       | 0.717           |
-| gpt-4.1-mini (default) | 0.984   | 0.977       | 0.977       | 0.943       | 0.653           |
-| gpt-4.1-nano | 0.952   | 0.972       | 0.823       | 0.823       | 0.429           |
-| gpt-4o-mini  | 0.965   | 0.977       | 0.955       | 0.945       | 0.842           |
+| gpt-5        | 0.9532  | 0.9195      | 0.9096      | 0.9068      | 0.0339          |
+| gpt-5-mini   | 0.9629  | 0.9321      | 0.9168      | 0.9149      | 0.0998          |
+| gpt-5-nano   | 0.9600  | 0.9297      | 0.9216      | 0.9175      | 0.1078          |
+| gpt-4.1      | 0.9603  | 0.9312      | 0.9249      | 0.9192      | 0.0439          |
+| gpt-4.1-mini (default) | 0.9520  | 0.9180      | 0.9130      | 0.9049      | 0.0459          |
+| gpt-4.1-nano | 0.9502  | 0.9262      | 0.9094      | 0.9043      | 0.0379          |
 
 **Notes:**
 

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -58,13 +58,14 @@ nav:
     - "Streaming vs Blocking": streaming_output.md
     - Tripwires: tripwires.md
     - Checks:
-      - Prompt Injection Detection: ref/checks/prompt_injection_detection.md
       - Contains PII: ref/checks/pii.md
       - Custom Prompt Check: ref/checks/custom_prompt_check.md
       - Hallucination Detection: ref/checks/hallucination_detection.md
       - Jailbreak Detection: ref/checks/jailbreak.md
       - Moderation: ref/checks/moderation.md
+      - NSFW Text: ref/checks/nsfw.md
       - Off Topic Prompts: ref/checks/off_topic_prompts.md
+      - Prompt Injection Detection: ref/checks/prompt_injection_detection.md
       - URL Filter: ref/checks/urls.md
     - Evaluation Tool: evals.md
   - API Reference:

diff --git a/src/guardrails/checks/text/hallucination_detection.py b/src/guardrails/checks/text/hallucination_detection.py
@@ -52,7 +52,13 @@
 from guardrails.spec import GuardrailSpecMetadata
 from guardrails.types import GuardrailLLMContextProto, GuardrailResult
 
-from .llm_base import LLMConfig, LLMOutput, _invoke_openai_callable
+from .llm_base import (
+    LLMConfig,
+    LLMErrorOutput,
+    LLMOutput,
+    _invoke_openai_callable,
+    create_error_result,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -232,39 +238,43 @@ async def hallucination_detection(
         )
 
     except ValueError as e:
-        # Log validation errors but return safe default
+        # Log validation errors and use shared error helper
         logger.warning(f"Validation error in hallucination_detection: {e}")
-        return GuardrailResult(
-            tripwire_triggered=False,
-            info={
-                "guardrail_name": "Hallucination Detection",
-                "flagged": False,
-                "confidence": 0.0,
+        error_output = LLMErrorOutput(
+            flagged=False,
+            confidence=0.0,
+            info={"error_message": f"Validation failed: {str(e)}"},
+        )
+        return create_error_result(
+            guardrail_name="Hallucination Detection",
+            analysis=error_output,
+            checked_text=candidate,
+            additional_info={
+                "threshold": config.confidence_threshold,
                 "reasoning": f"Validation failed: {str(e)}",
                 "hallucination_type": None,
                 "hallucinated_statements": None,
                 "verified_statements": None,
-                "threshold": config.confidence_threshold,
-                "error": str(e),
-                "checked_text": candidate,  # Hallucination Detection doesn't modify text, pass through unchanged
             },
         )
     except Exception as e:
-        # Log unexpected errors and return safe default
+        # Log unexpected errors and use shared error helper
         logger.exception("Unexpected error in hallucination_detection")
-        return GuardrailResult(
-            tripwire_triggered=False,
-            info={
-                "guardrail_name": "Hallucination Detection",
-                "flagged": False,
-                "confidence": 0.0,
+        error_output = LLMErrorOutput(
+            flagged=False,
+            confidence=0.0,
+            info={"error_message": str(e)},
+        )
+        return create_error_result(
+            guardrail_name="Hallucination Detection",
+            analysis=error_output,
+            checked_text=candidate,
+            additional_info={
+                "threshold": config.confidence_threshold,
                 "reasoning": f"Analysis failed: {str(e)}",
                 "hallucination_type": None,
                 "hallucinated_statements": None,
                 "verified_statements": None,
-                "threshold": config.confidence_threshold,
-                "error": str(e),
-                "checked_text": candidate,  # Hallucination Detection doesn't modify text, pass through unchanged
             },
         )
 

diff --git a/src/guardrails/checks/text/llm_base.py b/src/guardrails/checks/text/llm_base.py
@@ -60,7 +60,13 @@ class MyLLMOutput(LLMOutput):
 logger = logging.getLogger(__name__)
 
 
-__all__ = ["LLMConfig", "LLMOutput", "LLMErrorOutput", "create_llm_check_fn"]
+__all__ = [
+    "LLMConfig",
+    "LLMOutput",
+    "LLMErrorOutput",
+    "create_llm_check_fn",
+    "create_error_result",
+]
 
 
 class LLMConfig(BaseModel):
@@ -115,6 +121,44 @@ class LLMErrorOutput(LLMOutput):
     info: dict
 
 
+def create_error_result(
+    guardrail_name: str,
+    analysis: LLMErrorOutput,
+    checked_text: str,
+    additional_info: dict[str, Any] | None = None,
+) -> GuardrailResult:
+    """Create a standardized GuardrailResult from an LLM error output.
+
+    Args:
+        guardrail_name: Name of the guardrail that failed.
+        analysis: The LLM error output.
+        checked_text: The text that was being checked.
+        additional_info: Optional additional fields to include in info dict.
+
+    Returns:
+        GuardrailResult with execution_failed=True.
+    """
+    error_info = getattr(analysis, "info", {})
+    error_message = error_info.get("error_message", "LLM execution failed")
+
+    result_info: dict[str, Any] = {
+        "guardrail_name": guardrail_name,
+        "checked_text": checked_text,
+        "error": error_message,
+        **analysis.model_dump(),
+    }
+
+    if additional_info:
+        result_info.update(additional_info)
+
+    return GuardrailResult(
+        tripwire_triggered=False,
+        execution_failed=True,
+        original_exception=Exception(error_message),
+        info=result_info,
+    )
+
+
 def _build_full_prompt(system_prompt: str) -> str:
     """Assemble a complete LLM prompt with instructions and response schema.
 
@@ -334,20 +378,10 @@ async def guardrail_func(
 
         # Check if this is an error result
         if isinstance(analysis, LLMErrorOutput):
-            # Extract error information from the LLMErrorOutput
-            error_info = analysis.info if hasattr(analysis, "info") else {}
-            error_message = error_info.get("error_message", "LLM execution failed")
-
-            return GuardrailResult(
-                tripwire_triggered=False,  # Don't trigger tripwire on execution errors
-                execution_failed=True,
-                original_exception=Exception(error_message),  # Create exception from error message
-                info={
-                    "guardrail_name": name,
-                    "checked_text": data,
-                    "error": error_message,
-                    **analysis.model_dump(),
-                },
+            return create_error_result(
+                guardrail_name=name,
+                analysis=analysis,
+                checked_text=data,
             )
 
         # Compare severity levels