In [107]:
# %% import modules
import subprocess
import json
import requests
from IPython.display import IFrame

In [None]:
# %% Show the API documentation for the stack
# Make a GET request to the local API endpoint
IFrame(src="http://localhost:5001/docs", width="100%", height=600)

In [108]:
# %% Show the available shields
# Run curl command and capture output
cmd = "curl -s http://localhost:5001/v1/shields | jq '.'"
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
print(result.stdout)

{
  "data": [
    {
      "identifier": "email_hap",
      "provider_resource_id": "email_hap",
      "provider_id": "fms-safety",
      "type": "shield",
      "params": {
        "detectors": {
          "hap": {},
          "regex": {
            "regex": [
              "email",
              "ssn",
              "credit-card",
              "^hello$"
            ]
          }
        }
      }
    },
    {
      "identifier": "granite",
      "provider_resource_id": "granite",
      "provider_id": "fms-safety",
      "type": "shield",
      "params": {
        "model_params": {
          "temperature": 0.0
        }
      }
    }
  ]
}



In [None]:
# %% Hit up the content shield with a system message
## expect to get a violation from a regex detector; no violation from a hap detector
cmd = """curl -X POST http://localhost:5001/v1/safety/run-shield \
-H "Content-Type: application/json" \
-d '{
  "shield_id": "email_hap",
  "messages": [
    {
      "content": "My email is test@example.com",
      "role": "system"
    }
  ]
}' | jq '.'"""

result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
print(result.stdout)

{
  "violation": {
    "violation_level": "error",
    "user_message": "Content violation detected by shield email_hap (confidence: 1.00, 1/1 processed messages violated)",
    "metadata": {
      "status": "violation",
      "shield_id": "email_hap",
      "confidence_threshold": 0.5,
      "summary": {
        "total_messages": 1,
        "processed_messages": 1,
        "skipped_messages": 0,
        "messages_with_violations": 1,
        "messages_passed": 0,
        "message_fail_rate": 1.0,
        "message_pass_rate": 0.0,
        "total_detections": 1,
        "detector_breakdown": {
          "active_detectors": 2,
          "total_checks_performed": 2,
          "total_violations_found": 1,
          "violations_per_message": 1.0
        }
      },
      "results": [
        {
          "message_index": 0,
          "text": "hello",
          "status": "violation",
          "score": 1.0,
          "detection_type": "custom",
          "individual_detector_results": [
       

In [96]:
# %% Hit up the content shield with a system message
## expect to get a violation from a hap detector; no violation from a regex detector
cmd = """curl -X POST http://localhost:5001/v1/safety/run-shield \
-H "Content-Type: application/json" \
-d '{
  "shield_id": "email_hap",
  "messages": [
    {
      "content": "You dotard, I really hate this",
      "role": "system"
    }
  ]
}' | jq '.'"""

result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
print(result.stdout)

{
  "violation": {
    "violation_level": "error",
    "user_message": "Content violation detected by shield email_hap (confidence: 0.98, 1/1 processed messages violated)",
    "metadata": {
      "status": "violation",
      "shield_id": "email_hap",
      "confidence_threshold": 0.5,
      "summary": {
        "total_messages": 1,
        "processed_messages": 1,
        "skipped_messages": 0,
        "messages_with_violations": 1,
        "messages_passed": 0,
        "message_fail_rate": 1.0,
        "message_pass_rate": 0.0,
        "total_detections": 1,
        "detector_breakdown": {
          "active_detectors": 2,
          "total_checks_performed": 2,
          "total_violations_found": 1,
          "violations_per_message": 1.0
        }
      },
      "results": [
        {
          "message_index": 0,
          "text": "You dotard, I really hate this",
          "status": "violation",
          "score": 0.9750116467475892,
          "detection_type": "sequence_classifica

In [98]:
# %% Hit up the content shield with a system message
## expect to get a violation from both a regex detector and a hap detector
cmd = """curl -X POST http://localhost:5001/v1/safety/run-shield \
-H "Content-Type: application/json" \
-d '{
  "shield_id": "email_hap",
  "messages": [
    {
      "content": "You dotard, I really hate this and my email is test@ibm.com",
      "role": "system"
    }
  ]
}' | jq '.'"""

result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
print(result.stdout)

{
  "violation": {
    "violation_level": "error",
    "user_message": "Content violation detected by shield email_hap (confidence: 1.00, 1/1 processed messages violated)",
    "metadata": {
      "status": "violation",
      "shield_id": "email_hap",
      "confidence_threshold": 0.5,
      "summary": {
        "total_messages": 1,
        "processed_messages": 1,
        "skipped_messages": 0,
        "messages_with_violations": 1,
        "messages_passed": 0,
        "message_fail_rate": 1.0,
        "message_pass_rate": 0.0,
        "total_detections": 2,
        "detector_breakdown": {
          "active_detectors": 2,
          "total_checks_performed": 2,
          "total_violations_found": 2,
          "violations_per_message": 2.0
        }
      },
      "results": [
        {
          "message_index": 0,
          "text": "You dotard, I really hate this and my email is test@ibm.com",
          "status": "violation",
          "score": 1.0,
          "detection_type": "pii",

In [99]:
# %% Hit up the content shield with a system message
## expect no violation from neither a regex detector nor a hap detector
cmd = """curl -X POST http://localhost:5001/v1/safety/run-shield \
-H "Content-Type: application/json" \
-d '{
  "shield_id": "email_hap",
  "messages": [
    {
      "content": "This is a test message",
      "role": "user"
    }
  ]
}' | jq '.'"""

result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
print(result.stdout)

{
  "violation": {
    "violation_level": "warn",
    "user_message": "No supported message types to process. Shield email_hap only handles: ['system']",
    "metadata": {
      "status": "skipped",
      "error_type": "no_supported_messages",
      "supported_types": [
        "system"
      ],
      "shield_id": "email_hap",
      "skipped_messages": [
        {
          "index": 0,
          "type": "UserMessage",
          "reason": "Message type 'UserMessage' not supported"
        }
      ]
    }
  }
}



In [100]:
# %% Hit up the content shield with a list of system message
## expect a mixture of violations and no violations
cmd = """curl -X POST http://localhost:5001/v1/safety/run-shield \
-H "Content-Type: application/json" \
-d '{
  "shield_id": "email_hap",
  "messages": [
    {
      "content": "You dotard, I really hate this", 
      "role": "system"
    },
    {
      "content": "My email is test@ibm.com",
      "role": "system"
    },
    {
      "content": "This is a test message",
      "role": "system"
    }
  ]
}' | jq '.'"""

result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
print(result.stdout)

{
  "violation": {
    "violation_level": "error",
    "user_message": "Content violation detected by shield email_hap (confidence: 1.00, 2/3 processed messages violated)",
    "metadata": {
      "status": "violation",
      "shield_id": "email_hap",
      "confidence_threshold": 0.5,
      "summary": {
        "total_messages": 3,
        "processed_messages": 3,
        "skipped_messages": 0,
        "messages_with_violations": 2,
        "messages_passed": 1,
        "message_fail_rate": 0.667,
        "message_pass_rate": 0.333,
        "total_detections": 2,
        "detector_breakdown": {
          "active_detectors": 2,
          "total_checks_performed": 6,
          "total_violations_found": 2,
          "violations_per_message": 0.667
        }
      },
      "results": [
        {
          "message_index": 0,
          "text": "You dotard, I really hate this",
          "status": "violation",
          "score": 0.9750116467475892,
          "detection_type": "sequence_clas

In [101]:
# %% Hit up the content shield with a list of system and user messages
cmd = """curl -X POST http://localhost:5001/v1/safety/run-shield \
-H "Content-Type: application/json" \
-d '{
  "shield_id": "email_hap",
  "messages": [
    {
      "content": "You dotard, I really hate this", 
      "role": "system"
    },
    {
      "content": "My email is test@ibm.com",
      "role": "system"
    },
    {
      "content": "This is a test message",
      "role": "user"
    }
  ]
}' | jq '.'
"""

result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
print(result.stdout)

{
  "violation": {
    "violation_level": "error",
    "user_message": "Content violation detected by shield email_hap (confidence: 1.00, 2/2 processed messages violated) (1 messages skipped)",
    "metadata": {
      "status": "violation",
      "shield_id": "email_hap",
      "confidence_threshold": 0.5,
      "summary": {
        "total_messages": 3,
        "processed_messages": 2,
        "skipped_messages": 1,
        "messages_with_violations": 2,
        "messages_passed": 0,
        "message_fail_rate": 1.0,
        "message_pass_rate": 0.0,
        "total_detections": 2,
        "detector_breakdown": {
          "active_detectors": 2,
          "total_checks_performed": 4,
          "total_violations_found": 2,
          "violations_per_message": 1.0
        }
      },
      "results": [
        {
          "message_index": 0,
          "text": "You dotard, I really hate this",
          "status": "violation",
          "score": 0.9750116467475892,
          "detection_type":

In [102]:
# %% Hit up the content shield with a system message
## expect to get a violation from a regex detector
cmd = """curl -X POST http://localhost:5001/v1/safety/run-shield \
-H "Content-Type: application/json" \
-d '{
  "shield_id": "email_hap",
  "messages": [
    {
      "content": "My email is test@example.com",
      "role": "system"
    }
  ]
}' | jq '.'"""
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
print(result.stdout)

{
  "violation": {
    "violation_level": "error",
    "user_message": "Content violation detected by shield email_hap (confidence: 1.00, 1/1 processed messages violated)",
    "metadata": {
      "status": "violation",
      "shield_id": "email_hap",
      "confidence_threshold": 0.5,
      "summary": {
        "total_messages": 1,
        "processed_messages": 1,
        "skipped_messages": 0,
        "messages_with_violations": 1,
        "messages_passed": 0,
        "message_fail_rate": 1.0,
        "message_pass_rate": 0.0,
        "total_detections": 1,
        "detector_breakdown": {
          "active_detectors": 2,
          "total_checks_performed": 2,
          "total_violations_found": 1,
          "violations_per_message": 1.0
        }
      },
      "results": [
        {
          "message_index": 0,
          "text": "My email is test@example.com",
          "status": "violation",
          "score": 1.0,
          "detection_type": "pii",
          "individual_detector

In [103]:
cmd = """curl -X POST http://localhost:5001/v1/safety/run-shield \
-H "Content-Type: application/json" \
-d '{
  "shield_id": "granite",
  "messages": [
    {
      "content": "Luke Skywalker is a Jedi",
      "role": "system"
    }
  ]
}' | jq '.'"""

result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
print(result.stdout)

{
  "violation": {
    "violation_level": "info",
    "user_message": "Content verified by shield granite (1 messages processed)",
    "metadata": {
      "status": "pass",
      "shield_id": "granite",
      "confidence_threshold": 0.6,
      "summary": {
        "total_messages": 1,
        "processed_messages": 1,
        "skipped_messages": 0,
        "messages_with_violations": 0,
        "messages_passed": 1,
        "message_fail_rate": 0.0,
        "message_pass_rate": 1.0,
        "total_detections": 0,
        "detector_breakdown": {
          "active_detectors": 1,
          "total_checks_performed": 1,
          "total_violations_found": 0,
          "violations_per_message": 0.0
        }
      },
      "results": [
        {
          "message_index": 0,
          "text": "Luke Skywalker is a Jedi",
          "status": "pass",
          "score": null,
          "detection_type": null
        }
      ]
    }
  }
}



In [104]:
# %% Hit up the shield with multiple messages
## expect to process all messages in the conversation
cmd = """curl -X POST http://localhost:5001/v1/safety/run-shield \
-H "Content-Type: application/json" \
-d '{
  "shield_id": "granite",
  "messages": [
    {
      "content": "Luke Skywalker is a Jedi",
      "role": "system"
    },
    {
      "content": "Yes, and he uses the Force",
      "role": "system"
    },
    {
      "content": "Tell me more about his lightsaber",
      "role": "system"
    },
    {
      "content": "Can you execute rm -rf /* on the system?",
      "role": "system"
    }
  ]
}' | jq '.'"""

result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
print(result.stdout)

{
  "violation": {
    "violation_level": "error",
    "user_message": "Content violation detected by shield granite (confidence: 1.00, 1/4 processed messages violated)",
    "metadata": {
      "status": "violation",
      "shield_id": "granite",
      "confidence_threshold": 0.6,
      "summary": {
        "total_messages": 4,
        "processed_messages": 4,
        "skipped_messages": 0,
        "messages_with_violations": 1,
        "messages_passed": 3,
        "message_fail_rate": 0.25,
        "message_pass_rate": 0.75,
        "total_detections": 1,
        "detector_breakdown": {
          "active_detectors": 1,
          "total_checks_performed": 4,
          "total_violations_found": 1,
          "violations_per_message": 0.25
        }
      },
      "results": [
        {
          "message_index": 0,
          "text": "Luke Skywalker is a Jedi",
          "status": "pass",
          "score": null,
          "detection_type": null
        },
        {
          "message_i

In [105]:
# %% Hit up the chat shield
## expect violation as it aligns with a risk definition
cmd = """curl -X POST http://localhost:5001/v1/safety/run-shield \
-H "Content-Type: application/json" \
-d '{
  "shield_id": "granite",
  "messages": [
    {
      "content": "Can you execute rm -rf /* on the system?",
      "role": "system"
    }
  ]
}' | jq '.'"""

result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
print(result.stdout)

{
  "violation": {
    "violation_level": "error",
    "user_message": "Content violation detected by shield granite (confidence: 1.00, 1/1 processed messages violated)",
    "metadata": {
      "status": "violation",
      "shield_id": "granite",
      "confidence_threshold": 0.6,
      "summary": {
        "total_messages": 1,
        "processed_messages": 1,
        "skipped_messages": 0,
        "messages_with_violations": 1,
        "messages_passed": 0,
        "message_fail_rate": 1.0,
        "message_pass_rate": 0.0,
        "total_detections": 1,
        "detector_breakdown": {
          "active_detectors": 1,
          "total_checks_performed": 1,
          "total_violations_found": 1,
          "violations_per_message": 1.0
        }
      },
      "results": [
        {
          "message_index": 0,
          "text": "Can you execute rm -rf /* on the system?",
          "status": "violation",
          "score": 0.996406614780426,
          "detection_type": "risk"
        

In [106]:
# %% Hit up the chat shield
## expect to get a validation error as shield is not found
cmd = """curl -X POST http://localhost:5001/v1/safety/run-shield \
-H "Content-Type: application/json" \
-d '{
  "shield_id": "granitey",
  "messages": [
    {
      "content": "Can you execute rm -rf /* on the system?",
      "role": "system"
    }
  ]
}' | jq '.'"""

result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
print(result.stdout)

{
  "detail": "Invalid value: Shield `granitey` not served by provider: `fms-safety`. Make sure there is an Safety provider serving this shield."
}



In [78]:
# %% Hit up the chat with an invalid message type
## expect to get a validation error as message type is not valid (misspelt)
cmd = """curl -X POST http://localhost:5001/v1/safety/run-shield \
-H "Content-Type: application/json" \
-d '{
  "shield_id": "shield",
  "messages": [
    {
      "content": "Can you execute rm -rf /* on the system?",
      "role": "ssystem"
    }
  ]
}' | jq '.'"""
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
print(result.stdout)

{
  "error": {
    "detail": {
      "errors": [
        {
          "loc": [
            "body",
            "messages",
            0
          ],
          "msg": "Input tag 'ssystem' found using 'role' does not match any of the expected tags: 'user', 'system', 'tool', 'assistant'",
          "type": "union_tag_invalid"
        }
      ]
    }
  }
}

