## ScenarioJudger

 - Reads a file from S3 containing json compliance scenarios of the format:
```json
{
  "scenarios": [
    {
      "scenario-id": "scenario-id-1",
      "scenario-detail": "A new employee, Sarah Johnson, joins the IT department...",
      "is-compliant": false,
      "non-compliant-reason": "The scenario violates..." 
    },
    {
      "scenario-id": "scenario-id-2", 
      "scenario-detail": "TechCorp implements a comprehensive incident response procedure...",
      "is-compliant": true,
      "non-compliant-reason": "" 
    }
  ]
}
```
 - Evaluates the veracity each scenario-detail based on RAGed NIST-based policies in Bedrock knowledgebase, comparing its determination against "is-compliant" in the json.
 - When its determination differs, generates json records:
```json
{
  "scenarios": [
    {
      "scenario-id": "scenario-id-1",
      "scenario-detail": "A new employee, Sarah Johnson, joins the IT department...",
      "is-compliant": false,
      "non-compliant-reason": "The scenario violates...",
      "judged-compliant": true,
      "judged-compliant-reason": "Considered the rules AC...  and scenario is not in violation..."
      "llm-judge": "us.anthropic.claude-sonnet-4-20250514-v1:0",
      "judged-dtm":  
    },
    {
      "scenario-id": "scenario-id-2", 
      "scenario-detail": "TechCorp implements a comprehensive incident response procedure...",
      "is-compliant": true,
      "non-compliant-reason": "", 
      "judged-compliant": false,
      "judged-compliant-reason": "Scenario violates access control policy...",
      "llm-judge": "us.anthropic.claude-sonnet-4-20250514-v1:0",
      "judged-dtm":   
    }
  ]
}
```
 - Stores json records back to S3


In [49]:
# Import required libraries
import boto3  # AWS SDK for Python
import datetime
import json   # JSON handling
import time   # For rate limiting between API calls
from compliance_calculator import compliance_calculator, CALCULATOR_TOOL
from typing import List, Dict  # Type hints

# ============================================================================
# CONFIGURATION SECTION - Update these values
# ============================================================================
# S3 Configuration
INPUT_BUCKET = '183023889407-us-east-1-compliance-rule-generator'
INPUT_PREFIX = 'scenarios/'  # Folder path in S3 where scenarios are stored
S3_PREFIX_POLICY_MARKDOWN_ALL = 'policies/markdown/all-policies-main/'

OUTPUT_BUCKET = '183023889407-us-east-1-compliance-rule-generator'
OUTPUT_PREFIX = 'scenarios-judged/'  # Folder path for results

# AWS Region
AWS_REGION = 'us-east-1'
# AWS Bedrock Knowledge Base containing NIST policies
KNOWLEDGE_BASE_ID = 'T8EW10IU3Z'

MAX_TOKENS = 4096
TEMPERATURE = 0.7

# Available Bedrock model ARNs with performance notes
MODELS = {
    'premium': 'arn:aws:bedrock:us-east-1:183023889407:inference-profile/global.anthropic.claude-opus-4-5-20251101-v1:0', # not available
    'good': 'arn:aws:bedrock:us-east-1:183023889407:inference-profile/global.anthropic.claude-sonnet-4-5-20250929-v1:0', # times out
    'balanced': 'arn:aws:bedrock:us-east-1:183023889407:inference-profile/us.anthropic.claude-sonnet-4-20250514-v1:0',  # recommended
    'fast_cheap': 'arn:aws:bedrock:us-east-1:183023889407:inference-profile/us.anthropic.claude-haiku-4-5-20251001-v1:0',
    'aws_native_premier': 'arn:aws:bedrock:us-east-1:183023889407:inference-profile/us.amazon.nova-premier-v1:0',
    'aws_native_pro': 'arn:aws:bedrock:us-east-1:183023889407:inference-profile/us.amazon.nova-pro-v1:0'
}
MODEL_ARN = MODELS['balanced']  # Default model selection

# JSON tool configuration for Bedrock Converse API
# Forces the model to return structured JSON with specific schema
TOOL_CONFIG = {
    "tools": [
        {
            "toolSpec": {
                "name": "judged_scenario_json",
                "description": "Return judged compliance scenarios as JSON",
                "inputSchema": {
                    "json": {
                        "type": "object",
                        "properties": {
                            "scenarios": {
                                "type": "array",
                                "items": {
                                    "type": "object",
                                    "properties": {
                                        "judged-compliant": {"type": "boolean"},
                                        "judged-compliant-reason": {"type": "string"}
                                    },
                                    "required": ["judged-compliant", "judged-compliant-reason"]
                                }
                            }
                        },
                        "required": ["scenarios"]
                    }
                }
            }
        },
        {
            "toolSpec": {
                "name": "compliance_calculator",
                "description": "Calculate and compare values with time, money, data, and percentage units",
                "inputSchema": {
                    "json": {
                        "type": "object",
                        "properties": {
                            "expression": {"type": "string", "description": "Expression like '800ms < 1s' or '4m > 3b'"}
                        },
                        "required": ["expression"]
                    }
                }
            }
        }
    ]
}
# CALCULATOR_TOOL["toolSpec"] references the calculator tool definition from compliance_calculator.py

# Initialize AWS Bedrock clients
bedrock_agent_runtime = boto3.client('bedrock-agent-runtime', region_name='us-east-1')  # For knowledge base retrieval
bedrock_runtime = boto3.client('bedrock-runtime', region_name='us-east-1')  # For model inference

In [50]:
def load_scenarios_from_s3(input_bucket: str = INPUT_BUCKET, input_prefix: str = INPUT_PREFIX, object_name: str = "scenarios.json") -> List[Dict]:
    """
    Load scenarios from S3 JSON file.
    """
    s3 = boto3.client('s3')
    response = s3.get_object(Bucket=input_bucket, Key=input_prefix+object_name)
    json_data = json.loads(response['Body'].read().decode('utf-8'))
    return json_data["scenarios"]


In [51]:
def save_scenarios_to_s3(scenarios: List[Dict], output_bucket: str = OUTPUT_BUCKET, output_prefix: str = OUTPUT_PREFIX, object_name: str = "scenarios.json"):
    """
    Save generated scenarios to a S3.
    """
    s3 = boto3.client('s3')
    json_data = json.dumps({"scenarios": scenarios}, indent=2)
    s3.put_object(Bucket=output_bucket, Key=output_prefix+object_name, Body=json_data)

In [52]:
def retrieve_policies_by_id(bucket:str, folder:str, policy_ids: List[str]) -> str:
    """
    Retrieve specific policy documents from s3.
    """

    s3 = boto3.client('s3')
    policies = []
    for policy_id in policy_ids:
        response = s3.get_object(Bucket=bucket, Key=folder + policy_id + ".md")
        content = response['Body'].read().decode('utf-8')
        policies.append(f"{policy_id}:\n{content}")
    
    return "\n\n".join(policies)
    

In [53]:
def judge_scenarios_old(source_scenarios: List[Dict], model_arn: str, kb_id: str = KNOWLEDGE_BASE_ID) -> List[Dict]:
    """
    Process scenarios and add judgment fields.
    """

    # Extract model ID from ARN (Converse API requires model ID, not full ARN)
    model_id = model_arn.split('/')[-1] if '/' in model_arn else model_arn
    
    judged_scenarios = []
    for scenario in source_scenarios:
        judged_scenario = scenario.copy()

        # Extract policy IDs from scenario and pull the policy data from the knowledge base
        import re
        policy_match = re.search(r'Policies referenced: (.+)', scenario["scenario-detail"])
        if policy_match:
            policy_ids = [p.strip() for p in policy_match.group(1).split(',')]
            retrieved_policies = retrieve_policies_by_id(policy_ids, kb_id)
        else:
            print(f"No policies referenced in scenario: {scenario['scenario-detail']}")
            continue
                
        prompt = f"""
        You are **ComplianceEvaluator**, an expert AI compliance analyst specializing in NIST 800-53 controls and policies. 
        Your mission is to judge organizational policy scenarios against reference policies stored in your knowledge base.
                
        **Your Expertise:**
        - Deep understanding of all NIST 800-53 Rev. 5 control families (AC, AT, AU, CA, CM, CP, IA, IR, MA, MP, PE, PL, PM, PS, PT, RA, SA, SC, SI, SR)
        - Policy-to-control mapping and compliance evaluation
        - Evidence-focused assessment methodology

        **Task:** Judge if the scenario complies with ALL referenced policies from your knowledge base.

        **Avoid judging scenarios based on cost-benefit principles or concentration percentages.
    
        **Note that non-US citizens cannot obtain US security clearances.**
        
        **Response Format:**
        {{
          "judged-compliant": true/false, true if you determined the scenario is compliant with the organizational 
        policies stored in your knowledge base.  false if the scenario is not compliant.
          "judged-compliant-reason": "Empty if compliant. If the scenario is not compliant, explain very briefly why it is not compliant, citing
          exactly the policy ID(s) is violates, followed by the extracted policy text that indicates non-compliance."
        }}

        **Evaluate scenario against this policy data**:
        {retrieved_policies}

        **Here is the actual compliance scenario to judge**:
        {scenario["scenario-detail"]}
        """
        response = bedrock_runtime.converse(
            modelId=model_id,
            messages=[{"role": "user", "content": [{"text": prompt}]}],
            toolConfig=TOOL_CONFIG,
            inferenceConfig={
                "maxTokens": MAX_TOKENS,
                "temperature": TEMPERATURE
            }
        )
        
        tool_result = response['output']['message']['content'][0]['toolUse']['input']
        judged_scenario["judged-compliant"] = tool_result['scenarios'][0]['judged-compliant']
        judged_scenario["judged-compliant-reason"] = tool_result['scenarios'][0]['judged-compliant-reason']

        judged_scenario["llm-judge"] = MODEL_ARN.split('/')[-1]
        judged_scenario["judged-dtm"] = datetime.datetime.now().isoformat()
        judged_scenarios.append(judged_scenario)
    
    return judged_scenarios

In [54]:
def judge_scenarios(source_scenarios: List[Dict], model_arn: str, kb_id: str = KNOWLEDGE_BASE_ID) -> List[Dict]:
    """
    Process scenarios and add judgment fields.
    """

    # Extract model ID from ARN (Converse API requires model ID, not full ARN)
    model_id = model_arn.split('/')[-1] if '/' in model_arn else model_arn
    
    judged_scenarios = []
    for scenario in source_scenarios:
        judged_scenario = scenario.copy()

        # Extract policy IDs from scenario and pull the policy data from the knowledge base
        import re
        policy_match = re.search(r'Policies referenced: (.+)', scenario["scenario-detail"])
        if policy_match:
            policy_ids = [p.strip() for p in policy_match.group(1).split(',')]
            retrieved_policies = retrieve_policies_by_id(INPUT_BUCKET, S3_PREFIX_POLICY_MARKDOWN_ALL, policy_ids)
        else:
            print(f"No policies referenced in scenario: {scenario['scenario-detail']}")
            continue
                
        prompt = f"""
        You are **ComplianceEvaluator**, an expert AI compliance analyst specializing in NIST 800-53 controls and policies. 
        Your mission is to judge organizational policy scenarios against reference policies stored in your knowledge base.
                
        **Your Expertise:**
        - Deep understanding of all NIST 800-53 Rev. 5 control families (AC, AT, AU, CA, CM, CP, IA, IR, MA, MP, PE, PL, PM, PS, PT, RA, SA, SC, SI, SR)
        - Policy-to-control mapping and compliance evaluation
        - Evidence-focused assessment methodology

        **Task:** Judge if the scenario complies with ALL referenced policies from your knowledge base.

        **Avoid judging scenarios based on cost-benefit principles or concentration percentages.
    
        **Note that non-US citizens cannot obtain US security clearances.**
        
        **Response Format:**
        {{
          "judged-compliant": true/false, true if you determined the scenario is compliant with the organizational 
        policies stored in your knowledge base.  false if the scenario is not compliant.
          "judged-compliant-reason": "Empty if compliant. If the scenario is not compliant, explain very briefly why it is not compliant, citing
          exactly the policy ID(s) is violates, followed by the extracted policy text that indicates non-compliance."
        }}

        **Evaluate scenario against this policy data**:
        {retrieved_policies}

        **Here is the actual compliance scenario to judge**:
        {scenario["scenario-detail"]}
        """
        
        messages = [{"role": "user", "content": [{"text": prompt}]}]
        
        while True:
            response = bedrock_runtime.converse(
                modelId=model_id,
                messages=messages,
                toolConfig=TOOL_CONFIG,
                inferenceConfig={
                    "maxTokens": MAX_TOKENS,
                    "temperature": TEMPERATURE
                }
            )
            
            if response['stopReason'] == 'tool_use':
                tool_results = []
                for content_block in response['output']['message']['content']:
                    if 'toolUse' in content_block:
                        tool_name = content_block['toolUse']['name']
                        tool_use_id = content_block['toolUse']['toolUseId']
                        
                        if tool_name == 'compliance_calculator':
                            expression = content_block['toolUse']['input']['expression']                         
                            result = compliance_calculator(expression)
                            print("=" * 60)
                            print(f"Compliance calculator expression: {expression}" )
                            print(f"Compliance calculator result: {result}" )
                            print("=" * 60)
                            tool_results.append({
                                "toolResult": {
                                    "toolUseId": tool_use_id,
                                    "content": [{"text": result}]
                                }
                            })
                        elif tool_name == 'judged_scenario_json':
                            tool_result = content_block['toolUse']['input']
                            judged_scenario["judged-compliant"] = tool_result['scenarios'][0]['judged-compliant']
                            judged_scenario["judged-compliant-reason"] = tool_result['scenarios'][0]['judged-compliant-reason']
                            break
                
                if tool_results:
                    messages.append({"role": "assistant", "content": response['output']['message']['content']})
                    messages.append({"role": "user", "content": tool_results})
                else:
                    break
            else:
                break

        judged_scenario["llm-judge"] = MODEL_ARN.split('/')[-1]
        judged_scenario["judged-dtm"] = datetime.datetime.now().isoformat()
        judged_scenarios.append(judged_scenario)
    
    return judged_scenarios

In [55]:
def save_scenarios_to_file(scenarios: List[Dict], output_path: str):
    
    # Print scenarios to console for immediate review
    print(json.dumps(scenarios, indent=2))
    
    # Save to file with metadata and statistics
    with open(output_path, 'w') as f:
        json.dump({
            'total_scenarios': len(scenarios),
            'compliant_count': sum(1 for s in scenarios if s['is-compliant']),
            'non_compliant_count': sum(1 for s in scenarios if not s['is-compliant']),
            'judged compliant_count': sum(1 for s in scenarios if s['judged-compliant']),
            'judged non_compliant_count': sum(1 for s in scenarios if not s['judged-compliant']),
            'scenarios': scenarios
        }, f, indent=2)

In [56]:
source_scenarios_file = "scenarios.json"
judged_scenarios_file = "judged_scenarios.json"

source_scenarios = load_scenarios_from_s3(INPUT_BUCKET, INPUT_PREFIX, source_scenarios_file)

judged_scenarios = judge_scenarios(
    source_scenarios,
    MODEL_ARN,
    KNOWLEDGE_BASE_ID
)
save_scenarios_to_file(judged_scenarios, '/home/sagemaker-user/' + judged_scenarios_file)
save_scenarios_to_s3(judged_scenarios, OUTPUT_BUCKET, OUTPUT_PREFIX, judged_scenarios_file)



[
  {
    "scenario-id": "scenario-id-1",
    "scenario-detail": "TechGuard Financial Services, a major financial institution with 12,000+ employees processing customer PII across 45 branch locations, implemented a comprehensive privacy and security framework for their new mobile banking platform serving 2.8 million customers. The Chief Privacy Officer established rigorous PII disclosure accounting per PM-21, requiring all customer data disclosures to regulatory agencies, credit bureaus, and third-party processors be recorded within 24 hours in their automated Privacy Management System, with detailed records including disclosure date, specific data elements shared (account balances, transaction history, personal identifiers), business purpose (regulatory compliance, credit verification, fraud prevention), and complete recipient contact information including designated privacy officers at each receiving organization. The system maintains comprehensive audit trails of all record access a