### This notebook generates realistic compliance evaluation scenarios by leveraging AWS Bedrock's Retrieval-Augmented Generation (RAG) capabilities using the NIST control framework and specific organizational policies generated against that framework.    It generates 1,000 (500 compliant, 500 non-compliant) complex, multi-policy scenarios that simulate real-world compliance situations (like employee onboarding, data access requests, or security incidents). 


In [67]:
import boto3
import json
import time
import re
from typing import List, Dict

KNOWLEDGE_BASE_ID = 'T8EW10IU3Z'  # 183023889407-us-east-1-compliance-rule-generator-kb-s3
MODELS = {
    'premium': 'arn:aws:bedrock:us-east-1:183023889407:inference-profile/global.anthropic.claude-opus-4-5-20251101-v1:0', # not available
    'good': 'arn:aws:bedrock:us-east-1:183023889407:inference-profile/global.anthropic.claude-sonnet-4-5-20250929-v1:0', # times out
    'balanced': 'arn:aws:bedrock:us-east-1:183023889407:inference-profile/us.anthropic.claude-sonnet-4-20250514-v1:0',
    'fast_cheap': 'arn:aws:bedrock:us-east-1:183023889407:inference-profile/us.anthropic.claude-haiku-4-5-20251001-v1:0',
    'aws_native_premier': 'arn:aws:bedrock:us-east-1:183023889407:inference-profile/us.amazon.nova-premier-v1:0',
    'aws_native_pro': 'arn:aws:bedrock:us-east-1:183023889407:inference-profile/us.amazon.nova-pro-v1:0'
}
MODEL_ARN = MODELS['good']

bedrock_agent_runtime = boto3.client('bedrock-agent-runtime', region_name='us-east-1')

In [68]:
def generate_compliance_scenarios(
    knowledge_base_id: str = KNOWLEDGE_BASE_ID,
    model_arn: str = MODEL_ARN,
    batch_size: int = 1,
    total_scenarios: int = 1000
) -> List[Dict]:
    """
    Generate compliance scenarios using the knowledge base and RAG.
    Generates scenarios in batches to manage token limits and ensure quality.
    """
    
    all_scenarios = []
    
    for batch_num in range(total_scenarios // batch_size):
        scenario_type = "TRUE (compliant)" if batch_num % 2 == 0 else "FALSE (non-compliant)"
        
        prompt = f"""
You are an expert compliance analyst with deep knowledge of regulatory frameworks and corporate policy evaluation. Your task is to generate complex, realistic compliance scenarios using the company policies and NIST controls stored in this knowledge base.

## OBJECTIVE
Generate {batch_size} compliance evaluation scenarios that evaluate to {scenario_type}. These scenarios will be used to evaluate and train compliance assessment systems.

## SCENARIO REQUIREMENTS

### Complexity Requirements
Each scenario MUST:
1. Reference and integrate AT LEAST 8 different company policies from the knowledge base
2. Include the specific NIST 800-53 controls that underpin each referenced policy
3. Present a realistic business situation that a compliance officer might encounter
4. Include specific details: employee roles, departments, systems, data types, timeframes, and actions taken
5. Contain nuanced elements that require careful policy interpretation
6. Cross multiple compliance domains (e.g., access control + data protection + incident response + audit logging)

### Balance Requirements
- For FALSE scenarios: clearly identify which specific policy/control is violated and why
- For TRUE scenarios: demonstrate how all referenced policies are satisfied

### Realism Requirements
Scenarios should reflect real-world situations such as:
- Employee onboarding/offboarding processes
- Data access requests and approvals
- Security incident handling
- Third-party vendor interactions
- System configuration changes
- Audit and monitoring activities
- Exception request processes
- Cross-departmental data sharing

## GENERATION REQUIREMENTS
- Starting scenario ID: SCENARIO-{batch_num * batch_size + 1:04d}
- Output as a JSON array of {batch_size} scenario objects

## OUTPUT FORMAT
Output markdown with the following structure:

# Scenario ID: SCENARIO-0001
### Scenario Description: [Detailed narrative - minimum 200 words]
### Scenario evaluates to true [or false]

"""
        
        response = bedrock_agent_runtime.retrieve_and_generate(
            input={'text': prompt},
            retrieveAndGenerateConfiguration={
                'type': 'KNOWLEDGE_BASE',
                'knowledgeBaseConfiguration': {
                    'knowledgeBaseId': knowledge_base_id,
                    'modelArn': model_arn,
                    'retrievalConfiguration': {
                        'vectorSearchConfiguration': {
                            'numberOfResults': 20
                        }
                    }
                }
            }
        )
        
        response_text = response['output']['text'].strip()

        if response_text == "Sorry, I am unable to assist you with this request.":
            print("Model declined to generate scenarios. Exiting gracefully.")
            break
        
        print(f"Batch {batch_num + 1} response length: {len(response_text)}")
        print(response_text)

        filename = f"/home/sagemaker-user/{batch_num + 1}.md"
        with open(filename, 'w') as f:
            f.write(response_text)

        
        # batch_scenarios = json.loads(response_text)    
        # all_scenarios.extend(batch_scenarios)  # add all items from the batch_scenarios list to the end of the all_scenarios list.
        # print(f"Added {len(batch_scenarios)} scenarios from batch {batch_num + 1}")
        
        time.sleep(3)  # Increased delay
    
    # return all_scenarios
    return []

In [69]:
def save_scenarios_to_file(scenarios: List[Dict], output_path: str):
    """Save generated scenarios to a JSON file."""
    
    print(json.dumps(scenarios, indent=2))
    
    with open(output_path, 'w') as f:
        json.dump({
            'total_scenarios': len(scenarios),
            'compliant_count': sum(1 for s in scenarios if s['is_compliant']),
            'non_compliant_count': sum(1 for s in scenarios if not s['is_compliant']),
            'scenarios': scenarios
        }, f, indent=2)


In [70]:
scenarios = generate_compliance_scenarios(total_scenarios=1)
# save_scenarios_to_file(scenarios, '/home/sagemaker-user/ScenariosGenerated.json')

Model declined to generate scenarios. Exiting gracefully.
