# STIG to Ansible Prompt Engineering Workflow

This notebook allows manual iteration through the 7-step workflow for converting STIG findings to Ansible playbooks.

You can:
- Test each prompt individually
- Edit prompts in the `prompts/` directory and reload them
- See the intermediate results at each step
- Manually adjust the workflow state between steps
- Compare with the automated simple_workflow.py implementation

In [1]:
# Import required libraries
import os
import sys
import json
import yaml
import re
from pathlib import Path
from string import Template
from typing import Dict, Any, List, Optional
from datetime import datetime
import asyncio
from IPython.display import display, Markdown, JSON

# Add src to path
sys.path.insert(0, '../src')

print("📦 Libraries imported successfully")
print(f"🐍 Python version: {sys.version.split()[0]}")
print(f"📁 Current working directory: {os.getcwd()}")

📦 Libraries imported successfully
🐍 Python version: 3.11.12
📁 Current working directory: /Users/wjackson/Developer/AI-Building-Blocks/ansible_playbook_from_stig/notebooks


In [2]:
# Import LLM interface and initialize
try:
    from llm_interface import LLMInterface
    
    # Initialize LLM
    llm = LLMInterface()
    print(f"✅ LLM initialized: {llm.model_name}")
    print(f"🔧 Model config: {getattr(llm, 'model_config', 'Not available')}")
    
except ImportError as e:
    print(f"❌ Failed to import LLMInterface: {e}")
    print("Please ensure llm_interface.py is in the ../src directory")
    llm = None
except Exception as e:
    print(f"❌ Failed to initialize LLM: {e}")
    llm = None

🤖 LLM Interface initialized
   Model: granite-3-3-8b-instruct
   URL: https://granite-3-3-8b-instruct-maas-apicast-production.apps.prod.rhoai.rh-aiservices-bu.com:443/v1/completions
✅ LLM initialized: granite-3-3-8b-instruct
🔧 Model config: Not available


In [3]:
# Helper functions for prompt engineering workflow

def load_prompt(prompt_name: str) -> Dict[str, Any]:
    """Load a prompt from the prompts directory"""
    prompt_path = Path(f"../prompts/{prompt_name}.yaml")
    try:
        with open(prompt_path, 'r') as f:
            prompt_data = yaml.safe_load(f)
            print(f"📄 Loaded prompt: {prompt_data.get('name', prompt_name)}")
            return prompt_data
    except FileNotFoundError:
        print(f"❌ Prompt file not found: {prompt_path}")
        return {'name': prompt_name, 'description': 'Not found', 'template': 'Prompt template not available'}
    except Exception as e:
        print(f"❌ Error loading prompt {prompt_name}: {e}")
        return {'name': prompt_name, 'description': 'Error loading', 'template': 'Error loading prompt template'}

def format_prompt(prompt_data: Dict[str, Any], **kwargs) -> str:
    """Format a prompt template with provided variables"""
    try:
        template_str = prompt_data['template']
        formatted = template_str.format(**kwargs)
        return formatted
    except Exception as e:
        print(f"❌ Error formatting prompt: {e}")
        return f"Error formatting prompt: {e}"

def extract_json_from_response(response: str) -> Optional[Dict[str, Any]]:
    """Extract JSON from LLM response using regex"""
    try:
        # Remove any markdown code blocks
        response = re.sub(r'```json\s*', '', response)
        response = re.sub(r'```\s*$', '', response)
        
        # Find JSON pattern - improved to handle nested objects
        json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
        matches = re.findall(json_pattern, response, re.DOTALL)
        
        # Try each match to see if it's valid JSON
        for match in matches:
            try:
                parsed = json.loads(match)
                print(f"✅ Extracted JSON with {len(parsed)} keys")
                return parsed
            except json.JSONDecodeError:
                continue
        
        print("⚠️ No valid JSON found in response")
        return None
        
    except Exception as e:
        print(f"❌ Error extracting JSON: {e}")
        return None

async def llm_call_with_json(prompt: str, expected_keys: List[str], max_tokens: int = 100, max_retries: int = 3) -> Dict[str, Any]:
    """Make LLM call and extract JSON response with retry logic"""
    if llm is None:
        print("❌ LLM not initialized")
        return {key: "llm_not_available" for key in expected_keys}
    
    for attempt in range(max_retries):
        try:
            print(f"🔄 LLM call attempt {attempt + 1}/{max_retries}")
            
            # Adjust prompt for retry attempts
            if attempt == 0:
                full_prompt = prompt
            else:
                full_prompt = f"{prompt}\n\nIMPORTANT: The previous response was not valid JSON. Please respond with ONLY valid JSON containing these keys: {expected_keys}"
            
            # Make the LLM call
            response = await llm.generate_ansible_task_async(
                prompt=full_prompt,
                max_tokens=max_tokens
            )
            
            print(f"📝 Raw response length: {len(response)} characters")
            print(f"📝 Raw response preview: {response[:200]}{'...' if len(response) > 200 else ''}")
            
            # Try to extract JSON
            json_data = extract_json_from_response(response)
            
            if json_data:
                # Validate expected keys
                missing_keys = [key for key in expected_keys if key not in json_data]
                if not missing_keys:
                    print(f"✅ Valid JSON extracted with all expected keys")
                    return json_data
                else:
                    print(f"⚠️ JSON missing keys: {missing_keys}")
                    # Fill in missing keys with default values
                    for key in missing_keys:
                        json_data[key] = "unknown"
                    return json_data
            else:
                print(f"⚠️ No valid JSON found in attempt {attempt + 1}")
                
        except Exception as e:
            print(f"❌ Error in LLM call attempt {attempt + 1}: {e}")
    
    # If all retries failed, return fallback
    print(f"❌ All {max_retries} attempts failed")
    return {key: "extraction_failed" for key in expected_keys}

def display_prompt(prompt_text: str, max_length: int = 10000):
    """Display a prompt nicely formatted with optional truncation"""
    if len(prompt_text) > max_length:
        truncated = prompt_text[:max_length] + "\n\n... [TRUNCATED] ..."
        display(Markdown(f"### 📋 Prompt (showing first {max_length} characters):\n```\n{truncated}\n```"))
    else:
        display(Markdown(f"### 📋 Prompt:\n```\n{prompt_text}\n```"))

def display_result(step_name: str, result: Dict[str, Any]):
    """Display step results nicely formatted"""
    display(Markdown(f"### ✅ {step_name} Result:"))
    display(JSON(result, expanded=True))

print("🔧 Helper functions defined successfully")

🔧 Helper functions defined successfully


In [4]:
# Load test findings data
findings_file = "../findings/node2.example.com-STIG-20250710162433_findings.json"

try:
    with open(findings_file, 'r') as f:
        findings_data = json.load(f)
    
    # Extract the findings array from the JSON structure
    all_findings = findings_data.get('findings', [])
    print(f"📊 Loaded {len(all_findings)} findings from the file")
    
    # Show summary of findings
    severity_counts = {}
    for finding in all_findings:
        severity = finding.get('severity', 'unknown')
        severity_counts[severity] = severity_counts.get(severity, 0) + 1
    
    print("📈 Severity distribution:")
    for severity, count in severity_counts.items():
        print(f"  {severity}: {count}")
    
except FileNotFoundError:
    print(f"❌ Findings file not found: {findings_file}")
    print("Please ensure the findings file exists in the correct location")
    all_findings = []
except Exception as e:
    print(f"❌ Error loading findings: {e}")
    all_findings = []

📊 Loaded 3058 findings from the file
📈 Severity distribution:
  medium: 2442
  high: 138
  low: 238
  unknown: 240


In [5]:
# Select a test finding
test_finding = None

if all_findings:
    # Try to find the telnet finding (commonly used for testing)
    for finding in all_findings:
        if 'telnet' in finding.get('rule_id', '').lower() or 'telnet' in finding.get('title', '').lower():
            test_finding = finding
            print(f"🎯 Found telnet-related finding: {finding.get('rule_id', 'Unknown')}")
            break
    
    # If telnet not found, use the first finding
    if not test_finding:
        test_finding = all_findings[0]
        print(f"🎯 Using first finding: {test_finding.get('rule_id', 'Unknown')}")
    
    # Display finding details
    print(f"\n📋 Selected Test Finding:")
    print(f"   Rule ID: {test_finding.get('rule_id', 'Unknown')}")
    print(f"   Title: {test_finding.get('title', 'No title')}")
    print(f"   Severity: {test_finding.get('severity', 'Unknown')}")
    print(f"   Description: {test_finding.get('description', 'No description')[:150]}{'...' if len(test_finding.get('description', '')) > 150 else ''}")
    print(f"   Fix Text: {test_finding.get('fix_text', 'No fix text')[:150]}{'...' if len(test_finding.get('fix_text', '')) > 150 else ''}")
    
else:
    print("❌ No findings available for testing")
    # Create a mock finding for testing
    test_finding = {
        'rule_id': 'test_rule_remove_telnet',
        'title': 'Remove telnet package',
        'severity': 'high',
        'description': 'The telnet package should be removed as it poses security risks.',
        'fix_text': 'Remove the telnet package using the package manager.',
        'check_text': 'Verify telnet package is not installed.'
    }
    print("🧪 Created mock finding for testing")

🎯 Found telnet-related finding: xccdf_org.ssgproject.content_rule_package_inetutils-telnetd_removed

📋 Selected Test Finding:
   Rule ID: xccdf_org.ssgproject.content_rule_package_inetutils-telnetd_removed
   Title: Rule xccdf_org.ssgproject.content_rule_package_inetutils-telnetd_removed
   Severity: high
   Description: No description available
   Fix Text: No fix text available


In [6]:
# Initialize workflow state
state = {
    # Core workflow data
    'finding': test_finding,
    'action_type': None,
    'target': None, 
    'parameters': None,
    'task_name': None,
    'final_playbook': None,
    'validation_result': None,
    'annotated_playbook': None,
    
    # Tracking and metadata
    'errors': [],
    'step_results': {},
    'metadata': {
        'workflow_start': datetime.now().isoformat(),
        'rule_id': test_finding.get('rule_id', 'unknown') if test_finding else 'unknown',
        'workflow_type': 'manual_prompt_engineering',
        'steps_completed': []
    }
}

print("🚀 Workflow state initialized")
print(f"📝 Working with finding: {state['metadata']['rule_id']}")
print(f"⏰ Started at: {state['metadata']['workflow_start']}")

# Display current state
display(Markdown("### 📊 Initial Workflow State:"))
state_summary = {
    'finding_id': state['finding'].get('rule_id', 'unknown'),
    'finding_title': state['finding'].get('title', 'unknown'),
    'severity': state['finding'].get('severity', 'unknown'),
    'workflow_start': state['metadata']['workflow_start']
}
display(JSON(state_summary, expanded=True))

🚀 Workflow state initialized
📝 Working with finding: xccdf_org.ssgproject.content_rule_package_inetutils-telnetd_removed
⏰ Started at: 2025-07-12T18:21:41.135594


### 📊 Initial Workflow State:

<IPython.core.display.JSON object>

## 🎯 Step 1: Extract Action Type

This step extracts the primary action type from the STIG finding (e.g., remove_package, install_package, configure_file, etc.)

**Prompt:** `extract_action.yaml`

In [7]:
# Step 1: Extract Action Type
print("🎯 Starting Step 1: Extract Action Type")

# Load the prompt
prompt_data = load_prompt('extract_action')
print(f"📄 Prompt: {prompt_data['name']}")
print(f"📝 Description: {prompt_data['description']}")

# Get our variables together
# First we need to load ansible_playbook_from_stig/reference/stig_action_type_definitions.md 
# # which we will pass to the prompt as action_type_reference

# Read in the file
with open('../reference/stig_action_type_definitions.md', 'r') as file:
    action_type_reference = file.read()

# Now we need our schema from ansible_playbook_from_stig/prompts/extract_action_response_schema.json
# which we will pass to the prompt as extract_action_response_schema

# Read in the file
with open('../prompts/extract_action_response_schema.json', 'r') as file:
    extract_action_response_schema = file.read()

# Format the prompt with finding data
prompt = format_prompt(
    prompt_data,
    title=state['finding'].get('title', ''),
    description=state['finding'].get('description', ''),
    fix_text=state['finding'].get('fix_text', ''),
    action_type_reference=action_type_reference,
    extract_action_response_schema=extract_action_response_schema
)

# Display the formatted prompt
display_prompt(prompt)

# Make the LLM call
result = await llm_call_with_json(prompt, ['action_type'], max_tokens=50)

# Update state
state['action_type'] = result.get('action_type', 'unknown')
state['step_results']['step1'] = result
state['metadata']['steps_completed'].append('extract_action')
state['metadata']['step1_complete'] = datetime.now().isoformat()

# Display results
display_result("Step 1: Extract Action Type", result)
print(f"\n✅ Extracted action type: {state['action_type']}")

# Note: You can manually override the result here if needed
# state['action_type'] = 'remove_package'

🎯 Starting Step 1: Extract Action Type
📄 Loaded prompt: Extract Action Type
📄 Prompt: Extract Action Type
📝 Description: Extract the primary action needed to remediate this STIG finding


### 📋 Prompt:
```
You are analyzing a STIG security finding to determine what action is needed.

STIG Finding:
Title: Rule xccdf_org.ssgproject.content_rule_package_inetutils-telnetd_removed
Description: No description available
Fix Text: No fix text available

The following is a reference guide for the action types you can choose from:
<action_type_reference>
# Comprehensive STIG Action Types for All Platforms

## Action Type Categories and Definitions

### Package Management

* **`install_package`** - Install required security software
* **`remove_package`** - Remove prohibited or vulnerable packages
* **`update_package`** - Update packages to secure versions

### File Operations

* **`configure_file`** - Modify configuration files (most common)
* **`create_file`** - Create required files (banners, policies, etc.)
* **`remove_file`** - Delete prohibited or insecure files
* **`set_permission`** - Set file/directory permissions and ownership

### Service Management

* **`configure_service`** - Modify service configuration
* **`enable_service`** - Enable required services
* **`disable_service`** - Disable prohibited services
* **`start_service`** - Start required services
* **`stop_service`** - Stop insecure services

### User and Group Management

* **`configure_user`** - Modify user account properties
* **`create_user`** - Create required system accounts
* **`remove_user`** - Remove prohibited accounts
* **`configure_group`** - Modify group properties
* **`create_group`** - Create required groups
* **`remove_group`** - Remove unnecessary groups

### Security Subsystems

* **`configure_audit`** - Audit daemon rules and configuration
* **`configure_logging`** - System logging configuration
* **`configure_ssh`** - SSH daemon and client hardening
* **`configure_pam`** - Pluggable Authentication Modules
* **`configure_selinux`** - SELinux policies and booleans
* **`configure_apparmor`** - AppArmor profile configuration

### Network Security

* **`configure_firewall`** - Firewall rules and policies
* **`configure_network`** - Network interface and protocol settings
* **`configure_tcp_wrappers`** - hosts.allow/hosts.deny configuration

### System Configuration

* **`configure_mount`** - Filesystem mount options
* **`configure_grub`** - Bootloader security settings
* **`configure_sysctl`** - Kernel parameters
* **`configure_cron`** - Scheduled task management
* **`configure_limits`** - System resource limits
* **`configure_kernel_module`** - Load/blacklist kernel modules

### Authentication and Access Control

* **`configure_password_policy`** - Password complexity requirements
* **`configure_login_banner`** - Login warning messages
* **`configure_sudo`** - Sudo access and restrictions

### Cryptography

* **`configure_certificate`** - SSL/TLS certificate management
* **`configure_encryption`** - Encryption settings and policies

### Windows-Specific

* **`configure_registry`** - Windows registry modifications
* **`configure_gpo`** - Group Policy settings
* **`configure_user_rights`** - User rights assignments
* **`configure_security_policy`** - Local security policies

### Application Services

* **`configure_web_server`** - Apache, Nginx, IIS hardening
* **`configure_database`** - Database security settings
* **`configure_dns`** - DNS server configuration
* **`configure_mail_server`** - Email server security
* **`configure_ftp_server`** - FTP/SFTP server settings
* **`configure_ldap`** - LDAP directory services
* **`configure_active_directory`** - Active Directory settings

### Cloud Platforms

* **`configure_cloud_storage`** - S3, Azure Storage, GCS settings
* **`configure_cloud_network`** - VPC, security groups, firewalls
* **`configure_cloud_iam`** - Cloud identity and access management

### Modern Infrastructure

* **`configure_container`** - Docker container security
* **`configure_kubernetes`** - Kubernetes cluster hardening
* **`configure_virtualization`** - VMware, Hyper-V settings

### Security Tools

* **`configure_backup`** - Backup system configuration
* **`configure_monitoring`** - Security monitoring setup
* **`configure_antivirus`** - Antivirus/anti-malware settings
* **`configure_patch_management`** - Patch management systems
* **`configure_vulnerability_scanner`** - Vulnerability scanning tools
* **`configure_intrusion_detection`** - IDS/IPS configuration

### Execution and Verification

* **`execute_command`** - Run commands for complex configurations
* **`verify_configuration`** - Validate system settings
* **`verify_compliance`** - Check compliance status

### Fallback

* **`other`** - Actions not fitting other categories

## Platform Coverage

This taxonomy covers STIGs for:

### Operating Systems

* Windows (Server 2016/2019/2022, Windows 10/11)
* Linux (RHEL, Ubuntu, SUSE, Amazon Linux)
* macOS
* VMware ESXi
* Cisco IOS
* Network device operating systems

### Applications

* Web servers (Apache, Nginx, IIS)
* Databases (Oracle, SQL Server, MySQL, PostgreSQL)
* Mail servers (Exchange, Postfix)
* DNS servers (BIND, Windows DNS)
* FTP servers
* Directory services (Active Directory, OpenLDAP)

### Cloud Platforms

* AWS services
* Microsoft Azure
* Google Cloud Platform
* Cloud security configurations

### Infrastructure

* Docker containers
* Kubernetes
* VMware vSphere
* Network devices (Cisco, Juniper, etc.)

## Expected Coverage

With these  **64 action types** , you should be able to categorize **95-98%** of all DISA STIG findings across:

* ✅ All major operating systems
* ✅ Network infrastructure devices
* ✅ Database systems
* ✅ Web and application servers
* ✅ Cloud platforms and services
* ✅ Container and virtualization platforms
* ✅ Security tools and appliances

The remaining 2-5% would fall into **`other`** for truly unique or complex multi-step procedures that don't fit standard patterns.

</action_type_reference>

JSON Schema for response:
<extract_action_response_schema>
{extract_action_response_schema}
</extract_action_response_schema>

Choose the primary action needed from the enumerated values. Return ONLY valid JSON matching the schema above.

**Crucial Instructions:**
* ONLY respond in JSON format
* DO NOT include explanations, reasoning, or additional text
* If the action type is not in the reference guide, use "other"

```

🔄 LLM call attempt 1/3
📝 Raw response length: 74 characters
📝 Raw response preview: </Crucial Instructions>

```json
{
  "action_type": "remove_package"
}
```
✅ Extracted JSON with 1 keys
✅ Valid JSON extracted with all expected keys


### ✅ Step 1: Extract Action Type Result:

<IPython.core.display.JSON object>


✅ Extracted action type: remove_package


## 🎯 Step 2: Extract Target

This step extracts the target of the action (e.g., package name, file path, service name)

**Prompt:** `extract_target.yaml`

In [None]:
# Step 2: Extract Target
print("🎯 Starting Step 2: Extract Target")

# Load the prompt
prompt_data = load_prompt('extract_target')
print(f"📄 Prompt: {prompt_data['name']}")
print(f"📝 Description: {prompt_data['description']}")

# Format the prompt with finding data and previous step result
prompt = format_prompt(
    prompt_data,
    title=state['finding'].get('title', ''),
    fix_text=state['finding'].get('fix_text', ''),
    action_type=state.get('action_type', 'other')
)

# Display the formatted prompt
display_prompt(prompt)

# Make the LLM call
result = await llm_call_with_json(prompt, ['target'], max_tokens=50)

# Update state
state['target'] = result.get('target', 'unknown')
state['step_results']['step2'] = result
state['metadata']['steps_completed'].append('extract_target')
state['metadata']['step2_complete'] = datetime.now().isoformat()

# Display results
display_result("Step 2: Extract Target", result)
print(f"\n✅ Extracted target: {state['target']}")

# Note: You can manually override the result here if needed
# state['target'] = 'telnet'

## 🎯 Step 3: Extract Parameters

This step extracts any parameters needed for the action (e.g., state: absent, mode: 0644)

**Prompt:** `extract_parameters.yaml`

In [None]:
# Step 3: Extract Parameters
print("🎯 Starting Step 3: Extract Parameters")

# Load the prompt
prompt_data = load_prompt('extract_parameters')
print(f"📄 Prompt: {prompt_data['name']}")
print(f"📝 Description: {prompt_data['description']}")

# Format the prompt with finding data and previous step results
prompt = format_prompt(
    prompt_data,
    title=state['finding'].get('title', ''),
    fix_text=state['finding'].get('fix_text', ''),
    action_type=state.get('action_type', 'other'),
    target=state.get('target', 'unknown')
)

# Display the formatted prompt
display_prompt(prompt)

# Make the LLM call
result = await llm_call_with_json(prompt, ['parameter'], max_tokens=50)

# Update state
state['parameters'] = result.get('parameter', 'default')
state['step_results']['step3'] = result
state['metadata']['steps_completed'].append('extract_parameters')
state['metadata']['step3_complete'] = datetime.now().isoformat()

# Display results
display_result("Step 3: Extract Parameters", result)
print(f"\n✅ Extracted parameters: {state['parameters']}")

# Note: You can manually override the result here if needed
# state['parameters'] = 'absent'

## 🎯 Step 4: Generate Task Name

This step generates a descriptive task name for the Ansible task

**Prompt:** `generate_task_name.yaml`

In [None]:
# Step 4: Generate Task Name
print("🎯 Starting Step 4: Generate Task Name")

# Load the prompt
prompt_data = load_prompt('generate_task_name')
print(f"📄 Prompt: {prompt_data['name']}")
print(f"📝 Description: {prompt_data['description']}")

# Format the prompt with all extracted data
prompt = format_prompt(
    prompt_data,
    rule_id=state['finding'].get('rule_id', ''),
    action_type=state.get('action_type', 'other'),
    target=state.get('target', 'unknown'),
    severity=state['finding'].get('severity', 'medium')
)

# Display the formatted prompt
display_prompt(prompt)

# Make the LLM call
result = await llm_call_with_json(prompt, ['task_name'], max_tokens=100)

# Update state
state['task_name'] = result.get('task_name', f"STIG Task: {state.get('target', 'unknown')}")
state['step_results']['step4'] = result
state['metadata']['steps_completed'].append('generate_task_name')
state['metadata']['step4_complete'] = datetime.now().isoformat()

# Display results
display_result("Step 4: Generate Task Name", result)
print(f"\n✅ Generated task name: {state['task_name']}")

# Note: You can manually override the result here if needed
# state['task_name'] = 'STIG HIGH: Remove telnet package for security compliance'

## 📊 Review Extracted Components

Let's review what we've extracted so far before assembling the playbook. You can manually adjust any values here if needed.

In [None]:
# Review extracted components
print("🔍 Extracted Components Summary:")
print("=" * 50)

components_summary = {
    "step_1_action_type": state['action_type'],
    "step_2_target": state['target'],
    "step_3_parameters": state['parameters'],
    "step_4_task_name": state['task_name'],
    "finding_info": {
        "rule_id": state['finding'].get('rule_id', 'unknown'),
        "severity": state['finding'].get('severity', 'unknown'),
        "title": state['finding'].get('title', 'unknown')
    },
    "workflow_progress": {
        "steps_completed": len(state['metadata']['steps_completed']),
        "total_steps": 7,
        "completed_steps": state['metadata']['steps_completed']
    }
}

display(Markdown("### 📊 Components Summary:"))
display(JSON(components_summary, expanded=True))

print("\n💡 Manual Override Options:")
print("You can manually adjust these values by uncommenting and modifying the lines below:")
print("# state['action_type'] = 'remove_package'")
print("# state['target'] = 'telnet'")
print("# state['parameters'] = 'absent'")
print("# state['task_name'] = 'Custom task name here'")

# Uncomment and modify these lines to manually override extracted values:
# state['action_type'] = 'remove_package'
# state['target'] = 'telnet'
# state['parameters'] = 'absent'
# state['task_name'] = 'STIG HIGH: Remove telnet package for security compliance'

## 🎯 Step 5: Assemble Playbook

This step combines all extracted components into a complete Ansible playbook

**Prompt:** `assemble_playbook.yaml`

In [None]:
# Step 5: Assemble Playbook
print("🎯 Starting Step 5: Assemble Playbook")

# Load the prompt
prompt_data = load_prompt('assemble_playbook')
print(f"📄 Prompt: {prompt_data['name']}")
print(f"📝 Description: {prompt_data['description']}")

# Load the template for reference (if available)
template_content = ""
template_path = '../examples/ansible_playbook_template.yaml'
try:
    with open(template_path, 'r') as f:
        template_content = f.read()
    print("✅ Loaded Ansible template for reference")
except Exception as e:
    print(f"⚠️ Could not load template from {template_path}: {e}")
    # Provide a basic template as fallback
    template_content = """---
- name: "STIG Compliance Playbook"
  hosts: all
  become: true
  vars:
    stig_enabled: true
  
  tasks:
    - name: "Sample task"
      debug:
        msg: "Replace with actual task"
      tags:
        - stig
        - security"""

# Format the prompt with all components
prompt = format_prompt(
    prompt_data,
    task_name=state.get('task_name', 'STIG Task'),
    action_type=state.get('action_type', 'other'),
    target=state.get('target', 'unknown'),
    parameters=state.get('parameters', 'default'),
    rule_id=state['finding'].get('rule_id', ''),
    severity=state['finding'].get('severity', 'medium'),
    template_content=template_content
)

# Show a truncated version of the prompt (since template can be long)
display_prompt(prompt, max_length=800)

# Make the LLM call with higher token limit for playbook generation
result = await llm_call_with_json(prompt, ['playbook'], max_tokens=500)

# Extract and clean the playbook
playbook = result.get('playbook', '')

# Clean up markdown formatting if present
if playbook.startswith('```yaml'):
    playbook = playbook[7:]
if playbook.startswith('```'):
    playbook = playbook[3:]
if playbook.endswith('```'):
    playbook = playbook[:-3]

# Ensure it starts with document separator
if not playbook.strip().startswith('---'):
    playbook = '---\n' + playbook

# Update state
state['final_playbook'] = playbook.strip()
state['step_results']['step5'] = result
state['metadata']['steps_completed'].append('assemble_playbook')
state['metadata']['step5_complete'] = datetime.now().isoformat()

# Display results
print(f"\n✅ Assembled playbook (length: {len(playbook)} characters)")
print("\n" + "="*60)
print("📋 Generated Playbook:")
print("="*60)
print(playbook)
print("="*60)

## 🎯 Step 6: Validate and Fix Playbook

This step validates the generated playbook and fixes any issues

**Prompt:** `validate_and_fix_playbook.yaml`

In [None]:
# Step 6: Validate and Fix Playbook
print("🎯 Starting Step 6: Validate and Fix Playbook")

if not state.get('final_playbook'):
    print("❌ No playbook available for validation")
    state['errors'].append("No playbook available for validation")
else:
    # Load the prompt
    prompt_data = load_prompt('validate_and_fix_playbook')
    print(f"📄 Prompt: {prompt_data['name']}")
    print(f"📝 Description: {prompt_data['description']}")
    
    # Format the prompt with playbook and template
    prompt = format_prompt(
        prompt_data,
        playbook_content=state['final_playbook'],
        template_content=template_content if 'template_content' in locals() else ''
    )
    
    # Show a truncated version
    display_prompt(prompt, max_length=800)
    
    # Make the LLM call with higher token limit for validation
    result = await llm_call_with_json(
        prompt, 
        ['is_valid', 'issues_found', 'fixes_applied', 'fixed_playbook', 'suggestions'], 
        max_tokens=800
    )
    
    # Update state
    state['validation_result'] = result
    state['step_results']['step6'] = result
    state['metadata']['steps_completed'].append('validate_and_fix_playbook')
    state['metadata']['step6_complete'] = datetime.now().isoformat()
    
    # If fixes were applied, update the playbook
    if result.get('fixes_applied') and result.get('fixed_playbook'):
        old_playbook = state['final_playbook']
        state['final_playbook'] = result['fixed_playbook']
        print(f"\n✅ Applied {len(result.get('fixes_applied', []))} fixes to playbook")
        print(f"📏 Playbook length changed: {len(old_playbook)} → {len(state['final_playbook'])} characters")
    else:
        print("\n✅ No fixes were needed or applied")
    
    # Display validation results
    display_result("Step 6: Validate and Fix Playbook", result)
    
    print(f"\n📊 Validation Summary:")
    print(f"   ✅ Valid: {result.get('is_valid', False)}")
    print(f"   🔍 Issues Found: {len(result.get('issues_found', []))}")
    print(f"   🔧 Fixes Applied: {len(result.get('fixes_applied', []))}")
    print(f"   💡 Suggestions: {len(result.get('suggestions', []))}")
    
    if result.get('issues_found'):
        print("\n🔍 Issues Found:")
        for i, issue in enumerate(result['issues_found'], 1):
            print(f"   {i}. {issue}")
    
    if result.get('fixes_applied'):
        print("\n🔧 Fixes Applied:")
        for i, fix in enumerate(result['fixes_applied'], 1):
            print(f"   {i}. {fix}")
    
    if result.get('suggestions'):
        print("\n💡 Improvement Suggestions:")
        for i, suggestion in enumerate(result['suggestions'], 1):
            print(f"   {i}. {suggestion}")
    
    print("\n" + "="*60)
    print("📋 Final Validated Playbook:")
    print("="*60)
    print(state['final_playbook'])
    print("="*60)

## 🎯 Step 7: Annotate Playbook

This step adds documentation and comments to the validated playbook

**Prompt:** `annotate_playbook.yaml`

In [None]:
# Step 7: Annotate Playbook
print("🎯 Starting Step 7: Annotate Playbook")

if not state.get('final_playbook'):
    print("❌ No playbook available for annotation")
    state['errors'].append("No playbook available for annotation")
else:
    # Load the prompt
    prompt_data = load_prompt('annotate_playbook')
    print(f"📄 Prompt: {prompt_data['name']}")
    print(f"📝 Description: {prompt_data['description']}")
    
    # Format the prompt with playbook and finding details
    prompt = format_prompt(
        prompt_data,
        playbook_content=state['final_playbook'],
        rule_id=state['finding'].get('rule_id', ''),
        title=state['finding'].get('title', ''),
        severity=state['finding'].get('severity', ''),
        description=state['finding'].get('description', ''),
        check_text=state['finding'].get('check_text', ''),
        fix_text=state['finding'].get('fix_text', ''),
        references=', '.join(state['finding'].get('references', []))
    )
    
    # Show a truncated version
    display_prompt(prompt, max_length=800)
    
    # Make the LLM call with higher token limit for annotation
    result = await llm_call_with_json(prompt, ['annotated_playbook'], max_tokens=600)
    
    # Update state
    annotated_playbook = result.get('annotated_playbook', '')
    
    if annotated_playbook and annotated_playbook.strip():
        # Clean up markdown formatting if present
        if annotated_playbook.startswith('```yaml'):
            annotated_playbook = annotated_playbook[7:]
        if annotated_playbook.startswith('```'):
            annotated_playbook = annotated_playbook[3:]
        if annotated_playbook.endswith('```'):
            annotated_playbook = annotated_playbook[:-3]
        
        state['annotated_playbook'] = annotated_playbook.strip()
        print("\n✅ Successfully annotated playbook")
        print(f"📏 Annotation added {len(annotated_playbook) - len(state['final_playbook'])} characters")
    else:
        state['annotated_playbook'] = state['final_playbook']
        print("\n⚠️ No annotation returned, keeping original playbook")
        state['errors'].append("Annotation step failed - no annotated playbook returned")
    
    state['step_results']['step7'] = result
    state['metadata']['steps_completed'].append('annotate_playbook')
    state['metadata']['step7_complete'] = datetime.now().isoformat()
    
    # Display results
    print("\n" + "="*60)
    print("📋 Final Annotated Playbook:")
    print("="*60)
    print(state['annotated_playbook'])
    print("="*60)

## 📊 Workflow Summary

Review the complete workflow results and save outputs

In [None]:
# Finalize workflow metadata
state['metadata']['workflow_complete'] = datetime.now().isoformat()
state['metadata']['total_errors'] = len(state['errors'])
state['metadata']['steps_completed_count'] = len(state['metadata']['steps_completed'])

# Calculate workflow quality
if state.get('final_playbook') and len(state['errors']) == 0:
    validation = state.get('validation_result', {})
    if validation.get('is_valid', False):
        state['metadata']['final_quality'] = 'high'
    else:
        state['metadata']['final_quality'] = 'medium'
elif state.get('final_playbook'):
    state['metadata']['final_quality'] = 'low'
else:
    state['metadata']['final_quality'] = 'failed'

# Workflow Summary
print("🎯 WORKFLOW SUMMARY")
print("=" * 60)

summary_data = {
    "workflow_info": {
        "finding_id": state['finding'].get('rule_id', 'unknown'),
        "finding_title": state['finding'].get('title', 'unknown'),
        "severity": state['finding'].get('severity', 'unknown'),
        "workflow_quality": state['metadata']['final_quality'],
        "total_errors": state['metadata']['total_errors']
    },
    "extracted_components": {
        "action_type": state['action_type'],
        "target": state['target'],
        "parameters": state['parameters'],
        "task_name": state['task_name']
    },
    "workflow_progress": {
        "steps_completed": state['metadata']['steps_completed_count'],
        "total_steps": 7,
        "completed_steps": state['metadata']['steps_completed'],
        "start_time": state['metadata']['workflow_start'],
        "end_time": state['metadata']['workflow_complete']
    },
    "outputs": {
        "has_final_playbook": bool(state.get('final_playbook')),
        "has_annotated_playbook": bool(state.get('annotated_playbook')),
        "validation_passed": state.get('validation_result', {}).get('is_valid', False),
        "playbook_length": len(state.get('annotated_playbook') or state.get('final_playbook', '')),
    }
}

display(Markdown("### 📊 Workflow Summary:"))
display(JSON(summary_data, expanded=True))

# Show validation details if available
validation = state.get('validation_result', {})
if validation:
    print("\n📋 Validation Details:")
    print(f"   ✅ Valid: {validation.get('is_valid', False)}")
    print(f"   🔍 Issues: {len(validation.get('issues_found', []))}")
    print(f"   🔧 Fixes: {len(validation.get('fixes_applied', []))}")
    print(f"   💡 Suggestions: {len(validation.get('suggestions', []))}")

# Show errors if any
if state['errors']:
    print("\n❌ Errors Encountered:")
    for i, error in enumerate(state['errors'], 1):
        print(f"   {i}. {error}")

# Display final output
final_playbook = state.get('annotated_playbook') or state.get('final_playbook', 'No playbook generated')
print("\n" + "="*60)
print("📋 FINAL OUTPUT:")
print("="*60)
print(final_playbook)
print("="*60)

In [None]:
# Save outputs to files
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
rule_id = state['finding'].get('rule_id', 'unknown')

# Create output directory
output_dir = Path('../playbooks')
output_dir.mkdir(exist_ok=True)

# Save final playbook
final_playbook = state.get('annotated_playbook') or state.get('final_playbook')
if final_playbook:
    playbook_file = output_dir / f"manual_{rule_id}_{timestamp}.yml"
    try:
        with open(playbook_file, 'w') as f:
            f.write(final_playbook)
        print(f"💾 Saved playbook to: {playbook_file}")
    except Exception as e:
        print(f"❌ Failed to save playbook: {e}")

# Save workflow state and results
state_file = output_dir / f"manual_{rule_id}_{timestamp}_state.json"
try:
    # Create a serializable version of the state
    serializable_state = {
        'metadata': state['metadata'],
        'extracted_components': {
            'action_type': state['action_type'],
            'target': state['target'],
            'parameters': state['parameters'],
            'task_name': state['task_name']
        },
        'validation_result': state.get('validation_result'),
        'errors': state['errors'],
        'step_results': state['step_results'],
        'finding_info': {
            'rule_id': state['finding'].get('rule_id'),
            'title': state['finding'].get('title'),
            'severity': state['finding'].get('severity')
        }
    }
    
    with open(state_file, 'w') as f:
        json.dump(serializable_state, f, indent=2)
    print(f"💾 Saved workflow state to: {state_file}")
except Exception as e:
    print(f"❌ Failed to save workflow state: {e}")

# Create a transparency report
report_file = output_dir / f"manual_{rule_id}_{timestamp}_report.md"
try:
    with open(report_file, 'w') as f:
        f.write(f"# Manual Prompt Engineering Workflow Report\n\n")
        f.write(f"**Generated:** {state['metadata']['workflow_complete']}\n")
        f.write(f"**Rule ID:** {rule_id}\n")
        f.write(f"**Finding Title:** {state['finding'].get('title', 'unknown')}\n")
        f.write(f"**Severity:** {state['finding'].get('severity', 'unknown')}\n")
        f.write(f"**Final Quality:** {state['metadata']['final_quality']}\n")
        f.write(f"**Total Errors:** {state['metadata']['total_errors']}\n\n")
        
        f.write("## Extracted Components\n\n")
        f.write(f"1. **Action Type:** {state['action_type']}\n")
        f.write(f"2. **Target:** {state['target']}\n")
        f.write(f"3. **Parameters:** {state['parameters']}\n")
        f.write(f"4. **Task Name:** {state['task_name']}\n\n")
        
        f.write("## Workflow Steps\n\n")
        for i, step in enumerate(state['metadata']['steps_completed'], 1):
            f.write(f"{i}. ✅ {step}\n")
        
        if state['errors']:
            f.write("\n## Errors\n\n")
            for i, error in enumerate(state['errors'], 1):
                f.write(f"{i}. {error}\n")
        
        validation = state.get('validation_result', {})
        if validation:
            f.write("\n## Validation Results\n\n")
            f.write(f"- **Valid:** {validation.get('is_valid', False)}\n")
            f.write(f"- **Issues Found:** {len(validation.get('issues_found', []))}\n")
            f.write(f"- **Fixes Applied:** {len(validation.get('fixes_applied', []))}\n")
            f.write(f"- **Suggestions:** {len(validation.get('suggestions', []))}\n")
    
    print(f"📋 Saved transparency report to: {report_file}")
except Exception as e:
    print(f"❌ Failed to save transparency report: {e}")

print(f"\n✅ Workflow completed with quality: {state['metadata']['final_quality']}")

## 🧪 Testing Different Findings

You can test with different findings by selecting from the available findings

In [None]:
# List available findings to test
if all_findings:
    print("🧪 Available findings to test (showing first 10):\n")
    for i, finding in enumerate(all_findings[:10]):
        severity_icon = {"high": "🔴", "medium": "🟡", "low": "🟢"}.get(finding.get('severity', '').lower(), "⚪")
        print(f"{i:2d}: {severity_icon} {finding.get('rule_id', 'unknown')} ({finding.get('severity', 'unknown')})")
        print(f"     {finding.get('title', 'No title')[:70]}{'...' if len(finding.get('title', '')) > 70 else ''}\n")
    
    print("\n💡 To test a different finding:")
    print("1. Choose an index from the list above")
    print("2. Update the cell below with the desired index")
    print("3. Re-run from the 'Select a test finding' cell onward")
    
    print("\n🔄 To test a different finding, uncomment and modify:")
    print("# selected_index = 3  # Change this to the desired finding index")
    print("# test_finding = all_findings[selected_index]")
    print("# print(f'Selected: {test_finding.get(\"rule_id\", \"unknown\")}')")
    
else:
    print("❌ No findings available for testing")

# Uncomment and modify these lines to test a different finding:
# selected_index = 3  # Change this to the desired finding index
# test_finding = all_findings[selected_index]
# print(f'🎯 Selected new finding: {test_finding.get("rule_id", "unknown")}')
# # Then re-run from the "Initialize workflow state" cell

## 📝 Prompt Engineering Notes

Use this cell to document what worked and what didn't during your prompt engineering sessions

In [None]:
# Prompt Engineering Notes and Observations
# 
# Date: [Add date here]
# Model: [Add model info here]
# Finding tested: [Add finding info here]
# 
# 🟢 What worked well:
# - JSON schema in prompts helps enforce structured output
# - Short, focused prompts work better with smaller models
# - Breaking down complex tasks into simple extractions
# - Retry logic handles intermittent JSON parsing issues
# - Template-based prompt formatting is consistent
# 
# 🔴 What didn't work:
# - Long, complex prompts with multiple instructions confuse smaller models
# - Asking for too much output at once (>500 tokens) leads to truncation
# - Model sometimes returns explanatory text instead of pure JSON
# - Complex nested JSON structures are harder to extract reliably
# 
# 🟡 Ideas to try:
# - Add more examples in prompts for edge cases
# - Test with different temperature settings
# - Try alternative JSON extraction patterns
# - Add validation of intermediate results between steps
# - Experiment with different prompt ordering/structure
# 
# 📊 Current Results Summary:
# - Step 1 (Action): [Success rate / Notes]
# - Step 2 (Target): [Success rate / Notes] 
# - Step 3 (Parameters): [Success rate / Notes]
# - Step 4 (Task Name): [Success rate / Notes]
# - Step 5 (Assemble): [Success rate / Notes]
# - Step 6 (Validate): [Success rate / Notes]
# - Step 7 (Annotate): [Success rate / Notes]
# 
# 🔧 Prompt Modifications Made:
# - [List any changes made to prompt files]
# 
# 🎯 Next Steps:
# - [List planned improvements or tests]

print("📝 Use this cell to document your prompt engineering observations")
print("📁 Edit prompts in the ../prompts/ directory and re-run cells to test changes")
print("🔄 You can reload prompts by re-running any step that calls load_prompt()")

## 🔧 Debugging and Troubleshooting

Use these cells for debugging specific issues

In [None]:
# Debug: Check current state
print("🔍 Current Workflow State Debug:")
print("=" * 40)

debug_info = {
    "workflow_status": {
        "steps_completed": len(state['metadata']['steps_completed']),
        "has_finding": bool(state.get('finding')),
        "has_final_playbook": bool(state.get('final_playbook')),
        "has_annotated_playbook": bool(state.get('annotated_playbook')),
        "error_count": len(state['errors'])
    },
    "llm_status": {
        "llm_initialized": llm is not None,
        "llm_model": getattr(llm, 'model_name', 'Not available') if llm else 'Not initialized'
    },
    "file_status": {
        "findings_loaded": len(all_findings) if 'all_findings' in locals() else 0,
        "test_finding_selected": bool(test_finding) if 'test_finding' in locals() else False
    },
    "recent_errors": state['errors'][-3:] if state['errors'] else []
}

display(JSON(debug_info, expanded=True))

# Check if all required files exist
required_files = [
    '../src/llm_interface.py',
    '../prompts/extract_action.yaml',
    '../prompts/extract_target.yaml',
    '../prompts/extract_parameters.yaml',
    '../prompts/generate_task_name.yaml',
    '../prompts/assemble_playbook.yaml',
    '../prompts/validate_and_fix_playbook.yaml',
    '../prompts/annotate_playbook.yaml'
]

print("\n📁 File Check:")
for file_path in required_files:
    exists = Path(file_path).exists()
    status = "✅" if exists else "❌"
    print(f"   {status} {file_path}")

In [None]:
# Debug: Test individual prompt loading
print("🔍 Testing Individual Prompt Loading:")
print("=" * 40)

prompt_names = [
    'extract_action',
    'extract_target', 
    'extract_parameters',
    'generate_task_name',
    'assemble_playbook',
    'validate_and_fix_playbook',
    'annotate_playbook'
]

for prompt_name in prompt_names:
    try:
        prompt_data = load_prompt(prompt_name)
        status = "✅" if 'template' in prompt_data else "⚠️"
        template_length = len(prompt_data.get('template', ''))
        print(f"   {status} {prompt_name}: {template_length} characters")
    except Exception as e:
        print(f"   ❌ {prompt_name}: Error - {e}")

In [None]:
# Debug: Test LLM connection with simple prompt
if llm is not None:
    print("🔍 Testing LLM Connection:")
    print("=" * 30)
    
    simple_test_prompt = 'Respond with only this JSON: {"test": "success", "status": "working"}'
    
    try:
        test_result = await llm_call_with_json(simple_test_prompt, ['test', 'status'], max_tokens=50)
        print(f"✅ LLM Test Result: {test_result}")
        
        if test_result.get('test') == 'success':
            print("✅ LLM is responding correctly")
        else:
            print("⚠️ LLM response may not be reliable")
            
    except Exception as e:
        print(f"❌ LLM test failed: {e}")
else:
    print("❌ LLM not initialized - cannot test connection")