# Step 3: Batch Process LLM Findings

This notebook processes findings that require LLM classification and processing using the optimization strategy.

**Input:** 
- LLM needed findings JSON file (from Step 2)

**Output:**
- Ansible playbooks for successfully processed findings
- JSON file with findings requiring human review
- Processing summary and statistics

**Features:**
- Configurable batch size limiter for testing
- Batch processing of all test findings at once
- Comprehensive results inspection

In [1]:
# Import required libraries
import sys
import os
import json
import asyncio
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Any

# Add src to path
sys.path.insert(0, '../src')

# Import our modules
from llm_interface import LLMInterface
from ansible_playbook_generator import DeterministicPlaybookGenerator
from shared.prompt_utils import (
    load_prompt, format_prompt, llm_call_with_json, 
    display_prompt, display_result, clean_playbook_response
)

print("📦 Libraries imported successfully")
print(f"🐍 Python version: {sys.version.split()[0]}")
print(f"📁 Current working directory: {os.getcwd()}")

📦 Libraries imported successfully
🐍 Python version: 3.11.12
📁 Current working directory: /Users/wjackson/Developer/AI-Building-Blocks/ansible_playbook_from_stig/notebooks


In [2]:
# Configuration - Update these from Step 2 output
RUN_TIMESTAMP = "20250714_120000"  # Update from Step 2
LLM_NEEDED_FILE = "../playbooks/20250714_120000/llm_needed/llm_needed_findings_20250714_120000.json"  # Update from Step 2

# Processing configuration
MAX_FINDINGS_TO_PROCESS = 5  # 🚀 Start with 5 for testing, increase later
BATCH_SIZE_CLASSIFICATION = 10  # How many findings to classify at once
BATCH_SIZE_PROCESSING = 5   # How many findings to process at once

# Output configuration
PLAYBOOKS_BASE_DIR = "../playbooks"
PLAYBOOKS_RUN_DIR = f"{PLAYBOOKS_BASE_DIR}/{RUN_TIMESTAMP}"
LLM_OUTPUT_DIR = f"{PLAYBOOKS_RUN_DIR}/llm_processed"
HUMAN_REVIEW_DIR = f"{PLAYBOOKS_RUN_DIR}/human_review"

print(f"🕐 Processing run: {RUN_TIMESTAMP}")
print(f"📂 Input file: {LLM_NEEDED_FILE}")
print(f"📊 Processing limits:")
print(f"   Max findings to process: {MAX_FINDINGS_TO_PROCESS}")
print(f"   Classification batch size: {BATCH_SIZE_CLASSIFICATION}")
print(f"   Processing batch size: {BATCH_SIZE_PROCESSING}")
print(f"📁 Output directories:")
print(f"   LLM processed: {LLM_OUTPUT_DIR}")
print(f"   Human review: {HUMAN_REVIEW_DIR}")

# Verify input file exists
if Path(LLM_NEEDED_FILE).exists():
    print(f"✅ Found LLM needed file: {LLM_NEEDED_FILE}")
else:
    print(f"❌ Missing LLM needed file: {LLM_NEEDED_FILE}")
    print("Please update LLM_NEEDED_FILE path from Step 2 output")

🕐 Processing run: 20250714_120000
📂 Input file: ../playbooks/20250714_120000/llm_needed/llm_needed_findings_20250714_120000.json
📊 Processing limits:
   Max findings to process: 5
   Classification batch size: 10
   Processing batch size: 5
📁 Output directories:
   LLM processed: ../playbooks/20250714_120000/llm_processed
   Human review: ../playbooks/20250714_120000/human_review
✅ Found LLM needed file: ../playbooks/20250714_120000/llm_needed/llm_needed_findings_20250714_120000.json


In [3]:
# Create output directory structure
llm_output_dir = Path(LLM_OUTPUT_DIR)
human_review_dir = Path(HUMAN_REVIEW_DIR)

llm_output_dir.mkdir(parents=True, exist_ok=True)
human_review_dir.mkdir(parents=True, exist_ok=True)

print(f"📁 Created directory structure:")
print(f"   LLM processed: {llm_output_dir}")
print(f"   Human review: {human_review_dir}")

📁 Created directory structure:
   LLM processed: ../playbooks/20250714_120000/llm_processed
   Human review: ../playbooks/20250714_120000/human_review


In [4]:
# Initialize LLM interface
print("🤖 Initializing LLM interface...")

try:
    llm = LLMInterface()
    print(f"✅ LLM initialized successfully")
    print(f"   Model: {llm.model_name}")
    print(f"   API URL: {llm.api_url}")
    
except Exception as e:
    print(f"❌ Failed to initialize LLM: {e}")
    print("Please check your .env file and API configuration")
    llm = None

🤖 Initializing LLM interface...
🤖 LLM Interface initialized
   Model: granite-3-3-8b-instruct
   URL: https://granite-3-3-8b-instruct-maas-apicast-production.apps.prod.rhoai.rh-aiservices-bu.com:443/v1/completions
✅ LLM initialized successfully
   Model: granite-3-3-8b-instruct
   API URL: https://granite-3-3-8b-instruct-maas-apicast-production.apps.prod.rhoai.rh-aiservices-bu.com:443/v1/completions


In [5]:
# Load LLM needed findings
print("📊 Loading LLM needed findings...")

with open(LLM_NEEDED_FILE, 'r') as f:
    llm_needed_data = json.load(f)

all_llm_findings = llm_needed_data.get('findings', [])
metadata = llm_needed_data.get('metadata', {})

print(f"📈 LLM needed findings loaded: {len(all_llm_findings)}")
print(f"📄 Source: {metadata.get('source', 'Unknown')}")

# Apply processing limit for testing
if MAX_FINDINGS_TO_PROCESS > 0 and len(all_llm_findings) > MAX_FINDINGS_TO_PROCESS:
    llm_findings = all_llm_findings[:MAX_FINDINGS_TO_PROCESS]
    print(f"🚀 Processing limited to first {MAX_FINDINGS_TO_PROCESS} findings for testing")
else:
    llm_findings = all_llm_findings
    print(f"🚀 Processing all {len(llm_findings)} findings")

# Show severity distribution
severity_counts = {}
for finding in llm_findings:
    severity = finding.get('severity', 'unknown')
    severity_counts[severity] = severity_counts.get(severity, 0) + 1

print(f"📈 Processing severity distribution: {severity_counts}")

📊 Loading LLM needed findings...
📈 LLM needed findings loaded: 1094
📄 Source: deterministic_processing_step
🚀 Processing limited to first 5 findings for testing
📈 Processing severity distribution: {'medium': 2, 'high': 3}


In [6]:
# Show sample findings for inspection
if llm_findings:
    print("🔍 Sample LLM Findings (first 2):")
    for i, finding in enumerate(llm_findings[:2]):
        print(f"\n📋 Finding {i+1}:")
        print(f"   Rule ID: {finding.get('rule_id', 'Unknown')}")
        print(f"   Severity: {finding.get('severity', 'Unknown')}")
        print(f"   Title: {finding.get('title', 'Unknown')[:80]}...")
        print(f"   Description: {finding.get('description', 'Unknown')[:100]}...")
        print(f"   Fix text length: {len(finding.get('fix_text', ''))} chars")
        
        # Show first 200 chars of fix_text for inspection
        fix_text = finding.get('fix_text', '')
        if fix_text:
            print(f"   Fix text preview: {fix_text[:200]}...")

🔍 Sample LLM Findings (first 2):

📋 Finding 1:
   Rule ID: xccdf_org.ssgproject.content_rule_prefer_64bit_os
   Severity: medium
   Title: Prefer to use a 64-bit Operating System when supported...
   Description: Prefer installation of 64-bit operating systems when the CPU supports it. Prefer installation of 64-...
   Fix text length: 0 chars

📋 Finding 2:
   Rule ID: xccdf_org.ssgproject.content_rule_disable_prelink
   Severity: medium
   Title: Disable Prelinking...
   Description: The prelinking feature changes binaries in an attempt to decrease their startup time. In order to di...
   Fix text length: 1025 chars
   Fix text preview: # prelink not installed if test -e /etc/sysconfig/prelink -o -e /usr/sbin/prelink; then if grep -q ^PRELINKING /etc/sysconfig/prelink then sed -i 's/^PRELINKING[:blank:]*=[:blank:]*[:alpha:]*/PRELINKI...


## Phase 1: Classification

Classify findings into complexity categories using the optimization strategy.

In [7]:
# Step 1: Classify findings using prompt_1_classification
print("🎯 Starting Phase 1: Classification")
print("=" * 40)

if not llm:
    print("❌ Cannot proceed - LLM not initialized")
else:
    # Load classification prompt
    classification_prompt = load_prompt('prompt_1_classification')
    print(f"📄 Loaded classification prompt: {classification_prompt['name']}")
    print(f"🌡️ Temperature: {classification_prompt['parameters']['temperature']}")
    print(f"🎯 Max tokens: {classification_prompt['parameters']['max_tokens']}")
    
    # Classification results storage
    classification_results = []
    classification_errors = []
    
    print(f"\n🔄 Classifying {len(llm_findings)} findings...")

🎯 Starting Phase 1: Classification
📄 Loaded prompt: STIG Finding Complexity Classification
   Temperature: 0.0
   Max tokens: 2000
📄 Loaded classification prompt: STIG Finding Complexity Classification
🌡️ Temperature: 0.0
🎯 Max tokens: 2000

🔄 Classifying 5 findings...


In [8]:
# Classify all test findings in batch

if llm and llm_findings:
    print(f"🚀 Batch classifying {len(llm_findings)} findings...\n")
    
    for finding_index, finding in enumerate(llm_findings):
        print(f"\n{'='*60}")
        print(f"🔍 Classifying Finding {finding_index + 1}/{len(llm_findings)}")
        print(f"📋 Rule ID: {finding.get('rule_id', 'Unknown')}")
        print(f"📝 Title: {finding.get('title', 'Unknown')[:80]}...")
        
        # Format the classification prompt
        formatted_prompt = format_prompt(
            classification_prompt,
            rule_id=finding.get('rule_id', ''),
            title=finding.get('title', ''),
            description=finding.get('description', ''),
            fix_text=finding.get('fix_text', '')[:2000]  # Limit fix_text length
        )
        
        # Make the LLM call
        try:
            result = await llm_call_with_json(
                llm, 
                formatted_prompt, 
                ['category'], 
                max_retries=3,
                prompt_params=classification_prompt['parameters']
            )
            
            # Store the result
            classification_result = {
                'finding_index': finding_index,
                'rule_id': finding.get('rule_id', ''),
                'classification': result.get('category', 'UNKNOWN'),
                'finding': finding,
                'classification_successful': result.get('category') not in ['extraction_failed', 'llm_not_available', None]
            }
            
            classification_results.append(classification_result)
            
            print(f"✅ Classification: {result.get('category', 'UNKNOWN')}")
            
        except Exception as e:
            print(f"❌ Classification error: {e}")
            classification_errors.append({
                'finding_index': finding_index,
                'rule_id': finding.get('rule_id', ''),
                'error': str(e),
                'finding': finding
            })
    
    print(f"\n{'='*60}")
    print(f"✅ Classification complete!")
    print(f"   Successful: {len(classification_results)}")
    print(f"   Errors: {len(classification_errors)}")
else:
    print(f"⚠️ No findings to classify or LLM not available")

🚀 Batch classifying 5 findings...


🔍 Classifying Finding 1/5
📋 Rule ID: xccdf_org.ssgproject.content_rule_prefer_64bit_os
📝 Title: Prefer to use a 64-bit Operating System when supported...
❌ Error formatting prompt: '"category"'
🎯 Using max_tokens: 2000
🔄 LLM call attempt 1/3
📝 Raw response length: 1070 characters
📝 Raw response preview: ")

def format_prompt(category, prompt):
    """
    Formats a given prompt into a markdown-compatible string with the specified category.

    Args:
        category (str): The category of the prompt...
⚠️ No valid JSON found in response
⚠️ No valid JSON found in attempt 1
🔄 LLM call attempt 2/3
📝 Raw response length: 101 characters
📝 Raw response preview: and their corresponding values. Response must be proper formatted.

{
  "category": "Uncategorized"
}
✅ Extracted JSON with 1 keys
✅ Valid JSON extracted with all expected keys
✅ Classification: Uncategorized

🔍 Classifying Finding 2/5
📋 Rule ID: xccdf_org.ssgproject.content_rule_disable_prelink
📝 

In [9]:
# Classification summary and analysis
if classification_results:
    print("📊 CLASSIFICATION SUMMARY")
    print("=" * 30)
    
    # Count classifications by category
    category_counts = {}
    successful_classifications = 0
    
    for result in classification_results:
        category = result['classification']
        category_counts[category] = category_counts.get(category, 0) + 1
        
        if result['classification_successful']:
            successful_classifications += 1
    
    print(f"Total classifications attempted: {len(classification_results)}")
    print(f"Successful classifications: {successful_classifications}")
    print(f"Failed classifications: {len(classification_results) - successful_classifications}")
    print(f"Classification errors: {len(classification_errors)}")
    
    print(f"\n📈 Categories identified:")
    for category, count in sorted(category_counts.items()):
        print(f"   {category}: {count}")
    
    # Show all classifications
    print(f"\n🔍 All classifications:")
    for i, result in enumerate(classification_results):
        print(f"   {i+1}. {result['rule_id'][:40]}... → {result['classification']}")
        
    if len(classification_results) >= len(llm_findings):
        print(f"\n✅ Ready for Phase 2: Processing by category")
    else:
        remaining = len(llm_findings) - len(classification_results)
        print(f"\n⏳ {remaining} findings still need classification")
else:
    print("⚠️ No classification results available")

📊 CLASSIFICATION SUMMARY
Total classifications attempted: 5
Successful classifications: 5
Failed classifications: 0
Classification errors: 0

📈 Categories identified:
   Computers: 1
   Programming: 1
   Uncategorized: 2
   programming: 1

🔍 All classifications:
   1. xccdf_org.ssgproject.content_rule_prefer... → Uncategorized
   2. xccdf_org.ssgproject.content_rule_disabl... → Programming
   3. xccdf_org.ssgproject.content_rule_rpm_ve... → programming
   4. xccdf_org.ssgproject.content_rule_rpm_ve... → Computers
   5. xccdf_org.ssgproject.content_rule_rpm_ve... → Uncategorized

✅ Ready for Phase 2: Processing by category


## Phase 2: Processing by Category

Process classified findings using category-specific prompts.

In [10]:
# Phase 2: Process findings by category
print("🎯 Starting Phase 2: Processing by Category")
print("=" * 45)

if not classification_results:
    print("❌ No classification results available - run Phase 1 first")
elif not llm:
    print("❌ LLM not initialized")
else:
    # Group successful classifications by category
    successful_classifications = [r for r in classification_results if r['classification_successful']]
    
    categories_to_process = {}
    for result in successful_classifications:
        category = result['classification']
        if category not in categories_to_process:
            categories_to_process[category] = []
        categories_to_process[category].append(result)
    
    print(f"📊 Categories to process: {len(categories_to_process)}")
    for category, findings in categories_to_process.items():
        print(f"   {category}: {len(findings)} findings")
    
    # Mapping of categories to prompt files
    category_prompts = {
        'SHELL_SCRIPT': 'prompt_2_shell_script',
        'PACKAGE_VERIFICATION': 'prompt_3_package_verification',
        'CONFIG_MODIFICATION': 'prompt_4_config_modification',
        'BOOT_CONFIGURATION': 'prompt_5_boot_configuration',
        'MULTI_STEP_PROCESS': 'prompt_6_multi_step_process',
        'CRON_SCHEDULING': 'prompt_7_cron_scheduling',
        'UNKNOWN': 'prompt_8_fallback'
    }
    
    # Processing results storage
    processing_results = []
    processing_errors = []

🎯 Starting Phase 2: Processing by Category
📊 Categories to process: 4
   Uncategorized: 2 findings
   Programming: 1 findings
   programming: 1 findings
   Computers: 1 findings


In [11]:
# Process all findings in all categories

if categories_to_process and llm:
    print(f"🚀 Batch processing all findings...\n")
    
    for category, category_findings in categories_to_process.items():
        print(f"\n{'='*60}")
        print(f"🔄 Processing category: {category}")
        print(f"📊 {len(category_findings)} findings in this category")
        
        # Get the prompt for this category
        prompt_name = category_prompts.get(category, 'prompt_8_fallback')
        processing_prompt = load_prompt(prompt_name)
        
        print(f"📄 Using prompt: {processing_prompt['name']}")
        print(f"🌡️ Temperature: {processing_prompt['parameters']['temperature']}")
        print(f"🎯 Max tokens: {processing_prompt['parameters']['max_tokens']}")
        
        # Process each finding in this category
        for idx, result_to_process in enumerate(category_findings):
            finding = result_to_process['finding']
            
            print(f"\n  📋 Processing Finding {idx + 1}/{len(category_findings)}:")
            print(f"     Rule ID: {finding.get('rule_id', 'Unknown')}")
            print(f"     Title: {finding.get('title', 'Unknown')[:60]}...")
            
            # Format the processing prompt
            formatted_prompt = format_prompt(
                processing_prompt,
                rule_id=finding.get('rule_id', ''),
                title=finding.get('title', ''),
                description=finding.get('description', ''),
                fix_text=finding.get('fix_text', '')[:3000]  # Limit for processing
            )
            
            try:
                # Expected keys vary by category but commonly include these
                expected_keys = ['target_type', 'target_name', 'ansible_module', 'ansible_params']
                
                # Make the LLM call
                processing_result = await llm_call_with_json(
                    llm,
                    formatted_prompt,
                    expected_keys,
                    max_retries=3,
                    prompt_params=processing_prompt['parameters']
                )
                
                # Store the result
                complete_result = {
                    'finding_index': result_to_process['finding_index'],
                    'rule_id': finding.get('rule_id', ''),
                    'classification': result_to_process['classification'],
                    'category': category,
                    'processing_result': processing_result,
                    'finding': finding,
                    'processing_successful': all(key in processing_result for key in ['target_type', 'ansible_module']),
                    'processed_at': datetime.now().isoformat()
                }
                
                processing_results.append(complete_result)
                
                success_status = "✅" if complete_result['processing_successful'] else "⚠️"
                print(f"     {success_status} Target: {processing_result.get('target_type', 'unknown')}")
                print(f"     Module: {processing_result.get('ansible_module', 'unknown')}")
                
            except Exception as e:
                print(f"     ❌ Processing error: {e}")
                processing_errors.append({
                    'finding_index': result_to_process['finding_index'],
                    'rule_id': finding.get('rule_id', ''),
                    'classification': result_to_process['classification'],
                    'category': category,
                    'error': str(e),
                    'finding': finding,
                    'error_at': datetime.now().isoformat()
                })
    
    print(f"\n{'='*60}")
    print(f"✅ Processing complete!")
    print(f"   Successful: {len(processing_results)}")
    print(f"   Errors: {len(processing_errors)}")
else:
    print(f"⚠️ No categories to process or LLM not available")

🚀 Batch processing all findings...


🔄 Processing category: Uncategorized
📊 2 findings in this category
📄 Loaded prompt: Fallback STIG Remediation
   Temperature: 0.2
   Max tokens: 5000
📄 Using prompt: Fallback STIG Remediation
🌡️ Temperature: 0.2
🎯 Max tokens: 5000

  📋 Processing Finding 1/2:
     Rule ID: xccdf_org.ssgproject.content_rule_prefer_64bit_os
     Title: Prefer to use a 64-bit Operating System when supported...
🎯 Using max_tokens: 5000
🔄 LLM call attempt 1/3
📝 Raw response length: 1105 characters
📝 Raw response preview: ```json
{
  "intent": "Prefer to use a 64-bit Operating System when supported",
  "ansible_module": "command",
  "parameters": {
    "cmd": "uname -m"
  },
  "manual_review_required": true,
  "complex...
✅ Extracted JSON with 5 keys
⚠️ JSON missing keys: ['target_type', 'target_name', 'ansible_params']
     ✅ Target: unknown
     Module: command

  📋 Processing Finding 2/2:
     Rule ID: xccdf_org.ssgproject.content_rule_rpm_verify_permissions
     Title

In [13]:
# Processing summary and results analysis
print("📊 PROCESSING SUMMARY")
print("=" * 25)

print(f"Phase 1 - Classification:")
print(f"   Successful: {len(classification_results)}")
print(f"   Errors: {len(classification_errors)}")

print(f"\nPhase 2 - Processing:")
print(f"   Successful: {len(processing_results)}")
print(f"   Errors: {len(processing_errors)}")

if processing_results:
    successful_processing = [r for r in processing_results if r['processing_successful']]
    partial_processing = [r for r in processing_results if not r['processing_successful']]
    
    print(f"   Fully successful: {len(successful_processing)}")
    print(f"   Partial results: {len(partial_processing)}")
    
    # Show processing by category
    category_success = {}
    for result in processing_results:
        category = result['category']
        if category not in category_success:
            category_success[category] = {'total': 0, 'successful': 0}
        category_success[category]['total'] += 1
        if result['processing_successful']:
            category_success[category]['successful'] += 1
    
    print(f"\n📈 Success by category:")
    for category, stats in category_success.items():
        success_rate = stats['successful'] / stats['total'] * 100
        print(f"   {category}: {stats['successful']}/{stats['total']} ({success_rate:.1f}%)")
    
    # Show all processing results
    print(f"\n🔍 All processing results:")
    for i, result in enumerate(processing_results):
        processing_data = result['processing_result']
        status = "✅" if result['processing_successful'] else "⚠️"
        print(f"   {i+1}. {status} {result['rule_id'][:30]}...")
        print(f"      Category: {result['category']}")
        print(f"      Target: {processing_data.get('target_type', 'Unknown')}")
        print(f"      Module: {processing_data.get('ansible_module', 'Unknown')}")

total_processed = len(processing_results) + len(processing_errors)
remaining = len(llm_findings) - total_processed

if remaining > 0:
    print(f"\n⏳ {remaining} findings still need processing")
else:
    print(f"\n✅ All {len(llm_findings)} findings processed!")
    print(f"🎯 Ready for playbook generation")

📊 PROCESSING SUMMARY
Phase 1 - Classification:
   Successful: 5
   Errors: 0

Phase 2 - Processing:
   Successful: 5
   Errors: 0
   Fully successful: 5
   Partial results: 0

📈 Success by category:
   Uncategorized: 2/2 (100.0%)
   Programming: 1/1 (100.0%)
   programming: 1/1 (100.0%)
   Computers: 1/1 (100.0%)

🔍 All processing results:
   1. ✅ xccdf_org.ssgproject.content_r...
      Category: Uncategorized
      Target: unknown
      Module: command
   2. ✅ xccdf_org.ssgproject.content_r...
      Category: Uncategorized
      Target: unknown
      Module: unknown
   3. ✅ xccdf_org.ssgproject.content_r...
      Category: Programming
      Target: unknown
      Module: unknown
   4. ✅ xccdf_org.ssgproject.content_r...
      Category: programming
      Target: unknown
      Module: unknown
   5. ✅ xccdf_org.ssgproject.content_r...
      Category: Computers
      Target: unknown
      Module: unknown

✅ All 5 findings processed!
🎯 Ready for playbook generation


## Generate Ansible Playbooks and Save Results

In [14]:
# Generate Ansible playbooks from successful processing results
print("🚀 Generating Ansible Playbooks")
print("=" * 30)

successful_processing = [r for r in processing_results if r['processing_successful']]
partial_processing = [r for r in processing_results if not r['processing_successful']]

playbooks_generated = 0
human_review_needed = []

if successful_processing:
    print(f"📊 Generating playbooks for {len(successful_processing)} successful results...")
    
    # Convert processing results to target format for deterministic generator
    llm_targets = []
    
    for result in successful_processing:
        processing_data = result['processing_result']
        finding = result['finding']
        
        # Fix ansible_params if it's a string
        ansible_params = processing_data.get('ansible_params', {})
        if isinstance(ansible_params, str):
            # Try to parse as JSON
            try:
                ansible_params = json.loads(ansible_params)
            except:
                # If parsing fails, create a simple dict
                ansible_params = {'value': ansible_params}
        
        # Create target in the format expected by deterministic generator
        target = {
            'rule_id': result['rule_id'],
            'severity': finding.get('severity', 'medium'),
            'status': finding.get('status', 'fail'),
            'title': finding.get('title', ''),
            'target_type': processing_data.get('target_type', 'unknown'),
            'target_name': processing_data.get('target_name', ''),
            'action_context': processing_data.get('action_context', ''),
            'ansible_module': processing_data.get('ansible_module', 'debug'),
            'ansible_params': ansible_params,
            'compliance': {
                'cci_refs': finding.get('compliance', {}).get('cci_refs', []),
                'nist_refs': finding.get('compliance', {}).get('nist_refs', []),
                'cis_refs': finding.get('compliance', {}).get('cis_refs', [])
            }
        }
        
        llm_targets.append(target)
    
    # Save LLM targets file
    llm_targets_file = llm_output_dir / f"llm_generated_targets_{RUN_TIMESTAMP}.json"
    llm_targets_data = {
        'metadata': {
            'total_actionable': len(llm_targets),
            'extraction_date': datetime.now().isoformat(),
            'source': 'llm_batch_processing',
            'run_timestamp': RUN_TIMESTAMP,
            'processing_summary': {
                'classified': len(classification_results),
                'processed': len(processing_results),
                'successful': len(successful_processing)
            }
        },
        'targets': llm_targets
    }
    
    with open(llm_targets_file, 'w') as f:
        json.dump(llm_targets_data, f, indent=2)
    
    print(f"💾 Saved LLM targets to: {llm_targets_file}")
    
    # Generate playbook using deterministic generator
    try:
        generator = DeterministicPlaybookGenerator()
        playbook_file = llm_output_dir / f"llm_generated_playbook_{RUN_TIMESTAMP}.yml"
        
        playbook = generator.generate_playbook_from_targets(
            str(llm_targets_file),
            str(playbook_file)
        )
        
        playbooks_generated = 1
        print(f"✅ Generated LLM playbook: {playbook_file}")
        
        # Show playbook stats
        total_tasks = sum(len(play.get('tasks', [])) for play in playbook)
        print(f"📊 LLM Playbook stats: {total_tasks} tasks")
        
    except Exception as e:
        print(f"❌ Error generating LLM playbook: {e}")
        import traceback
        traceback.print_exc()
else:
    print(f"⚠️ No successful processing results to convert to playbooks")

# Collect items needing human review
human_review_needed.extend(partial_processing)
human_review_needed.extend(processing_errors)
human_review_needed.extend(classification_errors)

print(f"\n📊 Generation Summary:")
print(f"   Playbooks generated: {playbooks_generated}")
print(f"   Items for human review: {len(human_review_needed)}")

🚀 Generating Ansible Playbooks
📊 Generating playbooks for 5 successful results...
💾 Saved LLM targets to: ../playbooks/20250714_120000/llm_processed/llm_generated_targets_20250714_120000.json
✅ Generated playbook with 5 tasks: ../playbooks/20250714_120000/llm_processed/llm_generated_playbook_20250714_120000.yml
✅ Generated LLM playbook: ../playbooks/20250714_120000/llm_processed/llm_generated_playbook_20250714_120000.yml
📊 LLM Playbook stats: 7 tasks

📊 Generation Summary:
   Playbooks generated: 1
   Items for human review: 0


In [15]:
# Save items requiring human review
if human_review_needed:
    print(f"💾 Saving {len(human_review_needed)} items for human review...")
    
    human_review_file = human_review_dir / f"human_review_needed_{RUN_TIMESTAMP}.json"
    
    human_review_data = {
        'metadata': {
            'total_items': len(human_review_needed),
            'created_date': datetime.now().isoformat(),
            'source': 'llm_batch_processing_failures',
            'run_timestamp': RUN_TIMESTAMP,
            'description': 'Findings that could not be processed successfully and require human review'
        },
        'items': human_review_needed
    }
    
    with open(human_review_file, 'w') as f:
        json.dump(human_review_data, f, indent=2)
    
    print(f"💾 Saved human review items to: {human_review_file}")
    
    # Analyze human review items
    review_types = {}
    for item in human_review_needed:
        if 'error' in item:
            item_type = 'processing_error'
        elif 'processing_successful' in item and not item['processing_successful']:
            item_type = 'partial_result'
        else:
            item_type = 'classification_error'
        
        review_types[item_type] = review_types.get(item_type, 0) + 1
    
    print(f"📈 Human review breakdown: {review_types}")
else:
    print(f"✅ No items need human review - all processing successful!")
    human_review_file = None

✅ No items need human review - all processing successful!


In [None]:
# Create final processing summary
final_summary = {
    'run_timestamp': RUN_TIMESTAMP,
    'processing_date': datetime.now().isoformat(),
    'input_file': LLM_NEEDED_FILE,
    'processing_limits': {
        'max_findings_processed': MAX_FINDINGS_TO_PROCESS,
        'actual_findings_processed': len(llm_findings),
        'total_findings_available': len(all_llm_findings)
    },
    'phase_1_classification': {
        'attempted': len(classification_results),
        'successful': len([r for r in classification_results if r['classification_successful']]),
        'errors': len(classification_errors),
        'categories_found': list(set([r['classification'] for r in classification_results if r['classification_successful']]))
    },
    'phase_2_processing': {
        'attempted': len(processing_results),
        'successful': len([r for r in processing_results if r['processing_successful']]),
        'partial': len([r for r in processing_results if not r['processing_successful']]),
        'errors': len(processing_errors)
    },
    'output_generation': {
        'playbooks_generated': playbooks_generated,
        'human_review_items': len(human_review_needed)
    },
    'output_files': {
        'llm_targets': str(llm_targets_file) if 'llm_targets_file' in locals() else None,
        'llm_playbook': str(playbook_file) if 'playbook_file' in locals() else None,
        'human_review': str(human_review_file) if human_review_needed else None
    },
    'success_rates': {
        'classification_rate': len([r for r in classification_results if r['classification_successful']]) / len(classification_results) * 100 if classification_results else 0,
        'processing_rate': len([r for r in processing_results if r['processing_successful']]) / len(processing_results) * 100 if processing_results else 0,
        'overall_success_rate': len([r for r in processing_results if r['processing_successful']]) / len(llm_findings) * 100 if llm_findings else 0
    }
}

# Save final summary
summary_file = Path(PLAYBOOKS_RUN_DIR) / f"llm_processing_summary_{RUN_TIMESTAMP}.json"
with open(summary_file, 'w') as f:
    json.dump(final_summary, f, indent=2)

print(f"💾 Saved final summary to: {summary_file}")

In [16]:
# Final summary and next steps
print("🎯 LLM BATCH PROCESSING COMPLETE")
print("=" * 40)
print(f"Run timestamp: {RUN_TIMESTAMP}")
print(f"Processing date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

print(f"\n📊 FINAL STATISTICS:")
print(f"Input findings: {len(llm_findings)} (limited from {len(all_llm_findings)})")
print(f"Classification success: {final_summary['phase_1_classification']['successful']}/{final_summary['phase_1_classification']['attempted']} ({final_summary['success_rates']['classification_rate']:.1f}%)")
print(f"Processing success: {final_summary['phase_2_processing']['successful']}/{final_summary['phase_2_processing']['attempted']} ({final_summary['success_rates']['processing_rate']:.1f}%)")
print(f"Overall success rate: {final_summary['success_rates']['overall_success_rate']:.1f}%")

print(f"\n📁 OUTPUT FILES:")
if final_summary['output_files']['llm_playbook']:
    print(f"   ✅ LLM Generated Playbook: {final_summary['output_files']['llm_playbook']}")
if final_summary['output_files']['llm_targets']:
    print(f"   📊 LLM Targets: {final_summary['output_files']['llm_targets']}")
if final_summary['output_files']['human_review']:
    print(f"   👤 Human Review Needed: {final_summary['output_files']['human_review']}")
print(f"   📋 Processing Summary: {summary_file}")

print(f"\n🎯 NEXT STEPS:")
if final_summary['processing_limits']['actual_findings_processed'] < final_summary['processing_limits']['total_findings_available']:
    remaining = final_summary['processing_limits']['total_findings_available'] - final_summary['processing_limits']['actual_findings_processed']
    print(f"   🔄 Increase MAX_FINDINGS_TO_PROCESS to process {remaining} more findings")
    print(f"   📝 Current limit: {MAX_FINDINGS_TO_PROCESS}, Total available: {final_summary['processing_limits']['total_findings_available']}")

if final_summary['output_generation']['human_review_items'] > 0:
    print(f"   👤 Review {final_summary['output_generation']['human_review_items']} items requiring manual attention")
    print(f"   🔧 Consider prompt engineering improvements based on failure patterns")

if final_summary['output_generation']['playbooks_generated'] > 0:
    print(f"   ✅ Test generated playbooks in a safe environment")
    print(f"   🚀 Deploy playbooks to target systems")

print(f"\n📈 OPTIMIZATION INSIGHTS:")
if final_summary['success_rates']['overall_success_rate'] < 80:
    print(f"   ⚠️ Success rate below 80% - consider prompt improvements")
if final_summary['phase_1_classification']['successful'] < final_summary['phase_1_classification']['attempted']:
    print(f"   🎯 Classification issues - review prompt_1_classification")
if final_summary['phase_2_processing']['partial'] > 0:
    print(f"   🔧 {final_summary['phase_2_processing']['partial']} partial results - review category prompts")

print(f"\n🎉 LLM batch processing workflow complete!")
print(f"📁 All outputs saved to: {PLAYBOOKS_RUN_DIR}")

🎯 LLM BATCH PROCESSING COMPLETE
Run timestamp: 20250714_120000
Processing date: 2025-07-14 11:41:19

📊 FINAL STATISTICS:
Input findings: 5 (limited from 1094)


NameError: name 'final_summary' is not defined