In [None]:
# FHIR Inferno Test Generator
import re
import os
import logging
import time
import json
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
from collections import defaultdict

import pandas as pd
from dotenv import load_dotenv
from anthropic import Anthropic, RateLimitError
import google.generativeai as gemini
from openai import OpenAI
from tenacity import retry, wait_exponential, stop_after_attempt, retry_if_exception_type

# Set up logging
logging.basicConfig(level=logging.INFO, 
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Constants
PROJECT_ROOT = Path.cwd().parent  # Go up one level to project root
OUTPUT_DIR = os.path.join(PROJECT_ROOT, 'inferno_test_output')


In [None]:

# API Configuration
API_CONFIGS = {
    "claude": {
        "model_name": "claude-3-5-sonnet-20241022", 
        "max_tokens": 8192,
        "temperature": 0.3,  # Lower temperature for code generation
        "batch_size": 5,
        "delay_between_chunks": 1,
        "delay_between_batches": 3,
        "requests_per_minute": 900,
        "max_requests_per_day": 20000,
        "delay_between_requests": 0.1
    },
    "gemini": {
        "model": "models/gemini-1.5-pro-001",
        "max_tokens": 8192,
        "temperature": 0.3,
        "batch_size": 5,
        "delay_between_chunks": 2,
        "delay_between_batches": 5,
        "requests_per_minute": 900,
        "max_requests_per_day": 50000,
        "delay_between_requests": 0.1,
        "timeout": 60
    },
    "gpt": {
        "model": "gpt-4o",
        "max_tokens": 8192,
        "temperature": 0.3,
        "batch_size": 5,
        "delay_between_chunks": 2,
        "delay_between_batches": 5,
        "requests_per_minute": 450,
        "max_requests_per_day": 20000,
        "delay_between_requests": 0.15
    }
}


In [None]:

# System prompts for test generation
INFERNO_TEST_SYSTEM_PROMPT = """You are a specialized FHIR testing engineer with expertise in healthcare interoperability.
Your task is to convert test specifications from a test plan into executable Ruby tests using the Inferno testing framework.
You will generate valid, working Ruby code that follows Inferno test patterns and best practices."""

# Prompt template for test generation
INFERNO_TEST_GENERATION_PROMPT = """
Convert the following FHIR test specification into executable Ruby code using the Inferno testing framework.

Test Specification:
{test_specification}

Create an Inferno test implementation that:
1. Follows the Inferno structure (TestGroup containing one or more tests)
2. Implements the test logic described in the specification
3. Makes appropriate FHIR API calls
4. Includes proper assertions to validate the requirements
5. Handles both success and error cases appropriately

Naming conventions:
- Use underscored lowercase names for files (e.g., patient_read_test.rb)
- Use CamelCase for class names (e.g., PatientReadTest)
- Class names should end with 'Test' or 'Group'
- Give each test a unique ID that's descriptive and related to the requirement

The test should be comprehensive but focused on exactly what's described in the specification.
Include proper documentation in the code and follow Inferno best practices.

Requirement ID: {requirement_id}
Module Name: {module_name}

Guidance on Inferno test development:
{inferno_guidance}

Return only the Ruby code for the test implementation, with no additional explanation.
"""

# Prompt for determining if a requirement is testable
TEST_FEASIBILITY_PROMPT = """
Analyze the following test specification from a FHIR Implementation Guide and determine if it's feasible to implement as an automated test.

Test Specification:
{test_specification}

Respond with a JSON object containing the following fields:
1. "testable": boolean (true if it can be implemented as an automated test, false otherwise)
2. "reason": brief explanation of your assessment
3. "resource_types": array of FHIR resource types involved in the test (e.g., ["Patient", "Observation"])
4. "operations": array of FHIR operations required (e.g., ["read", "search"])
5. "complexity": one of ["simple", "moderate", "complex"]

Return only valid JSON with no additional text.
"""


In [None]:

def create_rate_limiter():
    """Create a rate limiter state dictionary for all APIs"""
    return {
        api: {
            'requests': [],
            'daily_requests': 0,
            'last_reset': time.time()
        }
        for api in API_CONFIGS.keys()
    }

def check_rate_limits(rate_limiter: dict, api: str):
    """Check and wait if rate limits would be exceeded"""
    if api not in rate_limiter:
        raise ValueError(f"Unknown API: {api}")
        
    now = time.time()
    state = rate_limiter[api]
    config = API_CONFIGS[api]
    
    # Reset daily counts if needed
    day_seconds = 24 * 60 * 60
    if now - state['last_reset'] >= day_seconds:
        state['daily_requests'] = 0
        state['last_reset'] = now
    
    # Check daily limit
    if state['daily_requests'] >= config['max_requests_per_day']:
        raise Exception(f"{api} daily request limit exceeded")
    
    # Remove old requests outside the current minute
    state['requests'] = [
        req_time for req_time in state['requests']
        if now - req_time < 60
    ]
    
    # Wait if at rate limit
    if len(state['requests']) >= config['requests_per_minute']:
        sleep_time = 60 - (now - state['requests'][0])
        if sleep_time > 0:
            time.sleep(sleep_time)
        state['requests'] = state['requests'][1:] 
    
    # Add minimum delay between requests
    if state['requests'] and now - state['requests'][-1] < config['delay_between_requests']:
        time.sleep(config['delay_between_requests'])
    
    # Record this request
    state['requests'].append(now)
    state['daily_requests'] += 1


In [None]:

def setup_clients():
    """Initialize clients for each LLM service"""
    try:
        # Claude setup
        claude_client = Anthropic(
            api_key=os.getenv('ANTHROPIC_API_KEY'),
        )
        
        # Gemini setup
        gemini_api_key = os.getenv('GEMINI_API_KEY')
        if not gemini_api_key:
            raise ValueError("GEMINI_API_KEY not found")
        gemini.configure(api_key=gemini_api_key)
        gemini_client = gemini.GenerativeModel(
            model_name=API_CONFIGS["gemini"]["model"],
            generation_config={
                "max_output_tokens": API_CONFIGS["gemini"]["max_tokens"],
                "temperature": API_CONFIGS["gemini"]["temperature"]
            }
        )
        
        # OpenAI setup
        openai_api_key = os.getenv('OPENAI_API_KEY')
        if not openai_api_key:
            raise ValueError("OPENAI_API_KEY not found")
        openai_client = OpenAI(
            api_key=openai_api_key,
            timeout=60.0
        )
        
        return {
            "claude": claude_client,
            "gpt": openai_client,
            "gemini": gemini_client
        }
        
    except Exception as e:
        logging.error(f"Error setting up clients: {str(e)}")
        raise


In [None]:

@retry(
    wait=wait_exponential(multiplier=1, min=4, max=60),
    stop=stop_after_attempt(5),
    retry=retry_if_exception_type((RateLimitError, TimeoutError))
)
def make_llm_request(client, api_type: str, prompt: str, system_prompt: str, rate_limit_func) -> str:
    """Make rate-limited API request with retries"""
    rate_limit_func()
    
    config = API_CONFIGS[api_type]
    
    try:
        if api_type == "claude":
            response = client.messages.create(
                model=config["model_name"],
                max_tokens=config["max_tokens"],
                messages=[{
                    "role": "user", 
                    "content": prompt
                }],
                system=system_prompt
            )
            return response.content[0].text
            
        elif api_type == "gemini":
            response = client.generate_content(
                prompt,
                generation_config={
                    "max_output_tokens": config["max_tokens"],
                    "temperature": config["temperature"]
                }
            )
            if hasattr(response, 'text'):
                return response.text
            elif response.candidates:
                return response.candidates[0].content.parts[0].text
            else:
                raise ValueError("No response generated from Gemini API")
                    
        elif api_type == "gpt":
            response = client.chat.completions.create(
                model=config["model"],
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=config["max_tokens"],
                temperature=config["temperature"]
            )
            return response.choices[0].message.content
            
    except Exception as e:
        logging.error(f"Error in {api_type} API request: {str(e)}")
        raise


In [None]:

def parse_test_plan(file_path: str) -> List[Dict[str, Any]]:
    """
    Parse a consolidated test plan into a list of test specifications
    
    Args:
        file_path: Path to the test plan markdown file
        
    Returns:
        List of dictionaries containing structured test specifications
    """
    with open(file_path, 'r') as f:
        content = f.read()
    
    # Extract test specifications by splitting on the triple-dash separator
    test_specs_raw = content.split('---')
    test_specs = []
    
    # Parse each test specification section
    for i, section in enumerate(test_specs_raw):
        if not section.strip():
            continue
            
        # Extract requirement ID with format REQ-XX
        id_match = re.search(r'<a id=[\'\"](req-\d+)[\'\"]\\/>', section, re.IGNORECASE) or \
                   re.search(r'### (REQ-\d+):', section, re.IGNORECASE)
        if not id_match:
            continue
            
        req_id = id_match.group(1).upper()
        
        # Extract requirement description
        desc_match = re.search(r'\*\*Description\*\*:\s*(.*?)(?:\n\n|\*\*Actor\*\*)', section, re.DOTALL)
        description = desc_match.group(1).strip() if desc_match else ""
        
        # Extract actor
        actor_match = re.search(r'\*\*Actor\*\*:\s*(.*?)(?:\n\n|\*\*Conformance\*\*)', section, re.DOTALL)
        actor = actor_match.group(1).strip() if actor_match else ""
        
        # Extract conformance level
        conf_match = re.search(r'\*\*Conformance\*\*:\s*(.*?)(?:\n\n|## Test)', section, re.DOTALL)
        conformance = conf_match.group(1).strip() if conf_match else ""
        
        # Extract testability assessment
        test_match = re.search(r'\*\*Testability Assessment:\*\*\s*(Automatic|Manual|Hybrid)', section, re.DOTALL | re.IGNORECASE)
        testability = test_match.group(1).strip() if test_match else "Unknown"
        
        # Extract test implementation strategy
        strategy_match = re.search(r'### 3\. Test Implementation Strategy\s*(.*?)(?:## |###|$)', section, re.DOTALL)
        strategy = strategy_match.group(1).strip() if strategy_match else ""
        
        # Extract validation criteria
        validation_match = re.search(r'#### 3\.\d+\. Validation Criteria\s*(.*?)(?:## |###|$)', section, re.DOTALL)
        validation = validation_match.group(1).strip() if validation_match else ""
        
        # Create a test specification dictionary
        test_spec = {
            'id': req_id,
            'description': description,
            'actor': actor,
            'conformance': conformance,
            'testability': testability,
            'strategy': strategy,
            'validation': validation,
            'full_spec': section.strip()
        }
        
        test_specs.append(test_spec)
    
    return test_specs


In [None]:

def get_inferno_guidance() -> str:
    """
    Load the Inferno guidance document
    
    Returns:
        String containing Inferno test development guidance
    """
    guidance_path = os.path.join(PROJECT_ROOT, 'inferno-guidance.md')
    
    # Use a default guidance if file not found
    if not os.path.exists(guidance_path):
        return """# Inferno Test Development Guidance
        
        Inferno is a Ruby-based testing framework for FHIR implementations. Tests should follow the structure:
        
        ```ruby
        module YourTestKit
          class YourTestGroup < Inferno::TestGroup
            id :unique_id
            title 'Test Group Title'
            description 'Detailed description'
            
            test do
              id :test_unique_id
              title 'Test Title'
              description 'Test description'
              
              run do
                # Test implementation
                assert condition, 'Failure message'
              end
            end
          end
        end
        ```
        """
    
    with open(guidance_path, 'r') as f:
        return f.read()


In [None]:

def assess_test_feasibility(
    client,
    api_type: str,
    test_spec: Dict[str, Any],
    rate_limit_func
) -> Dict[str, Any]:
    """
    Assess if a test specification is feasible to implement as an automated test
    
    Args:
        client: The API client
        api_type: API type (claude, gemini, gpt)
        test_spec: Test specification dictionary
        rate_limit_func: Function to check rate limits
        
    Returns:
        Dictionary with assessment information
    """
    logger.info(f"Assessing feasibility of {test_spec['id']}...")
    
    # For specifications already marked as Manual, we can skip the assessment
    if test_spec['testability'].lower() == 'manual':
        return {
            'testable': False,
            'reason': 'Requirement is explicitly marked as Manual',
            'resource_types': [],
            'operations': [],
            'complexity': 'N/A'
        }
    
    # For specifications marked as Automatic, we can skip the assessment
    if test_spec['testability'].lower() == 'automatic':
        # Simple heuristic to determine resource types from the specification
        fhir_resources = [
            "Patient", "Practitioner", "Organization", "Location", "Endpoint", 
            "HealthcareService", "PractitionerRole", "OrganizationAffiliation",
            "InsurancePlan", "Network"
        ]
        
        resource_types = []
        for resource in fhir_resources:
            if resource in test_spec['full_spec']:
                resource_types.append(resource)
        
        # Simple heuristic to determine operations
        operations = []
        operation_keywords = {
            'read': ['read', 'retrieve', 'get'],
            'search': ['search', 'find', 'query'],
            'create': ['create', 'post', 'add'],
            'update': ['update', 'put', 'modify'],
            'delete': ['delete', 'remove']
        }
        
        for op, keywords in operation_keywords.items():
            for keyword in keywords:
                if keyword in test_spec['full_spec'].lower():
                    operations.append(op)
                    break
        
        return {
            'testable': True,
            'reason': 'Requirement is explicitly marked as Automatic',
            'resource_types': resource_types,
            'operations': list(set(operations)),  # Deduplicate
            'complexity': test_spec.get('complexity', 'moderate')
        }
    
    # For Hybrid or unknown testability, use LLM to assess
    prompt = TEST_FEASIBILITY_PROMPT.format(
        test_specification=test_spec['full_spec']
    )
    
    system_prompt = "You are a FHIR testing expert who evaluates the feasibility of implementing automated tests for FHIR requirements."
    
    response = make_llm_request(client, api_type, prompt, system_prompt, rate_limit_func)
    
    try:
        # Parse the JSON response
        assessment = json.loads(response)
        return assessment
    except json.JSONDecodeError:
        # If we can't parse the response, fallback to a conservative assessment
        logger.warning(f"Failed to parse feasibility assessment for {test_spec['id']}")
        return {
            'testable': False,
            'reason': 'Failed to determine feasibility',
            'resource_types': [],
            'operations': [],
            'complexity': 'unknown'
        }


In [None]:

def determine_test_groups(test_specs: List[Dict[str, Any]]) -> Dict[str, Dict[str, List[Dict[str, Any]]]]:
    """
    Group test specifications based on common characteristics
    
    Args:
        test_specs: List of test specification dictionaries
        
    Returns:
        Nested dictionary of grouped test specifications
        The first level of keys is the actor (Client/Server)
        The second level of keys is the resource type or category
    """
    # First, group by actor (client/server)
    actor_groups = defaultdict(list)
    for spec in test_specs:
        actor = "Client" if "client" in spec['actor'].lower() else "Server"
        actor_groups[actor].append(spec)
    
    # Then, for each actor group, further subdivide by resource type if possible
    result_groups = {}
    
    for actor, specs in actor_groups.items():
        if actor not in result_groups:
            result_groups[actor] = {}
        
        resource_groups = defaultdict(list)
        
        # FHIR resources to look for
        fhir_resources = [
            "Patient", "Practitioner", "Organization", "Location", "Endpoint", 
            "HealthcareService", "PractitionerRole", "OrganizationAffiliation",
            "InsurancePlan", "Network"
        ]
        
        for spec in specs:
            # Try to determine resource type from test spec
            resource_found = False
            for resource in fhir_resources:
                if resource in spec['full_spec']:
                    resource_groups[resource].append(spec)
                    resource_found = True
                    break
            
            # If no specific resource identified, put in a general group
            if not resource_found:
                if 'search' in spec['full_spec'].lower():
                    resource_groups['Search'].append(spec)
                elif 'profile' in spec['full_spec'].lower():
                    resource_groups['Profiles'].append(spec)
                else:
                    resource_groups['General'].append(spec)
        
        # Add the resource groups to the result
        result_groups[actor] = dict(resource_groups)
    
    return result_groups


In [None]:

def generate_inferno_test(
    client, 
    api_type: str,
    test_spec: Dict[str, Any],
    inferno_guidance: str,
    module_name: str,
    rate_limit_func
) -> str:
    """
    Generate an Inferno test implementation for a test specification
    
    Args:
        client: The API client
        api_type: API type (claude, gemini, gpt)
        test_spec: Test specification dictionary
        inferno_guidance: Inferno test development guidance
        module_name: Name of the module for the test
        rate_limit_func: Function to check rate limits
        
    Returns:
        Ruby code implementing the test
    """
    logger.info(f"Generating Inferno test for {test_spec['id']}...")
    
    # Prepare the prompt
    prompt = INFERNO_TEST_GENERATION_PROMPT.format(
        test_specification=test_spec['full_spec'],
        requirement_id=test_spec['id'],
        module_name=module_name,
        inferno_guidance=inferno_guidance
    )
    
    # Make the API request
    response = make_llm_request(client, api_type, prompt, INFERNO_TEST_SYSTEM_PROMPT, rate_limit_func)
    
    # Clean up any markdown formatting if the LLM added code blocks
    if response.startswith('```ruby'):
        response = re.sub(r'^```ruby\n', '', response)
        response = re.sub(r'\n```$', '', response)
    elif response.startswith('```'):
        response = re.sub(r'^```\n', '', response)
        response = re.sub(r'\n```$', '', response)
    
    return response


In [None]:

def create_file_structure(grouped_tests: Dict[str, Dict[str, List[Dict[str, Any]]]], module_name: str, output_dir: str):
    """
    Create the file structure for the Inferno tests
    
    Args:
        grouped_tests: Nested dictionary of grouped test specs
        module_name: Name of the module for the tests
        output_dir: Directory for output files
        
    Returns:
        Dictionary mapping test specs to file paths
    """
    # Create base directories
    module_dir = os.path.join(output_dir, module_name.lower())
    os.makedirs(module_dir, exist_ok=True)
    
    # Create test group directories
    test_file_map = {}
    
    for actor, resource_groups in grouped_tests.items():
        actor_dir = os.path.join(module_dir, actor.lower())
        os.makedirs(actor_dir, exist_ok=True)
        
        for resource, test_specs in resource_groups.items():
            resource_dir = os.path.join(actor_dir, resource.lower())
            os.makedirs(resource_dir, exist_ok=True)
            
            for spec in test_specs:
                # Create a file name based on the requirement ID
                req_id = spec['id'].lower().replace('-', '_')
                file_name = f"{req_id}_test.rb"
                file_path = os.path.join(resource_dir, file_name)
                
                test_file_map[spec['id']] = file_path
    
    return test_file_map


In [None]:

def generate_module_file(module_name: str, output_dir: str, grouped_tests: Dict[str, Dict[str, List[Dict[str, Any]]]]):
    """
    Generate the main module file that includes all test groups
    
    Args:
        module_name: Name of the module for the tests
        output_dir: Directory for output files
        grouped_tests: Nested dictionary of grouped test specs
        
    Returns:
        Path to the generated module file
    """
    module_dir = os.path.join(output_dir, module_name.lower())
    module_file_path = os.path.join(module_dir, f"{module_name.lower()}.rb")
    
    # Generate module file content
    module_content = f"# {module_name} Inferno Test Suite\n"
    module_content += "# Generated on: " + datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n\n"
    module_content += "require 'inferno/dsl/test_suite'\n\n"
    
    # Add requires for all test groups
    for actor, resource_groups in grouped_tests.items():
        for resource, test_specs in resource_groups.items():
            group_path = f"{module_name.lower()}/{actor.lower()}/{resource.lower()}"
            module_content += f"# {actor} {resource} tests\n"
            for spec in test_specs:
                req_id = spec['id'].lower().replace('-', '_')
                module_content += f"require_relative '{actor.lower()}/{resource.lower()}/{req_id}_test'\n"
            module_content += "\n"
    
    # Add module definition
    module_content += f"module {module_name}\n"
    
    # Add test suite class
    class_name = f"{module_name}TestSuite"
    module_content += f"  class {class_name} < Inferno::TestSuite\n"
    module_content += f"    id :{module_name.lower()}_suite\n"
    module_content += f"    title '{module_name} Test Suite'\n"
    module_content += f"    description 'Test suite for validating {module_name} Implementation Guide conformance'\n\n"
    
    # Add actor groups
    for actor, resource_groups in grouped_tests.items():
        module_content += f"    # {actor} tests\n"
        module_content += f"    group do\n"
        module_content += f"      id :{actor.lower()}_group\n"
        module_content += f"      title '{actor} Tests'\n\n"
        
        # Add resource groups
        for resource, test_specs in resource_groups.items():
            module_content += f"      # {resource} tests\n"
            module_content += f"      group do\n"
            module_content += f"        id :{actor.lower()}_{resource.lower()}_group\n"
            module_content += f"        title '{actor} {resource} Tests'\n\n"
            
            # Add references to individual tests
            for spec in test_specs:
                req_id = spec['id'].lower().replace('-', '_')
                test_class = f"{module_name}::{actor}{resource}{spec['id'].replace('-', '')}Test"
                module_content += f"        test from: :{req_id}_test\n"
            
            module_content += "      end\n\n"
        
        module_content += "    end\n\n"
    
    module_content += "  end\n"
    module_content += "end\n"
    
    # Write to file
    with open(module_file_path, 'w') as f:
        f.write(module_content)
    
    return module_file_path


In [None]:

def generate_inferno_test_kit(
    api_type: str,
    test_plan_file: str,
    guidance_file: str = None,
    module_name: str = "PlanNet",
    output_dir: str = OUTPUT_DIR
) -> Dict[str, Any]:
    """
    Process a test plan and generate an Inferno test kit
    
    Args:
        api_type: API type (claude, gemini, gpt)
        test_plan_file: Path to test plan markdown file
        guidance_file: Path to Inferno guidance file (optional)
        module_name: Name of the module for the tests
        output_dir: Directory for output files
        
    Returns:
        Dictionary containing statistics and paths
    """
    logger.info(f"Starting Inferno test generation with {api_type} for {module_name}")
    
    # Initialize API clients and rate limiters
    clients = setup_clients()
    client = clients[api_type]
    config = API_CONFIGS[api_type]
    rate_limiter = create_rate_limiter()
    
    def check_limits():
        check_rate_limits(rate_limiter, api_type)
    
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Module name normalization for Ruby
    module_name = ''.join(word.capitalize() for word in module_name.split())
    
    try:
        # Parse test plan
        test_specs = parse_test_plan(test_plan_file)
        logger.info(f"Parsed {len(test_specs)} test specifications from {test_plan_file}")
        
        # Get Inferno guidance
        if guidance_file and os.path.exists(guidance_file):
            with open(guidance_file, 'r') as f:
                inferno_guidance = f.read()
            logger.info(f"Loaded Inferno guidance from {guidance_file}")
        else:
            inferno_guidance = get_inferno_guidance()
            logger.info("Using default Inferno guidance")
        
        # Assess feasibility of each test spec
        testable_specs = []
        untestable_specs = []
        
        for spec in test_specs:
            assessment = assess_test_feasibility(client, api_type, spec, check_limits)
            spec['assessment'] = assessment
            
            if assessment['testable']:
                testable_specs.append(spec)
            else:
                untestable_specs.append(spec)
        
        logger.info(f"Identified {len(testable_specs)} testable and {len(untestable_specs)} untestable requirements")
        
        # Group testable specifications
        grouped_tests = determine_test_groups(testable_specs)
        
        # Log the grouping results
        logger.info(f"Requirements grouped into {len(grouped_tests)} actor categories")
        for actor, resource_groups in grouped_tests.items():
            logger.info(f"Actor '{actor}': {len(resource_groups)} resource groups")
            for resource, specs in resource_groups.items():
                logger.info(f"  Resource '{resource}': {len(specs)} specifications")
        
        # Create the file structure
        test_file_map = create_file_structure(grouped_tests, module_name, output_dir)
        logger.info(f"Created file structure in {output_dir}")
        
        # Generate and save tests
        generated_tests = []
        skipped_tests = []
        
        for actor, resource_groups in grouped_tests.items():
            for resource, specs in resource_groups.items():
                for i, spec in enumerate(specs):
                    try:
                        # Generate the test
                        test_code = generate_inferno_test(
                            client, api_type, spec, inferno_guidance, module_name, check_limits
                        )
                        
                        # Write the test to file
                        file_path = test_file_map[spec['id']]
                        with open(file_path, 'w') as f:
                            f.write(test_code)
                        
                        generated_tests.append(spec['id'])
                        logger.info(f"Generated test for {spec['id']} in {file_path}")
                        
                    except Exception as e:
                        logger.error(f"Error generating test for {spec['id']}: {str(e)}")
                        skipped_tests.append(spec['id'])
                    
                    # Add delay between requests
                    if i < len(specs) - 1:  # No need to delay after the last request
                        time.sleep(config["delay_between_chunks"])
        
        # Generate the main module file
        module_file_path = generate_module_file(module_name, output_dir, grouped_tests)
        logger.info(f"Generated main module file at {module_file_path}")
        
        # Generate a report
        report_path = os.path.join(output_dir, f"{module_name.lower()}_test_report_{timestamp}.md")
        with open(
            # Generate a report
        report_path = os.path.join(output_dir, f"{module_name.lower()}_test_report_{timestamp}.md")
        with open(report_path, 'w') as f:
            f.write(f"# {module_name} Inferno Test Generation Report\n\n")
            f.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
            f.write(f"## Summary\n\n")
            f.write(f"- Total specifications: {len(test_specs)}\n")
            f.write(f"- Testable specifications: {len(testable_specs)}\n")
            f.write(f"- Untestable specifications: {len(untestable_specs)}\n")
            f.write(f"- Successfully generated tests: {len(generated_tests)}\n")
            f.write(f"- Failed to generate tests: {len(skipped_tests)}\n\n")
            
            f.write(f"## Generated Tests\n\n")
            for spec_id in generated_tests:
                f.write(f"- {spec_id}\n")
            
            f.write(f"\n## Skipped Tests\n\n")
            for spec_id in skipped_tests:
                f.write(f"- {spec_id}\n")
            
            f.write(f"\n## Untestable Requirements\n\n")
            for spec in untestable_specs:
                f.write(f"- {spec['id']}: {spec['assessment'].get('reason', 'Unknown reason')}\n")
        
        logger.info(f"Generated report at {report_path}")
        
        return {
            "total_specs": len(test_specs),
            "testable_specs": len(testable_specs),
            "untestable_specs": len(untestable_specs),
            "generated_tests": len(generated_tests),
            "skipped_tests": len(skipped_tests),
            "module_dir": os.path.join(output_dir, module_name.lower()),
            "module_file": module_file_path,
            "report_file": report_path
        }
        
    except Exception as e:
        logger.error(f"Error generating Inferno tests: {str(e)}")
        raise


In [None]:

def run_inferno_test_generator():
    """
    Run the Inferno test generator with user input
    
    Returns:
        Dictionary with generation results, or None if an error occurred
    """
    # Load environment variables
    load_dotenv()
    
    # Get input from user or set default values
    print("\nFHIR Inferno Test Generator")
    print("=" * 50)
    
    # Let user select the API
    print("\nSelect the API to use:")
    print("1. Claude")
    print("2. Gemini")
    print("3. GPT-4")
    api_choice = input("Enter your choice (1-3, default 1): ") or "1"
    
    api_mapping = {
        "1": "claude",
        "2": "gemini",
        "3": "gpt"
    }
    
    api_type = api_mapping.get(api_choice, "claude")
    
    # Get test plan file path
    test_plan_file = input("\nEnter path to test plan markdown file: ")
    
    # Check if test plan file exists
    if not os.path.exists(test_plan_file):
        logger.error(f"Test plan file not found: {test_plan_file}")
        print(f"Error: Test plan file not found at {test_plan_file}")
        return None
    
    # Get Inferno guidance file path (optional)
    guidance_file = input("\nEnter path to Inferno guidance file (optional, press Enter to skip): ")
    
    if guidance_file and not os.path.exists(guidance_file):
        logger.warning(f"Inferno guidance file not found: {guidance_file}")
        print(f"Warning: Inferno guidance file not found at {guidance_file}. Using built-in guidance.")
        guidance_file = None
    
    # Get module name
    module_name = input("\nEnter module name (default 'PlanNet'): ") or "PlanNet"
    
    # Get output directory
    output_dir = input(f"\nEnter output directory (default '{OUTPUT_DIR}'): ") or OUTPUT_DIR
    
    print(f"\nGenerating Inferno tests with {api_type.capitalize()}...")
    if guidance_file:
        print(f"Using Inferno guidance from {guidance_file}")
    print(f"This may take several minutes depending on the number of requirements.")
    
    try:
        # Process test plan and generate Inferno tests
        result = generate_inferno_test_kit(
            api_type=api_type,
            test_plan_file=test_plan_file,
            guidance_file=guidance_file,
            module_name=module_name,
            output_dir=output_dir
        )
        
        # Output results
        print("\n" + "="*80)
        print(f"Inferno test generation complete!")
        print(f"Total specifications: {result['total_specs']}")
        print(f"Testable specifications: {result['testable_specs']}")
        print(f"Successfully generated tests: {result['generated_tests']}")
        print(f"Module directory: {result['module_dir']}")
        print(f"Main module file: {result['module_file']}")
        print(f"Generation report: {result['report_file']}")
        print("="*80)
        
        return result
        
    except Exception as e:
        logger.error(f"Error: {str(e)}")
        print(f"\nError occurred during processing: {str(e)}")
        print("Check the log for more details.")
        return None

# Execute the main function when run from a notebook cell
if __name__ == "__main__":
    run_inferno_test_generator()