In [204]:
# FHIR Inferno Test Generator
import re
import os
import logging
import time
import json
import httpx
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
from collections import defaultdict

import pandas as pd
from dotenv import load_dotenv
from anthropic import Anthropic, RateLimitError
import google.generativeai as gemini
from openai import OpenAI
from tenacity import retry, wait_exponential, stop_after_attempt, retry_if_exception_type

# Set up logging
logging.basicConfig(level=logging.INFO, 
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Constants
CURRENT_DIR = Path.cwd()  # Current working directory
OUTPUT_DIR = os.path.join(CURRENT_DIR, 'test_output')


In [205]:
# API Configuration
API_CONFIGS = {
    "claude": {
        "model_name": "claude-3-5-sonnet-20241022", 
        "max_tokens": 8192,
        "temperature": 0.3,
        "batch_size": 5,
        "delay_between_chunks": 1,
        "delay_between_batches": 3,
        "requests_per_minute": 900,
        "max_requests_per_day": 20000,
        "delay_between_requests": 0.1
    },
    "gemini": {
        "model": "models/gemini-2.5-pro-preview-03-25",
        "max_tokens": 8192,
        "temperature": 0.3,
        "batch_size": 5,
        "delay_between_chunks": 2,
        "delay_between_batches": 5,
        "requests_per_minute": 900,
        "max_requests_per_day": 50000,
        "delay_between_requests": 0.1,
        "timeout": 60
    },
    "gpt": {
        "model": "gpt-4o",
        "max_tokens": 8192,
        "temperature": 0.3,
        "batch_size": 5,
        "delay_between_chunks": 2,
        "delay_between_batches": 5,
        "requests_per_minute": 450,
        "max_requests_per_day": 20000,
        "delay_between_requests": 0.15
    }
}

In [206]:

# System prompts for test generation
INFERNO_TEST_SYSTEM_PROMPT = """You are a specialized FHIR testing engineer with expertise in healthcare interoperability.
Your task is to convert test specifications from a test plan into executable Ruby tests using the Inferno testing framework.
You will generate valid, working Ruby code that follows Inferno test patterns and best practices."""

# Prompt template for test generation
INFERNO_TEST_GENERATION_PROMPT = """
Convert the following FHIR test specification into executable Ruby code using the Inferno testing framework.

Test Specification:
{test_specification}

Create an Inferno test implementation that:
1. Follows the Inferno structure (TestGroup containing one or more tests)
2. Implements the test logic described in the specification
3. Makes appropriate FHIR API calls
4. Includes proper assertions to validate the requirements
5. Handles both success and error cases appropriately

Naming conventions:
- Use underscored lowercase names for files (e.g., patient_read_test.rb)
- Use CamelCase for class names (e.g., PatientReadTest)
- Class names should end with 'Test' or 'Group'
- Give each test a unique ID that's descriptive and related to the requirement

The test should be comprehensive but focused on exactly what's described in the specification.
Include proper documentation in the code and follow Inferno best practices.

Requirement ID: {requirement_id}
Module Name: {module_name}

The test should also be developed in accordance with this guidance on Inferno test development:
{inferno_guidance}

When developing the test:

1. Define a TestGroup with a descriptive ID based on the requirement
2. Create individual test cases within the group for each specific aspect to test
3. Include detailed documentation in the description fields
4. Use proper assertions that directly validate the requirement
5. Provide meaningful error and success messages
6. Consider both positive and negative test scenarios
7. Add appropriate inputs for configurable test parameters

Follow this pattern for the TestGroup structure:
```ruby
module {module_name}
  class YourTestGroup < Inferno::TestGroup
    id :your_unique_id
    title 'Clear Requirement Title'
    description %(
      Detailed description of what this test validates, including:
      - The specific requirement being tested
      - How the test works
      - What conformance is being verified
    )
    
    # Tests go here
  end
end

Return only the Ruby code for the test implementation, with no additional explanation.
"""


In [207]:
def create_rate_limiter():
    """Create a rate limiter state dictionary for all APIs"""
    return {
        api: {
            'requests': [],
            'daily_requests': 0,
            'last_reset': time.time()
        }
        for api in API_CONFIGS.keys()
    }

def check_rate_limits(rate_limiter: dict, api: str):
    """Check and wait if rate limits would be exceeded"""
    if api not in rate_limiter:
        raise ValueError(f"Unknown API: {api}")
        
    now = time.time()
    state = rate_limiter[api]
    config = API_CONFIGS[api]
    
    # Reset daily counts if needed
    day_seconds = 24 * 60 * 60
    if now - state['last_reset'] >= day_seconds:
        state['daily_requests'] = 0
        state['last_reset'] = now
    
    # Check daily limit
    if state['daily_requests'] >= config['max_requests_per_day']:
        raise Exception(f"{api} daily request limit exceeded")
    
    # Remove old requests outside the current minute
    state['requests'] = [
        req_time for req_time in state['requests']
        if now - req_time < 60
    ]
    
    # Wait if at rate limit
    if len(state['requests']) >= config['requests_per_minute']:
        sleep_time = 60 - (now - state['requests'][0])
        if sleep_time > 0:
            time.sleep(sleep_time)
        state['requests'] = state['requests'][1:] 
    
    # Add minimum delay between requests
    if state['requests'] and now - state['requests'][-1] < config['delay_between_requests']:
        time.sleep(config['delay_between_requests'])
    
    # Record this request
    state['requests'].append(now)
    state['daily_requests'] += 1


In [208]:
def setup_clients():
    """Initialize clients for each LLM service"""
    try:
        #Claude setup
        verify_path = '/opt/homebrew/etc/openssl@3/cert.pem'
        http_client = httpx.Client(
            verify=verify_path if os.path.exists(verify_path) else True,
            timeout=60.0
        )
        claude_client = Anthropic(
            api_key=os.getenv('ANTHROPIC_API_KEY'),
            http_client=http_client
        )
        
        # Gemini setup
        gemini_api_key = os.getenv('GEMINI_API_KEY')
        if not gemini_api_key:
            raise ValueError("GEMINI_API_KEY not found")
        gemini.configure(api_key=gemini_api_key)
        gemini_client = gemini.GenerativeModel(
            model_name=API_CONFIGS["gemini"]["model"],
            generation_config={
                "max_output_tokens": API_CONFIGS["gemini"]["max_tokens"],
                "temperature": API_CONFIGS["gemini"]["temperature"]
            }
        )
        
        # OpenAI setup
        openai_api_key = os.getenv('OPENAI_API_KEY')
        if not openai_api_key:
            raise ValueError("OPENAI_API_KEY not found")
        openai_client = OpenAI(
            api_key=openai_api_key,
            timeout=60.0
        )
        
        return {
            "claude": claude_client,
            "gpt": openai_client,
            "gemini": gemini_client
        }
        
    except Exception as e:
        logging.error(f"Error setting up clients: {str(e)}")
        raise

In [209]:
@retry(
    wait=wait_exponential(multiplier=1, min=4, max=60),
    stop=stop_after_attempt(5),
    retry=retry_if_exception_type((RateLimitError, TimeoutError))
)
def make_llm_request(client, api_type: str, prompt: str, system_prompt: str, rate_limit_func) -> str:
    """Make rate-limited API request with retries"""
    rate_limit_func()
    
    config = API_CONFIGS[api_type]
    
    try:
        if api_type == "claude":
            response = client.messages.create(
                model=config["model_name"],
                max_tokens=config["max_tokens"],
                messages=[{
                    "role": "user", 
                    "content": prompt
                }],
                system=system_prompt
            )
            return response.content[0].text
            
        elif api_type == "gemini":
            response = client.generate_content(
                prompt,
                generation_config={
                    "max_output_tokens": config["max_tokens"],
                    "temperature": config["temperature"]
                }
            )
            if hasattr(response, 'text'):
                return response.text
            elif response.candidates:
                return response.candidates[0].content.parts[0].text
            else:
                raise ValueError("No response generated from Gemini API")
                    
        elif api_type == "gpt":
            response = client.chat.completions.create(
                model=config["model"],
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=config["max_tokens"],
                temperature=config["temperature"]
            )
            return response.choices[0].message.content
            
    except Exception as e:
        logging.error(f"Error in {api_type} API request: {str(e)}")
        raise


In [210]:
def parse_test_plan(file_path: str) -> Dict[str, Any]:
    """
    Parse a consolidated test plan into sections and requirements
    focusing on the main requirements
    """
    with open(file_path, 'r') as f:
        content = f.read()
    
    # First identify main sections
    sections = {}
    
    # Define the main section names we're interested in
    main_section_names = [
        "Application-Level Requirements",
        "Authentication",
        "Base Requirements",
        "CORE Conformance"
    ]
    
    # Find all requirement headers in the document
    req_pattern = r'### (REQ-\d+): (.*?)\n\n\*\*Description\*\*: "(.*?)"\n\n\*\*Actor\*\*: (.*?)\n\n\*\*Conformance\*\*: (.*?)(?:\n\n|$)'
    req_matches = re.findall(req_pattern, content, re.DOTALL)
    
    print(f"Found {len(req_matches)} potential requirements")
    
    # Initialize the sections structure
    for section_name in main_section_names:
        section_id = section_name.lower().replace(' ', '-')
        sections[section_name] = {
            'id': section_id,
            'name': section_name,
            'content': "",
            'requirements': []
        }
    
    # Process each requirement
    for req_id, req_title, req_desc, req_actor, req_conf in req_matches:
        print(f"Processing requirement: {req_id}")
        
        # Find the full test specification for this requirement
        test_spec_pattern = f"# Test Specification for {req_id}(.*?)(?:---|\n## )"
        test_spec_match = re.search(test_spec_pattern, content, re.DOTALL)
        test_spec = test_spec_match.group(1).strip() if test_spec_match else ""
        
        # Determine which section this requirement belongs to
        for section_name in main_section_names:
            section_start = content.find(f"## {section_name}")
            next_section = None
            for other_section in main_section_names:
                if other_section != section_name:
                    other_start = content.find(f"## {other_section}")
                    if other_start > section_start and (next_section is None or other_start < next_section):
                        next_section = other_start
            
            section_end = next_section if next_section else len(content)
            section_content = content[section_start:section_end]
            
            if f"### {req_id}" in section_content:
                # This requirement belongs to this section
                requirement = {
                    'id': req_id,
                    'title': req_title.strip(),
                    'description': req_desc.strip(),
                    'actor': req_actor.strip(),
                    'conformance': req_conf.strip(),
                    'full_content': f"### {req_id}: {req_title}\n\n**Description**: \"{req_desc}\"\n\n**Actor**: {req_actor}\n\n**Conformance**: {req_conf}",
                    'full_spec': test_spec,
                    'section': section_name,
                    'testability': 'Automatic'  # Default value
                }
                
                sections[section_name]['requirements'].append(requirement)
                print(f"Added requirement {req_id} to section {section_name}")
                
    # Remove empty sections
    sections = {k: v for k, v in sections.items() if v['requirements']}
    
    return sections

In [211]:
def get_inferno_guidance() -> str:
    """
    Load the Inferno guidance document
    
    Returns:
        String containing Inferno test development guidance
    """
    guidance_path = os.path.join(PROJECT_ROOT, 'inferno-guidance.md')
    
    # Use a default guidance if file not found
    if not os.path.exists(guidance_path):
        return """# Inferno Test Development Guidance
        
        Inferno is a Ruby-based testing framework for FHIR implementations. Tests should follow the structure:
        
        ```ruby
        module YourTestKit
          class YourTestGroup < Inferno::TestGroup
            id :unique_id
            title 'Test Group Title'
            description 'Detailed description'
            
            test do
              id :test_unique_id
              title 'Test Title'
              description 'Test description'
              
              run do
                # Test implementation
                assert condition, 'Failure message'
              end
            end
          end
        end
        ```
        """
    
    with open(guidance_path, 'r') as f:
        return f.read()

In [212]:
def determine_test_groups(sections: Dict[str, Dict[str, Any]], expected_actors: List[str]) -> Dict[str, Dict[str, List[Dict[str, Any]]]]:
    """
    Group test specifications based on the sections and actors from the test plan
    
    Args:
        sections: Dictionary of sections from the parsed test plan
        expected_actors: List of expected actors from user input
        
    Returns:
        Nested dictionary of grouped test specifications
    """
    # First, group by actor
    actor_groups = defaultdict(list)
    
    # Create a mapping from normalized actor names to expected actor names
    actor_mapping = {actor.lower().replace(' ', '_'): actor for actor in expected_actors}
    
    # Collect all requirements across all sections
    for section_name, section in sections.items():
        for req in section['requirements']:
            # Use the exact actor name from the requirement
            actor = req['actor'].strip()
            if not actor:
                actor = "Unknown Actor"
            else:
                # Try to match with expected actors (case-insensitive)
                actor_lower = actor.lower().replace(' ', '_')
                if actor_lower in actor_mapping:
                    actor = actor_mapping[actor_lower]
            
            # Store the requirement with its section information
            req_with_section = req.copy()
            req_with_section['section_name'] = section_name
            req_with_section['section_id'] = section['id']
            
            actor_groups[actor].append(req_with_section)
    
    # Only include actors that have requirements
    result_groups = {}
    for actor, reqs in actor_groups.items():
        if reqs:  # Only include non-empty actor groups
            if actor not in result_groups:
                result_groups[actor] = {}
            
            # Group by original section
            section_groups = defaultdict(list)
            for req in reqs:
                section_groups[req['section_name']].append(req)
            
            # Add the section groups to the result
            result_groups[actor] = dict(section_groups)
    
    return result_groups

In [213]:
def generate_tests_for_section(
    client, 
    api_type: str,
    section: Dict[str, Any],
    inferno_guidance: str,
    module_name: str,
    rate_limit_func,
    max_token_limit: int = 16000
):
    """
    Generate tests for an entire section or individual requirements based on token limits
    
    Args:
        client: The API client
        api_type: API type (claude, gemini, gpt)
        section: Section dictionary containing requirements
        inferno_guidance: Inferno test development guidance
        module_name: Name of the module for the test
        rate_limit_func: Function to check rate limits
        max_token_limit: Maximum tokens for the model
        
    Returns:
        Dictionary mapping requirement IDs to generated tests
    """
    logger.info(f"Generating tests for section: {section['name']}")
    
    # First, try to generate tests for the entire section
    if len(section['requirements']) > 1:
        # Construct a prompt for the entire section
        section_prompt = f"""
        Generate Inferno tests for the following section of requirements from a FHIR implementation guide.
        
        Section: {section['name']}
        
        Requirements:
        {section['content']}
        
        For each requirement, create a separate Inferno test class following the naming convention:
        - Class name: {module_name}[Actor][Resource][REQ-ID]Test
        - File name: req_[id]_test.rb
        
        Module Name: {module_name}
        
        Follow this Inferno development guidance:
        {inferno_guidance}
        
        Return the Ruby code for each test implementation, separated by clear markers indicating the requirement ID.
        """
        
        # Estimate token count (rough approximation)
        estimated_tokens = len(section_prompt) / 4  # Approximate 4 chars per token
        
        if estimated_tokens < max_token_limit:
            try:
                logger.info(f"Attempting to generate tests for entire section: {section['name']}")
                response = make_llm_request(
                    client, 
                    api_type, 
                    section_prompt, 
                    INFERNO_TEST_SYSTEM_PROMPT, 
                    rate_limit_func
                )
                
                # Parse the response to extract individual tests
                tests = {}
                current_req = None
                current_test = []
                
                for line in response.split('\n'):
                    # Look for markers indicating requirement boundaries
                    req_marker = re.search(r'(REQ-\d+)', line)
                    if req_marker and ("Test for" in line or "Implementation for" in line or "# Requirement" in line):
                        if current_req and current_test:
                            tests[current_req] = '\n'.join(current_test)
                            current_test = []
                        
                        current_req = req_marker.group(1)
                    
                    if current_req:
                        current_test.append(line)
                
                # Don't forget the last test
                if current_req and current_test:
                    tests[current_req] = '\n'.join(current_test)
                
                # If we successfully parsed tests for all requirements, return them
                if len(tests) == len(section['requirements']):
                    logger.info(f"Successfully generated tests for all requirements in section: {section['name']}")
                    return tests
                
                logger.warning(f"Generated tests for only {len(tests)} of {len(section['requirements'])} requirements in section: {section['name']}")
                # Continue to individual requirement processing
            
            except Exception as e:
                logger.warning(f"Failed to generate tests for entire section: {str(e)}")
                # Continue to individual requirement processing
    
    # If section-level generation failed or wasn't attempted, generate tests for individual requirements
    tests = {}
    for requirement in section['requirements']:
        try:
            logger.info(f"Generating test for requirement: {requirement['id']}")
            
            # Prepare the prompt for this requirement with full context
            req_prompt = INFERNO_TEST_GENERATION_PROMPT.format(
                test_specification=requirement['full_content'],
                requirement_id=requirement['id'],
                module_name=module_name,
                inferno_guidance=inferno_guidance
            )
            
            # Generate the test
            test_code = make_llm_request(
                client, 
                api_type, 
                req_prompt, 
                INFERNO_TEST_SYSTEM_PROMPT, 
                rate_limit_func
            )
            
            # Clean up any markdown formatting
            if test_code.startswith('```ruby'):
                test_code = re.sub(r'^```ruby\n', '', test_code)
                test_code = re.sub(r'\n```$', '', test_code)
            elif test_code.startswith('```'):
                test_code = re.sub(r'^```\n', '', test_code)
                test_code = re.sub(r'\n```$', '', test_code)
            
            tests[requirement['id']] = test_code
            logger.info(f"Successfully generated test for requirement: {requirement['id']}")
            
        except Exception as e:
            logger.error(f"Error generating test for requirement {requirement['id']}: {str(e)}")
    
    return tests

In [214]:
def create_file_structure(grouped_tests: Dict[str, Dict[str, List[Dict[str, Any]]]], module_name: str, output_dir: str):
    """
    Create the file structure for the Inferno tests
    
    Args:
        grouped_tests: Nested dictionary of grouped test specs
        module_name: Name of the module for the tests
        output_dir: Directory for output files
        
    Returns:
        Dictionary mapping test specs to file paths
    """
    # Create base directories
    module_dir = os.path.join(output_dir, module_name.lower())
    os.makedirs(module_dir, exist_ok=True)
    
    # Create test group directories
    test_file_map = {}
    
    for actor, resource_groups in grouped_tests.items():
        actor_dir = os.path.join(module_dir, actor.lower())
        os.makedirs(actor_dir, exist_ok=True)
        
        for resource, test_specs in resource_groups.items():
            resource_dir = os.path.join(actor_dir, resource.lower())
            os.makedirs(resource_dir, exist_ok=True)
            
            for spec in test_specs:
                # Create a file name based on the requirement ID
                req_id = spec['id'].lower().replace('-', '_')
                file_name = f"{req_id}_test.rb"
                file_path = os.path.join(resource_dir, file_name)
                
                test_file_map[spec['id']] = file_path
    
    return test_file_map

In [215]:
def generate_module_file(module_name: str, output_dir: str, grouped_tests: Dict[str, Dict[str, List[Dict[str, Any]]]]):
    """
    Generate the main module file that includes all test groups
    
    Args:
        module_name: Name of the module for the tests
        output_dir: Directory for output files
        grouped_tests: Nested dictionary of grouped test specs
        
    Returns:
        Path to the generated module file
    """
    module_dir = os.path.join(output_dir, module_name.lower())
    module_file_path = os.path.join(module_dir, f"{module_name.lower()}.rb")
    
    # Generate module file content
    module_content = f"# {module_name} Inferno Test Suite\n"
    module_content += "# Generated on: " + datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n\n"
    module_content += "require 'inferno/dsl/test_suite'\n\n"
    
    # Add requires for all test groups
    for actor, resource_groups in grouped_tests.items():
        # Create safe versions for file paths
        actor_safe = re.sub(r'[^a-zA-Z0-9_]', '_', actor.lower())
        
        for resource, test_specs in resource_groups.items():
            # Create safe versions for file paths
            resource_safe = re.sub(r'[^a-zA-Z0-9_]', '_', resource.lower())
            
            module_content += f"# {actor} {resource} tests\n"
            for spec in test_specs:
                req_id = spec['id'].lower().replace('-', '_')
                module_content += f"require_relative '{actor_safe}/{resource_safe}/{req_id}_test'\n"
            module_content += "\n"
    
    # Add module definition
    module_content += f"module {module_name}\n"
    
    # Add test suite class
    class_name = f"{module_name}TestSuite"
    module_content += f"  class {class_name} < Inferno::TestSuite\n"
    module_content += f"    id :{module_name.lower()}_suite\n"
    module_content += f"    title '{module_name} Test Suite'\n"
    module_content += f"    description 'Test suite for validating {module_name} Implementation Guide conformance'\n\n"
    
    # Add actor groups
    for actor, resource_groups in grouped_tests.items():
        # Create safe version for Ruby identifiers
        actor_safe = re.sub(r'[^a-zA-Z0-9_]', '_', actor.lower())
        
        module_content += f"    # {actor} tests\n"
        module_content += f"    group do\n"
        module_content += f"      id :{actor_safe}_group\n"
        module_content += f"      title '{actor} Tests'\n\n"
        
        # Add resource groups
        for resource, test_specs in resource_groups.items():
            # Skip empty resource groups
            if not test_specs:
                continue
                
            # Create safe version for Ruby identifiers
            resource_safe = re.sub(r'[^a-zA-Z0-9_]', '_', resource.lower())
            
            module_content += f"      # {resource} tests\n"
            module_content += f"      group do\n"
            module_content += f"        id :{actor_safe}_{resource_safe}_group\n"
            module_content += f"        title '{resource} Tests'\n\n"
            
            # Add references to individual tests
            for spec in test_specs:
                req_id = spec['id'].lower().replace('-', '_')
                module_content += f"        test from: :{req_id}_test\n"
            
            module_content += "      end\n\n"
        
        module_content += "    end\n\n"
    
    module_content += "  end\n"
    module_content += "end\n"
    
    # Write to file
    with open(module_file_path, 'w') as f:
        f.write(module_content)
    
    return module_file_path

In [216]:
def generate_inferno_test_kit(
    api_type: str,
    test_plan_file: str,
    guidance_file: str = None,
    module_name: str = "PlanNet",
    output_dir: str = OUTPUT_DIR,
    expected_actors: List[str] = None
) -> Dict[str, Any]:
    """
    Process a test plan and generate an Inferno test kit
    
    Args:
        api_type: API type (claude, gemini, gpt)
        test_plan_file: Path to test plan markdown file
        guidance_file: Path to Inferno guidance file (optional)
        module_name: Name of the module for the tests
        output_dir: Directory for output files
        expected_actors: List of expected actors in the test plan
        
    Returns:
        Dictionary containing statistics and paths
    """
    logger.info(f"Starting Inferno test generation with {api_type} for {module_name}")
    
    # Initialize API clients and rate limiters
    clients = setup_clients()
    client = clients[api_type]
    config = API_CONFIGS[api_type]
    rate_limiter = create_rate_limiter()
    
    def check_limits():
        check_rate_limits(rate_limiter, api_type)
    
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Module name normalization for Ruby
    module_name = ''.join(word.capitalize() for word in module_name.split())
    
    # Set default actors if none provided
    if not expected_actors:
        expected_actors = ["Health Plan API Actor", "Application Actor"]
    
    try:
        # Parse test plan into sections
        sections = parse_test_plan(test_plan_file)
        logger.info(f"Parsed test plan into {len(sections)} sections")
        
        # Count total requirements
        total_requirements = sum(len(section['requirements']) for section in sections.values())
        logger.info(f"Found {total_requirements} total requirements")
        
        # Get Inferno guidance
        if guidance_file and os.path.exists(guidance_file):
            with open(guidance_file, 'r') as f:
                inferno_guidance = f.read()
            logger.info(f"Loaded Inferno guidance from {guidance_file}")
        else:
            inferno_guidance = get_inferno_guidance()
            logger.info("Using default Inferno guidance")
        
        # Generate tests by section
        all_tests = {}
        for section_name, section in sections.items():
            logger.info(f"Processing section: {section_name} with {len(section['requirements'])} requirements")
            
            # Skip empty sections
            if not section['requirements']:
                continue
                
            # Generate tests for this section
            section_tests = generate_tests_for_section(
                client, 
                api_type, 
                section, 
                inferno_guidance, 
                module_name, 
                check_limits,
                config['max_tokens']
            )
            
            # Add to our collection
            all_tests.update(section_tests)
            
            # Add delay between sections
            time.sleep(config["delay_between_batches"])
            
        logger.info(f"Generated tests for {len(all_tests)} requirements")
        
        # Group requirements by actor and section (original groups from test plan)
        grouped_reqs = determine_test_groups(sections, expected_actors)
        
        # Ensure all expected actors are represented, even if empty
        for actor in expected_actors:
            if actor not in grouped_reqs:
                grouped_reqs[actor] = {}
        
        # Create file structure and write tests
        os.makedirs(os.path.join(output_dir, module_name.lower()), exist_ok=True)
        
        file_paths = {}
        for actor, section_groups in grouped_reqs.items():
            actor_safe = re.sub(r'[^a-zA-Z0-9_]', '_', actor.lower())
            actor_dir = os.path.join(output_dir, module_name.lower(), actor_safe)
            os.makedirs(actor_dir, exist_ok=True)
            
            for section_name, reqs in section_groups.items():
                section_safe = re.sub(r'[^a-zA-Z0-9_]', '_', section_name.lower())
                section_dir = os.path.join(actor_dir, section_safe)
                os.makedirs(section_dir, exist_ok=True)
                
                for req in reqs:
                    # Skip requirements we couldn't generate tests for
                    if req['id'] not in all_tests:
                        continue
                        
                    file_name = f"{req['id'].lower().replace('-', '_')}_test.rb"
                    file_path = os.path.join(section_dir, file_name)
                    
                    with open(file_path, 'w') as f:
                        f.write(all_tests[req['id']])
                    
                    file_paths[req['id']] = file_path
                    logger.info(f"Wrote test for {req['id']} to {file_path}")
        
        # Generate module file
        module_file = generate_module_file(module_name, output_dir, grouped_reqs)
        
        return {
            "total_sections": len(sections),
            "total_requirements": total_requirements,
            "generated_tests": len(all_tests),
            "module_dir": os.path.join(output_dir, module_name.lower()),
            "module_file": module_file
        }
        
    except Exception as e:
        logger.error(f"Error generating Inferno tests: {str(e)}")
        raise

In [217]:
def generate_module_file(module_name: str, output_dir: str, grouped_reqs) -> str:
    """
    Generate the main module file that includes all test groups
    
    Args:
        module_name: Name of the module for the tests
        output_dir: Directory for output files
        grouped_reqs: Dictionary of requirements grouped by actor and section
        
    Returns:
        Path to the generated module file
    """
    module_dir = os.path.join(output_dir, module_name.lower())
    module_file_path = os.path.join(module_dir, f"{module_name.lower()}.rb")
    
    # Generate module file content
    module_content = f"# {module_name} Inferno Test Suite\n"
    module_content += "# Generated on: " + datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n\n"
    module_content += "require 'inferno/dsl/test_suite'\n\n"
    
    # Add requires for all test groups
    for actor, section_groups in grouped_reqs.items():
        actor_safe = actor.replace(" ", "_").lower()
        for section_name, reqs in section_groups.items():
            section_safe = section_name.replace(" ", "_").lower()
            module_content += f"# {actor} - {section_name} tests\n"
            for req in reqs:
                req_id_safe = req['id'].lower().replace('-', '_')
                module_content += f"require_relative '{actor_safe}/{section_safe}/req_{req_id_safe}_test'\n"
            module_content += "\n"
    
    # Add module definition
    module_content += f"module {module_name}\n"
    
    # Add test suite class
    class_name = f"{module_name}TestSuite"
    module_content += f"  class {class_name} < Inferno::TestSuite\n"
    module_content += f"    id :{module_name.lower()}_suite\n"
    module_content += f"    title '{module_name} Test Suite'\n"
    module_content += f"    description 'Test suite for validating {module_name} Implementation Guide conformance'\n\n"
    
    # Add actor groups
    for actor, section_groups in grouped_reqs.items():
        actor_safe = actor.replace(" ", "_").lower()
        module_content += f"    # {actor} tests\n"
        module_content += f"    group do\n"
        module_content += f"      id :{actor_safe}_group\n"
        module_content += f"      title '{actor} Tests'\n\n"
        
        # Add section groups
        for section_name, reqs in section_groups.items():
            section_safe = section_name.replace(" ", "_").lower()
            module_content += f"      # {section_name} tests\n"
            module_content += f"      group do\n"
            module_content += f"        id :{actor_safe}_{section_safe}_group\n"
            module_content += f"        title '{section_name} Tests'\n\n"
            
            # Add references to individual tests
            for req in reqs:
                req_id_safe = req['id'].lower().replace('-', '_')
                module_content += f"        test from: :req_{req_id_safe}_test\n"
            
            module_content += "      end\n\n"
        
        module_content += "    end\n\n"
    
    module_content += "  end\n"
    module_content += "end\n"
    
    # Write to file
    with open(module_file_path, 'w') as f:
        f.write(module_content)
    
    return module_file_path

In [218]:
def run_inferno_test_generator():
    """
    Run the Inferno test generator with user input
    
    Returns:
        Dictionary with generation results, or None if an error occurred
    """
    # Load environment variables
    load_dotenv()
    
    # Get input from user or set default values
    print("\nFHIR Inferno Test Generator")
    print("=" * 50)
    
    # Let user select the API
    print("\nSelect the API to use:")
    print("1. Claude")
    print("2. Gemini")
    print("3. GPT-4")
    api_choice = input("Enter your choice (1-3, default 1): ") or "1"
    
    api_mapping = {
        "1": "claude",
        "2": "gemini",
        "3": "gpt"
    }
    
    api_type = api_mapping.get(api_choice, "claude")
    
    # Get test plan file path
    test_plan_file = input("\nEnter path to test plan markdown file: ")
    
    # Check if test plan file exists
    if not os.path.exists(test_plan_file):
        logger.error(f"Test plan file not found: {test_plan_file}")
        print(f"Error: Test plan file not found at {test_plan_file}")
        return None
    
    # the path to the guidance file
    guidance_file = os.path.join(CURRENT_DIR, "inferno-guidance.md")
    
    # Check if guidance file exists
    if not os.path.exists(guidance_file):
        logger.warning(f"Inferno guidance file not found: {guidance_file}")
        print(f"Warning: Inferno guidance file not found at {guidance_file}. Using built-in guidance.")
        guidance_file = None
    else:
        print(f"Using guidance file: {guidance_file}")
    
    # Get module name
    module_name = input("\nEnter module name (default 'PlanNet'): ") or "PlanNet"
    
    # Get actor information
    print("\nEnter the actors from the test plan (comma-separated, e.g., 'Health Plan API Actor, Application Actor'):")
    actors_input = input("Actors: ")
    
    if actors_input:
        expected_actors = [actor.strip() for actor in actors_input.split(',')]
    else:
        # Default actors if none provided
        expected_actors = ["Health Plan API Actor", "Application Actor"]
    
    # Get output directory
    output_dir = input(f"\nEnter output directory (default '{OUTPUT_DIR}'): ") or OUTPUT_DIR
    
    print(f"\nGenerating Inferno tests with {api_type.capitalize()}...")
    print(f"Using actors: {', '.join(expected_actors)}")
    if guidance_file:
        print(f"Using Inferno guidance from {guidance_file}")
    print(f"This may take several minutes depending on the number of requirements.")
    
    try:
        # Process test plan and generate Inferno tests
        result = generate_inferno_test_kit(
            api_type=api_type,
            test_plan_file=test_plan_file,
            guidance_file=guidance_file,
            module_name=module_name,
            output_dir=output_dir,
            expected_actors=expected_actors
        )
        
        # Output results
        print("\n" + "="*80)
        print(f"Inferno test generation complete!")
        print(f"Total sections: {result['total_sections']}")
        print(f"Total requirements: {result['total_requirements']}")
        print(f"Successfully generated tests: {result['generated_tests']}")
        print(f"Module directory: {result['module_dir']}")
        print(f"Main module file: {result['module_file']}")
        #print(f"Generation report: {result['report_file']}")
        print("="*80)
        
        return result
        
    except Exception as e:
        logger.error(f"Error: {str(e)}")
        print(f"\nError occurred during processing: {str(e)}")
        print("Check the log for more details.")
        return None

In [219]:
# Run the test generator
run_inferno_test_generator()


FHIR Inferno Test Generator

Select the API to use:
1. Claude
2. Gemini
3. GPT-4
Using guidance file: /Users/ceadams/Documents/onclaive/onclaive/test_kit_dev/inferno-guidance.md

Enter the actors from the test plan (comma-separated, e.g., 'Health Plan API Actor, Application Actor'):


2025-04-24 10:52:10,722 - __main__ - INFO - Starting Inferno test generation with claude for test3_claude
2025-04-24 10:52:10,750 - __main__ - INFO - Parsed test plan into 4 sections
2025-04-24 10:52:10,751 - __main__ - INFO - Found 4 total requirements
2025-04-24 10:52:10,752 - __main__ - INFO - Loaded Inferno guidance from /Users/ceadams/Documents/onclaive/onclaive/test_kit_dev/inferno-guidance.md
2025-04-24 10:52:10,752 - __main__ - INFO - Processing section: Application-Level Requirements with 1 requirements
2025-04-24 10:52:10,752 - __main__ - INFO - Generating tests for section: Application-Level Requirements
2025-04-24 10:52:10,752 - __main__ - INFO - Generating test for requirement: REQ-08



Generating Inferno tests with Claude...
Using actors: Application Actor, Health Plan API
Using Inferno guidance from /Users/ceadams/Documents/onclaive/onclaive/test_kit_dev/inferno-guidance.md
This may take several minutes depending on the number of requirements.
Found 4 potential requirements
Processing requirement: REQ-08
Added requirement REQ-08 to section Application-Level Requirements
Processing requirement: REQ-01
Added requirement REQ-01 to section Authentication
Processing requirement: REQ-09
Added requirement REQ-09 to section Base Requirements
Processing requirement: REQ-07
Added requirement REQ-07 to section CORE Conformance


2025-04-24 10:52:24,610 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
2025-04-24 10:52:24,613 - __main__ - INFO - Successfully generated test for requirement: REQ-08
2025-04-24 10:52:27,619 - __main__ - INFO - Processing section: Authentication with 1 requirements
2025-04-24 10:52:27,621 - __main__ - INFO - Generating tests for section: Authentication
2025-04-24 10:52:27,621 - __main__ - INFO - Generating test for requirement: REQ-01
2025-04-24 10:52:36,562 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
2025-04-24 10:52:36,564 - __main__ - INFO - Successfully generated test for requirement: REQ-01
2025-04-24 10:52:39,570 - __main__ - INFO - Processing section: Base Requirements with 1 requirements
2025-04-24 10:52:39,571 - __main__ - INFO - Generating tests for section: Base Requirements
2025-04-24 10:52:39,572 - __main__ - INFO - Generating test for requirement: REQ-09
2025-04-24 10:52:51,343 - 


Inferno test generation complete!
Total sections: 4
Total requirements: 4
Successfully generated tests: 4
Module directory: /Users/ceadams/Documents/onclaive/onclaive/test_kit_dev/test_output/test3_claude
Main module file: /Users/ceadams/Documents/onclaive/onclaive/test_kit_dev/test_output/test3_claude/test3_claude.rb


{'total_sections': 4,
 'total_requirements': 4,
 'generated_tests': 4,
 'module_dir': '/Users/ceadams/Documents/onclaive/onclaive/test_kit_dev/test_output/test3_claude',
 'module_file': '/Users/ceadams/Documents/onclaive/onclaive/test_kit_dev/test_output/test3_claude/test3_claude.rb'}