In [None]:
# Cell [1] - Imports and Setup
# iTrust/iTrust/02_requirements_to_artifacts_comments_headers_rtf_xml.ipynb

import os
import sys
import logging
import xml.etree.ElementTree as ET
from xml.dom import minidom
import re
import javalang
from typing import Dict, List, Optional, Tuple, Set
from datetime import datetime

# Configure logging
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('comments_headers_rtf')

def handle_exception(func):
    """
    Decorator to handle exceptions in functions
    
    Args:
        func: Function to wrap with exception handling
    
    Returns:
        Wrapped function with exception handling
    """
    def wrapper(*args, **kwargs):
        try:
            return func(*args, **kwargs)
        except Exception as e:
            logger.error(f"Exception in {func.__name__}: {str(e)}")
            logger.debug("Stack trace:", exc_info=True)
            raise
    return wrapper

@handle_exception
def prettify_xml(elem: ET.Element) -> str:
    """Convert XML element to pretty-printed string"""
    rough_string = ET.tostring(elem, 'utf-8')
    reparsed = minidom.parseString(rough_string)
    return reparsed.toprettyxml(indent="    ")

@handle_exception
def validate_xml(xml_string: bytes) -> bool:
    """Validate XML string"""
    try:
        ET.fromstring(xml_string)
        return True
    except ET.ParseError as e:
        logger.error(f"XML validation failed: {str(e)}")
        return False

In [2]:
# Cell [2] - XML Structure Functions
# iTrust/iTrust/02_requirements_to_artifacts_comments_headers_rtf_xml.ipynb

@handle_exception
def create_xml_structure(collection_id: str, collection_name: str, collection_desc: str) -> ET.Element:
    """
    Create base XML structure for artifacts collection
    
    Args:
        collection_id: ID for the collection
        collection_name: Name of the collection
        collection_desc: Description of the collection
        
    Returns:
        Root element of the XML structure
    """
    logger.debug(f"Creating XML structure for collection: {collection_id}")
    
    root = ET.Element("artifacts_collection")
    
    collection_info = ET.SubElement(root, "collection_info")
    ET.SubElement(collection_info, "id").text = collection_id
    ET.SubElement(collection_info, "name").text = collection_name
    ET.SubElement(collection_info, "version").text = "1.1"
    ET.SubElement(collection_info, "description").text = collection_desc
    ET.SubElement(collection_info, "content_location").text = "external"
    
    ET.SubElement(root, "artifacts")
    
    return root

@handle_exception
def add_artifact(artifacts_elem: ET.Element, artifact_id: str, content: str, parent_id: str = "") -> None:
    """
    Add artifact to XML structure
    
    Args:
        artifacts_elem: Parent artifacts element
        artifact_id: ID for the artifact
        content: Content of the artifact
        parent_id: Optional parent ID
    """
    logger.debug(f"Adding artifact: {artifact_id}")
    
    artifact = ET.SubElement(artifacts_elem, "artifact")
    ET.SubElement(artifact, "id").text = artifact_id
    ET.SubElement(artifact, "content").text = content
    ET.SubElement(artifact, "parent_id").text = parent_id

In [3]:
# Cell [3] - Java Code Processing Functions
# iTrust/iTrust/02_requirements_to_artifacts_comments_headers_rtf_xml.ipynb

@handle_exception
def clean_method_signature(signature: str) -> str:
    """
    Clean a method signature to remove implementation details
    
    Args:
        signature: Raw method signature possibly containing implementation
        
    Returns:
        Clean method signature ending with semicolon
    """
    # Remove any implementation block
    signature = re.sub(r'\s*\{[\s\S]*?\}', '', signature)
    
    # Remove any default values in interface methods
    signature = re.sub(r'default\s+', '', signature)
    
    # Ensure signature ends with semicolon if it doesn't already
    signature = signature.strip()
    if not signature.endswith(';'):
        signature += ';'
    
    return signature

@handle_exception
def extract_java_documentation(content: str) -> str:
    """
    Extract documentation elements from Java code including:
    - Package declaration
    - Imports
    - Class/Interface documentation and declaration
    - Method documentation and signatures (no implementation)
    - Field documentation
    
    Args:
        content: Java source code content
        
    Returns:
        Extracted documentation content
    """
    logger.debug("Extracting Java documentation elements")
    
    # Normalize line endings and remove carriage returns
    content = content.replace('\r\n', '\n')
    
    # Store processed lines
    processed_lines = []
    
    # Process the content line by line
    lines = content.split('\n')
    i = 0
    while i < len(lines):
        line = lines[i].strip()
        
        # Skip empty lines
        if not line:
            i += 1
            continue
            
        # Handle package declaration
        if line.startswith('package '):
            processed_lines.append(line)
            processed_lines.append('')
            i += 1
            continue
            
        # Handle imports
        if line.startswith('import '):
            processed_lines.append(line)
            i += 1
            continue
            
        # Handle block comments (including Javadoc)
        if line.startswith('/**'):
            comment_lines = [line]
            i += 1
            while i < len(lines) and '*/' not in lines[i]:
                comment_lines.append(lines[i])
                i += 1
            if i < len(lines):  # Add closing comment line
                comment_lines.append(lines[i])
            processed_lines.extend(comment_lines)
            i += 1
            continue
            
        # Handle single-line comments
        if line.startswith('//'):
            processed_lines.append(line)
            i += 1
            continue
            
        # Handle class/interface declaration
        if any(keyword in line for keyword in ['class ', 'interface ']):
            # Get full declaration (might span multiple lines)
            declaration = line
            while not declaration.strip().endswith('{') and i + 1 < len(lines):
                i += 1
                declaration += ' ' + lines[i].strip()
            
            # Clean up declaration
            declaration = re.sub(r'\s*\{.*$', '', declaration)  # Remove opening brace and anything after
            processed_lines.append(declaration + ' {')
            processed_lines.append('')
            i += 1
            continue
            
        # Handle field declarations
        if any(modifier in line for modifier in ['public ', 'private ', 'protected ']) and ';' in line and '(' not in line:
            # Keep only field declaration, remove any initialization
            field_decl = re.sub(r'=.*?;', ';', line)
            processed_lines.append(field_decl)
            i += 1
            continue
            
        # Handle method declarations
        if any(modifier in line for modifier in ['public ', 'private ', 'protected ']) and '(' in line:
            # Collect full method signature
            signature = line
            while not any(c in signature for c in ['{', ';']) and i + 1 < len(lines):
                i += 1
                signature += ' ' + lines[i].strip()
            
            # Clean up method signature
            signature = clean_method_signature(signature)
            processed_lines.append(signature)
            processed_lines.append('')
            
            # Skip method implementation
            brace_count = signature.count('{')
            while brace_count > 0 and i < len(lines):
                i += 1
                brace_count += lines[i].count('{') - lines[i].count('}')
            i += 1
            continue
            
        i += 1
    
    # Add closing brace for class/interface
    processed_lines.append('}')
    
    # Combine processed lines
    return '\n'.join(processed_lines)

@handle_exception
def process_java_file(filepath: str) -> Optional[Tuple[str, str]]:
    """
    Process a Java file to extract its documentation content
    
    Args:
        filepath: Path to the Java file
        
    Returns:
        Tuple of (artifact_id, content) or None if processing fails
    """
    try:
        logger.info(f"Processing Java file: {filepath}")
        
        # Extract class name for artifact ID
        filename = os.path.basename(filepath)
        class_name = os.path.splitext(filename)[0]
        artifact_id = f"CC_{class_name}"
        
        # Read and process file content
        with open(filepath, 'r', encoding='utf-8') as f:
            content = f.read()
            
        # Extract documentation
        doc_content = extract_java_documentation(content)
        
        return artifact_id, doc_content
        
    except Exception as e:
        logger.error(f"Failed to process {filepath}: {str(e)}")
        return None

In [4]:
# Cell [4] - Main Processing Function
# iTrust/iTrust/02_requirements_to_artifacts_comments_headers_rtf_xml.ipynb

@handle_exception
def process_java_documentation(source_dir: str, output_file: str) -> None:
    """
    Process Java files to create XML with documentation artifacts
    
    Args:
        source_dir: Directory containing Java source files
        output_file: Path for output XML file
    """
    logger.info(f"Processing Java documentation from: {source_dir}")
    
    # Create XML structure
    root = create_xml_structure(
        collection_id="CC",
        collection_name="iTrust Documentation Artifacts",
        collection_desc="Java documentation and interfaces"
    )
    artifacts_elem = root.find("artifacts")
    
    # Process all Java files
    file_count = 0
    for root_dir, _, files in os.walk(source_dir):
        for filename in sorted(files):
            if filename.endswith('.java'):
                filepath = os.path.join(root_dir, filename)
                result = process_java_file(filepath)
                
                if result:
                    artifact_id, content = result
                    add_artifact(artifacts_elem, artifact_id, content)
                    file_count += 1
    
    logger.info(f"Processed {file_count} Java files")
    
    # Generate and save XML
    xml_string = prettify_xml(root)
    if validate_xml(xml_string.encode('utf-8')):
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(xml_string)
        logger.info(f"Successfully created documentation XML: {output_file}")
    else:
        logger.error("Failed to validate XML output")

In [5]:
# Cell [4] - File Reading Function
# iTrust/iTrust/02_requirements_to_artifacts_comments_headers_rtf_xml.ipynb

@handle_exception
def read_file(filepath: str) -> Optional[str]:
    """
    Read and return the content of a file, with special handling for Java files.
    
    Args:
        filepath: Path to the file to read
        
    Returns:
        Content of the file or None if error
    """
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            content = f.read().strip()
            
            # Check if this is likely Java code
            java_indicators = ['package ', 'import ', 'public class', 'private class', 'protected class']
            is_java = any(indicator in content for indicator in java_indicators)
            
            if is_java:
                logger.debug(f"Detected Java-like content in {filepath}")
                content = extract_java_headers_and_comments(content)
            
            return content
            
    except UnicodeDecodeError:
        logger.warning(f"File {filepath} appears to use non-UTF-8 encoding, trying latin-1")
        try:
            with open(filepath, 'r', encoding='latin-1') as f:
                content = f.read().strip()
                
                # Check for Java content with latin-1 encoding
                java_indicators = ['package ', 'import ', 'public class', 'private class', 'protected class']
                is_java = any(indicator in content for indicator in java_indicators)
                
                if is_java:
                    logger.debug(f"Detected Java-like content in {filepath}")
                    content = extract_java_headers_and_comments(content)
                    
                return content
                
        except Exception as e:
            logger.error(f"Error reading file {filepath} with latin-1 encoding: {str(e)}")
            return None
            
    except Exception as e:
        logger.error(f"Error reading file {filepath}: {str(e)}")
        return None

In [6]:
# Cell [5] - Process Requirements Function
# iTrust/iTrust/02_requirements_to_artifacts_comments_headers_rtf_xml.ipynb

@handle_exception
def process_requirements(source_dir: str, output_file: str) -> None:
    """Process requirements and create XML file with file contents
    
    Args:
        source_dir: Directory containing requirement files
        output_file: Path to output XML file
    """
    logger.info(f"Processing requirements from directory: {source_dir}")
    
    root = create_xml_structure(
        collection_id="UC",
        collection_name="iTrust Source Artifacts",
        collection_desc="Use cases"
    )
    artifacts_elem = root.find("artifacts")
    
    try:
        # Ensure source directory exists
        if not os.path.exists(source_dir):
            raise FileNotFoundError(f"Source directory not found: {source_dir}")
            
        # Process each file in the directory
        for filename in sorted(os.listdir(source_dir)):
            if filename.lower().endswith('.txt') or filename.lower().endswith('.java'):
                filepath = os.path.join(source_dir, filename)
                logger.debug(f"Processing file: {filepath}")
                
                # Read the actual content of the file
                content = read_file(filepath)
                if content:
                    # Extract ID from filename (remove extension)
                    req_id = os.path.splitext(filename)[0]
                    add_artifact(artifacts_elem, req_id, content)
                else:
                    logger.warning(f"Skipping file due to empty/unreadable content: {filepath}")
        
        # Generate and save XML
        xml_string = prettify_xml(root)
        if validate_xml(xml_string.encode('utf-8')):
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(xml_string)
            logger.info(f"Successfully created XML file: {output_file}")
        else:
            logger.error("Failed to validate XML output")
            
    except Exception as e:
        logger.error(f"Error processing requirements: {str(e)}", exc_info=True)
        raise

In [None]:
# Cell [6] - Main Execution
# iTrust/iTrust/02_requirements_to_artifacts_comments_headers_rtf_xml.ipynb

@handle_exception
def verify_file_creation(filepath: str) -> None:
    """Verify that a file was created and log its size"""
    if os.path.exists(filepath):
        size = os.path.getsize(filepath)
        logger.info(f"Successfully created {filepath} (size: {size} bytes)")
    else:
        logger.error(f"Failed to create {filepath}")

# Process Java documentation
java_code_dir = "itrust_v10_code/iTrust/src"
comments_artifacts = "iTrust-comments-functions-targetArtifacts.xml"

process_java_documentation(
    source_dir=java_code_dir,
    output_file=comments_artifacts
)
verify_file_creation(comments_artifacts)

logger.info("Completed processing Java documentation")

In [None]:
# Cell [7] - Answer Set Validation
# iTrust/iTrust/02_requirements_to_artifacts_comments_headers_rtf_xml.ipynb

@handle_exception
def load_xml_artifacts(filepath: str) -> Set[str]:
    """
    Load artifact IDs from an XML file
    
    Args:
        filepath: Path to XML file
        
    Returns:
        Set of artifact IDs
    """
    logger.info(f"Loading artifacts from {filepath}")
    try:
        tree = ET.parse(filepath)
        root = tree.getroot()
        artifacts = root.find('artifacts')
        if artifacts is None:
            logger.warning(f"No artifacts found in {filepath}")
            return set()
            
        artifact_ids = {
            artifact.find('id').text 
            for artifact in artifacts.findall('artifact')
            if artifact.find('id') is not None
        }
        logger.debug(f"Found {len(artifact_ids)} artifacts in {filepath}")
        return artifact_ids
        
    except Exception as e:
        logger.error(f"Error loading artifacts from {filepath}: {str(e)}")
        return set()

@handle_exception
def load_answer_set_links(filepath: str) -> Tuple[Set[str], Set[str]]:
    """
    Load source and target artifact IDs from answer set XML
    
    Args:
        filepath: Path to answer set XML
        
    Returns:
        Tuple of (source artifact IDs, target artifact IDs)
    """
    logger.info(f"Loading answer set from {filepath}")
    try:
        tree = ET.parse(filepath)
        root = tree.getroot()
        links = root.find('links')
        if links is None:
            logger.warning("No links found in answer set")
            return set(), set()
            
        source_ids = set()
        target_ids = set()
        
        for link in links.findall('link'):
            source = link.find('source_artifact_id')
            target = link.find('target_artifact_id')
            
            if source is not None and source.text:
                source_ids.add(source.text)
            if target is not None and target.text:
                target_ids.add(target.text)
                
        logger.debug(f"Found {len(source_ids)} source IDs and {len(target_ids)} target IDs in answer set")
        return source_ids, target_ids
        
    except Exception as e:
        logger.error(f"Error loading answer set from {filepath}: {str(e)}")
        return set(), set()

@handle_exception
def validate_answer_set() -> None:
    """
    Validate that all artifacts referenced in the answer set exist in either 
    source or target artifacts files
    """
    logger.info("Starting answer set validation")
    
    # Load all artifacts
    source_artifacts = load_xml_artifacts("iTrust-comments-functions-sourceArtifacts.xml")
    target_artifacts = load_xml_artifacts("iTrust-comments-functions-targetArtifacts.xml")
    answer_sources, answer_targets = load_answer_set_links("iTrust-answerSet.xml")
    
    # Validate source artifacts
    missing_sources = answer_sources - source_artifacts
    if missing_sources:
        logger.error(f"Missing source artifacts: {sorted(missing_sources)}")
    else:
        logger.info("All source artifacts found")
        
    # Validate target artifacts
    missing_targets = answer_targets - target_artifacts
    if missing_targets:
        logger.error(f"Missing target artifacts: {sorted(missing_targets)}")
    else:
        logger.info("All target artifacts found")
        
    # Report unused artifacts (optional)
    unused_sources = source_artifacts - answer_sources
    if unused_sources:
        logger.warning(f"Unused source artifacts: {sorted(unused_sources)}")
        
    unused_targets = target_artifacts - answer_targets
    if unused_targets:
        logger.warning(f"Unused target artifacts: {sorted(unused_targets)}")
        
    # Summary statistics
    logger.info("\nValidation Summary:")
    logger.info(f"Total source artifacts: {len(source_artifacts)}")
    logger.info(f"Total target artifacts: {len(target_artifacts)}")
    logger.info(f"Total answer set sources: {len(answer_sources)}")
    logger.info(f"Total answer set targets: {len(answer_targets)}")
    logger.info(f"Missing source artifacts: {len(missing_sources)}")
    logger.info(f"Missing target artifacts: {len(missing_targets)}")
    logger.info(f"Unused source artifacts: {len(unused_sources)}")
    logger.info(f"Unused target artifacts: {len(unused_targets)}")

# Run validation
validate_answer_set()