In [1]:
# Cell [1] - Import required libraries
# @title: Import Libraries and Setup
# @description: Import necessary libraries for XML processing and file handling

import xml.etree.ElementTree as ET
import logging
import os
from typing import Dict, List, Tuple

# Configure logging
logging.basicConfig(level=logging.INFO,
                   format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

In [2]:
# Cell [2] - Define helper functions
# @title: Helper Functions
# @description: Functions to parse XML files and create mappings

def parse_xml_file(file_path: str) -> ET.Element:
    """
    Parse an XML file and return its root element.
    
    Args:
        file_path (str): Path to the XML file
        
    Returns:
        ET.Element: Root element of the XML tree
    """
    try:
        tree = ET.parse(file_path)
        return tree.getroot()
    except Exception as e:
        logger.error(f"Error parsing XML file {file_path}: {str(e)}")
        raise

def create_artifact_mapping(root: ET.Element) -> Dict[str, str]:
    """
    Create a mapping between artifact IDs and their content paths.
    
    Args:
        root (ET.Element): Root element of the XML tree
        
    Returns:
        Dict[str, str]: Mapping of artifact IDs to content paths
    """
    mapping = {}
    try:
        for artifact in root.findall('.//artifact'):
            artifact_id = artifact.find('id').text
            content = artifact.find('content').text
            mapping[artifact_id] = content
        return mapping
    except Exception as e:
        logger.error(f"Error creating artifact mapping: {str(e)}")
        raise

def parse_links(answer_set_root: ET.Element) -> List[Tuple[str, str]]:
    """
    Parse requirement-code links from the answer set XML.
    
    Args:
        answer_set_root (ET.Element): Root element of the answer set XML
        
    Returns:
        List[Tuple[str, str]]: List of (requirement_id, code_id) tuples
    """
    try:
        links = []
        for link in answer_set_root.findall('.//link'):
            req_id = link.find('source_artifact_id').text
            code_id = link.find('target_artifact_id').text
            links.append((req_id, code_id))
        return links
    except Exception as e:
        logger.error(f"Error parsing links: {str(e)}")
        raise

In [None]:
# Cell [3] - Load and process XML files
# @title: Load XML Files
# @description: Load and process the input XML files

# File paths
ANSWER_SET_PATH = "answer_req_javacode.xml"
TARGET_CODE_PATH = "target_JavaCode.xml"
SOURCE_REQ_PATH = "source_req.xml"

try:
    # Parse XML files
    answer_set_root = parse_xml_file(ANSWER_SET_PATH)
    target_code_root = parse_xml_file(TARGET_CODE_PATH)
    source_req_root = parse_xml_file(SOURCE_REQ_PATH)

    # Create mappings
    code_mapping = create_artifact_mapping(target_code_root)
    req_mapping = create_artifact_mapping(source_req_root)
    
    # Get links
    links = parse_links(answer_set_root)
    
    logger.info(f"Processed {len(links)} links")
    logger.info(f"Found {len(code_mapping)} code artifacts")
    logger.info(f"Found {len(req_mapping)} requirement artifacts")

except Exception as e:
    logger.error(f"Error in processing XML files: {str(e)}")
    raise

In [None]:
# Cell [4] - Create updated answer set
# @title: Generate Updated Answer Set
# @description: Create the updated answer set with CC_ prefix and confidence scores

def create_updated_answer_set() -> ET.Element:
    """
    Create the updated answer set XML with CC_ prefix and confidence scores.
    
    Returns:
        ET.Element: Root element of the updated answer set XML
    """
    try:
        # Create root structure
        root = ET.Element("answer_set")
        
        # Add answer info
        info = ET.SubElement(root, "answer_info")
        ET.SubElement(info, "source_artifacts_collection").text = "UC"
        ET.SubElement(info, "target_artifacts_collection").text = "CC"
        
        # Add links
        links_elem = ET.SubElement(root, "links")
        
        for req_id, code_id in links:
            if code_id in code_mapping and req_id in req_mapping:
                link = ET.SubElement(links_elem, "link")
                
                # Add requirement ID
                source = ET.SubElement(link, "source_artifact_id")
                source.text = req_id
                
                # Add code ID (with CC_ prefix)
                target = ET.SubElement(link, "target_artifact_id")
                target.text = f"CC_{code_id}"
                
                # Add confidence score
                confidence = ET.SubElement(link, "confidence_score")
                confidence.text = "1"
        
        return root
    except Exception as e:
        logger.error(f"Error creating updated answer set: {str(e)}")
        raise

# Create and save the updated answer set
try:
    updated_root = create_updated_answer_set()
    
    # Create string representation with proper indentation
    def indent(elem: ET.Element, level: int = 0) -> None:
        """
        Add proper indentation to XML elements for pretty printing.
        
        Args:
            elem (ET.Element): The XML element to indent
            level (int): Current indentation level
        """
        i = "\n" + level * "  "
        if len(elem):
            if not elem.text or not elem.text.strip():
                elem.text = i + "  "
            if not elem.tail or not elem.tail.strip():
                elem.tail = i
            for subelem in elem:
                indent(subelem, level + 1)
            if not elem.tail or not elem.tail.strip():
                elem.tail = i
        else:
            if level and (not elem.tail or not elem.tail.strip()):
                elem.tail = i

    # Apply indentation
    indent(updated_root)
    
    # Create the XML tree
    tree = ET.ElementTree(updated_root)
    
    # Add XML declaration and save with proper encoding and formatting
    with open("iTrust-answerSet.xml", 'wb') as f:
        f.write(b'<?xml version="1.0" encoding="UTF-8"?>\n')
        tree.write(f, encoding="UTF-8", xml_declaration=False)
    
    logger.info("Successfully created iTrust answer set file")

except Exception as e:
    logger.error(f"Error saving answer set: {str(e)}")
    raise

In [None]:
# Cell [5] - Validate output
# @title: Validate Generated Answer Set
# @description: Verify the contents of the generated answer set file

def validate_answer_set(file_path: str) -> None:
    """
    Validate the generated answer set file.
    
    Args:
        file_path (str): Path to the generated answer set file
    """
    try:
        tree = ET.parse(file_path)
        root = tree.getroot()
        
        # Count links
        links = root.findall('.//link')
        
        # Verify required elements
        valid_links = [link for link in links if 
                      link.find('source_artifact_id') is not None and
                      link.find('target_artifact_id') is not None and
                      link.find('confidence_score') is not None]
        
        logger.info(f"Total links in output: {len(links)}")
        logger.info(f"Valid links with all elements: {len(valid_links)}")
        
        # Verify CC_ prefix
        code_ids = [link.find('target_artifact_id').text for link in links]
        with_prefix = [id for id in code_ids if id.startswith('CC_')]
        logger.info(f"Links with CC_ prefix: {len(with_prefix)}")
        
        if len(with_prefix) != len(links):
            logger.warning("Not all code IDs have CC_ prefix!")
            
        # Verify confidence scores
        confidence_scores = [link.find('confidence_score').text for link in links]
        valid_scores = [score for score in confidence_scores if score == "1"]
        logger.info(f"Links with valid confidence scores: {len(valid_scores)}")
            
    except Exception as e:
        logger.error(f"Error validating answer set: {str(e)}")
        raise

# Validate the generated file
validate_answer_set("iTrust-answerSet.xml")