In [None]:
import xml.etree.ElementTree as ET
from xml.dom import minidom
import re
import logging

# Configure logging
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

def parse_handtrace(handtrace_text):
    """Parse the handtrace text into a list of records"""
    logger.debug(f"Starting to parse handtrace text of length: {len(handtrace_text)}")
    
    # Split the text by % and filter out empty strings
    records = [r.strip() for r in handtrace_text.split('%') if r.strip()]
    logger.debug(f"Found {len(records)} records after splitting on %")
    
    parsed_records = []
    for i, record in enumerate(records):
        logger.debug(f"Processing record {i+1}:")
        logger.debug(f"Raw record: {record}")
        
        # Split each record into lines and filter empty lines
        lines = [line.strip() for line in record.split('\n') if line.strip()]
        logger.debug(f"Found {len(lines)} non-empty lines in record")
        
        if not lines:
            logger.debug("Skipping empty record")
            continue
            
        # Split the line into words
        words = lines[0].split()
        if not words:
            logger.debug("No words found in record")
            continue
            
        # First word is the source artifact ID
        source_id = words[0]
        # Remaining words are target artifact IDs
        target_ids = words[1:] if len(words) > 1 else []
        
        logger.debug(f"Source ID: {source_id}")
        logger.debug(f"Target IDs: {target_ids}")
            
        parsed_records.append({
            'source_id': source_id,
            'target_ids': target_ids
        })
    
    logger.debug(f"Finished parsing. Found {len(parsed_records)} valid records")
    return parsed_records

def create_xml_answer_set(records):
    """Create the XML answer set from the parsed records"""
    logger.debug("Creating XML answer set")
    
    # Create root element
    root = ET.Element('answer_set')
    
    # Add answer_info section
    answer_info = ET.SubElement(root, 'answer_info')
    ET.SubElement(answer_info, 'source_artifacts_collection').text = 'cm1-high'
    ET.SubElement(answer_info, 'target_artifacts_collection').text = 'cm1-low'
    
    # Add links section
    links = ET.SubElement(root, 'links')
    
    # Add each record as a link
    link_count = 0
    for record in records:
        logger.debug(f"Processing record with source {record['source_id']}")
        for target_id in record['target_ids']:
            link = ET.SubElement(links, 'link')
            ET.SubElement(link, 'source_artifact_id').text = record['source_id']
            ET.SubElement(link, 'target_artifact_id').text = target_id
            ET.SubElement(link, 'confidence_score').text = '1'
            link_count += 1
    
    logger.debug(f"Created {link_count} links in XML")
    return root

def pretty_print_xml(element):
    """Convert XML element to a pretty-printed string"""
    rough_string = ET.tostring(element, 'utf-8')
    reparsed = minidom.parseString(rough_string)
    # Remove any line that contains another XML declaration
    pretty_xml = reparsed.toprettyxml(indent="  ")
    xml_lines = pretty_xml.splitlines()
    xml_lines = [line for line in xml_lines if '<?xml' not in line or line.strip() == '<?xml version="1.0" encoding="utf-8"?>']
    return '\n'.join(xml_lines)

def main():
    # Read the handtrace.txt file
    try:
        logger.info("Opening handtrace.txt")
        with open('handtrace.txt', 'r', encoding='utf-8') as f:
            handtrace_text = f.read()
        logger.debug(f"Read {len(handtrace_text)} characters from handtrace.txt")
        # Print first 100 characters for debugging
        logger.debug(f"First 100 characters of file: {handtrace_text[:100]}")
    except FileNotFoundError:
        logger.error("handtrace.txt not found in the current directory")
        raise
    except Exception as e:
        logger.error(f"Error reading handtrace.txt: {str(e)}")
        raise

    # Parse the handtrace
    records = parse_handtrace(handtrace_text)

    # Create XML
    xml_root = create_xml_answer_set(records)

    # Generate pretty XML string with XML declaration
    xml_string = '<?xml version="1.0" encoding="utf-8"?>\n' + pretty_print_xml(xml_root)

    # Write to CM1-answerSet.xml
    try:
        logger.info("Writing to CM1-answerSet.xml")
        with open('CM1-answerSet.xml', 'w', encoding='utf-8') as f:
            f.write(xml_string)
        logger.debug(f"Successfully wrote {len(xml_string)} characters to CM1-answerSet.xml")
    except Exception as e:
        logger.error(f"Error writing to CM1-answerSet.xml: {str(e)}")
        raise

    logger.info("Processing complete")

if __name__ == "__main__":
    main()