In [None]:
##Description:
# This file is meant to take XML incidents as they are created, it will remove their irrelevant
# tags, remove any narratives (ideally they wouldnt be there), remove the first line 
# to help with recoding, and finally reccode all known codes

# Note you will need to modify the file/dir paths and any names in your env.


from xml.etree import ElementTree as ET
import csv
import os
import xml.etree.ElementTree as ET

# Find your directory
# Get the current working directory
current_directory = os.getcwd()
print("Current working directory:", current_directory)

# Function to remove irrelevant XML tags
def remove_tags_from_xml(xml_file_path):
    try:
        # Load the XML file
        tree = ET.parse(xml_file_path)
        root = tree.getroot()

        # Tags to remove
        tags_to_remove = ['CreatorId', 'CreatorDate', 'UpdatorId', 'UpdatorDate', 'IncidentReportNum', 'AgencyCode', 'ReportTime', 'StartTime', 'SupplementSequence', 'IncSupplementStatusCode', 
                          'IncSupplementStatusCode', 'SuppReportDate', 'SuppReportTime', 'AgencyOnly', 'SecurityLevel', 'SuppAgencyCode', 'IncidentId', 'OffenseNumber', 'SupplementSeq',
                         'UCRNumber', 'OffenseStatusDate', 'UnknownNoOffenders', 'DsIcrSubmissionStatus', 'DsIcrDate', 'ORINumber', 'InternalID', 'NarrativeNumber', 'NarrativeTitle', 'SSN', 'DateOfInfo'
                         , 'SLastName', 'SFirstName']

        # Iterate over all tags to remove them directly
        elements_to_remove = []
        for tag in tags_to_remove:
            elements_to_remove.extend(root.findall('.//' + tag))

        for element in elements_to_remove:
            if element in root:  # Direct check for root elements
                root.remove(element)
            else:  # Remove from the parent, iterating from root
                parent = find_parent(root, element)
                if parent is not None:
                    parent.remove(element)

        # Save the cleaned XML file
        tree.write(xml_file_path, encoding='utf-8', xml_declaration=True)
        print(f"Successfully cleaned: {xml_file_path}")
    except ET.ParseError as e:
        print(f"Error parsing XML file {xml_file_path}: {e}")
    except Exception as e:
        print(f"Error: {e}")

# Function to load codes from a CSV file
def load_codes_from_csv(csv_file_path):
    code_descriptions = {}
    with open(csv_file_path, 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            category_code_key = (row['Category'], row['Code'].strip())
            code_descriptions[category_code_key] = row['Description']
    return code_descriptions

# Function to replace codes in the XML content based on the loaded code descriptions
def replace_codes_in_xml(xml_content, code_descriptions):
    root = ET.fromstring(xml_content)
    codes_and_values = [(elem, elem.tag, elem.text) for elem in root.iter() if 'Code' in elem.tag]
    for elem, tag, value in codes_and_values:
        description = code_descriptions.get((tag, value.strip()), None)
        if description:
            elem.text = description
    updated_xml = ET.tostring(root, encoding='unicode')
    return updated_xml

# Folder paths
csv_file_path = 'reformatted_codes.csv'  # Path to the CSV file
stripped_narrative_folder = 'CPSCP_Project/Incidents/StrippedNarratives'  # Path to the StrippedNarrative folder
recoded_folder = 'CPSCP_Project/Incidents/Recoded'  # Path to the folder where recoded XML files will be saved

# Load code descriptions from the CSV file
code_descriptions = load_codes_from_csv(csv_file_path)

# Process each XML file in the StrippedNarrative folder
for filename in os.listdir(stripped_narrative_folder):
    if filename.endswith('.xml'):
        # Read XML content from the file
        with open(os.path.join(stripped_narrative_folder, filename), 'r', encoding='utf-8') as file:
            xml_content = file.read()

        # Replace codes in the XML content
        updated_xml_content = replace_codes_in_xml(xml_content, code_descriptions)

        # Write the updated XML content to a new file in the Recoded folder
        output_filename = os.path.join(recoded_folder, filename)
        with open(output_filename, 'w', encoding='utf-8') as file:
            file.write(updated_xml_content)

        print(f"Updated XML file saved: {output_filename}")
