In [17]:
from xml.etree import ElementTree as ET

# Load and parse the XML file
xml_file_path = r"I:\20x_Single_4Ch_MN_screen_PP_JW\20x_Single_4Ch_MN_screen_PP_JW_FIV920A1_2\ImageCheckList B - 2.XML"
tree = ET.parse(xml_file_path)
root = tree.getroot()

# Since the XML structure is unknown, let's print the tag of the root and the first child, 
# as well as the first occurrence of <ImageCheck_Point> to understand its structure
print(f"Root tag: {root.tag}")
if root:
    print(f"First child tag: {root[0].tag}")

# Try to find the first <ImageCheck_Point> to understand its structure
for image_check_point in root.findall('.//ImageCheck_Point'):
    # Print details of the first <ImageCheck_Point> to understand its structure
    ET.dump(image_check_point)
    break  # Stop after printing the first one to not overload the output


Root tag: ImageCheck_PointList
First child tag: Errors
<ImageCheck_Point xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
      <Plate_Xum>64908.6015625</Plate_Xum>
      <Plate_Yum>53544.1015625</Plate_Yum>
      <PixelX_Fraction>0.9294141</PixelX_Fraction>
      <PixelY_Fraction>0.3078455</PixelY_Fraction>
      <WaveLength>0</WaveLength>
      <Well_Row>B</Well_Row>
      <Well_Col>2</Well_Col>
      <FOV>17</FOV>
      <RaftID>K5A9</RaftID>
      <PlateID>FIV920A1_2</PlateID>
      <Created>2023-08-17T08:07:04.962126-05:00</Created>
      <Annotations>
        <ImageCheck_Annotation>
          <Name> acc_0.7969 CNN_AC_R CNNs_7 kn_4 flt_18 iRt_1.5 ds__270_ nE_169 bSz_64 bNm_0 i_22</Name>
          <Value xsi:type="xsd:string">2 Nuc 2 Neurons</Value>
          <Default_Key>0</Default_Key>
          <Score>0.60877126</Score>
          <Note>2 Nuc 2 Neurons = 0.609
2 Nuc 1+ Neuron Poor Qual = 0.194
3-4 Nuclei = 0.144
5+ Nuclei = 0.028
2 Nuc 1 Neuron = 0.012
6 Focus (only if you r

In [18]:
import pandas as pd
# Create a dictionary from the Excel sheet for easier lookup
excel_data = pd.read_csv(r"R:\FIVE\EXP\FIV946\Pre Check\Pre Check v1 LA CorrectedAnnotation mlClassLayout.csv")
correction_dict = {}
for index, row in excel_data.dropna(subset=['CorrectedAnnotation']).iterrows():
    key = (row['PlateID'], row['RaftID'])
    correction_dict[key] = row['CorrectedAnnotation']

# Function to update the XML based on corrections specified in the Excel sheet
def update_xml_annotations(root):
    updated = False  # Flag to indicate if any updates were made
    for image_check_point in root.findall('.//ImageCheck_Point'):
        plate_id = image_check_point.find('PlateID').text
        raft_id = image_check_point.find('RaftID').text
        
        # Check if there's a correction for the PlateID and RaftID
        if (plate_id, raft_id) in correction_dict:
            # Get the corrected annotation value
            corrected_value = correction_dict[(plate_id, raft_id)]
            
            # Find the Annotations to update
            for annotation in image_check_point.findall('.//Annotations/ImageCheck_Annotation'):
                # Update the Value and set Name to "Manual"
                annotation.find('Value').text = corrected_value
                annotation.find('Name').text = "Manual"
                updated = True  # Mark that we've made an update
                
    return updated

# Update the XML annotations based on the Excel sheet corrections
update_made = update_xml_annotations(root)

# If updates were made, save the modified XML to a new file
if update_made:
    updated_xml_file_path = r'I:\20x_Single_4Ch_MN_screen_PP_JW\20x_Single_4Ch_MN_screen_PP_JW_FIV920A1_2\Updated_ImageCheckList B - 2v2.XML'
    tree.write(updated_xml_file_path)
    output_message = f"Updates were made. The updated XML file is saved at: {updated_xml_file_path}"
else:
    output_message = "No updates were necessary."

output_message


'Updates were made. The updated XML file is saved at: I:\\20x_Single_4Ch_MN_screen_PP_JW\\20x_Single_4Ch_MN_screen_PP_JW_FIV920A1_2\\Updated_ImageCheckList B - 2v2.XML'

Edit XML files based on corrected annotations

In [2]:
import os
from xml.etree import ElementTree as ET
import pandas as pd

def load_corrections(excel_file_path):
    excel_data = pd.read_csv(excel_file_path)
    correction_dict = {}
    for index, row in excel_data.dropna(subset=['CorrectedAnnotation']).iterrows():
        key = (row['PlateID'], row['RaftID'])
        correction_dict[key] = row['CorrectedAnnotation']
    return correction_dict

def update_xml_annotations(root, correction_dict):
    updated = False
    for image_check_point in root.findall('.//ImageCheck_Point'):
        plate_id = image_check_point.find('PlateID').text
        raft_element = image_check_point.find('RaftID')
        
        # Continue if RaftID element is missing or RaftID text is empty
        if raft_element is None or not raft_element.text:
            continue
        
        raft_id = raft_element.text
        
        if (plate_id, raft_id) in correction_dict:
            corrected_value = correction_dict[(plate_id, raft_id)]
            for annotation in image_check_point.findall('.//Annotations/ImageCheck_Annotation'):
                annotation.find('Value').text = corrected_value
                annotation.find('Name').text = "Manual"
                updated = True
                
    return updated

def process_folder(directory, excel_file_path):
    correction_dict = load_corrections(excel_file_path)
    for subdir, dirs, files in os.walk(directory):
        for file in files:
            if file.startswith("ImageCheckList") and file.endswith(".XML"):
                xml_file_path = os.path.join(subdir, file)
                tree = ET.parse(xml_file_path)
                root = tree.getroot()
                
                if update_xml_annotations(root, correction_dict):
                    new_file_path = xml_file_path.rsplit('.', 1)[0] + '.' + xml_file_path.rsplit('.', 1)[1]
                    tree.write(new_file_path)
                    

# Example usage
# directory = r"I:\20x_Single_4Ch_MN_screen_PP_JW"
directory = r"I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW"
excel_file_path = r"R:\FIVE\EXP\FIV946\Pre Check\Pre Check v1 LA CorrectedAnnotation mlClassLayout.csv"
process_folder(directory, excel_file_path)


Delete updated xml files

In [3]:
import os

def delete_updated_xml_files(directory):
    for subdir, dirs, files in os.walk(directory):
        for file in files:
            # Check if the file is an XML file and contains 'updated' in its name
            if file.endswith(".XML") and 'updated' in file:
                file_path = os.path.join(subdir, file)
                try:
                    os.remove(file_path)
                    print(f"Deleted file: {file_path}")
                except OSError as e:
                    print(f"Error deleting file {file_path}: {e}")

# Example usage
directory = r'I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW'
delete_updated_xml_files(directory)


Deleted file: I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW\20X_Quad_3Ch_TubulinHoechstMito_PP_JW_FIV910A4_1\ImageCheckList B - 1_updated.XML
Deleted file: I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW\20X_Quad_3Ch_TubulinHoechstMito_PP_JW_FIV910A4_1\ImageCheckList B - 2_updated.XML
Deleted file: I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW\20X_Quad_3Ch_TubulinHoechstMito_PP_JW_FIV910A4_1\ImageCheckList pre 04-04 update\ImageCheckList B - 1_updated.XML
Deleted file: I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW\20X_Quad_3Ch_TubulinHoechstMito_PP_JW_FIV910A4_1\ImageCheckList pre 04-04 update\ImageCheckList B - 2_updated.XML
Deleted file: I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW\20X_Quad_3Ch_TubulinHoechstMito_PP_JW_FIV910A4_1\Old ImgCheckList\ImageCheckList B - 1v1v1_updated.XML
Deleted file: I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW\20X_Quad_3Ch_TubulinHoechstMito_PP_JW_FIV910A4_1\Old ImgCheckList\ImageCheckList B - 1v1_updated.XML
Deleted file: I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW\20X_Quad_3Ch_Tubuli

Change name of updated XML files

In [2]:
import os

def rename_xml_files(directory):
    for subdir, dirs, files in os.walk(directory):
        for file in files:
            # Check if the file is an XML file and contains 'ImageCheckList' in its name
            if file.endswith(".XML") and 'ImageCheckList' in file:
                old_file_path = os.path.join(subdir, file)
                
                # If the file name contains '_updated', remove '_updated'
                if '_updated' in file:
                    new_file_name = file.replace('_updated', '')
                else:
                    # For files without '_updated', append 'v1' before the .XML extension
                    new_file_name = file.rsplit('.', 1)[0] + 'v1.' + file.rsplit('.', 1)[1]
                
                new_file_path = os.path.join(subdir, new_file_name)
                
                # Rename the file
                os.rename(old_file_path, new_file_path)
                print(f"Renamed '{old_file_path}' to '{new_file_path}'")

# Example usage
directory = r'I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW'
rename_xml_files(directory)


Renamed 'I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW\20X_Quad_3Ch_TubulinHoechstMito_PP_JW_FIV910A4_1\ImageCheckList B - 1v1.XML' to 'I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW\20X_Quad_3Ch_TubulinHoechstMito_PP_JW_FIV910A4_1\ImageCheckList B - 1v1v1.XML'
Renamed 'I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW\20X_Quad_3Ch_TubulinHoechstMito_PP_JW_FIV910A4_1\ImageCheckList B - 1v1_updated.XML' to 'I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW\20X_Quad_3Ch_TubulinHoechstMito_PP_JW_FIV910A4_1\ImageCheckList B - 1v1.XML'
Renamed 'I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW\20X_Quad_3Ch_TubulinHoechstMito_PP_JW_FIV910A4_1\ImageCheckList B - 1v2.XML' to 'I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW\20X_Quad_3Ch_TubulinHoechstMito_PP_JW_FIV910A4_1\ImageCheckList B - 1v2v1.XML'
Renamed 'I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW\20X_Quad_3Ch_TubulinHoechstMito_PP_JW_FIV910A4_1\ImageCheckList B - 1v2_updated.XML' to 'I:\20X_Quad_3Ch_TubulinHoechstMito_PP_JW\20X_Quad_3Ch_TubulinHoechstMito_PP_JW_FIV910A4_1\ImageCheckLi