In [7]:
import os
import zipfile
import xml.etree.ElementTree as ET
import shutil
import glob

In [None]:
source_dir = "need_to_be_converted"  # Directory containing the MXL files
xml_output_dir = "converted_to_xml"  # Directory to save the extracted XML files
moved_mxl_dir = "mxl_backup"  # Directory to move the original MXL files

# create output directories if they do not exist
os.makedirs(xml_output_dir, exist_ok=True)
os.makedirs(moved_mxl_dir, exist_ok=True)

# Find all .mxl files in the source directory
mxl_files = glob.glob(os.path.join(source_dir, "*.mxl"))

# unzip them all!
for mxl_file in mxl_files:
    print(f"Processing: {mxl_file}")
    try:
        with zipfile.ZipFile(mxl_file, "r") as zip_ref:
            container_file = None
            if "META-INF/container.xml" in zip_ref.namelist():
                container_file = "META-INF/container.xml"
            elif "container.xml" in zip_ref.namelist():
                container_file = "container.xml"
            if container_file:
                container_data = zip_ref.read(container_file)
                root = ET.fromstring(container_data)
                rootfile_elem = root.find(".//rootfile")
                if rootfile_elem is not None:
                    musicxml_path = rootfile_elem.attrib.get("full-path")
                    if musicxml_path in zip_ref.namelist():
                        zip_ref.extract(musicxml_path, xml_output_dir)

                        original_base = os.path.basename(mxl_file)
                        new_name = os.path.splitext(original_base)[0] + ".xml"
                        extracted_file = os.path.join(xml_output_dir, musicxml_path)
                        final_file = os.path.join(xml_output_dir, new_name)
                        os.rename(extracted_file, final_file)
                        print(
                            f"Extracted and renamed '{musicxml_path}' to '{final_file}'"
                        )
                    else:
                        print(
                            f"Referenced file '{musicxml_path}' not found in the archive."
                        )
                else:
                    print(
                        f"Could not find a <rootfile> element in container for {mxl_file}."
                    )
            else:
                for file_name in zip_ref.namelist():
                    if file_name.lower().endswith(".xml"):
                        zip_ref.extract(file_name, xml_output_dir)
                        original_base = os.path.basename(mxl_file)
                        new_name = os.path.splitext(original_base)[0] + ".xml"
                        os.rename(
                            os.path.join(xml_output_dir, file_name),
                            os.path.join(xml_output_dir, new_name),
                        )
                        print(
                            f"Extracted (fallback) and renamed '{file_name}' to '{new_name}'"
                        )
                        break
                else:
                    print(f"No XML file found in {mxl_file}.")
    except Exception as e:
        print(f"Error processing {mxl_file}: {e}")

    try:
        shutil.move(mxl_file, moved_mxl_dir)
        print(f"Moved original file '{mxl_file}' to '{moved_mxl_dir}'")
    except Exception as e:
        print(f"Error moving {mxl_file}: {e}")

Processing: need_to_be_converted/bwv1003.mxl
Extracted and renamed 'bwv1003.xml' to 'converted_to_xml/bwv1003.xml'
Moved original file 'need_to_be_converted/bwv1003.mxl' to 'mxl_backup'
Processing: need_to_be_converted/bwv1005.mxl
Extracted and renamed 'bwv1005.xml' to 'converted_to_xml/bwv1005.xml'
Moved original file 'need_to_be_converted/bwv1005.mxl' to 'mxl_backup'
Processing: need_to_be_converted/bwv1002.mxl
Extracted and renamed 'bwv1002.xml' to 'converted_to_xml/bwv1002.xml'
Moved original file 'need_to_be_converted/bwv1002.mxl' to 'mxl_backup'
Processing: need_to_be_converted/bwv1004.mxl
Extracted and renamed 'bwv1004.xml' to 'converted_to_xml/bwv1004.xml'
Moved original file 'need_to_be_converted/bwv1004.mxl' to 'mxl_backup'
Processing: need_to_be_converted/bwv1001.mxl
Extracted and renamed 'bwv1001.xml' to 'converted_to_xml/bwv1001.xml'
Moved original file 'need_to_be_converted/bwv1001.mxl' to 'mxl_backup'
Processing: need_to_be_converted/bwv1006.mxl
Extracted and renamed 'bw