In [2]:
import os
import zipfile
from datetime import datetime
import shutil
import csv
import re
import deiden_new

ALLOWED_EXTENSIONS = {'txt', 'xml', 'XML'}

def sanitize_filename(name):
    return re.sub(r'[<>:"/\\|?*]', '_', name)

# === STEP 1: Define input folders ===
base_path = os.path.join("H:", "Waitemata ECG XML database", "Philips")
folder_list = [
    os.path.join(base_path, "2021", "1"),
    os.path.join(base_path, "2021", "2"),
    os.path.join(base_path, "2021", "3"),
    os.path.join(base_path, "2021", "4"),
    os.path.join(base_path, "2021", "5"),
    os.path.join(base_path, "2021", "6"),
    os.path.join(base_path, "2021", "7"),
    os.path.join(base_path, "2022", "1"),
    os.path.join(base_path, "2022", "2"),
    os.path.join(base_path, "2022", "3"),
    os.path.join(base_path, "2022", "4"),
    os.path.join(base_path, "2022", "5"),
    os.path.join(base_path, "2022", "6"),
    os.path.join(base_path, "2022", "7"),
    os.path.join(base_path, "2022", "8"),
    os.path.join(base_path, "2022", "9"),
    os.path.join(base_path, "2022", "10"),
    os.path.join(base_path, "2023", "1"),
    os.path.join(base_path, "2023", "2"),
    os.path.join(base_path, "2023", "3"),
    os.path.join(base_path, "2023", "4"),
    os.path.join(base_path, "2023", "5"),
    os.path.join(base_path, "2023", "6"),
    os.path.join(base_path, "2023", "7"),
    os.path.join(base_path, "2023", "8"),
    os.path.join(base_path, "2023", "9"),
    os.path.join(base_path, "2023", "10"),
    os.path.join(base_path, "2024", "1"),
    os.path.join(base_path, "2024", "2"),
    os.path.join(base_path, "2024", "3"),
    os.path.join(base_path, "2024", "4"),
    os.path.join(base_path, "2024", "5"),
    os.path.join(base_path, "2024", "6"),
    os.path.join(base_path, "2024", "7"),
    os.path.join(base_path, "2024", "8"),
    os.path.join(base_path, "2024", "9"),
    os.path.join(base_path, "2024", "10"),
    os.path.join(base_path, "2024", "11"),
    os.path.join(base_path, "2024", "12"),
    os.path.join(base_path, "2024", "13"),
    os.path.join(base_path, "2024", "14"),
    os.path.join(base_path, "2024", "15"),
    os.path.join(base_path, "2025", "1"),
    os.path.join(base_path, "2025", "2"),
]

# Output root
output_root = os.path.abspath("output")
os.makedirs(output_root, exist_ok=True)

# === STEP 2: Process each folder ===
for input_folder in folder_list:
    if not os.path.isdir(input_folder):
        print(f"❌ Skipping missing folder: {input_folder}")
        continue

    # Setup temp output folder
    temp_output = os.path.join(output_root, "temp")
    os.makedirs(temp_output, exist_ok=True)

    # Clear temp folder
    for f in os.listdir(temp_output):
        fp = os.path.join(temp_output, f)
        try:
            if os.path.isfile(fp):
                os.unlink(fp)
            elif os.path.isdir(fp):
                shutil.rmtree(fp)
        except Exception as e:
            print(f"⚠️ Could not delete {fp}: {e}")

    print(f"\n🔄 Processing folder: {input_folder}")
    file_counters = {"philips": 0, "mindray": 0, "mortara": 0}

    # Create zip-friendly name
    folder_parts = os.path.abspath(input_folder).split(os.sep)[-3:]
    zip_prefix = sanitize_filename("_".join(folder_parts))

    # Prepare CSV file for ID mapping
    csv_file_path = os.path.join(temp_output, f"id_mappings_{zip_prefix}.csv")
    with open(csv_file_path, mode='w', newline='') as csvfile:
        fieldnames = ['original_id', 'replaced_id']
        csv_writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        csv_writer.writeheader()

        # Get valid files
        files = [f for f in os.listdir(input_folder) if f.split(".")[-1] in ALLOWED_EXTENSIONS]
        if not files:
            print(f"⚠️ No valid files in {input_folder}")
            continue

        for filename in files:
            file_path = os.path.join(input_folder, filename)
            device_name = deiden_new.detect_type.detect_device_type(file_path).lower()
            if device_name not in file_counters:
                print(f"⚠️ Skipping unknown device type: {device_name} for file {filename}")
                continue

            count = file_counters[device_name]
            file_counters[device_name] += 1
            output_filename = f"{zip_prefix}_{device_name}_{count}"

            if device_name == 'philips':
                deiden_new.process_philips_file_internal(output_filename, file_path, temp_output, csv_writer, deiden_new.key)
            elif device_name == 'mindray':
                deiden_new.process_mindray_file_internal(output_filename, file_path, temp_output, csv_writer, deiden_new.key)
            elif device_name == 'mortara':
                deiden_new.process_mortara_file_internal(output_filename, file_path, temp_output, csv_writer, deiden_new.key)

    # Step 3: Zip processed files
    zip_filename = f"{zip_prefix}.zip"
    zip_filepath = os.path.join(output_root, zip_filename)
    with zipfile.ZipFile(zip_filepath, 'w') as zipf:
        for root, dirs, files in os.walk(temp_output):
            for file in files:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, temp_output)
                zipf.write(file_path, arcname)

    print(f"✅ ZIP created: {zip_filepath}")



🔄 Processing folder: H:Waitemata ECG XML database\Philips\2021\1
✅ ZIP created: C:\Users\guoya\Dropbox\WDHB\Projects\Pfizer_Echocardiography AI\ecg_deiden_v2.3\application\output\Philips_2021_1.zip

🔄 Processing folder: H:Waitemata ECG XML database\Philips\2021\2
✅ ZIP created: C:\Users\guoya\Dropbox\WDHB\Projects\Pfizer_Echocardiography AI\ecg_deiden_v2.3\application\output\Philips_2021_2.zip

🔄 Processing folder: H:Waitemata ECG XML database\Philips\2021\3
✅ ZIP created: C:\Users\guoya\Dropbox\WDHB\Projects\Pfizer_Echocardiography AI\ecg_deiden_v2.3\application\output\Philips_2021_3.zip

🔄 Processing folder: H:Waitemata ECG XML database\Philips\2021\4
✅ ZIP created: C:\Users\guoya\Dropbox\WDHB\Projects\Pfizer_Echocardiography AI\ecg_deiden_v2.3\application\output\Philips_2021_4.zip

🔄 Processing folder: H:Waitemata ECG XML database\Philips\2021\5
✅ ZIP created: C:\Users\guoya\Dropbox\WDHB\Projects\Pfizer_Echocardiography AI\ecg_deiden_v2.3\application\output\Philips_2021_5.zip

🔄 Pr