In [1]:
import os
import xml.etree.ElementTree as ET
import pandas as pd

# Directories
xml_directory = "xml_files"
csv_directory = "csv_files"

# Create 'csv_files' directory if it doesn't exist
if not os.path.exists(csv_directory):
    os.makedirs(csv_directory)

# Check if XML directory exists
if not os.path.exists(xml_directory):
    print(f"Error: Directory '{xml_directory}' does not exist.")
else:
    # Iterate through all XML files in the directory
    for filename in os.listdir(xml_directory):
        if filename.endswith(".xml"):
            file_path = os.path.join(xml_directory, filename)
            
            # Parse the XML file
            tree = ET.parse(file_path)
            root = tree.getroot()
            
            # List to store data for CSV
            csv_data = []

            # Extract data from 'prem' and 'conc' tags
            for prem in root.findall(".//prem"):
                text = prem.text.strip() if prem.text else ""
                csv_data.append({"text": text, "label": "prem"})
            
            for conc in root.findall(".//conc"):
                text = conc.text.strip() if conc.text else ""
                csv_data.append({"text": text, "label": "conc"})

            # Convert the data to a DataFrame
            df = pd.DataFrame(csv_data)

            # Save the DataFrame to a CSV file
            csv_file_name = os.path.splitext(filename)[0] + ".csv"
            csv_file_path = os.path.join(csv_directory, csv_file_name)
            df.to_csv(csv_file_path, index=False)

    print(f"All CSV files have been created successfully in the '{csv_directory}' directory.")


All CSV files have been created successfully in the 'csv_files' directory.
