In [3]:
import os
import xml.etree.ElementTree as ET
import pandas as pd

# Folder containing the XML files
folder_path = 'xml_invoice_report_8_23'

# List to store data
data = []

# Process each XML file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.xml') and '$' not in filename:
        file_path = os.path.join(folder_path, filename)
        
        try:
            tree = ET.parse(file_path)
            root = tree.getroot()

            # Iterate through each 'part' in the XML
            for part in root.findall('part'):
                # Initialize dictionary for each row
                row = {
                    'filename': filename,
                    'date': part.findtext('date', default='N/A'),
                    'group': part.findtext('group', default='N/A'),
                    'group-id': part.findtext('group-id', default='N/A'),
                    'description': part.findtext('description', default='N/A'),
                    'amount': part.findtext('amount', default='N/A'),
                    'days': part.findtext('days', default='N/A'),
                    'group-two': part.findtext('group-two', default='N/A'),
                    'start-date': part.findtext('start-date', default='N/A'),
                    'end-date': part.findtext('end-date', default='N/A'),
                    'site': part.findtext('site', default='N/A'),
                    'rate': part.findtext('rate', default='N/A')
                }

                # Append the row to the data list
                data.append(row)
        
        except ET.ParseError as e:
            print(f"Error parsing {filename}: {e}")
        except Exception as e:
            print(f"Unexpected error with {filename}: {e}")

# Convert data list to DataFrame
df = pd.DataFrame(data)

# Save the DataFrame to an Excel file
output_file = 'output.xlsx'
df.to_excel(output_file, index=False)

print(f'Data has been written to {output_file}')


Data has been written to output.xlsx
