In [None]:
import xml.etree.ElementTree as ET
import pandas as pd

# Load the XML file produced by Informatica PowerCenter
xml_file_path = '/content/PROJECT_CDR_OHI.XML'
tree = ET.parse(xml_file_path)
root = tree.getroot()

# Define a function to recursively extract data from elements
def get_element_data(element):
    data = {}
    data['tag'] = element.tag
    data['text'] = element.text
    data['attributes'] = element.attrib
    data['children'] = [get_element_data(child) for child in element]
    return data

# Extract data from the root element
xml_data = get_element_data(root)

# Flatten the extracted data into a list of dictionaries with parent node names
def flatten_data(data, path=""):
    flattened_data = {'Path': path}
    for key, value in data.items():
        if isinstance(value, list):
            for i, item in enumerate(value):
                if path:
                    flattened_data.update(flatten_data(item, f"{path}.{item['tag']}"))
                else:
                    flattened_data.update(flatten_data(item, item['tag']))
        else:
            flattened_data[key] = value
    return flattened_data

# Create a list of flattened data for each top-level element
flattened_data_list = [flatten_data(child) for child in xml_data['children']]

# Create a Pandas DataFrame
df = pd.DataFrame(flattened_data_list)

# Export the DataFrame to an Excel file
excel_file_path = 'output_data.xlsx'
df.to_excel(excel_file_path, index=False)

print(f"Data exported to '{excel_file_path}'")


Data exported to 'output_data.xlsx'


