In [2]:
import pandas as pd
import xml.etree.ElementTree as ET
from xml.dom import minidom
import os

In [3]:
# Read in PSD to dataframe(s)
myDir = r"C:\Users\104092\OneDrive - Grundfos\Documents\git\grundfos-express-tools\psd to xml\PSD files for testing"
file = "Lbom-ES.xlsx"
psd_file = os.path.join(myDir, file)
tabName = "Impeller"

In [4]:
# Creates dataframe from excel tab of interest
raw_data = pd.read_excel(psd_file, sheet_name=tabName, header=1, index_col=False)
unnamed_cols = raw_data.columns.str.match("Unnamed")
psd_data = raw_data.drop(raw_data[raw_data.columns[unnamed_cols]], axis=1) # Drops columns with unnamed cols. Need to ask if these columns should be preserved.

In [5]:
def get_row_index(search_query:str, df):
    """Returns row integer number of search_query from 1st column"""
    row_num = df.index[df.iloc[:,0]==search_query].tolist()[0]
    return row_num
    
# Finds [START], [END] row # to separate header from data rows
start_row = get_row_index("[START]", psd_data)
end_row = get_row_index("[END]", psd_data)

In [6]:
def prettify(elem):
    """Return a pretty-printed XML string for the Element.
    """
    rough_string = ET.tostring(elem, 'UTF-8')
    reparsed = minidom.parseString(rough_string)
    print(reparsed.toprettyxml(indent="  "))

In [7]:
# Create dict of column headers with data types for each column. Skips first column from PSD
header_dict = {i: psd_data.at[1, i] for i in psd_data.columns if psd_data.columns.get_loc(i) != 0}

# PSD field says "pointer-merge", but this turns into "text" during XML conversion process anyway.
header_dict.update({'ID': "text"}) 
print(header_dict)

{'ID': 'text', 'Model': 'text', 'CodeX': 'text', 'ImpellerMaterial': 'text', 'PacoMatlCode': 'text', 'CapScrewandWasher': 'text', 'ImpellerKey': 'text', 'Coating': 'text', 'BOM': 'text', 'PriceID': 'pointer', 'LeadtimeID': 'pointer'}


In [11]:
# Iterate through dataframe in correct order to produce XML instance
root = ET.Element('top')  # Root element

# Sets Class for each row/entry from PSD (Should be the same for all entries)
class_ref_name = psd_data.columns[0] # This uses the PSD column header for the class name.
class_attr = {"name": class_ref_name}

# Sets chartype, which is always a doublebyte
chartype_attr = {"value": "doublebyte"}

for index, col in psd_data[start_row:end_row].iterrows():
    # Inserts Instance for each row in PSD
    inst_attr = {"name": col["ID"]}
    instance = ET.SubElement(root, "instance", attrib=inst_attr)

    # Inserts Class for each row in PSD
    class_entry = ET.SubElement(instance, "class")
    class_sub_entry = ET.SubElement(class_entry, "ref", attrib=class_attr)

    # Inserts chartype attribute
    chartype = ET.SubElement(instance, "chartype", chartype_attr)

    # Loop through header dictionary for attributes
    for k, v in header_dict.items():
        # Need to insert if statement where pointers receive sub-child
        if v == "text":
            attr_dict = {"name": k, "type": v, "value": psd_data.at[index, k].__str__()}
            attr_entry = ET.SubElement(instance, "attribute", attr_dict)
        elif "pointer" in v: # Whats the difference between pointer and pointer-merge? May have to change this. 
            attr_dict = {"name": k, "type": v}
            attr_entry = ET.SubElement(instance, "attribute", attr_dict)
            pointer_dict = {"value": psd_data.at[index, k].__str__()}
            pointer_sub_entry = ET.SubElement(attr_entry, "ref", attrib=pointer_dict)

In [None]:
# print(ET.tostring(root, encoding='utf8').decode('utf8'))

In [None]:
prettify(root)