##### Pseudo Code

- Read in XML
- Capture header, footer data 
- Capture product line data
- Iterate through each new product curve number
    - duplicate and edit corresponding original element in xml 
    - Update sort order
    - Add root, 1st level tags around new xml file.

##### File Setup

In [49]:
import xml.etree.ElementTree as ET
import os

fileDir = r"C:\Users\104092\OneDrive - Grundfos\Documents\10-19 Projects\12 NBS Curve PSD Separation\12.01 Original Files"
filename = "nbs_std_export.xml"
filepath = os.path.join(fileDir, filename)

##### Create working copy

In [50]:
# Create copy xml file
import shutil

filename, file_ext = filename.split(os.extsep)

outputDir = r"C:\Users\104092\OneDrive - Grundfos\Documents\10-19 Projects\12 NBS Curve PSD Separation\12.02 Output Files"
outputFilename = filename + " - Ready for Review." + file_ext
outputPath = os.path.join(outputDir, outputFilename)

shutil.copyfile(filepath, outputPath) # Create copy to leave original untouched

'C:\\Users\\104092\\OneDrive - Grundfos\\Documents\\10-19 Projects\\12 NBS Curve PSD Separation\\12.02 Output Files\\nbs_std_export - Ready for Review.xml'

##### Create list of all new curve numbers from curve header data tab

In [51]:
import pandas as pd
import openpyxl

psd_dir = r"C:\Users\104092\OneDrive - Grundfos\Documents\10-19 Projects\12 NBS Curve PSD Separation\12.02 Output Files"
psd_file = "GXS Curve_Conexus_V2 - std models.xlsx"
psd_path = os.path.join(psd_dir, psd_file)

tabName = "Curve Header Data"

# Creates dataframe from excel tab of interest
raw_data = pd.read_excel(psd_path, sheet_name=tabName, header=8)
raw_data = raw_data[raw_data['Curve number'].notna()]

# Create list of all new curve numbers
new_curve_numbers = raw_data['Curve number'].values.tolist()

##### Setup root element

In [52]:
tree = ET.parse(outputPath)
root = tree.getroot()

ET.indent(root, space='\t', level=0)
ET.dump(root)

<SKBData xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
	<ProductLine priceFactorAppliedToAllIndexes="false" searchWithSDCEnabled="false" selectorVersion="8.0.0" skbVersion="22.2.0.220418.623" sortOrder="3">
		<supplierID>NBS_Std</supplierID>
		<name>NBS_Std</name>
		<description language="English">NBS – Split Coupled End Suction</description>
		<productLineSummary>
			<description language="English">Grundfos NBS is an end-suction split coupled pump which requires no alignment or grouting.  This multipurpose pump is design for optimized efficiency, easy installation with low maintenance costs, and is suitable for a variety of applications.</description>
		</productLineSummary>
		<productLineSummaryExtended>
			<description language="English">&lt;div&gt; &lt;h6&gt;Grundfos NBS is an end-suction split coupled pump which requires no alignment or grouting.  This multipurpose pump is design for optimized efficiency, easy installation with low maintenance costs, and is suitable for a

##### Custom Functions

In [53]:
def append_node_as_str(node, str_list):
    """ Takes an element, and returns a str containing the element with all its sub elements/children"""
    node_as_string = ET.tostring(node, encoding='unicode', method='xml')
    str_list.append(node_as_string)

In [54]:
def updateSortOrder(node, currentSort):
    """ Use a yield generator to keep count? """
    newSortIndex = str(currentSort * 5)
    node.set('sortOrder', newSortIndex)

In [55]:
def copy_element(element_to_copy):
    """ Takes in an element, and returns a copy of subtree as a str"""
    node_as_string = ET.tostring(element_to_copy, encoding='unicode', method='xml')
    copy_element = ET.fromstring(node_as_string)
    return copy_element

In [56]:
def create_new_pump_xml(pump_node, new_curves_list):
    """ When a pump model instance is encountered:
        - Iterate through list of new curve names
        - Create a copy of pump model for each new related curve (as a string with all sub elements)
        - Modify sortOrder attribute
        - Modify tags:
            - <pumpSizeAndType>
            - <supplierID>
            - <curveNumber>
        - Add new pump trim model instance to xml_string_list
        
    """
    # sortOrderIndex = 1

    # Creates a dict of attributes to note/modify
    # pump_data = ['pumpSizeAndType','supplierID','curveNumber'] 
    pump_data = ['pumpSizeAndType','curveNumber'] 
    pump_dict = {subelem.tag: subelem.text for subelem in pump_node if subelem.tag in pump_data}
    # print(pump_dict)
    
    # Compare pumpSizeAndType to new curve numbers.
    related_curves = []

    for curve_num in new_curves_list:
        # print(f"looking at curve_number: {curve_num}\n")
        # When a match is found, updates relevant attributes, converts to string, and appends
        if pump_dict['pumpSizeAndType'] in curve_num:
            # print(f"match! {pump_dict['pumpSizeAndType']} will feed new curve num: {curve_num}")

            global sortOrderIndex
            updateSortOrder(pump_node, sortOrderIndex)
            sortOrderIndex += 1

            # Update relevant values, convert to string, append to new xml tree
            for elem in pump_node:
                if elem.tag in pump_dict.keys():
                    # print(f"Before - elem.tag: {elem.tag}, elem.text: {elem.text}")
                    elem.text = curve_num
                    # print(f"After - elem.tag: {elem.tag}, elem.text: {elem.text}")
                    
            
            modified_entry = ET.tostring(pump_node, encoding='unicode')
            # print(f"modified entry: {modified_entry}")
            related_curves.append(modified_entry)
        
    concat = " ".join([str(item) for item in related_curves])
    # print(concat)   
    return concat


##### Main

In [57]:
# Iterate through 2nd level children (grandchildren), creating duplicates with modified curvenumber and pumpsize name info

xml_string_list = [] # Initialize empty list that will hold new string that will eventually be converted to xml
""" Root = SKBData
    Child = ProductLine
    Grandchild = interesting data
"""
sortOrderIndex = 1

for child in root:  
    for grandchild in child:
        
        if grandchild.tag != 'pumpCommon':
            append_node_as_str(grandchild, xml_string_list)

        elif grandchild.tag == "pumpCommon":
            # Make a copy of element as string 
            element_copy = copy_element(grandchild)
            new_grandchildren = create_new_pump_xml(element_copy, new_curve_numbers)
            # print(f"FINAL CONCAT STRING FOR THIS pumpCommon Element \n {new_grandchild} \n\n\n")
            xml_string_list.append(new_grandchildren)
        


#### Creates SKBData, ProductLine tags to wrap around new xml

In [58]:
# Need to create Outer 2 elements (SKBData, ProductLine tags)

# Opening tags
root_tag_open = f'<{root.tag} xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">'

child_elem = ET.Element(child.tag, attrib=child.attrib)
child_elem_str_open = ET.tostring(child_elem, encoding='unicode')
child_elem_str_open = child_elem_str_open.replace('/','')
print(child_elem_str_open)

# Closing tags
root_tag_close = f'</{root.tag}>'
child_elem_str_close = f'</{child.tag}>'

# Add to XML String List before concatenation below
xml_string_list = [root_tag_open] + [child_elem_str_open] + xml_string_list + [child_elem_str_close] + [root_tag_close]


<ProductLine priceFactorAppliedToAllIndexes="false" searchWithSDCEnabled="false" selectorVersion="8.0.0" skbVersion="22.2.0.220418.623" sortOrder="3" >


#### Remove xsi namespaces from pump common entries

In [59]:
# Need to remove xsi namespace from pump common entries
import re

pattern = r'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"'

for index, item in enumerate(xml_string_list):
    if 'pumpCommon' in item:
        x = re.sub(pattern, '', item)
        # item.replace(, x)
        xml_string_list[index] = x

#### Concatenate strings, and Write to file

In [60]:
concat_data = " ".join([str(item) for item in xml_string_list])
# print(concat_data)

<SKBData xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <ProductLine priceFactorAppliedToAllIndexes="false" searchWithSDCEnabled="false" selectorVersion="8.0.0" skbVersion="22.2.0.220418.623" sortOrder="3" > <supplierID>NBS_Std</supplierID>
		 <name>NBS_Std</name>
		 <description language="English">NBS – Split Coupled End Suction</description>
		 <productLineSummary>
			<description language="English">Grundfos NBS is an end-suction split coupled pump which requires no alignment or grouting.  This multipurpose pump is design for optimized efficiency, easy installation with low maintenance costs, and is suitable for a variety of applications.</description>
		</productLineSummary>
		 <productLineSummaryExtended>
			<description language="English">&lt;div&gt; &lt;h6&gt;Grundfos NBS is an end-suction split coupled pump which requires no alignment or grouting.  This multipurpose pump is design for optimized efficiency, easy installation with low maintenance costs, and is suitable for

In [61]:
text_file = open("modified_product_line.xml", "wt")
n = text_file.write(concat_data)
text_file.close()