# XML manipulation by etree

In [1]:
from xml.etree.ElementTree import Element, SubElement, dump
import xml.etree.ElementTree as et

# Indentation function
def indent(elem, level=0):
    i = "\n" + level*"  "
    if len(elem):
        if not elem.text or not elem.text.strip():
            elem.text = i + "  "
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
        for elem in elem:
            indent(elem, level+1)
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
    else:
        if level and (not elem.tail or not elem.tail.strip()):
            elem.tail = i
            
def idump(root):
    indent(root)
    dump(root)

# Make simple XML dom

In [2]:
note = Element("note")
to = Element("to")
to.text = "Tove"

note.append(to)

indent(note)

dump(note)

<note>
  <to>Tove</to>
</note>


# add SubElement to Element

In [27]:
note = Element("note")
note.attrib["date"] = "20120104"

to = Element("to")
to.text = "Tove"
note.append(to)

SubElement(note, "from", attrib={'id':'from', 'name':'form'}).text = "Jani"   # add attribute
SubElement(note, "heading").text = "Reminder"
SubElement(note, "body").text = "Don't forget me this weekend!"

In [28]:
indent(note)

dump(note)

<note date="20120104">
  <to>Tove</to>
  <from id="from" name="form">Jani</from>
  <heading>Reminder</heading>
  <body>Don't forget me this weekend!</body>
</note>


# add SubElement to on selected Hierarchy

In [31]:
addr = Element('address')
addr.text = 'yemiji'

location = note.find('to')
location.append(addr)

In [32]:
idump(note)

<note date="20120104">
  <to>Tove<address>yemiji</address>
  </to>
  <from id="from" name="form">Jani</from>
  <heading>Reminder</heading>
  <body>Don't forget me this weekend!</body>
</note>


# read xml file

In [5]:
xml = open('input.xml').read()
root = et.fromstring(xml)

indent(root)
dump(root)

<note>
  <info id="1st">
    <name>Tove</name>
    <addr>11</addr>
    <addr>12</addr>
    <addr>13</addr>
  </info>
  <info id="2nd">
    <name>Jane</name>
    <addr>21</addr>
    <addr>22</addr>
    <addr>23</addr>
  </info>
</note>


# add SubElement to root

In [6]:
# add subelement
_Element = Element('age', {'loc':'Boston'})
_Element.text = '23'

info = root.findall("info[@id='2nd']")   # find by tag name & attributes

for _info in info:
    addrs = _info.findall("addr")
    for addr in addrs:
        addr.append(_Element)

In [7]:
idump(root)

<note>
  <info id="1st">
    <name>Tove</name>
    <addr>11</addr>
    <addr>12</addr>
    <addr>13</addr>
  </info>
  <info id="2nd">
    <name>Jane</name>
    <addr>21<age loc="Boston">23</age>
    </addr>
    <addr>22<age loc="Boston">23</age>
    </addr>
    <addr>23<age loc="Boston">23</age>
    </addr>
  </info>
</note>


# set attributes

In [26]:
note = Element("note", name="CH Lee")
to = Element("to")
to.text = "Tove"

note.append(to)

# add new elements using SubElement
SubElement(note, "from").text = "Jani"

# add new attributes
note.attrib["date"] = "20120104"
note.attrib["id"] = "Unique"

In [9]:
idump(note)

<note date="20120104" id="Unique" name="CH Lee">
  <to>Tove</to>
  <from>Jani</from>
</note>


# remove elements

In [10]:
note = Element("note")
to = Element("to")
to.text = "Tove"

note.append(to)
SubElement(note, "From").text = "Jani"

In [11]:
idump(note)

<note>
  <to>Tove</to>
  <From>Jani</From>
</note>


In [12]:
dummy = Element("dummy")
note.remove(to)

In [13]:
idump(note)

<note>
  <From>Jani</From>
</note>


# remove all sub elements

In [49]:
note = Element("note")
to = Element("to")
to.text = "Tove"

note.append(to)
SubElement(note, "From").text = "Jani"

In [50]:
idump(note)

<note>
  <to>Tove</to>
  <From>Jani</From>
</note>


In [51]:
# fill subelements in to
to_loc = note.find('to')

for i in  range(5):
    addr = Element('addr')
    addr.text = str(i+1)
    
    to_loc.append(addr)

In [52]:
idump(note)

<note>
  <to>Tove<addr>1</addr>
    <addr>2</addr>
    <addr>3</addr>
    <addr>4</addr>
    <addr>5</addr>
  </to>
  <From>Jani</From>
</note>


# remove all childrens of selected tag
## using getchildren

In [47]:
loc_to = note.find('to')

# get childrens of loc_to
iter_child = loc_to.getchildren()

for child in iter_child:
    loc_to.remove(child)

In [48]:
idump(note)

<note>
  <to>Tove</to>
  <From>Jani</From>
</note>


## using getiterator

In [85]:
loc_to = note.find('to')

# get all parent & childrens using getiterator
iter = loc_to.getiterator()

In [86]:
for tag in iter:
    print('{0} : {1} - {2}'.format(tag, tag.text, tag.items()))

<Element 'to' at 0x0000022E195CCB38> : Tove - []
<Element 'addr' at 0x0000022E1A1D3368> : 1 - []
<Element 'addr' at 0x0000022E1A1D34F8> : 2 - []
<Element 'addr' at 0x0000022E1A1D3548> : 3 - []
<Element 'addr' at 0x0000022E1A1D3638> : 4 - []
<Element 'addr' at 0x0000022E1A1D37C8> : 5 - []


# Insert elements

In [14]:
note = Element("note")
to = Element("to")
to.text = "Tove"

note.append(to)
SubElement(note, "From").text = "Jani"

In [15]:
idump(note)

<note>
  <to>Tove</to>
  <From>Jani</From>
</note>


In [16]:
dummy = Element("dummy")

note.insert(0, dummy)  # first arg is a position to be inserted

In [17]:
idump(note)

<note>
  <dummy />
  <to>Tove</to>
  <From>Jani</From>
</note>


# Insert contents to typical location

In [18]:
note = Element("note")
to = Element("to")
to.text = "Tove"

note.append(to)
SubElement(note, "From").text = "Jani"

In [19]:
idump(note)

<note>
  <to>Tove</to>
  <From>Jani</From>
</note>


In [20]:
search = note.find('to')

In [21]:
search

<Element 'to' at 0x0000022E195A9AE8>

## using insert

In [22]:
content = Element('mailcontents')

search.insert(0, content)

In [23]:
idump(note)

<note>
  <to>Tove<mailcontents />
  </to>
  <From>Jani</From>
</note>


## using append

In [24]:
search2 = note.find('From')
content.text = 'mail contents'

search2.append(content)

In [25]:
idump(note)

<note>
  <to>Tove<mailcontents>mail contents</mailcontents>
  </to>
  <From>Jani<mailcontents>mail contents</mailcontents>
  </From>
</note>
