In [28]:
#PURPOSE: to demonstrate how to access and change information an XML file

import xml.etree.ElementTree as ET #get entire file as a tree object
tree = ET.parse('books.xml')
root = tree.getroot()

In [29]:
print (root)
print ("Root:",root.tag) #find out what the root tag is for this file
print (root.attrib) #for root attributes as a dict
print (root.items())

<Element 'bookstore' at 0x107934958>
Root: bookstore
{'name': "Peter's Pretty Pieces", 'id': 'PPP'}
[('name', "Peter's Pretty Pieces"), ('id', 'PPP')]


In [30]:
#iterate over the children in root
for child in root:
    print (child.tag,":",child.attrib)

book : {'category': 'cooking'}
book : {'category': 'children'}
book : {'category': 'web'}
book : {'cover': 'paperback', 'category': 'web'}
magazine : {}


In [31]:
#access a specific node within root
print (root[4][0].tag,":",root[4][0].text)
print (root[4][1].tag,":",root[4][1].text)

title : La chica bonita
author : Fulano


In [32]:
#dig down within specific nodes
for book in root.findall('book'):
    title = book.find('title').text
    print (title)
    for author in book.findall('author'): #now look for just the authors pertaining to this node
        print (author.text)
    print ("______________")

Everyday Italian
Giada De Laurentiis
______________
Harry Potter
J K. Rowling
______________
XQuery Kick Start
James McGovern
Per Bothner
Kurt Cagle
James Linn
Vaidyanathan Nagarajan
______________
Learning XML
Erik T. Ray
______________


In [35]:
#get the value of a specific attribute
for book in root.findall("magazine"):
    for info in book.iter(): #note that the iter() method gets all levels of elements
        text = info.text
        lang = info.get("lang") #get the language attribute
        if lang == None: 
            info.set("lang","ru")#set an attribute (note the dbl quotes)
        print (text, lang)


     ru
La chica bonita es
Fulano ru
2015 ru
1 ru
2 ru
demasiado ru


In [37]:
#insert a new book and children
newBook = ET.Element('book')
newTitle = ET.SubElement(newBook,'title')
newAuthor = ET.SubElement(newBook, 'author')
newYear = ET.SubElement(newBook,'year')
newPrice = ET.SubElement(newBook,'price')
ET.dump(newBook) #print it out to see what this structure looks like
root.append(newBook) #add it to the end of the current list of books

<book><title /><author /><year /><price /></book>


In [40]:
#iterate over the new structure to show the change
def showAllText():
    count = 1
    for element in root.iter('book'):
        print (str(count)+". ",element.tag)
        for child in element.iter():
            print (child.text)
        print ("________________")
        count +=1
showAllText()

1.  book

    
mwa ha ha ha ha
Everyday Italian
Giada De Laurentiis
2005
30.00
________________
2.  book
mwa ha ha ha ha
________________
3.  book

    
Harry Potter
J K. Rowling
2005
29.99
________________
4.  book

    
XQuery Kick Start
James McGovern
Per Bothner
Kurt Cagle
James Linn
Vaidyanathan Nagarajan
2003
49.99
________________
5.  book

    
Learning XML
Erik T. Ray
2003
39.95
________________
6.  book
None
None
None
None
None
________________


In [43]:
#Let's add some text to these new nodes
root[5].set("title","a category") # this sets an attribute, not the "title" child node
print (root[5].attrib)
title = root[5].find('title')
title.text = "A new book"
author = root[5].find('author')
author.text = "Peter Rich"
year = root[5].find('year')
year.text = "2015"
price = root[5].find('price')
price.text = "134.45"

{'title': 'a category'}


In [44]:
showAllText()

1.  book

    
mwa ha ha ha ha
Everyday Italian
Giada De Laurentiis
2005
30.00
________________
2.  book
mwa ha ha ha ha
________________
3.  book

    
Harry Potter
J K. Rowling
2005
29.99
________________
4.  book

    
XQuery Kick Start
James McGovern
Per Bothner
Kurt Cagle
James Linn
Vaidyanathan Nagarajan
2003
49.99
________________
5.  book

    
Learning XML
Erik T. Ray
2003
39.95
________________
6.  book
None
A new book
Peter Rich
2015
134.45
________________


In [45]:
#save it all to a new file
tree.write("Books_New.xml")