In [5]:
from lxml import etree

In [6]:
tree = etree.parse('bibs.xml')
tree

<lxml.etree._ElementTree at 0x20b23c5dcc0>

In [7]:
from helper import printf

In [8]:
printf(tree)

<bib>
<cd>abc</cd>
<book price="35">
	<publisher>Addison-Wesley</publisher>
        <author>Serge Abiteboul</author>
        <author><first-name>Rick</first-name><last-name>Hull</last-name></author>
        <author age="20">Victor Vianu</author>
        <title>Foundations of Databases</title>
        <year>1995</year>
	<price>38.8</price>
</book>
<book price="55">
        <publisher>Freeman</publisher>
        <author>Jeffrey D. Ullman</author>
        <title>Principles of Database and Knowledge Base Systems</title>
        <year>1998</year>
</book>
<book>
	<title>xyz</title>
	<author age="25"/>
</book>
</bib>


In [9]:
# positional predicate, starts from 1
printf(tree.xpath('/bib/book[1]/publisher/text()'))

Addison-Wesley


In [10]:
printf(tree.xpath('/bib/book/publisher/text()'))

Addison-Wesley
Freeman


In [11]:
# look for title of books published in 1998, return actual title, not element
printf(tree.xpath('/bib/book[year = 1998]/title/text()'))

Principles of Database and Knowledge Base Systems


In [12]:
printf(tree.xpath('/bib/book[year > 1998 and year < 2000]/title/text()'))

In [13]:
printf(tree.xpath('/bib/book[year > 1998 or year < 2000]/title/text()'))

Foundations of Databases
Principles of Database and Knowledge Base Systems


In [14]:
printf(tree.xpath('/bib/book[year > 1998 and not(year < 2000)]/title/text()'))

In [15]:
# logical operators: and, or, not (...)
# inside a pair [predicate]

In [16]:
# text(), *, node(), @*
# contains
# //
# @
# / (axis)

# find empty nodes, nodes which do not have any content (text, sub-element)

In [17]:
printf(tree)

<bib>
<cd>abc</cd>
<book price="35">
	<publisher>Addison-Wesley</publisher>
        <author>Serge Abiteboul</author>
        <author><first-name>Rick</first-name><last-name>Hull</last-name></author>
        <author age="20">Victor Vianu</author>
        <title>Foundations of Databases</title>
        <year>1995</year>
	<price>38.8</price>
</book>
<book price="55">
        <publisher>Freeman</publisher>
        <author>Jeffrey D. Ullman</author>
        <title>Principles of Database and Knowledge Base Systems</title>
        <year>1998</year>
</book>
<book>
	<title>xyz</title>
	<author age="25"/>
</book>
</bib>


In [18]:
printf(tree.xpath('/bib/book/author/text()'))

Serge Abiteboul
Victor Vianu
Jeffrey D. Ullman


In [19]:
printf(tree.xpath('/bib/book/author/*'))

<first-name>Rick</first-name>
<last-name>Hull</last-name>


In [20]:
printf(tree.xpath('/bib/book/author/node()'))

Serge Abiteboul
<first-name>Rick</first-name>
<last-name>Hull</last-name>
Victor Vianu
Jeffrey D. Ullman


In [21]:
# find books whose price (attribute) is > 40
# #@price means price is an attribute
printf(tree.xpath('/bib/book[@price > 40]'))

<book price="55">
        <publisher>Freeman</publisher>
        <author>Jeffrey D. Ullman</author>
        <title>Principles of Database and Knowledge Base Systems</title>
        <year>1998</year>
</book>



In [22]:
# find publisher whose name contain 'es'
printf(tree.xpath('/bib/book/publisher[contains(., "es")]'))

<publisher>Addison-Wesley</publisher>
        


In [23]:
# find authors of books whose publisher name contain 'es'
printf(tree.xpath('/bib/book[contains(publisher, "es")]/author'))

<author>Serge Abiteboul</author>
        
<author><first-name>Rick</first-name><last-name>Hull</last-name></author>
        
<author age="20">Victor Vianu</author>
        


In [24]:
# find authors of any bib records
# // child or descendant
printf(tree.xpath('/bib//author'))

<author>Serge Abiteboul</author>
        
<author><first-name>Rick</first-name><last-name>Hull</last-name></author>
        
<author age="20">Victor Vianu</author>
        
<author>Jeffrey D. Ullman</author>
        
<author age="25"/>



In [25]:
printf(tree.xpath('/bib/*'))

<cd>abc</cd>

<book price="35">
	<publisher>Addison-Wesley</publisher>
        <author>Serge Abiteboul</author>
        <author><first-name>Rick</first-name><last-name>Hull</last-name></author>
        <author age="20">Victor Vianu</author>
        <title>Foundations of Databases</title>
        <year>1995</year>
	<price>38.8</price>
</book>

<book price="55">
        <publisher>Freeman</publisher>
        <author>Jeffrey D. Ullman</author>
        <title>Principles of Database and Knowledge Base Systems</title>
        <year>1998</year>
</book>

<book>
	<title>xyz</title>
	<author age="25"/>
</book>



In [26]:
printf(tree.xpath('//author[first-name]'))

<author><first-name>Rick</first-name><last-name>Hull</last-name></author>
        


In [27]:
printf(tree.xpath('//author[not(node())]'))

<author age="25"/>

