Pyquery (https://github.com/gawel/pyquery/) allows you to make jquery (https://jquery.com/) queries on xml documents.

### Synopsis

In [1]:
from pyquery import PyQuery as pq

### attr()

In [2]:
d = pq('<div my_attr="myattr_val">my txt</div>')
# grab element's my_attr, find elements whose my_attr contains *=..., starts with ^=..., ends with $=..., respectively 
d.attr('my_attr'), d('div[my_attr*="attr"]'), d('[my_attr^="my"]').attr('my_attr'), d('[my_attr$="val"]').text()

('myattr_val', [<div>], 'myattr_val', 'my txt')

### children

In [3]:
d = pq('<span><p class="hello">Hi!</p> and <p>Bye!</p></span>')
# select immediate children
d.children(), d.children('.hello')

([<p.hello>, <p>], [<p.hello>])

### closest()

In [4]:
d = pq('<div class="hello"><p>This is a <strong class="hello">test</strong></p></div>')
# grab the closest parent element
d('strong').closest('div'), d('strong').closest('.hello')

([<div.hello>], [<strong.hello>])

### :contains()

In [5]:
d = pq("<p><abbr title='World Health Organization'>WHO</abbr> was founded ...</p>")
# grab element that contains ...
d("abbr:contains('WHO')").attr('title')

'World Health Organization'

### contents()

In [6]:
d = pq('<h1>hello <b>bold</b></h1>')
# show the contents
d.contents()

['hello ', <Element b at 0x7fbb2168a248>]

In [7]:
# show a little more
d.contents().map(lambda j, e: pq(e).outer_html())

['<p>hello </p>', '<b>bold</b>']

### d()

In [8]:
d = pq("""
        <html>
        <body>
        <div id='abc'>ABC</div>
        </body>
        </html>
        """)

# grap element with id = '#abc'
d('#abc')

[<div#abc>]

### each ()

In [9]:
d = pq('<p class="hello">Hi there</p><p>Bye</p><br />')
# loop over each element and apply lambda func
d('p').each(lambda i, e: pq(e).attr('nc','mc_'+str(i)) )

[<p.hello>, <p>]

### end()

In [10]:
d = pq('<p><span><em>Whoah!</em></span></p><p><em> there</em></p>')
# break out of a traversal level and return to the parent level
d('p').eq(1).find('em').end().end()

[<p>, <p>]

### eq()

In [11]:
d = pq('<p class="hello">Hi</p><p>Bye</p><div></div>')
# grab the n-th element, ie eq(n) 
d('p').eq(0), d('p').eq(-1).text()

([<p.hello>], 'Bye')

### filter()

In [12]:
d = pq('<p class="hello">Hi</p><p>Bye</p>')
# filter by class hello
d('p').filter('.hello')

[<p.hello>]

In [13]:
# filter by item i=0
d('p').filter(lambda i: i == 0).text()

'Hi'

In [14]:
# filter by text = 'Hi'
d('p').filter(lambda i, el: pq(el).text() == 'Hi')

[<p.hello>]

### find()

In [15]:
d = pq('<p><span><em>hunky</em></span></p><p><em>dory!</em></p>')
# find the elements beneath
d('p').find('em')

[<em>, <em>]

In [16]:
# grab the last <p> and find <em
d('p').eq(-1).find('em').text()

'dory!'

### has_class()

In [17]:
d = pq('<div class="class1 class2 class3"></div>')
# checks the element class
d.has_class('class2')

True

### html()

In [18]:
d = pq('<div><span>toto</span></div>')
# show html, text, etc
d.html(), d.text(), d.contents(), d.contents().outer_html()

('<span>toto</span>', 'toto', [<span>], '<span>toto</span>')

### is_()

In [19]:
d = pq('<p class="cls1 cls2">Hi</p><p id="bye" >Bye</p><div attr1="attr1_val">txt1</div>')
# check if the selector matches the class=cls2, an id=bye, has tr1 int attr1
d('p').eq(0).is_('.cls2'), d('p').eq(1).is_('#bye'), d('div[attr1*="tr1"]').attr('attr1')

(True, True, 'attr1_val')

### items()

In [20]:
d = pq('<div><span>foo</span><span>bar</span></div>')
# iterate over elements returning PyQuery objects
[i.text() for i in d('span').items()]

['foo', 'bar']

### map()

In [21]:
d = pq('<p class="hello">Hi there</p><p>Bye</p><br />')
# iterate over elements applying lambda func
d('p').map(lambda i, e: (pq(e).outer_html(), pq(e).text()) )

[('<p class="hello">Hi there</p>', 'Hi there'), ('<p>Bye</p>', 'Bye')]

In [22]:
d('p').map(lambda i, e: len(pq(this).text())), d('p').map(lambda i, e: pq(e).text().split())

([8, 3], ['Hi', 'there', 'Bye'])

### next_all()

In [23]:
html_doc = '''
    <span>
    <p class="hello">Hi</p>
    <p>Bye</p>
    <img scr=""/>
    <code>abc=2</code>
    <span>the end</span>
    </span>
    '''
d = pq(html_doc)
# grab all the next elements
d('p:last').next_all()

[<img>, <code>, <span>]

### outer_html()

In [24]:
d = pq('<div><span class="red">toto</span> rocks</div>')
# show html representation
d('div').outer_html(), d('span').outer_html()

('<div><span class="red">toto</span> rocks</div>',
 '<span class="red">toto</span>')

### parents()

In [25]:
d = pq('<h1><span><p class="hello">Hi</p><div>Bye</div></span></h1>')
# grab the h1 parents
d('.hello').parents(), d('div').parents('h1'), d('.hello').closest('h1')

([<h1>, <span>], [<h1>], [<h1>])

### prev_all

In [26]:
h = '''<span>
        <p id="firstid" class="first">Im first</p>
        <p class="hello">Hi</p>
        <p>Bye</p>
        <img scr=""/>
        </span>'''
d = pq(h)
# grab all the previous elements
d('p:last').prev_all()

[<p#firstid.first>, <p.hello>]

### siblings()

In [27]:
d = pq('<span><p class="hello">Hi</p><p>Bye</p><img scr=""/></span>')
# grab all the siblings
d('.hello').siblings(), d('.hello').siblings('img')

([<p>, <img>], [<img>])

### text()

In [28]:
doc = pq('<div><span>toto</span><span>tata</span></div>')
# show the text representation
doc.text()

'tototata'

### val()

In [29]:
d = pq('<input value="old value"/>')
# get or set the value of <input>
d.val(), d.val('new value'), d.outer_html()

('old value', [<input>], '<input value="new value">')

### THE END