Skip to content

Commit

Permalink
Use XPath 'lang()' in XML docs.
Browse files Browse the repository at this point in the history
  • Loading branch information
sjp committed Nov 15, 2012
1 parent 3e5abd8 commit 9846271
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 6 deletions.
38 changes: 38 additions & 0 deletions cssselect/tests.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -464,6 +464,30 @@ def series(css):
assert series('foo') == None assert series('foo') == None
assert series('n+') == None assert series('n+') == None


def test_lang(self):
document = etree.fromstring(XMLLANG_IDS)
sort_key = dict(
(el, count) for count, el in enumerate(document.getiterator())
).__getitem__
css_to_xpath = GenericTranslator().css_to_xpath

def langid(selector):
xpath = css_to_xpath(selector)
items = document.xpath(xpath)
items.sort(key=sort_key)
return [element.get('id', 'nil') for element in items]

assert langid(':lang("EN")') == ['first', 'second', 'third', 'fourth']
assert langid(':lang("en-us")') == ['second', 'fourth']
assert langid(':lang(en-nz)') == ['third']
assert langid(':lang(fr)') == ['fifth']
assert langid(':lang(ru)') == ['sixth']
assert langid(":lang('ZH')") == ['eighth']
assert langid(':lang(de) :lang(zh)') == ['eighth']
assert langid(':lang(en), :lang(zh)') == [
'first', 'second', 'third', 'fourth', 'eighth']
assert langid(':lang(es)') == []

def test_select(self): def test_select(self):
document = etree.fromstring(HTML_IDS) document = etree.fromstring(HTML_IDS)
sort_key = dict( sort_key = dict(
Expand Down Expand Up @@ -675,6 +699,20 @@ def count(selector):
assert count('div[class!=madeup]') == 243 # ? Seems right assert count('div[class!=madeup]') == 243 # ? Seems right
assert count('div[class~=dialog]') == 51 # ? Seems right assert count('div[class~=dialog]') == 51 # ? Seems right


XMLLANG_IDS = '''
<test>
<a id="first" xml:lang="en">a</a>
<b id="second" xml:lang="en-US">b</b>
<c id="third" xml:lang="en-Nz">c</c>
<d id="fourth" xml:lang="En-us">d</d>
<e id="fifth" xml:lang="fr">e</e>
<f id="sixth" xml:lang="ru">f</f>
<g id="seventh" xml:lang="de">
<h id="eighth" xml:lang="zh"/>
</g>
</test>
'''

HTML_IDS = ''' HTML_IDS = '''
<html id="html"><head> <html id="html"><head>
<link id="link-href" href="foo" /> <link id="link-href" href="foo" />
Expand Down
21 changes: 15 additions & 6 deletions cssselect/xpath.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -410,12 +410,7 @@ def xpath_lang_function(self, xpath, function):
% function.arguments) % function.arguments)
value = function.arguments[0].value value = function.arguments[0].value
return xpath.add_condition( return xpath.add_condition(
"ancestor-or-self::*[@lang][1][starts-with(concat(" "lang(%s)" % (self.xpath_literal(value)))
# XPath 1.0 has no lower-case function...
"translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
"'abcdefghijklmnopqrstuvwxyz'), "
"'-'), %s)]"
% (self.lang_attribute, self.xpath_literal(value.lower() + '-')))




# Pseudo: dispatch by pseudo-class name # Pseudo: dispatch by pseudo-class name
Expand Down Expand Up @@ -575,6 +570,20 @@ def xpath_checked_pseudo(self, xpath):
"and (name(.) = 'input' or name(.) = 'command')" "and (name(.) = 'input' or name(.) = 'command')"
"and (@type = 'checkbox' or @type = 'radio'))") "and (@type = 'checkbox' or @type = 'radio'))")


def xpath_lang_function(self, xpath, function):
if function.argument_types() not in (['STRING'], ['IDENT']):
raise ExpressionError(
"Expected a single string or ident for :lang(), got %r"
% function.arguments)
value = function.arguments[0].value
return xpath.add_condition(
"ancestor-or-self::*[@lang][1][starts-with(concat("
# XPath 1.0 has no lower-case function...
"translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
"'abcdefghijklmnopqrstuvwxyz'), "
"'-'), %s)]"
% (self.lang_attribute, self.xpath_literal(value.lower() + '-')))

def xpath_link_pseudo(self, xpath): def xpath_link_pseudo(self, xpath):
return xpath.add_condition("@href and " return xpath.add_condition("@href and "
"(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')") "(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')")
Expand Down

0 comments on commit 9846271

Please sign in to comment.