Permalink
Browse files

Add support for :lang(), close #3

  • Loading branch information...
1 parent acb6c39 commit 856621a21479fd51daebfce42d5d94ddabef13e3 @SimonSapin SimonSapin committed Apr 24, 2012
Showing with 37 additions and 13 deletions.
  1. +11 −0 CHANGES
  2. +1 −1 cssselect/__init__.py
  3. +8 −5 cssselect/tests.py
  4. +16 −5 cssselect/xpath.py
  5. +1 −2 docs/index.rst
View
11 CHANGES
@@ -1,6 +1,17 @@
Changelog
=========
+Version 0.6
+-----------
+
+* In ``setup.py`` use setuptools/distribute if available, but fall back
+ on distutils.
+* Implement the ``:lang()`` pseudo-class, although it is only based on
+ ``xml:lang`` or ``lang`` attributes. If the document language is known from
+ some other meta-data (like a ``Content-Language`` HTTP header or ``<meta>``
+ element), a workaround is to set a lang attribute on the root element.
+
+
Version 0.5
-----------
View
@@ -18,5 +18,5 @@
from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
-VERSION = '0.5'
+VERSION = '0.6'
__version__ = VERSION
View
@@ -18,7 +18,6 @@
"""
import sys
-import operator
import unittest
from lxml import etree, html
@@ -392,7 +391,6 @@ def xpath(css):
self.assertRaises(ExpressionError, xpath, ':last-of-type')
self.assertRaises(ExpressionError, xpath, ':nth-of-type(1)')
self.assertRaises(ExpressionError, xpath, ':nth-last-of-type(1)')
- self.assertRaises(ExpressionError, xpath, ':lang(fr)')
self.assertRaises(ExpressionError, xpath, ':nth-child(n-)')
self.assertRaises(ExpressionError, xpath, ':after')
self.assertRaises(ExpressionError, xpath, ':lorem-ipsum')
@@ -497,8 +495,14 @@ def pcss(main, *selectors, **kwargs):
assert pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') == [
'foobar-div']
assert pcss('div[foobar~="cd"]') == []
- assert pcss('*[lang|="en"]', '[lang|="en-US"]') == ['second-li']
+ assert pcss('*[lang|="En"]', '[lang|="En-us"]') == ['second-li']
+ # Attribute values are case sensitive
+ assert pcss('*[lang|="en"]', '[lang|="en-US"]') == []
assert pcss('*[lang|="e"]') == []
+ # ... :lang() is not.
+ assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [
+ 'second-li', 'li-div']
+ assert pcss(':lang("e")', html_only=True) == []
assert pcss('li:nth-child(3)') == ['third-li']
assert pcss('li:nth-child(10)') == []
assert pcss('li:nth-child(2n)', 'li:nth-child(even)',
@@ -524,7 +528,6 @@ def pcss(main, *selectors, **kwargs):
assert pcss('li div:only-child') == ['li-div']
assert pcss('div *:only-child') == ['li-div', 'foobar-span']
self.assertRaises(ExpressionError, pcss, 'p *:only-of-type')
- self.assertRaises(ExpressionError, pcss, 'p:lang(fr)')
assert pcss('p:only-of-type') == ['paragraph']
assert pcss('a:empty', 'a:EMpty') == ['name-anchor']
assert pcss('li:empty') == [
@@ -661,7 +664,7 @@ def count(selector):
link</a>
<ol id="first-ol" class="a b c">
<li id="first-li">content</li>
- <li id="second-li" lang="en-US">
+ <li id="second-li" lang="En-us">
<div id="li-div">
</div>
</li>
View
@@ -116,6 +116,10 @@ class GenericTranslator(object):
#: http://www.w3.org/TR/selectors/#id-selectors
id_attribute = 'id'
+ #: The attribute used for ``:lang()`` depends on the document language:
+ #: http://www.w3.org/TR/selectors/#lang-pseudo
+ lang_attribute = 'xml:lang'
+
#: The case sensitivity of document language element names,
#: attribute names, and attribute values in selectors depends
#: on the document language.
@@ -366,11 +370,15 @@ def xpath_contains_function(self, xpath, function):
return xpath.add_condition('contains(string(.), %s)'
% self.xpath_literal(function.arguments))
- def function_unsupported(self, xpath, pseudo):
- raise ExpressionError(
- "The pseudo-class :%s() is not supported" % pseudo.name)
-
- xpath_lang_function = function_unsupported
+ def xpath_lang_function(self, xpath, function):
+ return xpath.add_condition(
+ "ancestor-or-self::*[@lang][1][starts-with(concat("
+ # XPath 1.0 has no lower-case function...
+ "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
+ "'abcdefghijklmnopqrstuvwxyz'), "
+ "'-'), %s)]"
+ % (self.lang_attribute, self.xpath_literal(
+ function.arguments.lower() + '-')))
# Pseudo: dispatch by pseudo-class name
@@ -497,6 +505,9 @@ class HTMLTranslator(GenericTranslator):
are case-insensitive.
"""
+
+ lang_attribute = 'lang'
+
def __init__(self, xhtml=False):
self.xhtml = xhtml # Might be useful for sub-classes?
if not xhtml:
View
@@ -92,7 +92,6 @@ they never match:
These applicable pseudo-classes are not yet implemented:
-* ``:lang(language)``
* ``*:first-of-type``, ``*:last-of-type``, ``*:nth-of-type``,
``*:nth-last-of-type``, ``*:only-of-type``. All of these work when
you specify an element type, but not with ``*``
@@ -136,7 +135,7 @@ implemented without forking or monkey-patching cssselect.
The "customization API" is the set of methods in translation classes
and their signature. You can look at the `source code`_ to see how it works.
However, be aware that this API is not very stable yet. It might change
-and break you sub-class.
+and break your sub-class.
.. _source code: https://github.com/SimonSapin/cssselect/blob/master/cssselect/xpath.py

0 comments on commit 856621a

Please sign in to comment.