diff --git a/AUTHORS b/AUTHORS index 70ca409..66dcc22 100644 --- a/AUTHORS +++ b/AUTHORS @@ -10,3 +10,4 @@ Simon Sapin Stefan Behnel Thomas Grainger Varialus +Arthur Darcet diff --git a/cssselect/parser.py b/cssselect/parser.py index 9bb039c..1aed6f8 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -76,7 +76,7 @@ def __init__(self, tree, pseudo_element=None): #: +-------------------------+----------------+--------------------------------+ #: | Invalid pseudo-class | ``li:marker`` | ``None`` | #: +-------------------------+----------------+--------------------------------+ - #: | Functinal | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` | + #: | Functional | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` | #: +-------------------------+----------------+--------------------------------+ #: #: .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement @@ -92,6 +92,20 @@ def __repr__(self): return '%s[%r%s]' % ( self.__class__.__name__, self.parsed_tree, pseudo_element) + def canonical(self): + """Return a CSS representation for this selector (a string) + """ + if isinstance(self.pseudo_element, FunctionalPseudoElement): + pseudo_element = '::%s' % self.pseudo_element.canonical() + elif self.pseudo_element: + pseudo_element = '::%s' % self.pseudo_element + else: + pseudo_element = '' + res = '%s%s' % (self.parsed_tree.canonical(), pseudo_element) + if len(res) > 1: + res = res.lstrip('*') + return res + def specificity(self): """Return the specificity_ of this selector as a tuple of 3 integers. @@ -116,6 +130,9 @@ def __repr__(self): return '%s[%r.%s]' % ( self.__class__.__name__, self.selector, self.class_name) + def canonical(self): + return '%s.%s' % (self.selector.canonical(), self.class_name) + def specificity(self): a, b, c = self.selector.specificity() b += 1 @@ -151,6 +168,10 @@ def __repr__(self): def argument_types(self): return [token.type for token in self.arguments] + def canonical(self): + args = ''.join(token.css() for token in self.arguments) + return '%s(%s)' % (self.name, args) + def specificity(self): a, b, c = self.selector.specificity() b += 1 @@ -174,6 +195,10 @@ def __repr__(self): def argument_types(self): return [token.type for token in self.arguments] + def canonical(self): + args = ''.join(token.css() for token in self.arguments) + return '%s:%s(%s)' % (self.selector.canonical(), self.name, args) + def specificity(self): a, b, c = self.selector.specificity() b += 1 @@ -192,6 +217,9 @@ def __repr__(self): return '%s[%r:%s]' % ( self.__class__.__name__, self.selector, self.ident) + def canonical(self): + return '%s:%s' % (self.selector.canonical(), self.ident) + def specificity(self): a, b, c = self.selector.specificity() b += 1 @@ -210,6 +238,12 @@ def __repr__(self): return '%s[%r:not(%r)]' % ( self.__class__.__name__, self.selector, self.subselector) + def canonical(self): + subsel = self.subselector.canonical() + if len(subsel) > 1: + subsel = subsel.lstrip('*') + return '%s:not(%s)' % (self.selector.canonical(), subsel) + def specificity(self): a1, b1, c1 = self.selector.specificity() a2, b2, c2 = self.subselector.specificity() @@ -238,7 +272,20 @@ def __repr__(self): else: return '%s[%r[%s %s %r]]' % ( self.__class__.__name__, self.selector, attrib, - self.operator, self.value) + self.operator, self.value.value) + + def canonical(self): + if self.namespace: + attrib = '%s|%s' % (self.namespace, self.attrib) + else: + attrib = self.attrib + + if self.operator == 'exists': + op = attrib + else: + op = '%s%s%s' % (attrib, self.operator, self.value.css()) + + return '%s[%s]' % (self.selector.canonical(), op) def specificity(self): a, b, c = self.selector.specificity() @@ -258,10 +305,13 @@ def __init__(self, namespace=None, element=None): self.element = element def __repr__(self): + return '%s[%s]' % (self.__class__.__name__, self.canonical()) + + def canonical(self): element = self.element or '*' if self.namespace: element = '%s|%s' % (self.namespace, element) - return '%s[%s]' % (self.__class__.__name__, element) + return element def specificity(self): if self.element: @@ -282,6 +332,9 @@ def __repr__(self): return '%s[%r#%s]' % ( self.__class__.__name__, self.selector, self.id) + def canonical(self): + return '%s#%s' % (self.selector.canonical(), self.id) + def specificity(self): a, b, c = self.selector.specificity() a += 1 @@ -303,6 +356,13 @@ def __repr__(self): return '%s[%r %s %r]' % ( self.__class__.__name__, self.selector, comb, self.subselector) + def canonical(self): + subsel = self.subselector.canonical() + if len(subsel) > 1: + subsel = subsel.lstrip('*') + return '%s %s %s' % ( + self.selector.canonical(), self.combinator, subsel) + def specificity(self): a1, b1, c1 = self.selector.specificity() a2, b2, c2 = self.subselector.specificity() @@ -536,7 +596,7 @@ def parse_attrib(selector, stream): if next != ('DELIM', ']'): raise SelectorSyntaxError( "Expected ']', got %s" % (next,)) - return Attrib(selector, namespace, attrib, op, value.value) + return Attrib(selector, namespace, attrib, op, value) def parse_series(tokens): @@ -591,6 +651,12 @@ def is_delim(self, *values): type = property(operator.itemgetter(0)) value = property(operator.itemgetter(1)) + def css(self): + if self.type == 'STRING': + return repr(self.value) + else: + return self.value + class EOFToken(Token): def __new__(cls, pos): diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 22cd029..ad2ccbd 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -308,10 +308,12 @@ def xpath_attrib(self, selector): attrib = '@' + name else: attrib = 'attribute::*[name() = %s]' % self.xpath_literal(name) - if self.lower_case_attribute_values: - value = selector.value.lower() + if selector.value is None: + value = None + elif self.lower_case_attribute_values: + value = selector.value.value.lower() else: - value = selector.value + value = selector.value.value return method(self.xpath(selector.selector), attrib, value) def xpath_class(self, class_selector): diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index f01aa7f..0819f25 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -244,6 +244,39 @@ def specificity(css): assert specificity('#lorem + foo#ipsum:first-child > bar:first-line' ) == (2, 1, 3) + def test_css_export(self): + def css2css(css, res=None): + selectors = parse(css) + assert len(selectors) == 1 + assert selectors[0].canonical() == (res or css) + + css2css('*') + css2css(' foo', 'foo') + css2css('Foo', 'Foo') + css2css(':empty ', ':empty') + css2css(':before', '::before') + css2css(':beFOre', '::before') + css2css('*:before', '::before') + css2css(':nth-child(2)') + css2css('.bar') + css2css('[baz]') + css2css('[baz="4"]', "[baz='4']") + css2css('[baz^="4"]', "[baz^='4']") + css2css("[ns|attr='4']") + css2css('#lipsum') + css2css(':not(*)') + css2css(':not(foo)') + css2css(':not(*.foo)', ':not(.foo)') + css2css(':not(*[foo])', ':not([foo])') + css2css(':not(:empty)') + css2css(':not(#foo)') + css2css('foo:empty') + css2css('foo::before') + css2css('foo:empty::before') + css2css('::name(arg + "val" - 3)', "::name(arg+'val'-3)") + css2css('#lorem + foo#ipsum:first-child > bar::first-line') + css2css('foo > *') + def test_parse_errors(self): def get_error(css): try: