diff --git a/cssselect/parser.py b/cssselect/parser.py index 9bb039c..3be71bb 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -452,6 +452,13 @@ def parse_simple_selector(stream, inside_negation=False): continue if stream.peek() != ('DELIM', '('): result = Pseudo(result, ident) + if result.__repr__() == 'Pseudo[Element[*]:scope]': + if not (len(stream.used) == 2 or + (len(stream.used) == 3 + and stream.used[0].type == 'S')): + raise SelectorSyntaxError( + 'Got immediate child pseudo-element ":scope" ' + 'not at the start of a selector') continue stream.next() stream.skip_whitespace() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 22cd029..d5bbf72 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -541,6 +541,14 @@ def xpath_lang_function(self, xpath, function): def xpath_root_pseudo(self, xpath): return xpath.add_condition("not(parent::*)") + # CSS immediate children (CSS ":scope > div" to XPath "child::div" or "./div") + # Works only at the start of a selector + # Needed to get immediate children of a processed selector in Scrapy + # for product in response.css('.product'): + # description = product.css(':scope > div::text').get() + def xpath_scope_pseudo(self, xpath): + return xpath.add_condition("1") + def xpath_first_child_pseudo(self, xpath): return xpath.add_condition('count(preceding-sibling::*) = 0') diff --git a/docs/index.rst b/docs/index.rst index fe473f7..c7f0c1a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -108,8 +108,10 @@ in the Level 3 specification: * ``:not()`` accepts a *sequence of simple selectors*, not just single *simple selector*. For example, ``:not(a.important[rel])`` is allowed, even though the negation contains 3 *simple selectors*. +* ``:scope`` allows to access immediate children of a selector: ``product.css(':scope > div::text')``, simillar to XPath ``child::div``. Must be used at the start of a selector. Simplified version of `level 4 reference`_. .. _an early draft: http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors +.. _level 4 reference: https://developer.mozilla.org/en-US/docs/Web/CSS/:scope .. The following claim was copied from lxml: diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index f01aa7f..8b562da 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -146,6 +146,20 @@ def parse_many(first, *others): 'Negation[Element[div]:not(Class[Element[div].foo])]'] assert parse_many('td ~ th') == [ 'CombinedSelector[Element[td] ~ Element[th]]'] + assert parse_many(':scope > foo') == [ + 'CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]' + ] + assert parse_many(' :scope > foo') == [ + 'CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]' + ] + assert parse_many(':scope > foo bar > div') == [ + 'CombinedSelector[CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > ' + 'Element[foo]] Element[bar]] > Element[div]]' + ] + assert parse_many(':scope > #foo #bar') == [ + 'CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > ' + 'Hash[Element[*]#foo]] Hash[Element[*]#bar]]' + ] def test_pseudo_elements(self): def parse_pseudo(css): @@ -164,9 +178,16 @@ def parse_one(css): assert len(result) == 1 return result[0] + def test_pseudo_repr(css): + result = parse(css) + assert len(result) == 1 + selector = result[0] + return selector.parsed_tree.__repr__() + assert parse_one('foo') == ('Element[foo]', None) assert parse_one('*') == ('Element[*]', None) assert parse_one(':empty') == ('Pseudo[Element[*]:empty]', None) + assert parse_one(':scope') == ('Pseudo[Element[*]:scope]', None) # Special cases for CSS 2.1 pseudo-elements assert parse_one(':BEfore') == ('Element[*]', 'before') @@ -190,11 +211,14 @@ def parse_one(css): 'CombinedSelector[Hash[Element[lorem]#ipsum] ~ ' 'Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]', 'selection') - - parse_pseudo('foo:before, bar, baz:after') == [ - ('Element[foo]', 'before'), - ('Element[bar]', None), - ('Element[baz]', 'after')] + assert parse_pseudo(':scope > div, foo bar') == [ + ('CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]', None), + ('CombinedSelector[Element[foo] Element[bar]]', None) + ] + assert parse_pseudo('foo:before, bar, baz:after') == [ + ('Element[foo]', 'before'), ('Element[bar]', None), + ('Element[baz]', 'after') + ] # Special cases for CSS 2.1 pseudo-elements are ignored by default for pseudo in ('after', 'before', 'first-line', 'first-letter'): @@ -211,6 +235,11 @@ def parse_one(css): self.assertRaises(ExpressionError, tr.selector_to_xpath, selector, translate_pseudo_elements=True) + # Special test for the unicode symbols and ':scope' element if check + # Errors if use repr() instead of __repr__() + assert test_pseudo_repr(u':fİrst-child') == u'Pseudo[Element[*]:fİrst-child]' + assert test_pseudo_repr(':scope') == 'Pseudo[Element[*]:scope]' + def test_specificity(self): def specificity(css): selectors = parse(css) @@ -310,6 +339,13 @@ def get_error(css): "Got pseudo-element ::before inside :not() at 12") assert get_error(':not(:not(a))') == ( "Got nested :not()") + assert get_error(':scope > div :scope header') == ( + 'Got immediate child pseudo-element ":scope" not at the start of a selector' + ) + assert get_error('div :scope header') == ( + 'Got immediate child pseudo-element ":scope" not at the start of a selector' + ) + assert get_error('> div p') == ("Expected selector, got ' at 0>") def test_translation(self): def xpath(css): @@ -483,6 +519,8 @@ def test_quoting(self): '''descendant-or-self::*[@aval = '"']''') assert css_to_xpath('*[aval=\'"""\']') == ( '''descendant-or-self::*[@aval = '"""']''') + assert css_to_xpath(':scope > div[dataimg=""]') == ( + "descendant-or-self::*[1]/div[@dataimg = '']") def test_unicode_escapes(self): # \22 == '"' \20 == ' ' @@ -560,6 +598,7 @@ def xpath(css): assert xpath('::attr-href') == "descendant-or-self::*/@href" assert xpath('p img::attr(src)') == ( "descendant-or-self::p/descendant-or-self::*/img/@src") + assert xpath(':scope') == "descendant-or-self::*[1]" def test_series(self): def series(css): @@ -672,6 +711,11 @@ def pcss(main, *selectors, **kwargs): assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [ 'second-li', 'li-div'] assert pcss(':lang("e")', html_only=True) == [] + assert pcss(':scope > div') == [] + assert pcss(':scope body') == ['nil'] + assert pcss(':scope body > div') == ['outer-div', 'foobar-div'] + assert pcss(':scope head') == ['nil'] + assert pcss(':scope html') == [] # --- nth-* and nth-last-* ------------------------------------- @@ -853,6 +897,9 @@ def count(selector): assert count('div[class|=dialog]') == 50 # ? Seems right assert count('div[class!=madeup]') == 243 # ? Seems right assert count('div[class~=dialog]') == 51 # ? Seems right + assert count(':scope > div') == 1 + assert count(':scope > div > div[class=dialog]') == 1 + assert count(':scope > div div') == 242 XMLLANG_IDS = '''