Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions cssselect/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,13 @@ def parse_simple_selector(stream, inside_negation=False):
continue
if stream.peek() != ('DELIM', '('):
result = Pseudo(result, ident)
if result.__repr__() == 'Pseudo[Element[*]:scope]':
if not (len(stream.used) == 2 or
(len(stream.used) == 3
and stream.used[0].type == 'S')):
raise SelectorSyntaxError(
'Got immediate child pseudo-element ":scope" '
'not at the start of a selector')
continue
stream.next()
stream.skip_whitespace()
Expand Down
8 changes: 8 additions & 0 deletions cssselect/xpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,14 @@ def xpath_lang_function(self, xpath, function):
def xpath_root_pseudo(self, xpath):
return xpath.add_condition("not(parent::*)")

# CSS immediate children (CSS ":scope > div" to XPath "child::div" or "./div")
# Works only at the start of a selector
# Needed to get immediate children of a processed selector in Scrapy
# for product in response.css('.product'):
# description = product.css(':scope > div::text').get()
def xpath_scope_pseudo(self, xpath):
return xpath.add_condition("1")

def xpath_first_child_pseudo(self, xpath):
return xpath.add_condition('count(preceding-sibling::*) = 0')

Expand Down
2 changes: 2 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,10 @@ in the Level 3 specification:
* ``:not()`` accepts a *sequence of simple selectors*, not just single
*simple selector*. For example, ``:not(a.important[rel])`` is allowed,
even though the negation contains 3 *simple selectors*.
* ``:scope`` allows to access immediate children of a selector: ``product.css(':scope > div::text')``, simillar to XPath ``child::div``. Must be used at the start of a selector. Simplified version of `level 4 reference`_.

.. _an early draft: http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
.. _level 4 reference: https://developer.mozilla.org/en-US/docs/Web/CSS/:scope

..
The following claim was copied from lxml:
Expand Down
57 changes: 52 additions & 5 deletions tests/test_cssselect.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,20 @@ def parse_many(first, *others):
'Negation[Element[div]:not(Class[Element[div].foo])]']
assert parse_many('td ~ th') == [
'CombinedSelector[Element[td] ~ Element[th]]']
assert parse_many(':scope > foo') == [
'CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]'
]
assert parse_many(' :scope > foo') == [
'CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]'
]
assert parse_many(':scope > foo bar > div') == [
'CombinedSelector[CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > '
'Element[foo]] <followed> Element[bar]] > Element[div]]'
]
assert parse_many(':scope > #foo #bar') == [
'CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > '
'Hash[Element[*]#foo]] <followed> Hash[Element[*]#bar]]'
]

def test_pseudo_elements(self):
def parse_pseudo(css):
Expand All @@ -164,9 +178,16 @@ def parse_one(css):
assert len(result) == 1
return result[0]

def test_pseudo_repr(css):
result = parse(css)
assert len(result) == 1
selector = result[0]
return selector.parsed_tree.__repr__()

assert parse_one('foo') == ('Element[foo]', None)
assert parse_one('*') == ('Element[*]', None)
assert parse_one(':empty') == ('Pseudo[Element[*]:empty]', None)
assert parse_one(':scope') == ('Pseudo[Element[*]:scope]', None)

# Special cases for CSS 2.1 pseudo-elements
assert parse_one(':BEfore') == ('Element[*]', 'before')
Expand All @@ -190,11 +211,14 @@ def parse_one(css):
'CombinedSelector[Hash[Element[lorem]#ipsum] ~ '
'Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]',
'selection')

parse_pseudo('foo:before, bar, baz:after') == [
('Element[foo]', 'before'),
('Element[bar]', None),
('Element[baz]', 'after')]
assert parse_pseudo(':scope > div, foo bar') == [
('CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]', None),
('CombinedSelector[Element[foo] <followed> Element[bar]]', None)
]
assert parse_pseudo('foo:before, bar, baz:after') == [
('Element[foo]', 'before'), ('Element[bar]', None),
('Element[baz]', 'after')
]

# Special cases for CSS 2.1 pseudo-elements are ignored by default
for pseudo in ('after', 'before', 'first-line', 'first-letter'):
Expand All @@ -211,6 +235,11 @@ def parse_one(css):
self.assertRaises(ExpressionError, tr.selector_to_xpath, selector,
translate_pseudo_elements=True)

# Special test for the unicode symbols and ':scope' element if check
# Errors if use repr() instead of __repr__()
assert test_pseudo_repr(u':fİrst-child') == u'Pseudo[Element[*]:fİrst-child]'
assert test_pseudo_repr(':scope') == 'Pseudo[Element[*]:scope]'

def test_specificity(self):
def specificity(css):
selectors = parse(css)
Expand Down Expand Up @@ -310,6 +339,13 @@ def get_error(css):
"Got pseudo-element ::before inside :not() at 12")
assert get_error(':not(:not(a))') == (
"Got nested :not()")
assert get_error(':scope > div :scope header') == (
'Got immediate child pseudo-element ":scope" not at the start of a selector'
)
assert get_error('div :scope header') == (
'Got immediate child pseudo-element ":scope" not at the start of a selector'
)
assert get_error('> div p') == ("Expected selector, got <DELIM '>' at 0>")

def test_translation(self):
def xpath(css):
Expand Down Expand Up @@ -483,6 +519,8 @@ def test_quoting(self):
'''descendant-or-self::*[@aval = '"']''')
assert css_to_xpath('*[aval=\'"""\']') == (
'''descendant-or-self::*[@aval = '"""']''')
assert css_to_xpath(':scope > div[dataimg="<testmessage>"]') == (
"descendant-or-self::*[1]/div[@dataimg = '<testmessage>']")

def test_unicode_escapes(self):
# \22 == '"' \20 == ' '
Expand Down Expand Up @@ -560,6 +598,7 @@ def xpath(css):
assert xpath('::attr-href') == "descendant-or-self::*/@href"
assert xpath('p img::attr(src)') == (
"descendant-or-self::p/descendant-or-self::*/img/@src")
assert xpath(':scope') == "descendant-or-self::*[1]"

def test_series(self):
def series(css):
Expand Down Expand Up @@ -672,6 +711,11 @@ def pcss(main, *selectors, **kwargs):
assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [
'second-li', 'li-div']
assert pcss(':lang("e")', html_only=True) == []
assert pcss(':scope > div') == []
assert pcss(':scope body') == ['nil']
assert pcss(':scope body > div') == ['outer-div', 'foobar-div']
assert pcss(':scope head') == ['nil']
assert pcss(':scope html') == []

# --- nth-* and nth-last-* -------------------------------------

Expand Down Expand Up @@ -853,6 +897,9 @@ def count(selector):
assert count('div[class|=dialog]') == 50 # ? Seems right
assert count('div[class!=madeup]') == 243 # ? Seems right
assert count('div[class~=dialog]') == 51 # ? Seems right
assert count(':scope > div') == 1
assert count(':scope > div > div[class=dialog]') == 1
assert count(':scope > div div') == 242

XMLLANG_IDS = '''
<test>
Expand Down