diff --git a/.bandit.yml b/.bandit.yml
new file mode 100644
index 0000000..7fcde04
--- /dev/null
+++ b/.bandit.yml
@@ -0,0 +1,2 @@
+skips:
+- B101
diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..89e6e07
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,15 @@
+[flake8]
+max-line-length = 99
+ignore = W503
+exclude =
+ .git
+ .tox
+ venv*
+
+ # pending revision
+ cssselect/__init__.py
+ cssselect/parser.py
+ cssselect/xpath.py
+ docs/conf.py
+ setup.py
+ tests/test_cssselect.py
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
new file mode 100644
index 0000000..db380bb
--- /dev/null
+++ b/.github/workflows/checks.yml
@@ -0,0 +1,36 @@
+name: Checks
+on: [push, pull_request]
+
+jobs:
+ checks:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ include:
+ - python-version: 3
+ env:
+ TOXENV: black
+ - python-version: 3
+ env:
+ TOXENV: flake8
+ - python-version: 3
+ env:
+ TOXENV: pylint
+ - python-version: 3
+ env:
+ TOXENV: security
+
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Run check
+ env: ${{ matrix.env }}
+ run: |
+ pip install -U pip
+ pip install -U tox
+ tox
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 817d824..799f52f 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -6,19 +6,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- include:
- - python-version: 2.7
- env:
- TOXENV: py
- - python-version: 3.5
- env:
- TOXENV: py
- - python-version: 3.6
- env:
- TOXENV: py
- - python-version: 3.7
- env:
- TOXENV: py
+ python-version: [3.6, 3.7, 3.8, 3.9]
steps:
- uses: actions/checkout@v2
@@ -29,10 +17,10 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Run tests
- env: ${{ matrix.env }}
run: |
+ pip install -U pip
pip install -U tox
- tox
+ tox -e py
- name: Upload coverage report
- run: bash <(curl -s https://codecov.io/bash)
\ No newline at end of file
+ run: bash <(curl -s https://codecov.io/bash)
diff --git a/.gitignore b/.gitignore
index b0ab86a..c276bd1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,6 @@
/dist
/docs/_build
/.coverage
-.idea
\ No newline at end of file
+.idea
+htmlcov/
+coverage.xml
diff --git a/README.rst b/README.rst
index 9bcd648..9708616 100644
--- a/README.rst
+++ b/README.rst
@@ -10,32 +10,30 @@ cssselect: CSS Selectors for Python
:target: https://pypi.python.org/pypi/cssselect
:alt: Supported Python Versions
-.. image:: https://img.shields.io/travis/scrapy/cssselect/master.svg
- :target: https://travis-ci.org/scrapy/cssselect
- :alt: Build Status
+.. image:: https://github.com/scrapy/cssselect/actions/workflows/tests.yml/badge.svg
+ :target: https://github.com/scrapy/cssselect/actions/workflows/tests.yml
+ :alt: Tests
.. image:: https://img.shields.io/codecov/c/github/scrapy/cssselect/master.svg
:target: https://codecov.io/github/scrapy/cssselect?branch=master
:alt: Coverage report
-*cssselect* parses `CSS3 Selectors`_ and translate them to `XPath 1.0`_
-expressions. Such expressions can be used in lxml_ or another XPath engine
-to find the matching elements in an XML or HTML document.
+**cssselect** is a BSD-licensed Python library to parse `CSS3 selectors`_ and
+translate them to `XPath 1.0`_ expressions.
-This module used to live inside of lxml as ``lxml.cssselect`` before it was
-extracted as a stand-alone project.
-
-.. _CSS3 Selectors: https://www.w3.org/TR/css3-selectors/
-.. _XPath 1.0: https://www.w3.org/TR/xpath/
-.. _lxml: http://lxml.de/
+`XPath 1.0`_ expressions can be used in lxml_ or another XPath engine to find
+the matching elements in an XML or HTML document.
+Find the cssselect online documentation at https://cssselect.readthedocs.io.
Quick facts:
-* Free software: BSD licensed
-* Compatible with Python 2.7 and 3.4+
-* Latest documentation `on Read the Docs `_
* Source, issues and pull requests `on GitHub
`_
-* Releases `on PyPI `_
+* Releases `on PyPI `_
* Install with ``pip install cssselect``
+
+
+.. _CSS3 selectors: https://www.w3.org/TR/selectors-3/
+.. _XPath 1.0: https://www.w3.org/TR/xpath/all/
+.. _lxml: https://lxml.de/
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index b41cef9..2e4f824 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -13,10 +13,15 @@
"""
-from cssselect.parser import (parse, Selector, FunctionalPseudoElement,
- SelectorError, SelectorSyntaxError)
+from cssselect.parser import (
+ parse,
+ Selector,
+ FunctionalPseudoElement,
+ SelectorError,
+ SelectorSyntaxError,
+)
from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
-VERSION = '1.1.0'
+VERSION = "1.1.0"
__version__ = VERSION
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 74e1501..7493d02 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -27,7 +27,7 @@
def ascii_lower(string):
"""Lower-case, but only in the ASCII range."""
- return string.encode('utf8').lower().decode('utf8')
+ return string.encode("utf8").lower().decode("utf8")
class SelectorError(Exception):
@@ -39,12 +39,14 @@ class SelectorError(Exception):
"""
+
class SelectorSyntaxError(SelectorError, SyntaxError):
"""Parsing a selector that does not match the grammar."""
#### Parsed objects
+
class Selector(object):
"""
Represents a parsed selector.
@@ -55,10 +57,10 @@ class Selector(object):
or unsupported pseudo-elements.
"""
+
def __init__(self, tree, pseudo_element=None):
self.parsed_tree = tree
- if pseudo_element is not None and not isinstance(
- pseudo_element, FunctionalPseudoElement):
+ if pseudo_element is not None and not isinstance(pseudo_element, FunctionalPseudoElement):
pseudo_element = ascii_lower(pseudo_element)
#: A :class:`FunctionalPseudoElement`,
#: or the identifier for the pseudo-element as a string,
@@ -86,24 +88,22 @@ def __repr__(self):
if isinstance(self.pseudo_element, FunctionalPseudoElement):
pseudo_element = repr(self.pseudo_element)
elif self.pseudo_element:
- pseudo_element = '::%s' % self.pseudo_element
+ pseudo_element = "::%s" % self.pseudo_element
else:
- pseudo_element = ''
- return '%s[%r%s]' % (
- self.__class__.__name__, self.parsed_tree, pseudo_element)
+ pseudo_element = ""
+ return "%s[%r%s]" % (self.__class__.__name__, self.parsed_tree, pseudo_element)
def canonical(self):
- """Return a CSS representation for this selector (a string)
- """
+ """Return a CSS representation for this selector (a string)"""
if isinstance(self.pseudo_element, FunctionalPseudoElement):
- pseudo_element = '::%s' % self.pseudo_element.canonical()
+ pseudo_element = "::%s" % self.pseudo_element.canonical()
elif self.pseudo_element:
- pseudo_element = '::%s' % self.pseudo_element
+ pseudo_element = "::%s" % self.pseudo_element
else:
- pseudo_element = ''
- res = '%s%s' % (self.parsed_tree.canonical(), pseudo_element)
+ pseudo_element = ""
+ res = "%s%s" % (self.parsed_tree.canonical(), pseudo_element)
if len(res) > 1:
- res = res.lstrip('*')
+ res = res.lstrip("*")
return res
def specificity(self):
@@ -122,16 +122,16 @@ class Class(object):
"""
Represents selector.class_name
"""
+
def __init__(self, selector, class_name):
self.selector = selector
self.class_name = class_name
def __repr__(self):
- return '%s[%r.%s]' % (
- self.__class__.__name__, self.selector, self.class_name)
+ return "%s[%r.%s]" % (self.__class__.__name__, self.selector, self.class_name)
def canonical(self):
- return '%s.%s' % (self.selector.canonical(), self.class_name)
+ return "%s.%s" % (self.selector.canonical(), self.class_name)
def specificity(self):
a, b, c = self.selector.specificity()
@@ -156,6 +156,7 @@ class FunctionalPseudoElement(object):
Use at your own risks.
"""
+
def __init__(self, name, arguments):
self.name = ascii_lower(name)
self.arguments = arguments
@@ -213,8 +214,8 @@ def argument_types(self):
return [token.type for token in self.arguments]
def canonical(self):
- args = ''.join(token.css() for token in self.arguments)
- return '%s:%s(%s)' % (self.selector.canonical(), self.name, args)
+ args = "".join(token.css() for token in self.arguments)
+ return "%s:%s(%s)" % (self.selector.canonical(), self.name, args)
def specificity(self):
a, b, c = self.selector.specificity()
@@ -226,16 +227,16 @@ class Pseudo(object):
"""
Represents selector:ident
"""
+
def __init__(self, selector, ident):
self.selector = selector
self.ident = ascii_lower(ident)
def __repr__(self):
- return '%s[%r:%s]' % (
- self.__class__.__name__, self.selector, self.ident)
+ return "%s[%r:%s]" % (self.__class__.__name__, self.selector, self.ident)
def canonical(self):
- return '%s:%s' % (self.selector.canonical(), self.ident)
+ return "%s:%s" % (self.selector.canonical(), self.ident)
def specificity(self):
a, b, c = self.selector.specificity()
@@ -247,19 +248,19 @@ class Negation(object):
"""
Represents selector:not(subselector)
"""
+
def __init__(self, selector, subselector):
self.selector = selector
self.subselector = subselector
def __repr__(self):
- return '%s[%r:not(%r)]' % (
- self.__class__.__name__, self.selector, self.subselector)
+ return "%s[%r:not(%r)]" % (self.__class__.__name__, self.selector, self.subselector)
def canonical(self):
subsel = self.subselector.canonical()
if len(subsel) > 1:
- subsel = subsel.lstrip('*')
- return '%s:not(%s)' % (self.selector.canonical(), subsel)
+ subsel = subsel.lstrip("*")
+ return "%s:not(%s)" % (self.selector.canonical(), subsel)
def specificity(self):
a1, b1, c1 = self.selector.specificity()
@@ -267,10 +268,38 @@ def specificity(self):
return a1 + a2, b1 + b2, c1 + c2
+class Matching(object):
+ """
+ Represents selector:is(selector_list)
+ """
+
+ def __init__(self, selector, selector_list):
+ self.selector = selector
+ self.selector_list = selector_list
+
+ def __repr__(self):
+ return "%s[%r:is(%s)]" % (
+ self.__class__.__name__,
+ self.selector,
+ ", ".join(map(repr, self.selector_list)),
+ )
+
+ def canonical(self):
+ selector_arguments = []
+ for s in self.selector_list:
+ selarg = s.canonical()
+ selector_arguments.append(selarg.lstrip("*"))
+ return "%s:is(%s)" % (self.selector.canonical(), ", ".join(map(str, selector_arguments)))
+
+ def specificity(self):
+ return max([x.specificity() for x in self.selector_list])
+
+
class Attrib(object):
"""
Represents selector[namespace|attrib operator value]
"""
+
def __init__(self, selector, namespace, attrib, operator, value):
self.selector = selector
self.namespace = namespace
@@ -280,29 +309,32 @@ def __init__(self, selector, namespace, attrib, operator, value):
def __repr__(self):
if self.namespace:
- attrib = '%s|%s' % (self.namespace, self.attrib)
+ attrib = "%s|%s" % (self.namespace, self.attrib)
else:
attrib = self.attrib
- if self.operator == 'exists':
- return '%s[%r[%s]]' % (
- self.__class__.__name__, self.selector, attrib)
+ if self.operator == "exists":
+ return "%s[%r[%s]]" % (self.__class__.__name__, self.selector, attrib)
else:
- return '%s[%r[%s %s %r]]' % (
- self.__class__.__name__, self.selector, attrib,
- self.operator, self.value.value)
+ return "%s[%r[%s %s %r]]" % (
+ self.__class__.__name__,
+ self.selector,
+ attrib,
+ self.operator,
+ self.value.value,
+ )
def canonical(self):
if self.namespace:
- attrib = '%s|%s' % (self.namespace, self.attrib)
+ attrib = "%s|%s" % (self.namespace, self.attrib)
else:
attrib = self.attrib
- if self.operator == 'exists':
+ if self.operator == "exists":
op = attrib
else:
- op = '%s%s%s' % (attrib, self.operator, self.value.css())
+ op = "%s%s%s" % (attrib, self.operator, self.value.css())
- return '%s[%s]' % (self.selector.canonical(), op)
+ return "%s[%s]" % (self.selector.canonical(), op)
def specificity(self):
a, b, c = self.selector.specificity()
@@ -317,17 +349,18 @@ class Element(object):
`None` is for the universal selector '*'
"""
+
def __init__(self, namespace=None, element=None):
self.namespace = namespace
self.element = element
def __repr__(self):
- return '%s[%s]' % (self.__class__.__name__, self.canonical())
+ return "%s[%s]" % (self.__class__.__name__, self.canonical())
def canonical(self):
- element = self.element or '*'
+ element = self.element or "*"
if self.namespace:
- element = '%s|%s' % (self.namespace, element)
+ element = "%s|%s" % (self.namespace, element)
return element
def specificity(self):
@@ -341,16 +374,16 @@ class Hash(object):
"""
Represents selector#id
"""
+
def __init__(self, selector, id):
self.selector = selector
self.id = id
def __repr__(self):
- return '%s[%r#%s]' % (
- self.__class__.__name__, self.selector, self.id)
+ return "%s[%r#%s]" % (self.__class__.__name__, self.selector, self.id)
def canonical(self):
- return '%s#%s' % (self.selector.canonical(), self.id)
+ return "%s#%s" % (self.selector.canonical(), self.id)
def specificity(self):
a, b, c = self.selector.specificity()
@@ -366,19 +399,17 @@ def __init__(self, selector, combinator, subselector):
self.subselector = subselector
def __repr__(self):
- if self.combinator == ' ':
- comb = ''
+ if self.combinator == " ":
+ comb = ""
else:
comb = self.combinator
- return '%s[%r %s %r]' % (
- self.__class__.__name__, self.selector, comb, self.subselector)
+ return "%s[%r %s %r]" % (self.__class__.__name__, self.selector, comb, self.subselector)
def canonical(self):
subsel = self.subselector.canonical()
if len(subsel) > 1:
- subsel = subsel.lstrip('*')
- return '%s %s %s' % (
- self.selector.canonical(), self.combinator, subsel)
+ subsel = subsel.lstrip("*")
+ return "%s %s %s" % (self.selector.canonical(), self.combinator, subsel)
def specificity(self):
a1, b1, c1 = self.selector.specificity()
@@ -389,14 +420,13 @@ def specificity(self):
#### Parser
# foo
-_el_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$')
+_el_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$")
# foo#bar or #bar
-_id_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$')
+_id_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$")
# foo.bar or .bar
-_class_re = re.compile(
- r'^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$')
+_class_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$")
def parse(css):
@@ -420,16 +450,16 @@ def parse(css):
return [Selector(Element(element=match.group(1)))]
match = _id_re.match(css)
if match is not None:
- return [Selector(Hash(Element(element=match.group(1) or None),
- match.group(2)))]
+ return [Selector(Hash(Element(element=match.group(1) or None), match.group(2)))]
match = _class_re.match(css)
if match is not None:
- return [Selector(Class(Element(element=match.group(1) or None),
- match.group(2)))]
+ return [Selector(Class(Element(element=match.group(1) or None), match.group(2)))]
stream = TokenStream(tokenize(css))
stream.source = css
return list(parse_selector_group(stream))
+
+
# except SelectorSyntaxError:
# e = sys.exc_info()[1]
# message = "%s at %s -> %r" % (
@@ -443,31 +473,32 @@ def parse_selector_group(stream):
stream.skip_whitespace()
while 1:
yield Selector(*parse_selector(stream))
- if stream.peek() == ('DELIM', ','):
+ if stream.peek() == ("DELIM", ","):
stream.next()
stream.skip_whitespace()
else:
break
+
def parse_selector(stream):
result, pseudo_element = parse_simple_selector(stream)
while 1:
stream.skip_whitespace()
peek = stream.peek()
- if peek in (('EOF', None), ('DELIM', ',')):
+ if peek in (("EOF", None), ("DELIM", ",")):
break
if pseudo_element:
raise SelectorSyntaxError(
- 'Got pseudo-element ::%s not at the end of a selector'
- % pseudo_element)
- if peek.is_delim('+', '>', '~'):
+ "Got pseudo-element ::%s not at the end of a selector" % pseudo_element
+ )
+ if peek.is_delim("+", ">", "~"):
# A combinator
combinator = stream.next().value
stream.skip_whitespace()
else:
# By exclusion, the last parse_simple_selector() ended
# at peek == ' '
- combinator = ' '
+ combinator = " "
next_selector, pseudo_element = parse_simple_selector(stream)
result = CombinedSelector(result, combinator, next_selector)
return result, pseudo_element
@@ -477,13 +508,13 @@ def parse_simple_selector(stream, inside_negation=False):
stream.skip_whitespace()
selector_start = len(stream.used)
peek = stream.peek()
- if peek.type == 'IDENT' or peek == ('DELIM', '*'):
- if peek.type == 'IDENT':
+ if peek.type == "IDENT" or peek == ("DELIM", "*"):
+ if peek.type == "IDENT":
namespace = stream.next().value
else:
stream.next()
namespace = None
- if stream.peek() == ('DELIM', '|'):
+ if stream.peek() == ("DELIM", "|"):
stream.next()
element = stream.next_ident_or_star()
else:
@@ -495,75 +526,83 @@ def parse_simple_selector(stream, inside_negation=False):
pseudo_element = None
while 1:
peek = stream.peek()
- if peek.type in ('S', 'EOF') or peek.is_delim(',', '+', '>', '~') or (
- inside_negation and peek == ('DELIM', ')')):
+ if (
+ peek.type in ("S", "EOF")
+ or peek.is_delim(",", "+", ">", "~")
+ or (inside_negation and peek == ("DELIM", ")"))
+ ):
break
if pseudo_element:
raise SelectorSyntaxError(
- 'Got pseudo-element ::%s not at the end of a selector'
- % pseudo_element)
- if peek.type == 'HASH':
+ "Got pseudo-element ::%s not at the end of a selector" % pseudo_element
+ )
+ if peek.type == "HASH":
result = Hash(result, stream.next().value)
- elif peek == ('DELIM', '.'):
+ elif peek == ("DELIM", "."):
stream.next()
result = Class(result, stream.next_ident())
- elif peek == ('DELIM', '|'):
+ elif peek == ("DELIM", "|"):
stream.next()
result = Element(None, stream.next_ident())
- elif peek == ('DELIM', '['):
+ elif peek == ("DELIM", "["):
stream.next()
result = parse_attrib(result, stream)
- elif peek == ('DELIM', ':'):
+ elif peek == ("DELIM", ":"):
stream.next()
- if stream.peek() == ('DELIM', ':'):
+ if stream.peek() == ("DELIM", ":"):
stream.next()
pseudo_element = stream.next_ident()
- if stream.peek() == ('DELIM', '('):
+ if stream.peek() == ("DELIM", "("):
stream.next()
pseudo_element = FunctionalPseudoElement(
- pseudo_element, parse_arguments(stream))
+ pseudo_element, parse_arguments(stream)
+ )
continue
ident = stream.next_ident()
- if ident.lower() in ('first-line', 'first-letter',
- 'before', 'after'):
+ if ident.lower() in ("first-line", "first-letter", "before", "after"):
# Special case: CSS 2.1 pseudo-elements can have a single ':'
# Any new pseudo-element must have two.
pseudo_element = _unicode(ident)
continue
- if stream.peek() != ('DELIM', '('):
+ if stream.peek() != ("DELIM", "("):
result = Pseudo(result, ident)
- if result.__repr__() == 'Pseudo[Element[*]:scope]':
- if not (len(stream.used) == 2 or
- (len(stream.used) == 3
- and stream.used[0].type == 'S')):
+ if result.__repr__() == "Pseudo[Element[*]:scope]":
+ if not (
+ len(stream.used) == 2
+ or (len(stream.used) == 3 and stream.used[0].type == "S")
+ ):
raise SelectorSyntaxError(
'Got immediate child pseudo-element ":scope" '
- 'not at the start of a selector')
+ "not at the start of a selector"
+ )
continue
stream.next()
stream.skip_whitespace()
- if ident.lower() == 'not':
+ if ident.lower() == "not":
if inside_negation:
- raise SelectorSyntaxError('Got nested :not()')
+ raise SelectorSyntaxError("Got nested :not()")
argument, argument_pseudo_element = parse_simple_selector(
- stream, inside_negation=True)
+ stream, inside_negation=True
+ )
next = stream.next()
if argument_pseudo_element:
raise SelectorSyntaxError(
- 'Got pseudo-element ::%s inside :not() at %s'
- % (argument_pseudo_element, next.pos))
- if next != ('DELIM', ')'):
+ "Got pseudo-element ::%s inside :not() at %s"
+ % (argument_pseudo_element, next.pos)
+ )
+ if next != ("DELIM", ")"):
raise SelectorSyntaxError("Expected ')', got %s" % (next,))
result = Negation(result, argument)
+ elif ident.lower() in ("matches", "is"):
+ selectors = parse_simple_selector_arguments(stream)
+ result = Matching(result, selectors)
else:
arguments, of_type = parse_arguments(stream)
result = Function(result, ident, arguments, of_type)
else:
- raise SelectorSyntaxError(
- "Expected selector, got %s" % (peek,))
+ raise SelectorSyntaxError("Expected selector, got %s" % (peek,))
if len(stream.used) == selector_start:
- raise SelectorSyntaxError(
- "Expected selector, got %s" % (stream.peek(),))
+ raise SelectorSyntaxError("Expected selector, got %s" % (stream.peek(),))
return result, pseudo_element
@@ -583,9 +622,31 @@ def parse_arguments(stream):
arguments.append(next)
elif next == ('DELIM', ')'):
return arguments, None
+
else:
+ raise SelectorSyntaxError("Expected an argument, got %s" % (next,))
+
+
+def parse_simple_selector_arguments(stream):
+ arguments = []
+ while 1:
+ result, pseudo_element = parse_simple_selector(stream, True)
+ if pseudo_element:
raise SelectorSyntaxError(
- "Expected an argument, got %s" % (next,))
+ "Got pseudo-element ::%s inside function" % (pseudo_element,)
+ )
+ stream.skip_whitespace()
+ next = stream.next()
+ if next in (("EOF", None), ("DELIM", ",")):
+ stream.next()
+ stream.skip_whitespace()
+ arguments.append(result)
+ elif next == ("DELIM", ")"):
+ arguments.append(result)
+ break
+ else:
+ raise SelectorSyntaxError("Expected an argument, got %s" % (next,))
+ return arguments
def parse_of_type(stream):
@@ -602,15 +663,14 @@ def parse_of_type(stream):
def parse_attrib(selector, stream):
stream.skip_whitespace()
attrib = stream.next_ident_or_star()
- if attrib is None and stream.peek() != ('DELIM', '|'):
- raise SelectorSyntaxError(
- "Expected '|', got %s" % (stream.peek(),))
- if stream.peek() == ('DELIM', '|'):
+ if attrib is None and stream.peek() != ("DELIM", "|"):
+ raise SelectorSyntaxError("Expected '|', got %s" % (stream.peek(),))
+ if stream.peek() == ("DELIM", "|"):
stream.next()
- if stream.peek() == ('DELIM', '='):
+ if stream.peek() == ("DELIM", "="):
namespace = None
stream.next()
- op = '|='
+ op = "|="
else:
namespace = attrib
attrib = stream.next_ident()
@@ -620,27 +680,23 @@ def parse_attrib(selector, stream):
if op is None:
stream.skip_whitespace()
next = stream.next()
- if next == ('DELIM', ']'):
- return Attrib(selector, namespace, attrib, 'exists', None)
- elif next == ('DELIM', '='):
- op = '='
- elif next.is_delim('^', '$', '*', '~', '|', '!') and (
- stream.peek() == ('DELIM', '=')):
- op = next.value + '='
+ if next == ("DELIM", "]"):
+ return Attrib(selector, namespace, attrib, "exists", None)
+ elif next == ("DELIM", "="):
+ op = "="
+ elif next.is_delim("^", "$", "*", "~", "|", "!") and (stream.peek() == ("DELIM", "=")):
+ op = next.value + "="
stream.next()
else:
- raise SelectorSyntaxError(
- "Operator expected, got %s" % (next,))
+ raise SelectorSyntaxError("Operator expected, got %s" % (next,))
stream.skip_whitespace()
value = stream.next()
- if value.type not in ('IDENT', 'STRING'):
- raise SelectorSyntaxError(
- "Expected string or ident, got %s" % (value,))
+ if value.type not in ("IDENT", "STRING"):
+ raise SelectorSyntaxError("Expected string or ident, got %s" % (value,))
stream.skip_whitespace()
next = stream.next()
- if next != ('DELIM', ']'):
- raise SelectorSyntaxError(
- "Expected ']', got %s" % (next,))
+ if next != ("DELIM", "]"):
+ raise SelectorSyntaxError("Expected ']', got %s" % (next,))
return Attrib(selector, namespace, attrib, op, value)
@@ -659,18 +715,18 @@ def parse_series(tokens):
s = ''.join(token.value for token in tokens).strip()
if s == 'odd':
return 2, 1
- elif s == 'even':
+ elif s == "even":
return 2, 0
- elif s == 'n':
+ elif s == "n":
return 1, 0
- if 'n' not in s:
+ if "n" not in s:
# Just b
return 0, int(s)
a, b = s.split("n", 1)
if not a:
a = 1
- elif a == '-' or a == '+':
- a = int(a+'1')
+ elif a == "-" or a == "+":
+ a = int(a + "1")
else:
a = int(a)
if not b:
@@ -683,6 +739,7 @@ def parse_series(tokens):
#### Token objects
+
class Token(tuple):
def __new__(cls, type_, value, pos):
obj = tuple.__new__(cls, (type_, value))
@@ -693,13 +750,13 @@ def __repr__(self):
return "<%s '%s' at %i>" % (self.type, self.value, self.pos)
def is_delim(self, *values):
- return self.type == 'DELIM' and self.value in values
+ return self.type == "DELIM" and self.value in values
type = property(operator.itemgetter(0))
value = property(operator.itemgetter(1))
def css(self):
- if self.type == 'STRING':
+ if self.type == "STRING":
return repr(self.value)
else:
return self.value
@@ -707,41 +764,44 @@ def css(self):
class EOFToken(Token):
def __new__(cls, pos):
- return Token.__new__(cls, 'EOF', None, pos)
+ return Token.__new__(cls, "EOF", None, pos)
def __repr__(self):
- return '<%s at %i>' % (self.type, self.pos)
+ return "<%s at %i>" % (self.type, self.pos)
#### Tokenizer
class TokenMacros:
- unicode_escape = r'\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?'
- escape = unicode_escape + r'|\\[^\n\r\f0-9a-f]'
- string_escape = r'\\(?:\n|\r\n|\r|\f)|' + escape
- nonascii = r'[^\0-\177]'
- nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii)
- nmstart = '[_a-z]|%s|%s' % (escape, nonascii)
+ unicode_escape = r"\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?"
+ escape = unicode_escape + r"|\\[^\n\r\f0-9a-f]"
+ string_escape = r"\\(?:\n|\r\n|\r|\f)|" + escape
+ nonascii = r"[^\0-\177]"
+ nmchar = "[_a-z0-9-]|%s|%s" % (escape, nonascii)
+ nmstart = "[_a-z]|%s|%s" % (escape, nonascii)
+
def _compile(pattern):
return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match
-_match_whitespace = _compile(r'[ \t\r\n\f]+')
-_match_number = _compile(r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)')
-_match_hash = _compile('#(?:%(nmchar)s)+')
-_match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*')
+
+_match_whitespace = _compile(r"[ \t\r\n\f]+")
+_match_number = _compile(r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)")
+_match_hash = _compile("#(?:%(nmchar)s)+")
+_match_ident = _compile("-?(?:%(nmstart)s)(?:%(nmchar)s)*")
_match_string_by_quote = {
"'": _compile(r"([^\n\r\f\\']|%(string_escape)s)*"),
'"': _compile(r'([^\n\r\f\\"]|%(string_escape)s)*'),
}
-_sub_simple_escape = re.compile(r'\\(.)').sub
+_sub_simple_escape = re.compile(r"\\(.)").sub
_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub
-_sub_newline_escape =re.compile(r'\\(?:\n|\r\n|\r|\f)').sub
+_sub_newline_escape = re.compile(r"\\(?:\n|\r\n|\r|\f)").sub
# Same as r'\1', but faster on CPython
-_replace_simple = operator.methodcaller('group', 1)
+_replace_simple = operator.methodcaller("group", 1)
+
def _replace_unicode(match):
codepoint = int(match.group(1), 16)
@@ -762,59 +822,62 @@ def tokenize(s):
while pos < len_s:
match = _match_whitespace(s, pos=pos)
if match:
- yield Token('S', ' ', pos)
+ yield Token("S", " ", pos)
pos = match.end()
continue
match = _match_ident(s, pos=pos)
if match:
- value = _sub_simple_escape(_replace_simple,
- _sub_unicode_escape(_replace_unicode, match.group()))
- yield Token('IDENT', value, pos)
+ value = _sub_simple_escape(
+ _replace_simple, _sub_unicode_escape(_replace_unicode, match.group())
+ )
+ yield Token("IDENT", value, pos)
pos = match.end()
continue
match = _match_hash(s, pos=pos)
if match:
- value = _sub_simple_escape(_replace_simple,
- _sub_unicode_escape(_replace_unicode, match.group()[1:]))
- yield Token('HASH', value, pos)
+ value = _sub_simple_escape(
+ _replace_simple, _sub_unicode_escape(_replace_unicode, match.group()[1:])
+ )
+ yield Token("HASH", value, pos)
pos = match.end()
continue
quote = s[pos]
if quote in _match_string_by_quote:
match = _match_string_by_quote[quote](s, pos=pos + 1)
- assert match, 'Should have found at least an empty match'
+ assert match, "Should have found at least an empty match"
end_pos = match.end()
if end_pos == len_s:
- raise SelectorSyntaxError('Unclosed string at %s' % pos)
+ raise SelectorSyntaxError("Unclosed string at %s" % pos)
if s[end_pos] != quote:
- raise SelectorSyntaxError('Invalid string at %s' % pos)
- value = _sub_simple_escape(_replace_simple,
- _sub_unicode_escape(_replace_unicode,
- _sub_newline_escape('', match.group())))
- yield Token('STRING', value, pos)
+ raise SelectorSyntaxError("Invalid string at %s" % pos)
+ value = _sub_simple_escape(
+ _replace_simple,
+ _sub_unicode_escape(_replace_unicode, _sub_newline_escape("", match.group())),
+ )
+ yield Token("STRING", value, pos)
pos = end_pos + 1
continue
match = _match_number(s, pos=pos)
if match:
value = match.group()
- yield Token('NUMBER', value, pos)
+ yield Token("NUMBER", value, pos)
pos = match.end()
continue
pos2 = pos + 2
- if s[pos:pos2] == '/*':
- pos = s.find('*/', pos2)
+ if s[pos:pos2] == "/*":
+ pos = s.find("*/", pos2)
if pos == -1:
pos = len_s
else:
pos += 2
continue
- yield Token('DELIM', s[pos], pos)
+ yield Token("DELIM", s[pos], pos)
pos += 1
assert pos == len_s
@@ -852,21 +915,20 @@ def peek(self):
def next_ident(self):
next = self.next()
- if next.type != 'IDENT':
- raise SelectorSyntaxError('Expected ident, got %s' % (next,))
+ if next.type != "IDENT":
+ raise SelectorSyntaxError("Expected ident, got %s" % (next,))
return next.value
def next_ident_or_star(self):
next = self.next()
- if next.type == 'IDENT':
+ if next.type == "IDENT":
return next.value
- elif next == ('DELIM', '*'):
+ elif next == ("DELIM", "*"):
return None
else:
- raise SelectorSyntaxError(
- "Expected ident or '*', got %s" % (next,))
+ raise SelectorSyntaxError("Expected ident or '*', got %s" % (next,))
def skip_whitespace(self):
peek = self.peek()
- if peek.type == 'S':
+ if peek.type == "S":
self.next()
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 0feab6b..7c5c2ef 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -28,7 +28,7 @@
def _unicode_safe_getattr(obj, name, default=None):
# getattr() with a non-ASCII name fails on Python 2.x
- name = name.encode('ascii', 'replace').decode('ascii')
+ name = name.encode("ascii", "replace").decode("ascii")
return getattr(obj, name, default)
@@ -38,48 +38,47 @@ class ExpressionError(SelectorError, RuntimeError):
#### XPath Helpers
-class XPathExpr(object):
- def __init__(self, path='', element='*', condition='', star_prefix=False):
+class XPathExpr(object):
+ def __init__(self, path="", element="*", condition="", star_prefix=False):
self.path = path
self.element = element
self.condition = condition
def __str__(self):
- path = _unicode(self.path) + _unicode(self.element)
+ path = _unicode(self.path) + _unicode(self.element)
if self.condition:
- path += '[%s]' % self.condition
+ path += "[%s]" % self.condition
return path
def __repr__(self):
- return '%s[%s]' % (self.__class__.__name__, self)
+ return "%s[%s]" % (self.__class__.__name__, self)
- def add_condition(self, condition):
+ def add_condition(self, condition, conjuction="and"):
if self.condition:
- self.condition = '(%s) and (%s)' % (self.condition, condition)
+ self.condition = "(%s) %s (%s)" % (self.condition, conjuction, condition)
else:
self.condition = condition
return self
def add_name_test(self):
- if self.element == '*':
+ if self.element == "*":
# We weren't doing a test anyway
return
- self.add_condition(
- "name() = %s" % GenericTranslator.xpath_literal(self.element))
- self.element = '*'
+ self.add_condition("name() = %s" % GenericTranslator.xpath_literal(self.element))
+ self.element = "*"
def add_star_prefix(self):
"""
Append '*/' to the path to keep the context constrained
to a single parent.
"""
- self.path += '*/'
+ self.path += "*/"
def join(self, combiner, other):
path = _unicode(self) + combiner
# Any "star prefix" is redundant when joining.
- if other.path != '*/':
+ if other.path != "*/":
path += other.path
self.path = path
self.element = other.element
@@ -92,14 +91,15 @@ def join(self, combiner, other):
# The spec is actually more permissive than that, but don’t bother.
# This is just for the fast path.
# http://www.w3.org/TR/REC-xml/#NT-NameStartChar
-is_safe_name = re.compile('^[a-zA-Z_][a-zA-Z0-9_.-]*$').match
+is_safe_name = re.compile("^[a-zA-Z_][a-zA-Z0-9_.-]*$").match
# Test that the string is not empty and does not contain whitespace
-is_non_whitespace = re.compile(r'^[^ \t\r\n\f]+$').match
+is_non_whitespace = re.compile(r"^[^ \t\r\n\f]+$").match
#### Translation
+
class GenericTranslator(object):
"""
Translator for "generic" XML documents.
@@ -122,30 +122,30 @@ class GenericTranslator(object):
####
combinator_mapping = {
- ' ': 'descendant',
- '>': 'child',
- '+': 'direct_adjacent',
- '~': 'indirect_adjacent',
+ " ": "descendant",
+ ">": "child",
+ "+": "direct_adjacent",
+ "~": "indirect_adjacent",
}
attribute_operator_mapping = {
- 'exists': 'exists',
- '=': 'equals',
- '~=': 'includes',
- '|=': 'dashmatch',
- '^=': 'prefixmatch',
- '$=': 'suffixmatch',
- '*=': 'substringmatch',
- '!=': 'different', # XXX Not in Level 3 but meh
+ "exists": "exists",
+ "=": "equals",
+ "~=": "includes",
+ "|=": "dashmatch",
+ "^=": "prefixmatch",
+ "$=": "suffixmatch",
+ "*=": "substringmatch",
+ "!=": "different", # XXX Not in Level 3 but meh
}
#: The attribute used for ID selectors depends on the document language:
#: http://www.w3.org/TR/selectors/#id-selectors
- id_attribute = 'id'
+ id_attribute = "id"
#: The attribute used for ``:lang()`` depends on the document language:
#: http://www.w3.org/TR/selectors/#lang-pseudo
- lang_attribute = 'xml:lang'
+ lang_attribute = "xml:lang"
#: The case sensitivity of document language element names,
#: attribute names, and attribute values in selectors depends
@@ -168,7 +168,7 @@ class GenericTranslator(object):
# class used to represent and xpath expression
xpathexpr_cls = XPathExpr
- def css_to_xpath(self, css, prefix='descendant-or-self::'):
+ def css_to_xpath(self, css, prefix="descendant-or-self::"):
"""Translate a *group of selectors* to XPath.
Pseudo-elements are not supported here since XPath only knows
@@ -187,12 +187,14 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'):
The equivalent XPath 1.0 expression as an Unicode string.
"""
- return ' | '.join(self.selector_to_xpath(selector, prefix,
- translate_pseudo_elements=True)
- for selector in parse(css))
+ return " | ".join(
+ self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
+ for selector in parse(css)
+ )
- def selector_to_xpath(self, selector, prefix='descendant-or-self::',
- translate_pseudo_elements=False):
+ def selector_to_xpath(
+ self, selector, prefix="descendant-or-self::", translate_pseudo_elements=False
+ ):
"""Translate a parsed selector to XPath.
@@ -213,14 +215,14 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::',
The equivalent XPath 1.0 expression as an Unicode string.
"""
- tree = getattr(selector, 'parsed_tree', None)
+ tree = getattr(selector, "parsed_tree", None)
if not tree:
- raise TypeError('Expected a parsed selector, got %r' % (selector,))
+ raise TypeError("Expected a parsed selector, got %r" % (selector,))
xpath = self.xpath(tree)
assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return'
if translate_pseudo_elements and selector.pseudo_element:
xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
- return (prefix or '') + _unicode(xpath)
+ return (prefix or "") + _unicode(xpath)
def xpath_pseudo_element(self, xpath, pseudo_element):
"""Translate a pseudo-element.
@@ -229,7 +231,7 @@ def xpath_pseudo_element(self, xpath, pseudo_element):
but can be overridden by sub-classes.
"""
- raise ExpressionError('Pseudo-elements are not supported.')
+ raise ExpressionError("Pseudo-elements are not supported.")
@staticmethod
def xpath_literal(s):
@@ -239,75 +241,82 @@ def xpath_literal(s):
elif '"' not in s:
s = '"%s"' % s
else:
- s = "concat(%s)" % ','.join([
- (("'" in part) and '"%s"' or "'%s'") % part
- for part in split_at_single_quotes(s) if part
- ])
+ s = "concat(%s)" % ",".join(
+ [
+ (("'" in part) and '"%s"' or "'%s'") % part
+ for part in split_at_single_quotes(s)
+ if part
+ ]
+ )
return s
def xpath(self, parsed_selector):
"""Translate any parsed selector object."""
type_name = type(parsed_selector).__name__
- method = getattr(self, 'xpath_%s' % type_name.lower(), None)
+ method = getattr(self, "xpath_%s" % type_name.lower(), None)
if method is None:
- raise ExpressionError('%s is not supported.' % type_name)
+ raise ExpressionError("%s is not supported." % type_name)
return method(parsed_selector)
-
# Dispatched by parsed object type
def xpath_combinedselector(self, combined):
"""Translate a combined selector."""
combinator = self.combinator_mapping[combined.combinator]
- method = getattr(self, 'xpath_%s_combinator' % combinator)
- return method(self.xpath(combined.selector),
- self.xpath(combined.subselector))
+ method = getattr(self, "xpath_%s_combinator" % combinator)
+ return method(self.xpath(combined.selector), self.xpath(combined.subselector))
def xpath_negation(self, negation):
xpath = self.xpath(negation.selector)
sub_xpath = self.xpath(negation.subselector)
sub_xpath.add_name_test()
if sub_xpath.condition:
- return xpath.add_condition('not(%s)' % sub_xpath.condition)
+ return xpath.add_condition("not(%s)" % sub_xpath.condition)
else:
- return xpath.add_condition('0')
+ return xpath.add_condition("0")
+
+ def xpath_matching(self, matching):
+ xpath = self.xpath(matching.selector)
+ exprs = [self.xpath(selector) for selector in matching.selector_list]
+ for e in exprs:
+ e.add_name_test()
+ if e.condition:
+ xpath.add_condition(e.condition, "or")
+ return xpath
def xpath_function(self, function):
"""Translate a functional pseudo-class."""
- method = 'xpath_%s_function' % function.name.replace('-', '_')
+ method = "xpath_%s_function" % function.name.replace("-", "_")
method = _unicode_safe_getattr(self, method, None)
if not method:
- raise ExpressionError(
- "The pseudo-class :%s() is unknown" % function.name)
+ raise ExpressionError("The pseudo-class :%s() is unknown" % function.name)
return method(self.xpath(function.selector), function)
def xpath_pseudo(self, pseudo):
"""Translate a pseudo-class."""
- method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_')
+ method = "xpath_%s_pseudo" % pseudo.ident.replace("-", "_")
method = _unicode_safe_getattr(self, method, None)
if not method:
# TODO: better error message for pseudo-elements?
- raise ExpressionError(
- "The pseudo-class :%s is unknown" % pseudo.ident)
+ raise ExpressionError("The pseudo-class :%s is unknown" % pseudo.ident)
return method(self.xpath(pseudo.selector))
-
def xpath_attrib(self, selector):
"""Translate an attribute selector."""
operator = self.attribute_operator_mapping[selector.operator]
- method = getattr(self, 'xpath_attrib_%s' % operator)
+ method = getattr(self, "xpath_attrib_%s" % operator)
if self.lower_case_attribute_names:
name = selector.attrib.lower()
else:
name = selector.attrib
safe = is_safe_name(name)
if selector.namespace:
- name = '%s:%s' % (selector.namespace, name)
+ name = "%s:%s" % (selector.namespace, name)
safe = safe and is_safe_name(selector.namespace)
if safe:
- attrib = '@' + name
+ attrib = "@" + name
else:
- attrib = 'attribute::*[name() = %s]' % self.xpath_literal(name)
+ attrib = "attribute::*[name() = %s]" % self.xpath_literal(name)
if selector.value is None:
value = None
elif self.lower_case_attribute_values:
@@ -320,19 +329,18 @@ def xpath_class(self, class_selector):
"""Translate a class selector."""
# .foo is defined as [class~=foo] in the spec.
xpath = self.xpath(class_selector.selector)
- return self.xpath_attrib_includes(
- xpath, '@class', class_selector.class_name)
+ return self.xpath_attrib_includes(xpath, "@class", class_selector.class_name)
def xpath_hash(self, id_selector):
"""Translate an ID selector."""
xpath = self.xpath(id_selector.selector)
- return self.xpath_attrib_equals(xpath, '@id', id_selector.id)
+ return self.xpath_attrib_equals(xpath, "@id", id_selector.id)
def xpath_element(self, selector):
"""Translate a type or universal selector."""
element = selector.element
if not element:
- element = '*'
+ element = "*"
safe = True
else:
safe = is_safe_name(element)
@@ -341,39 +349,36 @@ def xpath_element(self, selector):
if selector.namespace:
# Namespace prefixes are case-sensitive.
# http://www.w3.org/TR/css3-namespace/#prefixes
- element = '%s:%s' % (selector.namespace, element)
+ element = "%s:%s" % (selector.namespace, element)
safe = safe and is_safe_name(selector.namespace)
xpath = self.xpathexpr_cls(element=element)
if not safe:
xpath.add_name_test()
return xpath
-
# CombinedSelector: dispatch by combinator
def xpath_descendant_combinator(self, left, right):
"""right is a child, grand-child or further descendant of left"""
- return left.join('/descendant-or-self::*/', right)
+ return left.join("/descendant-or-self::*/", right)
def xpath_child_combinator(self, left, right):
"""right is an immediate child of left"""
- return left.join('/', right)
+ return left.join("/", right)
def xpath_direct_adjacent_combinator(self, left, right):
"""right is a sibling immediately after left"""
- xpath = left.join('/following-sibling::', right)
+ xpath = left.join("/following-sibling::", right)
xpath.add_name_test()
- return xpath.add_condition('position() = 1')
+ return xpath.add_condition("position() = 1")
def xpath_indirect_adjacent_combinator(self, left, right):
"""right is a sibling after left, immediately or not"""
- return left.join('/following-sibling::', right)
-
+ return left.join("/following-sibling::", right)
# Function: dispatch by function/pseudo-class name
- def xpath_nth_child_function(self, xpath, function, last=False,
- add_name_test=True):
+ def xpath_nth_child_function(self, xpath, function, last=False, add_name_test=True):
try:
a, b = parse_series(function.arguments)
except ValueError:
@@ -427,14 +432,14 @@ def xpath_nth_child_function(self, xpath, function, last=False,
# for a == 1, nth-*(an+b) means n+b-1 siblings before/after,
# and since n ∈ {0, 1, 2, ...}, if b-1<=0,
# there is always an "n" matching any number of siblings (maybe none)
- if a == 1 and b_min_1 <=0:
+ if a == 1 and b_min_1 <= 0:
return xpath
# early-exit condition 2:
# ~~~~~~~~~~~~~~~~~~~~~~~
# an+b-1 siblings with a<0 and (b-1)<0 is not possible
if a < 0 and b_min_1 < 0:
- return xpath.add_condition('0')
+ return xpath.add_condition("0")
# `add_name_test` boolean is inverted and somewhat counter-intuitive:
#
@@ -444,20 +449,20 @@ def xpath_nth_child_function(self, xpath, function, last=False,
elif add_name_test:
nodetest = '*'
else:
- nodetest = '%s' % xpath.element
+ nodetest = "%s" % xpath.element
# count siblings before or after the element
if not last:
- siblings_count = 'count(preceding-sibling::%s)' % nodetest
+ siblings_count = "count(preceding-sibling::%s)" % nodetest
else:
- siblings_count = 'count(following-sibling::%s)' % nodetest
+ siblings_count = "count(following-sibling::%s)" % nodetest
# special case of fixed position: nth-*(0n+b)
# if a == 0:
# ~~~~~~~~~~
# count(***-sibling::***) = b-1
if a == 0:
- return xpath.add_condition('%s = %s' % (siblings_count, b_min_1))
+ return xpath.add_condition("%s = %s" % (siblings_count, b_min_1))
expressions = []
@@ -466,12 +471,12 @@ def xpath_nth_child_function(self, xpath, function, last=False,
# so if a>0, and (b-1)<=0, an "n" exists to satisfy this,
# therefore, the predicate is only interesting if (b-1)>0
if b_min_1 > 0:
- expressions.append('%s >= %s' % (siblings_count, b_min_1))
+ expressions.append("%s >= %s" % (siblings_count, b_min_1))
else:
# if a<0, and (b-1)<0, no "n" satisfies this,
# this is tested above as an early exist condition
# otherwise,
- expressions.append('%s <= %s' % (siblings_count, b_min_1))
+ expressions.append("%s <= %s" % (siblings_count, b_min_1))
# operations modulo 1 or -1 are simpler, one only needs to verify:
#
@@ -494,56 +499,48 @@ def xpath_nth_child_function(self, xpath, function, last=False,
b_neg = (-b_min_1) % abs(a)
if b_neg != 0:
- b_neg = '+%s' % b_neg
- left = '(%s %s)' % (left, b_neg)
+ b_neg = "+%s" % b_neg
+ left = "(%s %s)" % (left, b_neg)
- expressions.append('%s mod %s = 0' % (left, a))
+ expressions.append("%s mod %s = 0" % (left, a))
if len(expressions) > 1:
- template = '(%s)'
+ template = "(%s)"
else:
- template = '%s'
- xpath.add_condition(' and '.join(template % expression
- for expression in expressions))
+ template = "%s"
+ xpath.add_condition(" and ".join(template % expression for expression in expressions))
return xpath
def xpath_nth_last_child_function(self, xpath, function):
return self.xpath_nth_child_function(xpath, function, last=True)
def xpath_nth_of_type_function(self, xpath, function):
- if xpath.element == '*':
- raise ExpressionError(
- "*:nth-of-type() is not implemented")
- return self.xpath_nth_child_function(xpath, function,
- add_name_test=False)
+ if xpath.element == "*":
+ raise ExpressionError("*:nth-of-type() is not implemented")
+ return self.xpath_nth_child_function(xpath, function, add_name_test=False)
def xpath_nth_last_of_type_function(self, xpath, function):
- if xpath.element == '*':
- raise ExpressionError(
- "*:nth-of-type() is not implemented")
- return self.xpath_nth_child_function(xpath, function, last=True,
- add_name_test=False)
+ if xpath.element == "*":
+ raise ExpressionError("*:nth-of-type() is not implemented")
+ return self.xpath_nth_child_function(xpath, function, last=True, add_name_test=False)
def xpath_contains_function(self, xpath, function):
# Defined there, removed in later drafts:
# http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
- if function.argument_types() not in (['STRING'], ['IDENT']):
+ if function.argument_types() not in (["STRING"], ["IDENT"]):
raise ExpressionError(
- "Expected a single string or ident for :contains(), got %r"
- % function.arguments)
+ "Expected a single string or ident for :contains(), got %r" % function.arguments
+ )
value = function.arguments[0].value
- return xpath.add_condition(
- 'contains(., %s)' % self.xpath_literal(value))
+ return xpath.add_condition("contains(., %s)" % self.xpath_literal(value))
def xpath_lang_function(self, xpath, function):
- if function.argument_types() not in (['STRING'], ['IDENT']):
+ if function.argument_types() not in (["STRING"], ["IDENT"]):
raise ExpressionError(
- "Expected a single string or ident for :lang(), got %r"
- % function.arguments)
+ "Expected a single string or ident for :lang(), got %r" % function.arguments
+ )
value = function.arguments[0].value
- return xpath.add_condition(
- "lang(%s)" % (self.xpath_literal(value)))
-
+ return xpath.add_condition("lang(%s)" % (self.xpath_literal(value)))
# Pseudo: dispatch by pseudo-class name
@@ -559,31 +556,28 @@ def xpath_scope_pseudo(self, xpath):
return xpath.add_condition("1")
def xpath_first_child_pseudo(self, xpath):
- return xpath.add_condition('count(preceding-sibling::*) = 0')
+ return xpath.add_condition("count(preceding-sibling::*) = 0")
def xpath_last_child_pseudo(self, xpath):
- return xpath.add_condition('count(following-sibling::*) = 0')
+ return xpath.add_condition("count(following-sibling::*) = 0")
def xpath_first_of_type_pseudo(self, xpath):
- if xpath.element == '*':
- raise ExpressionError(
- "*:first-of-type is not implemented")
- return xpath.add_condition('count(preceding-sibling::%s) = 0' % xpath.element)
+ if xpath.element == "*":
+ raise ExpressionError("*:first-of-type is not implemented")
+ return xpath.add_condition("count(preceding-sibling::%s) = 0" % xpath.element)
def xpath_last_of_type_pseudo(self, xpath):
- if xpath.element == '*':
- raise ExpressionError(
- "*:last-of-type is not implemented")
- return xpath.add_condition('count(following-sibling::%s) = 0' % xpath.element)
+ if xpath.element == "*":
+ raise ExpressionError("*:last-of-type is not implemented")
+ return xpath.add_condition("count(following-sibling::%s) = 0" % xpath.element)
def xpath_only_child_pseudo(self, xpath):
- return xpath.add_condition('count(parent::*/child::*) = 1')
+ return xpath.add_condition("count(parent::*/child::*) = 1")
def xpath_only_of_type_pseudo(self, xpath):
- if xpath.element == '*':
- raise ExpressionError(
- "*:only-of-type is not implemented")
- return xpath.add_condition('count(parent::*/child::%s) = 1' % xpath.element)
+ if xpath.element == "*":
+ raise ExpressionError("*:only-of-type is not implemented")
+ return xpath.add_condition("count(parent::*/child::%s) = 1" % xpath.element)
def xpath_empty_pseudo(self, xpath):
return xpath.add_condition("not(*) and not(string-length())")
@@ -610,61 +604,63 @@ def xpath_attrib_exists(self, xpath, name, value):
return xpath
def xpath_attrib_equals(self, xpath, name, value):
- xpath.add_condition('%s = %s' % (name, self.xpath_literal(value)))
+ xpath.add_condition("%s = %s" % (name, self.xpath_literal(value)))
return xpath
def xpath_attrib_different(self, xpath, name, value):
# FIXME: this seems like a weird hack...
if value:
- xpath.add_condition('not(%s) or %s != %s'
- % (name, name, self.xpath_literal(value)))
+ xpath.add_condition("not(%s) or %s != %s" % (name, name, self.xpath_literal(value)))
else:
- xpath.add_condition('%s != %s'
- % (name, self.xpath_literal(value)))
+ xpath.add_condition("%s != %s" % (name, self.xpath_literal(value)))
return xpath
def xpath_attrib_includes(self, xpath, name, value):
if is_non_whitespace(value):
xpath.add_condition(
"%s and contains(concat(' ', normalize-space(%s), ' '), %s)"
- % (name, name, self.xpath_literal(' '+value+' ')))
+ % (name, name, self.xpath_literal(" " + value + " "))
+ )
else:
- xpath.add_condition('0')
+ xpath.add_condition("0")
return xpath
def xpath_attrib_dashmatch(self, xpath, name, value):
# Weird, but true...
- xpath.add_condition('%s and (%s = %s or starts-with(%s, %s))' % (
- name,
- name, self.xpath_literal(value),
- name, self.xpath_literal(value + '-')))
+ xpath.add_condition(
+ "%s and (%s = %s or starts-with(%s, %s))"
+ % (name, name, self.xpath_literal(value), name, self.xpath_literal(value + "-"))
+ )
return xpath
def xpath_attrib_prefixmatch(self, xpath, name, value):
if value:
- xpath.add_condition('%s and starts-with(%s, %s)' % (
- name, name, self.xpath_literal(value)))
+ xpath.add_condition(
+ "%s and starts-with(%s, %s)" % (name, name, self.xpath_literal(value))
+ )
else:
- xpath.add_condition('0')
+ xpath.add_condition("0")
return xpath
def xpath_attrib_suffixmatch(self, xpath, name, value):
if value:
# Oddly there is a starts-with in XPath 1.0, but not ends-with
xpath.add_condition(
- '%s and substring(%s, string-length(%s)-%s) = %s'
- % (name, name, name, len(value)-1, self.xpath_literal(value)))
+ "%s and substring(%s, string-length(%s)-%s) = %s"
+ % (name, name, name, len(value) - 1, self.xpath_literal(value))
+ )
else:
- xpath.add_condition('0')
+ xpath.add_condition("0")
return xpath
def xpath_attrib_substringmatch(self, xpath, name, value):
if value:
# Attribute selectors are case sensitive
- xpath.add_condition('%s and contains(%s, %s)' % (
- name, name, self.xpath_literal(value)))
+ xpath.add_condition(
+ "%s and contains(%s, %s)" % (name, name, self.xpath_literal(value))
+ )
else:
- xpath.add_condition('0')
+ xpath.add_condition("0")
return xpath
@@ -685,7 +681,7 @@ class HTMLTranslator(GenericTranslator):
"""
- lang_attribute = 'lang'
+ lang_attribute = "lang"
def __init__(self, xhtml=False):
self.xhtml = xhtml # Might be useful for sub-classes?
@@ -699,33 +695,36 @@ def xpath_checked_pseudo(self, xpath):
return xpath.add_condition(
"(@selected and name(.) = 'option') or "
"(@checked "
- "and (name(.) = 'input' or name(.) = 'command')"
- "and (@type = 'checkbox' or @type = 'radio'))")
+ "and (name(.) = 'input' or name(.) = 'command')"
+ "and (@type = 'checkbox' or @type = 'radio'))"
+ )
def xpath_lang_function(self, xpath, function):
- if function.argument_types() not in (['STRING'], ['IDENT']):
+ if function.argument_types() not in (["STRING"], ["IDENT"]):
raise ExpressionError(
- "Expected a single string or ident for :lang(), got %r"
- % function.arguments)
+ "Expected a single string or ident for :lang(), got %r" % function.arguments
+ )
value = function.arguments[0].value
return xpath.add_condition(
"ancestor-or-self::*[@lang][1][starts-with(concat("
- # XPath 1.0 has no lower-case function...
- "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
- "'abcdefghijklmnopqrstuvwxyz'), "
- "'-'), %s)]"
- % (self.lang_attribute, self.xpath_literal(value.lower() + '-')))
+ # XPath 1.0 has no lower-case function...
+ "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
+ "'abcdefghijklmnopqrstuvwxyz'), "
+ "'-'), %s)]" % (self.lang_attribute, self.xpath_literal(value.lower() + "-"))
+ )
def xpath_link_pseudo(self, xpath):
- return xpath.add_condition("@href and "
- "(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')")
+ return xpath.add_condition(
+ "@href and " "(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')"
+ )
# Links are never visited, the implementation for :visited is the same
# as in GenericTranslator
def xpath_disabled_pseudo(self, xpath):
# http://www.w3.org/TR/html5/section-index.html#attributes-1
- return xpath.add_condition('''
+ return xpath.add_condition(
+ """
(
@disabled and
(
@@ -747,13 +746,15 @@ def xpath_disabled_pseudo(self, xpath):
)
and ancestor::fieldset[@disabled]
)
- ''')
+ """
+ )
# FIXME: in the second half, add "and is not a descendant of that
# fieldset element's first legend element child, if any."
def xpath_enabled_pseudo(self, xpath):
# http://www.w3.org/TR/html5/section-index.html#attributes-1
- return xpath.add_condition('''
+ return xpath.add_condition(
+ """
(
@href and (
name(.) = 'a' or
@@ -781,7 +782,8 @@ def xpath_enabled_pseudo(self, xpath):
@disabled or ancestor::optgroup[@disabled]
)
)
- ''')
+ """
+ )
# FIXME: ... or "li elements that are children of menu elements,
# and that have a child element that defines a command, if the first
# such element's Disabled State facet is false (not disabled)".
diff --git a/docs/conf.py b/docs/conf.py
index aa897ef..9dc2575 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -50,9 +50,9 @@
# built documents.
#
# The full version, including alpha/beta/rc tags.
-init_py = open(os.path.join(os.path.dirname(__file__),
- '..', 'cssselect', '__init__.py')).read()
-release = re.search("VERSION = '([^']+)'", init_py).group(1)
+with open(os.path.join(os.path.dirname(__file__), '..', 'cssselect', '__init__.py')) as init_file:
+ init_py = init_file.read()
+release = re.search('VERSION = "([^"]+)"', init_py).group(1)
# The short X.Y version.
version = release.rstrip('dev')
diff --git a/pylintrc b/pylintrc
new file mode 100644
index 0000000..7da580b
--- /dev/null
+++ b/pylintrc
@@ -0,0 +1,34 @@
+[MASTER]
+persistent=no
+
+[MESSAGES CONTROL]
+disable=assignment-from-no-return,
+ bad-continuation,
+ bad-whitespace,
+ c-extension-no-member,
+ consider-using-in,
+ fixme,
+ inconsistent-return-statements,
+ invalid-name,
+ missing-class-docstring,
+ missing-function-docstring,
+ missing-module-docstring,
+ multiple-imports,
+ no-else-return,
+ no-member,
+ no-self-use,
+ raise-missing-from,
+ redefined-builtin,
+ redefined-outer-name,
+ too-few-public-methods,
+ too-many-arguments,
+ too-many-branches,
+ too-many-function-args,
+ too-many-lines,
+ too-many-public-methods,
+ too-many-statements,
+ undefined-variable,
+ unidiomatic-typecheck,
+ unused-argument,
+ unused-import,
+ useless-object-inheritance # Required for Python 2 support
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..57a5583
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,2 @@
+[tool.black]
+line-length = 99
diff --git a/setup.py b/setup.py
index de7128d..f95721d 100644
--- a/setup.py
+++ b/setup.py
@@ -2,45 +2,47 @@
import re
import os.path
+
try:
from setuptools import setup
- extra_kwargs = {'test_suite': 'cssselect.tests'}
+
+ extra_kwargs = {"test_suite": "cssselect.tests"}
except ImportError:
from distutils.core import setup
+
extra_kwargs = {}
ROOT = os.path.dirname(__file__)
-README = open(os.path.join(ROOT, 'README.rst')).read()
-INIT_PY = open(os.path.join(ROOT, 'cssselect', '__init__.py')).read()
-VERSION = re.search("VERSION = '([^']+)'", INIT_PY).group(1)
+with open(os.path.join(ROOT, "README.rst")) as readme_file:
+ README = readme_file.read()
+with open(os.path.join(ROOT, "cssselect", "__init__.py")) as init_file:
+ INIT_PY = init_file.read()
+VERSION = re.search('VERSION = "([^"]+)"', INIT_PY).group(1)
setup(
- name='cssselect',
+ name="cssselect",
version=VERSION,
- author='Ian Bicking',
- author_email='ianb@colorstudy.com',
- maintainer='Paul Tremberth',
- maintainer_email='paul.tremberth@gmail.com',
- description=
- 'cssselect parses CSS3 Selectors and translates them to XPath 1.0',
+ author="Ian Bicking",
+ author_email="ianb@colorstudy.com",
+ maintainer="Paul Tremberth",
+ maintainer_email="paul.tremberth@gmail.com",
+ description="cssselect parses CSS3 Selectors and translates them to XPath 1.0",
long_description=README,
- url='https://github.com/scrapy/cssselect',
- license='BSD',
- packages=['cssselect'],
- python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*',
+ url="https://github.com/scrapy/cssselect",
+ license="BSD",
+ packages=["cssselect"],
+ python_requires=">=3.6",
classifiers=[
- 'Development Status :: 4 - Beta',
- 'Intended Audience :: Developers',
- 'License :: OSI Approved :: BSD License',
- 'Programming Language :: Python :: 2',
- 'Programming Language :: Python :: 2.7',
- 'Programming Language :: Python :: 3',
- 'Programming Language :: Python :: 3.4',
- 'Programming Language :: Python :: 3.5',
- 'Programming Language :: Python :: 3.6',
- 'Programming Language :: Python :: 3.7'
+ "Development Status :: 4 - Beta",
+ "Intended Audience :: Developers",
+ "License :: OSI Approved :: BSD License",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.6",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
],
- **extra_kwargs
+ **extra_kwargs,
)
diff --git a/tests/requirements.txt b/tests/requirements.txt
deleted file mode 100644
index 000d5f2..0000000
--- a/tests/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-codecov
-lxml;python_version!="3.4"
-lxml<=4.3.5;python_version=="3.4"
-pytest >=4.6, <4.7 # 4.7 drops support for Python 2.7 and 3.4
-pytest-cov
\ No newline at end of file
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index e40ca62..9aaede9 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -21,17 +21,23 @@
import unittest
from lxml import etree, html
-from cssselect import (parse, GenericTranslator, HTMLTranslator,
- SelectorSyntaxError, ExpressionError)
-from cssselect.parser import (tokenize, parse_series, _unicode,
- FunctionalPseudoElement)
+from cssselect import (
+ parse,
+ GenericTranslator,
+ HTMLTranslator,
+ SelectorSyntaxError,
+ ExpressionError,
+)
+from cssselect.parser import tokenize, parse_series, _unicode, FunctionalPseudoElement
from cssselect.xpath import _unicode_safe_getattr, XPathExpr
if sys.version_info[0] < 3:
# Python 2
def u(text):
- return text.decode('utf8')
+ return text.decode("utf8")
+
+
else:
# Python 3
def u(text):
@@ -41,8 +47,8 @@ def u(text):
class TestCssselect(unittest.TestCase):
def test_tokenizer(self):
tokens = [
- _unicode(item) for item in tokenize(
- u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)'))]
+ _unicode(item) for item in tokenize(u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)'))
+ ]
assert tokens == [
u(""),
"",
@@ -69,8 +75,7 @@ def repr_parse(css):
selectors = parse(css)
for selector in selectors:
assert selector.pseudo_element is None
- return [repr(selector.parsed_tree).replace("(u'", "('")
- for selector in selectors]
+ return [repr(selector.parsed_tree).replace("(u'", "('") for selector in selectors]
def parse_many(first, *others):
result = repr_parse(first)
@@ -78,88 +83,91 @@ def parse_many(first, *others):
assert repr_parse(other) == result
return result
- assert parse_many('*') == ['Element[*]']
- assert parse_many('*|*') == ['Element[*]']
- assert parse_many('*|foo') == ['Element[foo]']
- assert parse_many('|foo') == ['Element[foo]']
- assert parse_many('foo|*') == ['Element[foo|*]']
- assert parse_many('foo|bar') == ['Element[foo|bar]']
+ assert parse_many("*") == ["Element[*]"]
+ assert parse_many("*|*") == ["Element[*]"]
+ assert parse_many("*|foo") == ["Element[foo]"]
+ assert parse_many("|foo") == ["Element[foo]"]
+ assert parse_many("foo|*") == ["Element[foo|*]"]
+ assert parse_many("foo|bar") == ["Element[foo|bar]"]
# This will never match, but it is valid:
- assert parse_many('#foo#bar') == ['Hash[Hash[Element[*]#foo]#bar]']
- assert parse_many(
- 'div>.foo',
- 'div> .foo',
- 'div >.foo',
- 'div > .foo',
- 'div \n> \t \t .foo', 'div\r>\n\n\n.foo', 'div\f>\f.foo'
- ) == ['CombinedSelector[Element[div] > Class[Element[*].foo]]']
- assert parse_many('td.foo,.bar',
- 'td.foo, .bar',
- 'td.foo\t\r\n\f ,\t\r\n\f .bar'
- ) == [
- 'Class[Element[td].foo]',
- 'Class[Element[*].bar]'
+ assert parse_many("#foo#bar") == ["Hash[Hash[Element[*]#foo]#bar]"]
+ assert (
+ parse_many(
+ "div>.foo",
+ "div> .foo",
+ "div >.foo",
+ "div > .foo",
+ "div \n> \t \t .foo",
+ "div\r>\n\n\n.foo",
+ "div\f>\f.foo",
+ )
+ == ["CombinedSelector[Element[div] > Class[Element[*].foo]]"]
+ )
+ assert parse_many("td.foo,.bar", "td.foo, .bar", "td.foo\t\r\n\f ,\t\r\n\f .bar") == [
+ "Class[Element[td].foo]",
+ "Class[Element[*].bar]",
+ ]
+ assert parse_many("div, td.foo, div.bar span") == [
+ "Element[div]",
+ "Class[Element[td].foo]",
+ "CombinedSelector[Class[Element[div].bar] Element[span]]",
+ ]
+ assert parse_many("div > p") == ["CombinedSelector[Element[div] > Element[p]]"]
+ assert parse_many("td:first") == ["Pseudo[Element[td]:first]"]
+ assert parse_many("td:first") == ["Pseudo[Element[td]:first]"]
+ assert parse_many("td :first") == [
+ "CombinedSelector[Element[td] Pseudo[Element[*]:first]]"
+ ]
+ assert parse_many("td :first") == [
+ "CombinedSelector[Element[td] Pseudo[Element[*]:first]]"
+ ]
+ assert parse_many("a[name]", "a[ name\t]") == ["Attrib[Element[a][name]]"]
+ assert parse_many("a [name]") == [
+ "CombinedSelector[Element[a] Attrib[Element[*][name]]]"
+ ]
+ assert parse_many('a[rel="include"]', "a[rel = include]") == [
+ "Attrib[Element[a][rel = 'include']]"
]
- assert parse_many('div, td.foo, div.bar span') == [
- 'Element[div]',
- 'Class[Element[td].foo]',
- 'CombinedSelector[Class[Element[div].bar] '
- ' Element[span]]']
- assert parse_many('div > p') == [
- 'CombinedSelector[Element[div] > Element[p]]']
- assert parse_many('td:first') == [
- 'Pseudo[Element[td]:first]']
- assert parse_many('td:first') == [
- 'Pseudo[Element[td]:first]']
- assert parse_many('td :first') == [
- 'CombinedSelector[Element[td] '
- ' Pseudo[Element[*]:first]]']
- assert parse_many('td :first') == [
- 'CombinedSelector[Element[td] '
- ' Pseudo[Element[*]:first]]']
- assert parse_many('a[name]', 'a[ name\t]') == [
- 'Attrib[Element[a][name]]']
- assert parse_many('a [name]') == [
- 'CombinedSelector[Element[a] Attrib[Element[*][name]]]']
- assert parse_many('a[rel="include"]', 'a[rel = include]') == [
- "Attrib[Element[a][rel = 'include']]"]
assert parse_many("a[hreflang |= 'en']", "a[hreflang|=en]") == [
- "Attrib[Element[a][hreflang |= 'en']]"]
- assert parse_many('div:nth-child(10)') == [
- "Function[Element[div]:nth-child(['10'])]"]
- assert parse_many(':nth-child(2n+2)') == [
- "Function[Element[*]:nth-child(['2', 'n', '+2'])]"]
- assert parse_many('div:nth-of-type(10)') == [
- "Function[Element[div]:nth-of-type(['10'])]"]
- assert parse_many('div div:nth-of-type(10) .aclass') == [
- 'CombinedSelector[CombinedSelector[Element[div] '
- "Function[Element[div]:nth-of-type(['10'])]] "
- ' Class[Element[*].aclass]]']
- assert parse_many('label:only') == [
- 'Pseudo[Element[label]:only]']
- assert parse_many('a:lang(fr)') == [
- "Function[Element[a]:lang(['fr'])]"]
- assert parse_many('div:contains("foo")') == [
- "Function[Element[div]:contains(['foo'])]"]
- assert parse_many('div#foobar') == [
- 'Hash[Element[div]#foobar]']
- assert parse_many('div:not(div.foo)') == [
- 'Negation[Element[div]:not(Class[Element[div].foo])]']
- assert parse_many('td ~ th') == [
- 'CombinedSelector[Element[td] ~ Element[th]]']
- assert parse_many(':scope > foo') == [
- 'CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]'
+ "Attrib[Element[a][hreflang |= 'en']]"
+ ]
+ assert parse_many("div:nth-child(10)") == ["Function[Element[div]:nth-child(['10'])]"]
+ assert parse_many(":nth-child(2n+2)") == [
+ "Function[Element[*]:nth-child(['2', 'n', '+2'])]"
+ ]
+ assert parse_many("div:nth-of-type(10)") == ["Function[Element[div]:nth-of-type(['10'])]"]
+ assert parse_many("div div:nth-of-type(10) .aclass") == [
+ "CombinedSelector[CombinedSelector[Element[div] "
+ "Function[Element[div]:nth-of-type(['10'])]] "
+ " Class[Element[*].aclass]]"
+ ]
+ assert parse_many("label:only") == ["Pseudo[Element[label]:only]"]
+ assert parse_many("a:lang(fr)") == ["Function[Element[a]:lang(['fr'])]"]
+ assert parse_many('div:contains("foo")') == ["Function[Element[div]:contains(['foo'])]"]
+ assert parse_many("div#foobar") == ["Hash[Element[div]#foobar]"]
+ assert parse_many("div:not(div.foo)") == [
+ "Negation[Element[div]:not(Class[Element[div].foo])]"
+ ]
+ assert parse_many("div:is(.foo, #bar)") == [
+ "Matching[Element[div]:is(Class[Element[*].foo], Hash[Element[*]#bar])]"
+ ]
+ assert parse_many(":is(:hover, :visited)") == [
+ "Matching[Element[*]:is(Pseudo[Element[*]:hover], Pseudo[Element[*]:visited])]"
+ ]
+ assert parse_many("td ~ th") == ["CombinedSelector[Element[td] ~ Element[th]]"]
+ assert parse_many(":scope > foo") == [
+ "CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]"
]
- assert parse_many(' :scope > foo') == [
- 'CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]'
+ assert parse_many(" :scope > foo") == [
+ "CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]"
]
- assert parse_many(':scope > foo bar > div') == [
- 'CombinedSelector[CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > '
- 'Element[foo]] Element[bar]] > Element[div]]'
+ assert parse_many(":scope > foo bar > div") == [
+ "CombinedSelector[CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > "
+ "Element[foo]] Element[bar]] > Element[div]]"
]
- assert parse_many(':scope > #foo #bar') == [
- 'CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > '
- 'Hash[Element[*]#foo]] Hash[Element[*]#bar]]'
+ assert parse_many(":scope > #foo #bar") == [
+ "CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > "
+ "Hash[Element[*]#foo]] Hash[Element[*]#bar]]"
]
def test_pseudo_elements(self):
@@ -185,61 +193,66 @@ def test_pseudo_repr(css):
selector = result[0]
return selector.parsed_tree.__repr__()
- assert parse_one('foo') == ('Element[foo]', None)
- assert parse_one('*') == ('Element[*]', None)
- assert parse_one(':empty') == ('Pseudo[Element[*]:empty]', None)
- assert parse_one(':scope') == ('Pseudo[Element[*]:scope]', None)
+ assert parse_one("foo") == ("Element[foo]", None)
+ assert parse_one("*") == ("Element[*]", None)
+ assert parse_one(":empty") == ("Pseudo[Element[*]:empty]", None)
+ assert parse_one(":scope") == ("Pseudo[Element[*]:scope]", None)
# Special cases for CSS 2.1 pseudo-elements
- assert parse_one(':BEfore') == ('Element[*]', 'before')
- assert parse_one(':aftER') == ('Element[*]', 'after')
- assert parse_one(':First-Line') == ('Element[*]', 'first-line')
- assert parse_one(':First-Letter') == ('Element[*]', 'first-letter')
-
- assert parse_one('::befoRE') == ('Element[*]', 'before')
- assert parse_one('::AFter') == ('Element[*]', 'after')
- assert parse_one('::firsT-linE') == ('Element[*]', 'first-line')
- assert parse_one('::firsT-letteR') == ('Element[*]', 'first-letter')
-
- assert parse_one('::text-content') == ('Element[*]', 'text-content')
- assert parse_one('::attr(name)') == (
- "Element[*]", "FunctionalPseudoElement[::attr(['name'])]")
-
- assert parse_one('::Selection') == ('Element[*]', 'selection')
- assert parse_one('foo:after') == ('Element[foo]', 'after')
- assert parse_one('foo::selection') == ('Element[foo]', 'selection')
- assert parse_one('lorem#ipsum ~ a#b.c[href]:empty::selection') == (
- 'CombinedSelector[Hash[Element[lorem]#ipsum] ~ '
- 'Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]',
- 'selection')
- assert parse_pseudo(':scope > div, foo bar') == [
- ('CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]', None),
- ('CombinedSelector[Element[foo] Element[bar]]', None)
+ assert parse_one(":BEfore") == ("Element[*]", "before")
+ assert parse_one(":aftER") == ("Element[*]", "after")
+ assert parse_one(":First-Line") == ("Element[*]", "first-line")
+ assert parse_one(":First-Letter") == ("Element[*]", "first-letter")
+
+ assert parse_one("::befoRE") == ("Element[*]", "before")
+ assert parse_one("::AFter") == ("Element[*]", "after")
+ assert parse_one("::firsT-linE") == ("Element[*]", "first-line")
+ assert parse_one("::firsT-letteR") == ("Element[*]", "first-letter")
+
+ assert parse_one("::text-content") == ("Element[*]", "text-content")
+ assert parse_one("::attr(name)") == (
+ "Element[*]",
+ "FunctionalPseudoElement[::attr(['name'])]",
+ )
+
+ assert parse_one("::Selection") == ("Element[*]", "selection")
+ assert parse_one("foo:after") == ("Element[foo]", "after")
+ assert parse_one("foo::selection") == ("Element[foo]", "selection")
+ assert parse_one("lorem#ipsum ~ a#b.c[href]:empty::selection") == (
+ "CombinedSelector[Hash[Element[lorem]#ipsum] ~ "
+ "Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]",
+ "selection",
+ )
+ assert parse_pseudo(":scope > div, foo bar") == [
+ ("CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]", None),
+ ("CombinedSelector[Element[foo] Element[bar]]", None),
]
- assert parse_pseudo('foo:before, bar, baz:after') == [
- ('Element[foo]', 'before'), ('Element[bar]', None),
- ('Element[baz]', 'after')
+ assert parse_pseudo("foo:before, bar, baz:after") == [
+ ("Element[foo]", "before"),
+ ("Element[bar]", None),
+ ("Element[baz]", "after"),
]
# Special cases for CSS 2.1 pseudo-elements are ignored by default
- for pseudo in ('after', 'before', 'first-line', 'first-letter'):
- selector, = parse('e:%s' % pseudo)
+ for pseudo in ("after", "before", "first-line", "first-letter"):
+ (selector,) = parse("e:%s" % pseudo)
assert selector.pseudo_element == pseudo
- assert GenericTranslator().selector_to_xpath(selector, prefix='') == "e"
+ assert GenericTranslator().selector_to_xpath(selector, prefix="") == "e"
# Pseudo Elements are ignored by default, but if allowed they are not
# supported by GenericTranslator
tr = GenericTranslator()
- selector, = parse('e::foo')
- assert selector.pseudo_element == 'foo'
- assert tr.selector_to_xpath(selector, prefix='') == "e"
- self.assertRaises(ExpressionError, tr.selector_to_xpath, selector,
- translate_pseudo_elements=True)
+ (selector,) = parse("e::foo")
+ assert selector.pseudo_element == "foo"
+ assert tr.selector_to_xpath(selector, prefix="") == "e"
+ self.assertRaises(
+ ExpressionError, tr.selector_to_xpath, selector, translate_pseudo_elements=True
+ )
# Special test for the unicode symbols and ':scope' element if check
# Errors if use repr() instead of __repr__()
- assert test_pseudo_repr(u':fİrst-child') == u'Pseudo[Element[*]:fİrst-child]'
- assert test_pseudo_repr(':scope') == 'Pseudo[Element[*]:scope]'
+ assert test_pseudo_repr(u":fİrst-child") == u"Pseudo[Element[*]:fİrst-child]"
+ assert test_pseudo_repr(":scope") == "Pseudo[Element[*]:scope]"
def test_specificity(self):
def specificity(css):
@@ -247,32 +260,34 @@ def specificity(css):
assert len(selectors) == 1
return selectors[0].specificity()
- assert specificity('*') == (0, 0, 0)
- assert specificity(' foo') == (0, 0, 1)
- assert specificity(':empty ') == (0, 1, 0)
- assert specificity(':before') == (0, 0, 1)
- assert specificity('*:before') == (0, 0, 1)
- assert specificity(':nth-child(2)') == (0, 1, 0)
- assert specificity('.bar') == (0, 1, 0)
- assert specificity('[baz]') == (0, 1, 0)
+ assert specificity("*") == (0, 0, 0)
+ assert specificity(" foo") == (0, 0, 1)
+ assert specificity(":empty ") == (0, 1, 0)
+ assert specificity(":before") == (0, 0, 1)
+ assert specificity("*:before") == (0, 0, 1)
+ assert specificity(":nth-child(2)") == (0, 1, 0)
+ assert specificity(".bar") == (0, 1, 0)
+ assert specificity("[baz]") == (0, 1, 0)
assert specificity('[baz="4"]') == (0, 1, 0)
assert specificity('[baz^="4"]') == (0, 1, 0)
- assert specificity('#lipsum') == (1, 0, 0)
+ assert specificity("#lipsum") == (1, 0, 0)
- assert specificity(':not(*)') == (0, 0, 0)
- assert specificity(':not(foo)') == (0, 0, 1)
- assert specificity(':not(.foo)') == (0, 1, 0)
- assert specificity(':not([foo])') == (0, 1, 0)
- assert specificity(':not(:empty)') == (0, 1, 0)
- assert specificity(':not(#foo)') == (1, 0, 0)
+ assert specificity(":not(*)") == (0, 0, 0)
+ assert specificity(":not(foo)") == (0, 0, 1)
+ assert specificity(":not(.foo)") == (0, 1, 0)
+ assert specificity(":not([foo])") == (0, 1, 0)
+ assert specificity(":not(:empty)") == (0, 1, 0)
+ assert specificity(":not(#foo)") == (1, 0, 0)
- assert specificity('foo:empty') == (0, 1, 1)
- assert specificity('foo:before') == (0, 0, 2)
- assert specificity('foo::before') == (0, 0, 2)
- assert specificity('foo:empty::before') == (0, 1, 2)
+ assert specificity(":is(.foo, #bar)") == (1, 0, 0)
+ assert specificity(":is(:hover, :visited)") == (0, 1, 0)
- assert specificity('#lorem + foo#ipsum:first-child > bar:first-line'
- ) == (2, 1, 3)
+ assert specificity("foo:empty") == (0, 1, 1)
+ assert specificity("foo:before") == (0, 0, 2)
+ assert specificity("foo::before") == (0, 0, 2)
+ assert specificity("foo:empty::before") == (0, 1, 2)
+
+ assert specificity("#lorem + foo#ipsum:first-child > bar:first-line") == (2, 1, 3)
def test_css_export(self):
def css2css(css, res=None):
@@ -280,32 +295,34 @@ def css2css(css, res=None):
assert len(selectors) == 1
assert selectors[0].canonical() == (res or css)
- css2css('*')
- css2css(' foo', 'foo')
- css2css('Foo', 'Foo')
- css2css(':empty ', ':empty')
- css2css(':before', '::before')
- css2css(':beFOre', '::before')
- css2css('*:before', '::before')
- css2css(':nth-child(2)')
- css2css('.bar')
- css2css('[baz]')
+ css2css("*")
+ css2css(" foo", "foo")
+ css2css("Foo", "Foo")
+ css2css(":empty ", ":empty")
+ css2css(":before", "::before")
+ css2css(":beFOre", "::before")
+ css2css("*:before", "::before")
+ css2css(":nth-child(2)")
+ css2css(".bar")
+ css2css("[baz]")
css2css('[baz="4"]', "[baz='4']")
css2css('[baz^="4"]', "[baz^='4']")
css2css("[ns|attr='4']")
- css2css('#lipsum')
- css2css(':not(*)')
- css2css(':not(foo)')
- css2css(':not(*.foo)', ':not(.foo)')
- css2css(':not(*[foo])', ':not([foo])')
- css2css(':not(:empty)')
- css2css(':not(#foo)')
- css2css('foo:empty')
- css2css('foo::before')
- css2css('foo:empty::before')
+ css2css("#lipsum")
+ css2css(":not(*)")
+ css2css(":not(foo)")
+ css2css(":not(*.foo)", ":not(.foo)")
+ css2css(":not(*[foo])", ":not([foo])")
+ css2css(":not(:empty)")
+ css2css(":not(#foo)")
+ css2css(":is(#bar, .foo)")
+ css2css(":is(:focused, :visited)")
+ css2css("foo:empty")
+ css2css("foo::before")
+ css2css("foo:empty::before")
css2css('::name(arg + "val" - 3)', "::name(arg+'val'-3)")
- css2css('#lorem + foo#ipsum:first-child > bar::first-line')
- css2css('foo > *')
+ css2css("#lorem + foo#ipsum:first-child > bar::first-line")
+ css2css("foo > *")
def test_parse_errors(self):
def get_error(css):
@@ -315,97 +332,78 @@ def get_error(css):
# Py2, Py3, ...
return str(sys.exc_info()[1]).replace("(u'", "('")
- assert get_error('attributes(href)/html/body/a') == (
- "Expected selector, got ")
- assert get_error('attributes(href)') == (
- "Expected selector, got ")
- assert get_error('html/body/a') == (
- "Expected selector, got ")
- assert get_error(' ') == (
- "Expected selector, got ")
- assert get_error('div, ') == (
- "Expected selector, got ")
- assert get_error(' , div') == (
- "Expected selector, got ")
- assert get_error('p, , div') == (
- "Expected selector, got ")
- assert get_error('div > ') == (
- "Expected selector, got ")
- assert get_error(' > div') == (
- "Expected selector, got ' at 2>")
- assert get_error('foo|#bar') == (
- "Expected ident or '*', got ")
- assert get_error('#.foo') == (
- "Expected selector, got ")
- assert get_error('.#foo') == (
- "Expected ident, got ")
- assert get_error(':#foo') == (
- "Expected ident, got ")
- assert get_error('[*]') == (
- "Expected '|', got ")
- assert get_error('[foo|]') == (
- "Expected ident, got ")
- assert get_error('[#]') == (
- "Expected ident or '*', got ")
- assert get_error('[foo=#]') == (
- "Expected string or ident, got ")
- assert get_error('[href]a') == (
- "Expected selector, got ")
- assert get_error('[rel=stylesheet]') is None
- assert get_error('[rel:stylesheet]') == (
- "Operator expected, got ")
- assert get_error('[rel=stylesheet') == (
- "Expected ']', got ")
- assert get_error(':lang(fr)') is None
- assert get_error(':lang(fr') == (
- "Expected an argument, got ")
- assert get_error(':contains("foo') == (
- "Unclosed string at 10")
- assert get_error('foo!') == (
- "Expected selector, got ")
+ assert get_error("attributes(href)/html/body/a") == (
+ "Expected selector, got "
+ )
+ assert get_error("attributes(href)") == ("Expected selector, got ")
+ assert get_error("html/body/a") == ("Expected selector, got ")
+ assert get_error(" ") == ("Expected selector, got ")
+ assert get_error("div, ") == ("Expected selector, got ")
+ assert get_error(" , div") == ("Expected selector, got ")
+ assert get_error("p, , div") == ("Expected selector, got ")
+ assert get_error("div > ") == ("Expected selector, got ")
+ assert get_error(" > div") == ("Expected selector, got ' at 2>")
+ assert get_error("foo|#bar") == ("Expected ident or '*', got ")
+ assert get_error("#.foo") == ("Expected selector, got ")
+ assert get_error(".#foo") == ("Expected ident, got ")
+ assert get_error(":#foo") == ("Expected ident, got ")
+ assert get_error("[*]") == ("Expected '|', got ")
+ assert get_error("[foo|]") == ("Expected ident, got ")
+ assert get_error("[#]") == ("Expected ident or '*', got ")
+ assert get_error("[foo=#]") == ("Expected string or ident, got ")
+ assert get_error("[href]a") == ("Expected selector, got ")
+ assert get_error("[rel=stylesheet]") is None
+ assert get_error("[rel:stylesheet]") == ("Operator expected, got ")
+ assert get_error("[rel=stylesheet") == ("Expected ']', got ")
+ assert get_error(":lang(fr)") is None
+ assert get_error(":lang(fr") == ("Expected an argument, got ")
+ assert get_error(':contains("foo') == ("Unclosed string at 10")
+ assert get_error("foo!") == ("Expected selector, got ")
# Mis-placed pseudo-elements
- assert get_error('a:before:empty') == (
- "Got pseudo-element ::before not at the end of a selector")
- assert get_error('li:before a') == (
- "Got pseudo-element ::before not at the end of a selector")
- assert get_error(':not(:before)') == (
- "Got pseudo-element ::before inside :not() at 12")
- assert get_error(':not(:not(a))') == (
- "Got nested :not()")
- assert get_error(':scope > div :scope header') == (
+ assert get_error("a:before:empty") == (
+ "Got pseudo-element ::before not at the end of a selector"
+ )
+ assert get_error("li:before a") == (
+ "Got pseudo-element ::before not at the end of a selector"
+ )
+ assert get_error(":not(:before)") == ("Got pseudo-element ::before inside :not() at 12")
+ assert get_error(":not(:not(a))") == ("Got nested :not()")
+ assert get_error(":is(:before)") == ("Got pseudo-element ::before inside function")
+ assert get_error(":is(a b)") == ("Expected an argument, got ")
+ assert get_error(":scope > div :scope header") == (
'Got immediate child pseudo-element ":scope" not at the start of a selector'
)
- assert get_error('div :scope header') == (
+ assert get_error("div :scope header") == (
'Got immediate child pseudo-element ":scope" not at the start of a selector'
)
- assert get_error('> div p') == ("Expected selector, got ' at 0>")
+ assert get_error("> div p") == ("Expected selector, got ' at 0>")
def test_translation(self):
def xpath(css):
- return _unicode(GenericTranslator().css_to_xpath(css, prefix=''))
-
- assert xpath('*') == "*"
- assert xpath('e') == "e"
- assert xpath('*|e') == "e"
- assert xpath('e|f') == "e:f"
- assert xpath('e[foo]') == "e[@foo]"
- assert xpath('e[foo|bar]') == "e[@foo:bar]"
+ return _unicode(GenericTranslator().css_to_xpath(css, prefix=""))
+
+ assert xpath("*") == "*"
+ assert xpath("e") == "e"
+ assert xpath("*|e") == "e"
+ assert xpath("e|f") == "e:f"
+ assert xpath("e[foo]") == "e[@foo]"
+ assert xpath("e[foo|bar]") == "e[@foo:bar]"
assert xpath('e[foo="bar"]') == "e[@foo = 'bar']"
assert xpath('e[foo~="bar"]') == (
- "e[@foo and contains("
- "concat(' ', normalize-space(@foo), ' '), ' bar ')]")
- assert xpath('e[foo^="bar"]') == (
- "e[@foo and starts-with(@foo, 'bar')]")
+ "e[@foo and contains(" "concat(' ', normalize-space(@foo), ' '), ' bar ')]"
+ )
+ assert xpath('e[foo^="bar"]') == ("e[@foo and starts-with(@foo, 'bar')]")
assert xpath('e[foo$="bar"]') == (
- "e[@foo and substring(@foo, string-length(@foo)-2) = 'bar']")
- assert xpath('e[foo*="bar"]') == (
- "e[@foo and contains(@foo, 'bar')]")
+ "e[@foo and substring(@foo, string-length(@foo)-2) = 'bar']"
+ )
+ assert xpath('e[foo*="bar"]') == ("e[@foo and contains(@foo, 'bar')]")
assert xpath('e[hreflang|="en"]') == (
- "e[@hreflang and ("
- "@hreflang = 'en' or starts-with(@hreflang, 'en-'))]")
+ "e[@hreflang and (" "@hreflang = 'en' or starts-with(@hreflang, 'en-'))]"
+ )
# --- nth-* and nth-last-* -------------------------------------
+<<<<<<< HEAD
assert (
xpath("e:nth-child(2n+1 of S)")
== "e[count(preceding-sibling::S) mod 2 = 0]"
@@ -421,186 +419,155 @@ def xpath(css):
assert xpath('e:nth-child(1)') == (
"e[count(preceding-sibling::*) = 0]")
+=======
+ assert xpath("e:nth-child(1)") == ("e[count(preceding-sibling::*) = 0]")
+>>>>>>> parent/master
# always true
- assert xpath('e:nth-child(n)') == (
- "e")
- assert xpath('e:nth-child(n+1)') == (
- "e")
+ assert xpath("e:nth-child(n)") == ("e")
+ assert xpath("e:nth-child(n+1)") == ("e")
# always true too
- assert xpath('e:nth-child(n-10)') == (
- "e")
+ assert xpath("e:nth-child(n-10)") == ("e")
# b=2 is the limit...
- assert xpath('e:nth-child(n+2)') == (
- "e[count(preceding-sibling::*) >= 1]")
+ assert xpath("e:nth-child(n+2)") == ("e[count(preceding-sibling::*) >= 1]")
# always false
- assert xpath('e:nth-child(-n)') == (
- "e[0]")
+ assert xpath("e:nth-child(-n)") == ("e[0]")
# equivalent to first child
- assert xpath('e:nth-child(-n+1)') == (
- "e[count(preceding-sibling::*) <= 0]")
+ assert xpath("e:nth-child(-n+1)") == ("e[count(preceding-sibling::*) <= 0]")
- assert xpath('e:nth-child(3n+2)') == (
+ assert xpath("e:nth-child(3n+2)") == (
"e[(count(preceding-sibling::*) >= 1) and "
- "((count(preceding-sibling::*) +2) mod 3 = 0)]")
- assert xpath('e:nth-child(3n-2)') == (
- "e[count(preceding-sibling::*) mod 3 = 0]")
- assert xpath('e:nth-child(-n+6)') == (
- "e[count(preceding-sibling::*) <= 5]")
-
- assert xpath('e:nth-last-child(1)') == (
- "e[count(following-sibling::*) = 0]")
- assert xpath('e:nth-last-child(2n)') == (
- "e[(count(following-sibling::*) +1) mod 2 = 0]")
- assert xpath('e:nth-last-child(2n+1)') == (
- "e[count(following-sibling::*) mod 2 = 0]")
- assert xpath('e:nth-last-child(2n+2)') == (
+ "((count(preceding-sibling::*) +2) mod 3 = 0)]"
+ )
+ assert xpath("e:nth-child(3n-2)") == ("e[count(preceding-sibling::*) mod 3 = 0]")
+ assert xpath("e:nth-child(-n+6)") == ("e[count(preceding-sibling::*) <= 5]")
+
+ assert xpath("e:nth-last-child(1)") == ("e[count(following-sibling::*) = 0]")
+ assert xpath("e:nth-last-child(2n)") == ("e[(count(following-sibling::*) +1) mod 2 = 0]")
+ assert xpath("e:nth-last-child(2n+1)") == ("e[count(following-sibling::*) mod 2 = 0]")
+ assert xpath("e:nth-last-child(2n+2)") == (
"e[(count(following-sibling::*) >= 1) and "
- "((count(following-sibling::*) +1) mod 2 = 0)]")
- assert xpath('e:nth-last-child(3n+1)') == (
- "e[count(following-sibling::*) mod 3 = 0]")
+ "((count(following-sibling::*) +1) mod 2 = 0)]"
+ )
+ assert xpath("e:nth-last-child(3n+1)") == ("e[count(following-sibling::*) mod 3 = 0]")
# represents the two last e elements
- assert xpath('e:nth-last-child(-n+2)') == (
- "e[count(following-sibling::*) <= 1]")
-
- assert xpath('e:nth-of-type(1)') == (
- "e[count(preceding-sibling::e) = 0]")
- assert xpath('e:nth-last-of-type(1)') == (
- "e[count(following-sibling::e) = 0]")
- assert xpath('div e:nth-last-of-type(1) .aclass') == (
+ assert xpath("e:nth-last-child(-n+2)") == ("e[count(following-sibling::*) <= 1]")
+
+ assert xpath("e:nth-of-type(1)") == ("e[count(preceding-sibling::e) = 0]")
+ assert xpath("e:nth-last-of-type(1)") == ("e[count(following-sibling::e) = 0]")
+ assert xpath("div e:nth-last-of-type(1) .aclass") == (
"div/descendant-or-self::*/e[count(following-sibling::e) = 0]"
- "/descendant-or-self::*/*[@class and contains("
- "concat(' ', normalize-space(@class), ' '), ' aclass ')]")
+ "/descendant-or-self::*/*[@class and contains("
+ "concat(' ', normalize-space(@class), ' '), ' aclass ')]"
+ )
- assert xpath('e:first-child') == (
- "e[count(preceding-sibling::*) = 0]")
- assert xpath('e:last-child') == (
- "e[count(following-sibling::*) = 0]")
- assert xpath('e:first-of-type') == (
- "e[count(preceding-sibling::e) = 0]")
- assert xpath('e:last-of-type') == (
- "e[count(following-sibling::e) = 0]")
- assert xpath('e:only-child') == (
- "e[count(parent::*/child::*) = 1]")
- assert xpath('e:only-of-type') == (
- "e[count(parent::*/child::e) = 1]")
- assert xpath('e:empty') == (
- "e[not(*) and not(string-length())]")
- assert xpath('e:EmPTY') == (
- "e[not(*) and not(string-length())]")
- assert xpath('e:root') == (
- "e[not(parent::*)]")
- assert xpath('e:hover') == (
- "e[0]") # never matches
- assert xpath('e:contains("foo")') == (
- "e[contains(., 'foo')]")
- assert xpath('e:ConTains(foo)') == (
- "e[contains(., 'foo')]")
- assert xpath('e.warning') == (
- "e[@class and contains("
- "concat(' ', normalize-space(@class), ' '), ' warning ')]")
- assert xpath('e#myid') == (
- "e[@id = 'myid']")
- assert xpath('e:not(:nth-child(odd))') == (
- "e[not(count(preceding-sibling::*) mod 2 = 0)]")
- assert xpath('e:nOT(*)') == (
- "e[0]") # never matches
- assert xpath('e f') == (
- "e/descendant-or-self::*/f")
- assert xpath('e > f') == (
- "e/f")
- assert xpath('e + f') == (
- "e/following-sibling::*[(name() = 'f') and (position() = 1)]")
- assert xpath('e ~ f') == (
- "e/following-sibling::f")
- assert xpath('e ~ f:nth-child(3)') == (
- "e/following-sibling::f[count(preceding-sibling::*) = 2]")
- assert xpath('div#container p') == (
- "div[@id = 'container']/descendant-or-self::*/p")
+ assert xpath("e:first-child") == ("e[count(preceding-sibling::*) = 0]")
+ assert xpath("e:last-child") == ("e[count(following-sibling::*) = 0]")
+ assert xpath("e:first-of-type") == ("e[count(preceding-sibling::e) = 0]")
+ assert xpath("e:last-of-type") == ("e[count(following-sibling::e) = 0]")
+ assert xpath("e:only-child") == ("e[count(parent::*/child::*) = 1]")
+ assert xpath("e:only-of-type") == ("e[count(parent::*/child::e) = 1]")
+ assert xpath("e:empty") == ("e[not(*) and not(string-length())]")
+ assert xpath("e:EmPTY") == ("e[not(*) and not(string-length())]")
+ assert xpath("e:root") == ("e[not(parent::*)]")
+ assert xpath("e:hover") == ("e[0]") # never matches
+ assert xpath('e:contains("foo")') == ("e[contains(., 'foo')]")
+ assert xpath("e:ConTains(foo)") == ("e[contains(., 'foo')]")
+ assert xpath("e.warning") == (
+ "e[@class and contains(" "concat(' ', normalize-space(@class), ' '), ' warning ')]"
+ )
+ assert xpath("e#myid") == ("e[@id = 'myid']")
+ assert xpath("e:not(:nth-child(odd))") == ("e[not(count(preceding-sibling::*) mod 2 = 0)]")
+ assert xpath("e:nOT(*)") == ("e[0]") # never matches
+ assert xpath("e f") == ("e/descendant-or-self::*/f")
+ assert xpath("e > f") == ("e/f")
+ assert xpath("e + f") == ("e/following-sibling::*[(name() = 'f') and (position() = 1)]")
+ assert xpath("e ~ f") == ("e/following-sibling::f")
+ assert xpath("e ~ f:nth-child(3)") == (
+ "e/following-sibling::f[count(preceding-sibling::*) = 2]"
+ )
+ assert xpath("div#container p") == ("div[@id = 'container']/descendant-or-self::*/p")
# Invalid characters in XPath element names
- assert xpath(r'di\a0 v') == (
- u("*[name() = 'di v']")) # di\xa0v
- assert xpath(r'di\[v') == (
- "*[name() = 'di[v']")
- assert xpath(r'[h\a0 ref]') == (
- u("*[attribute::*[name() = 'h ref']]")) # h\xa0ref
- assert xpath(r'[h\]ref]') == (
- "*[attribute::*[name() = 'h]ref']]")
-
- self.assertRaises(ExpressionError, xpath, u(':fİrst-child'))
- self.assertRaises(ExpressionError, xpath, ':first-of-type')
- self.assertRaises(ExpressionError, xpath, ':only-of-type')
- self.assertRaises(ExpressionError, xpath, ':last-of-type')
- self.assertRaises(ExpressionError, xpath, ':nth-of-type(1)')
- self.assertRaises(ExpressionError, xpath, ':nth-last-of-type(1)')
- self.assertRaises(ExpressionError, xpath, ':nth-child(n-)')
- self.assertRaises(ExpressionError, xpath, ':after')
- self.assertRaises(ExpressionError, xpath, ':lorem-ipsum')
- self.assertRaises(ExpressionError, xpath, ':lorem(ipsum)')
- self.assertRaises(ExpressionError, xpath, '::lorem-ipsum')
+ assert xpath(r"di\a0 v") == (u("*[name() = 'di v']")) # di\xa0v
+ assert xpath(r"di\[v") == ("*[name() = 'di[v']")
+ assert xpath(r"[h\a0 ref]") == (u("*[attribute::*[name() = 'h ref']]")) # h\xa0ref
+ assert xpath(r"[h\]ref]") == ("*[attribute::*[name() = 'h]ref']]")
+
+ self.assertRaises(ExpressionError, xpath, u(":fİrst-child"))
+ self.assertRaises(ExpressionError, xpath, ":first-of-type")
+ self.assertRaises(ExpressionError, xpath, ":only-of-type")
+ self.assertRaises(ExpressionError, xpath, ":last-of-type")
+ self.assertRaises(ExpressionError, xpath, ":nth-of-type(1)")
+ self.assertRaises(ExpressionError, xpath, ":nth-last-of-type(1)")
+ self.assertRaises(ExpressionError, xpath, ":nth-child(n-)")
+ self.assertRaises(ExpressionError, xpath, ":after")
+ self.assertRaises(ExpressionError, xpath, ":lorem-ipsum")
+ self.assertRaises(ExpressionError, xpath, ":lorem(ipsum)")
+ self.assertRaises(ExpressionError, xpath, "::lorem-ipsum")
self.assertRaises(TypeError, GenericTranslator().css_to_xpath, 4)
- self.assertRaises(TypeError, GenericTranslator().selector_to_xpath,
- 'foo')
+ self.assertRaises(TypeError, GenericTranslator().selector_to_xpath, "foo")
def test_unicode(self):
if sys.version_info[0] < 3:
- css = '.a\xc1b'.decode('ISO-8859-1')
+ css = ".a\xc1b".decode("ISO-8859-1")
else:
- css = '.a\xc1b'
+ css = ".a\xc1b"
xpath = GenericTranslator().css_to_xpath(css)
assert css[1:] in xpath
- xpath = xpath.encode('ascii', 'xmlcharrefreplace').decode('ASCII')
+ xpath = xpath.encode("ascii", "xmlcharrefreplace").decode("ASCII")
assert xpath == (
"descendant-or-self::*[@class and contains("
- "concat(' ', normalize-space(@class), ' '), ' aÁb ')]")
+ "concat(' ', normalize-space(@class), ' '), ' aÁb ')]"
+ )
def test_quoting(self):
css_to_xpath = GenericTranslator().css_to_xpath
- assert css_to_xpath('*[aval="\'"]') == (
- '''descendant-or-self::*[@aval = "'"]''')
- assert css_to_xpath('*[aval="\'\'\'"]') == (
- """descendant-or-self::*[@aval = "'''"]""")
- assert css_to_xpath('*[aval=\'"\']') == (
- '''descendant-or-self::*[@aval = '"']''')
- assert css_to_xpath('*[aval=\'"""\']') == (
- '''descendant-or-self::*[@aval = '"""']''')
+ assert css_to_xpath('*[aval="\'"]') == ("""descendant-or-self::*[@aval = "'"]""")
+ assert css_to_xpath("*[aval=\"'''\"]") == ("""descendant-or-self::*[@aval = "'''"]""")
+ assert css_to_xpath("*[aval='\"']") == ("""descendant-or-self::*[@aval = '"']""")
+ assert css_to_xpath('*[aval=\'"""\']') == ('''descendant-or-self::*[@aval = '"""']''')
assert css_to_xpath(':scope > div[dataimg=""]') == (
- "descendant-or-self::*[1]/div[@dataimg = '']")
+ "descendant-or-self::*[1]/div[@dataimg = '']"
+ )
def test_unicode_escapes(self):
# \22 == '"' \20 == ' '
css_to_xpath = GenericTranslator().css_to_xpath
assert css_to_xpath(r'*[aval="\'\22\'"]') == (
- '''descendant-or-self::*[@aval = concat("'",'"',"'")]''')
+ """descendant-or-self::*[@aval = concat("'",'"',"'")]"""
+ )
assert css_to_xpath(r'*[aval="\'\22 2\'"]') == (
- '''descendant-or-self::*[@aval = concat("'",'"2',"'")]''')
+ """descendant-or-self::*[@aval = concat("'",'"2',"'")]"""
+ )
assert css_to_xpath(r'*[aval="\'\20 \'"]') == (
- '''descendant-or-self::*[@aval = "' '"]''')
- assert css_to_xpath('*[aval="\'\\20\r\n \'"]') == (
- '''descendant-or-self::*[@aval = "' '"]''')
+ """descendant-or-self::*[@aval = "' '"]"""
+ )
+ assert css_to_xpath("*[aval=\"'\\20\r\n '\"]") == (
+ """descendant-or-self::*[@aval = "' '"]"""
+ )
def test_xpath_pseudo_elements(self):
class CustomTranslator(GenericTranslator):
def xpath_pseudo_element(self, xpath, pseudo_element):
if isinstance(pseudo_element, FunctionalPseudoElement):
- method = 'xpath_%s_functional_pseudo_element' % (
- pseudo_element.name.replace('-', '_'))
+ method = "xpath_%s_functional_pseudo_element" % (
+ pseudo_element.name.replace("-", "_")
+ )
method = _unicode_safe_getattr(self, method, None)
if not method:
raise ExpressionError(
- "The functional pseudo-element ::%s() is unknown"
- % pseudo_element.name)
+ "The functional pseudo-element ::%s() is unknown" % pseudo_element.name
+ )
xpath = method(xpath, pseudo_element.arguments)
else:
- method = 'xpath_%s_simple_pseudo_element' % (
- pseudo_element.replace('-', '_'))
+ method = "xpath_%s_simple_pseudo_element" % (pseudo_element.replace("-", "_"))
method = _unicode_safe_getattr(self, method, None)
if not method:
raise ExpressionError(
- "The pseudo-element ::%s is unknown"
- % pseudo_element)
+ "The pseudo-element ::%s is unknown" % pseudo_element
+ )
xpath = method(xpath)
return xpath
@@ -608,8 +575,7 @@ def xpath_pseudo_element(self, xpath, pseudo_element):
# elements that have a certain number of attributes
def xpath_nb_attr_function(self, xpath, function):
nb_attributes = int(function.arguments[0].value)
- return xpath.add_condition(
- "count(@*)=%d" % nb_attributes)
+ return xpath.add_condition("count(@*)=%d" % nb_attributes)
# pseudo-class:
# elements that have 5 attributes
@@ -619,21 +585,36 @@ def xpath_five_attributes_pseudo(self, xpath):
# functional pseudo-element:
# element's attribute by name
def xpath_attr_functional_pseudo_element(self, xpath, arguments):
+<<<<<<< HEAD
attribute_name = arguments[0][0].value
other = XPathExpr('@%s' % attribute_name, '', )
return xpath.join('/', other)
+=======
+ attribute_name = arguments[0].value
+ other = XPathExpr(
+ "@%s" % attribute_name,
+ "",
+ )
+ return xpath.join("/", other)
+>>>>>>> parent/master
# pseudo-element:
# element's text() nodes
def xpath_text_node_simple_pseudo_element(self, xpath):
- other = XPathExpr('text()', '', )
- return xpath.join('/', other)
+ other = XPathExpr(
+ "text()",
+ "",
+ )
+ return xpath.join("/", other)
# pseudo-element:
# element's href attribute
def xpath_attr_href_simple_pseudo_element(self, xpath):
- other = XPathExpr('@href', '', )
- return xpath.join('/', other)
+ other = XPathExpr(
+ "@href",
+ "",
+ )
+ return xpath.join("/", other)
# pseudo-element:
# used to demonstrate operator precedence
@@ -643,91 +624,86 @@ def xpath_first_or_second_pseudo(self, xpath):
def xpath(css):
return _unicode(CustomTranslator().css_to_xpath(css))
- assert xpath(':five-attributes') == "descendant-or-self::*[count(@*)=5]"
- assert xpath(':nb-attr(3)') == "descendant-or-self::*[count(@*)=3]"
- assert xpath('::attr(href)') == "descendant-or-self::*/@href"
- assert xpath('::text-node') == "descendant-or-self::*/text()"
- assert xpath('::attr-href') == "descendant-or-self::*/@href"
- assert xpath('p img::attr(src)') == (
- "descendant-or-self::p/descendant-or-self::*/img/@src")
- assert xpath(':scope') == "descendant-or-self::*[1]"
- assert xpath(':first-or-second[href]') == (
- "descendant-or-self::*[(@id = 'first' or @id = 'second') "
- "and (@href)]")
+ assert xpath(":five-attributes") == "descendant-or-self::*[count(@*)=5]"
+ assert xpath(":nb-attr(3)") == "descendant-or-self::*[count(@*)=3]"
+ assert xpath("::attr(href)") == "descendant-or-self::*/@href"
+ assert xpath("::text-node") == "descendant-or-self::*/text()"
+ assert xpath("::attr-href") == "descendant-or-self::*/@href"
+ assert xpath("p img::attr(src)") == (
+ "descendant-or-self::p/descendant-or-self::*/img/@src"
+ )
+ assert xpath(":scope") == "descendant-or-self::*[1]"
+ assert xpath(":first-or-second[href]") == (
+ "descendant-or-self::*[(@id = 'first' or @id = 'second') " "and (@href)]"
+ )
- assert str(XPathExpr('', '', condition='@href')) == "[@href]"
+ assert str(XPathExpr("", "", condition="@href")) == "[@href]"
document = etree.fromstring(OPERATOR_PRECEDENCE_IDS)
- sort_key = dict(
- (el, count) for count, el in enumerate(document.getiterator())
- ).__getitem__
+ sort_key = dict((el, count) for count, el in enumerate(document.getiterator())).__getitem__
+
def operator_id(selector):
xpath = CustomTranslator().css_to_xpath(selector)
items = document.xpath(xpath)
items.sort(key=sort_key)
- return [element.get('id', 'nil') for element in items]
+ return [element.get("id", "nil") for element in items]
- assert operator_id(':first-or-second') == ['first', 'second']
- assert operator_id(':first-or-second[href]') == ['second']
- assert operator_id('[href]:first-or-second') == ['second']
+ assert operator_id(":first-or-second") == ["first", "second"]
+ assert operator_id(":first-or-second[href]") == ["second"]
+ assert operator_id("[href]:first-or-second") == ["second"]
def test_series(self):
def series(css):
- selector, = parse(':nth-child(%s)' % css)
+ (selector,) = parse(":nth-child(%s)" % css)
args = selector.parsed_tree.arguments
try:
return parse_series(args)
except ValueError:
return None
- assert series('1n+3') == (1, 3)
- assert series('1n +3') == (1, 3)
- assert series('1n + 3') == (1, 3)
- assert series('1n+ 3') == (1, 3)
- assert series('1n-3') == (1, -3)
- assert series('1n -3') == (1, -3)
- assert series('1n - 3') == (1, -3)
- assert series('1n- 3') == (1, -3)
- assert series('n-5') == (1, -5)
- assert series('odd') == (2, 1)
- assert series('even') == (2, 0)
- assert series('3n') == (3, 0)
- assert series('n') == (1, 0)
- assert series('+n') == (1, 0)
- assert series('-n') == (-1, 0)
- assert series('5') == (0, 5)
- assert series('foo') is None
- assert series('n+') is None
+ assert series("1n+3") == (1, 3)
+ assert series("1n +3") == (1, 3)
+ assert series("1n + 3") == (1, 3)
+ assert series("1n+ 3") == (1, 3)
+ assert series("1n-3") == (1, -3)
+ assert series("1n -3") == (1, -3)
+ assert series("1n - 3") == (1, -3)
+ assert series("1n- 3") == (1, -3)
+ assert series("n-5") == (1, -5)
+ assert series("odd") == (2, 1)
+ assert series("even") == (2, 0)
+ assert series("3n") == (3, 0)
+ assert series("n") == (1, 0)
+ assert series("+n") == (1, 0)
+ assert series("-n") == (-1, 0)
+ assert series("5") == (0, 5)
+ assert series("foo") is None
+ assert series("n+") is None
def test_lang(self):
document = etree.fromstring(XMLLANG_IDS)
- sort_key = dict(
- (el, count) for count, el in enumerate(document.getiterator())
- ).__getitem__
+ sort_key = dict((el, count) for count, el in enumerate(document.getiterator())).__getitem__
css_to_xpath = GenericTranslator().css_to_xpath
def langid(selector):
xpath = css_to_xpath(selector)
items = document.xpath(xpath)
items.sort(key=sort_key)
- return [element.get('id', 'nil') for element in items]
-
- assert langid(':lang("EN")') == ['first', 'second', 'third', 'fourth']
- assert langid(':lang("en-us")') == ['second', 'fourth']
- assert langid(':lang(en-nz)') == ['third']
- assert langid(':lang(fr)') == ['fifth']
- assert langid(':lang(ru)') == ['sixth']
- assert langid(":lang('ZH')") == ['eighth']
- assert langid(':lang(de) :lang(zh)') == ['eighth']
- assert langid(':lang(en), :lang(zh)') == [
- 'first', 'second', 'third', 'fourth', 'eighth']
- assert langid(':lang(es)') == []
+ return [element.get("id", "nil") for element in items]
+
+ assert langid(':lang("EN")') == ["first", "second", "third", "fourth"]
+ assert langid(':lang("en-us")') == ["second", "fourth"]
+ assert langid(":lang(en-nz)") == ["third"]
+ assert langid(":lang(fr)") == ["fifth"]
+ assert langid(":lang(ru)") == ["sixth"]
+ assert langid(":lang('ZH')") == ["eighth"]
+ assert langid(":lang(de) :lang(zh)") == ["eighth"]
+ assert langid(":lang(en), :lang(zh)") == ["first", "second", "third", "fourth", "eighth"]
+ assert langid(":lang(es)") == []
def test_select(self):
document = etree.fromstring(HTML_IDS)
- sort_key = dict(
- (el, count) for count, el in enumerate(document.getiterator())
- ).__getitem__
+ sort_key = dict((el, count) for count, el in enumerate(document.getiterator())).__getitem__
css_to_xpath = GenericTranslator().css_to_xpath
html_css_to_xpath = HTMLTranslator().css_to_xpath
@@ -739,166 +715,218 @@ def select_ids(selector, html_only):
xpath = html_css_to_xpath(selector)
items = document.xpath(xpath)
items.sort(key=sort_key)
- return [element.get('id', 'nil') for element in items]
+ return [element.get("id", "nil") for element in items]
def pcss(main, *selectors, **kwargs):
- html_only = kwargs.pop('html_only', False)
+ html_only = kwargs.pop("html_only", False)
result = select_ids(main, html_only)
for selector in selectors:
assert select_ids(selector, html_only) == result
return result
- all_ids = pcss('*')
- assert all_ids[:6] == [
- 'html', 'nil', 'link-href', 'link-nohref', 'nil', 'outer-div']
- assert all_ids[-1:] == ['foobar-span']
- assert pcss('div') == ['outer-div', 'li-div', 'foobar-div']
- assert pcss('DIV', html_only=True) == [
- 'outer-div', 'li-div', 'foobar-div'] # case-insensitive in HTML
- assert pcss('div div') == ['li-div']
- assert pcss('div, div div') == ['outer-div', 'li-div', 'foobar-div']
- assert pcss('a[name]') == ['name-anchor']
- assert pcss('a[NAme]', html_only=True) == [
- 'name-anchor'] # case-insensitive in HTML:
- assert pcss('a[rel]') == ['tag-anchor', 'nofollow-anchor']
- assert pcss('a[rel="tag"]') == ['tag-anchor']
- assert pcss('a[href*="localhost"]') == ['tag-anchor']
+ all_ids = pcss("*")
+ assert all_ids[:6] == ["html", "nil", "link-href", "link-nohref", "nil", "outer-div"]
+ assert all_ids[-1:] == ["foobar-span"]
+ assert pcss("div") == ["outer-div", "li-div", "foobar-div"]
+ assert pcss("DIV", html_only=True) == [
+ "outer-div",
+ "li-div",
+ "foobar-div",
+ ] # case-insensitive in HTML
+ assert pcss("div div") == ["li-div"]
+ assert pcss("div, div div") == ["outer-div", "li-div", "foobar-div"]
+ assert pcss("a[name]") == ["name-anchor"]
+ assert pcss("a[NAme]", html_only=True) == ["name-anchor"] # case-insensitive in HTML:
+ assert pcss("a[rel]") == ["tag-anchor", "nofollow-anchor"]
+ assert pcss('a[rel="tag"]') == ["tag-anchor"]
+ assert pcss('a[href*="localhost"]') == ["tag-anchor"]
assert pcss('a[href*=""]') == []
- assert pcss('a[href^="http"]') == ['tag-anchor', 'nofollow-anchor']
- assert pcss('a[href^="http:"]') == ['tag-anchor']
+ assert pcss('a[href^="http"]') == ["tag-anchor", "nofollow-anchor"]
+ assert pcss('a[href^="http:"]') == ["tag-anchor"]
assert pcss('a[href^=""]') == []
- assert pcss('a[href$="org"]') == ['nofollow-anchor']
+ assert pcss('a[href$="org"]') == ["nofollow-anchor"]
assert pcss('a[href$=""]') == []
- assert pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') == [
- 'foobar-div']
- assert pcss('[foobar~="ab bc"]',
- '[foobar~=""]', '[foobar~=" \t"]') == []
+ assert pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') == ["foobar-div"]
+ assert pcss('[foobar~="ab bc"]', '[foobar~=""]', '[foobar~=" \t"]') == []
assert pcss('div[foobar~="cd"]') == []
- assert pcss('*[lang|="En"]', '[lang|="En-us"]') == ['second-li']
+ assert pcss('*[lang|="En"]', '[lang|="En-us"]') == ["second-li"]
# Attribute values are case sensitive
assert pcss('*[lang|="en"]', '[lang|="en-US"]') == []
assert pcss('*[lang|="e"]') == []
# ... :lang() is not.
- assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [
- 'second-li', 'li-div']
+ assert pcss(':lang("EN")', "*:lang(en-US)", html_only=True) == ["second-li", "li-div"]
assert pcss(':lang("e")', html_only=True) == []
- assert pcss(':scope > div') == []
- assert pcss(':scope body') == ['nil']
- assert pcss(':scope body > div') == ['outer-div', 'foobar-div']
- assert pcss(':scope head') == ['nil']
- assert pcss(':scope html') == []
+ assert pcss(":scope > div") == []
+ assert pcss(":scope body") == ["nil"]
+ assert pcss(":scope body > div") == ["outer-div", "foobar-div"]
+ assert pcss(":scope head") == ["nil"]
+ assert pcss(":scope html") == []
# --- nth-* and nth-last-* -------------------------------------
# select nothing
- assert pcss('li:nth-child(-n)') == []
+ assert pcss("li:nth-child(-n)") == []
# select all children
- assert pcss('li:nth-child(n)') == [
- 'first-li', 'second-li', 'third-li', 'fourth-li',
- 'fifth-li', 'sixth-li', 'seventh-li']
-
- assert pcss('li:nth-child(3)',
- '#first-li ~ :nth-child(3)') == ['third-li']
- assert pcss('li:nth-child(10)') == []
- assert pcss('li:nth-child(2n)', 'li:nth-child(even)',
- 'li:nth-child(2n+0)') == [
- 'second-li', 'fourth-li', 'sixth-li']
- assert pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)') == [
- 'first-li', 'third-li', 'fifth-li', 'seventh-li']
- assert pcss('li:nth-child(2n+4)') == ['fourth-li', 'sixth-li']
- assert pcss('li:nth-child(3n+1)') == [
- 'first-li', 'fourth-li', 'seventh-li']
- assert pcss('li:nth-child(-n+3)') == [
- 'first-li', 'second-li', 'third-li']
- assert pcss('li:nth-child(-2n+4)') == ['second-li', 'fourth-li']
- assert pcss('li:nth-last-child(0)') == []
- assert pcss('li:nth-last-child(1)') == ['seventh-li']
- assert pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)') == [
- 'second-li', 'fourth-li', 'sixth-li']
- assert pcss('li:nth-last-child(2n+1)') == [
- 'first-li', 'third-li', 'fifth-li', 'seventh-li']
- assert pcss('li:nth-last-child(2n+2)') == [
- 'second-li', 'fourth-li', 'sixth-li']
- assert pcss('li:nth-last-child(3n+1)') == [
- 'first-li', 'fourth-li', 'seventh-li']
- assert pcss('ol:first-of-type') == ['first-ol']
- assert pcss('ol:nth-child(1)') == []
- assert pcss('ol:nth-of-type(2)') == ['second-ol']
- assert pcss('ol:nth-last-of-type(1)') == ['second-ol']
+ assert pcss("li:nth-child(n)") == [
+ "first-li",
+ "second-li",
+ "third-li",
+ "fourth-li",
+ "fifth-li",
+ "sixth-li",
+ "seventh-li",
+ ]
+
+ assert pcss("li:nth-child(3)", "#first-li ~ :nth-child(3)") == ["third-li"]
+ assert pcss("li:nth-child(10)") == []
+ assert pcss("li:nth-child(2n)", "li:nth-child(even)", "li:nth-child(2n+0)") == [
+ "second-li",
+ "fourth-li",
+ "sixth-li",
+ ]
+ assert pcss("li:nth-child(+2n+1)", "li:nth-child(odd)") == [
+ "first-li",
+ "third-li",
+ "fifth-li",
+ "seventh-li",
+ ]
+ assert pcss("li:nth-child(2n+4)") == ["fourth-li", "sixth-li"]
+ assert pcss("li:nth-child(3n+1)") == ["first-li", "fourth-li", "seventh-li"]
+ assert pcss("li:nth-child(-n+3)") == ["first-li", "second-li", "third-li"]
+ assert pcss("li:nth-child(-2n+4)") == ["second-li", "fourth-li"]
+ assert pcss("li:nth-last-child(0)") == []
+ assert pcss("li:nth-last-child(1)") == ["seventh-li"]
+ assert pcss("li:nth-last-child(2n)", "li:nth-last-child(even)") == [
+ "second-li",
+ "fourth-li",
+ "sixth-li",
+ ]
+ assert pcss("li:nth-last-child(2n+1)") == [
+ "first-li",
+ "third-li",
+ "fifth-li",
+ "seventh-li",
+ ]
+ assert pcss("li:nth-last-child(2n+2)") == ["second-li", "fourth-li", "sixth-li"]
+ assert pcss("li:nth-last-child(3n+1)") == ["first-li", "fourth-li", "seventh-li"]
+ assert pcss("ol:first-of-type") == ["first-ol"]
+ assert pcss("ol:nth-child(1)") == []
+ assert pcss("ol:nth-of-type(2)") == ["second-ol"]
+ assert pcss("ol:nth-last-of-type(1)") == ["second-ol"]
# "+" and "~" tests
- assert pcss('ol#first-ol li + li:nth-child(4)') == ['fourth-li']
- assert pcss('li + li:nth-child(1)') == []
- assert pcss('li ~ li:nth-child(2n+1)') == [
- 'third-li', 'fifth-li', 'seventh-li'
- ] # all but the first
- assert pcss('li ~ li:nth-last-child(2n+1)') == [
- 'third-li', 'fifth-li', 'seventh-li'
- ] # all but the first
-
- assert pcss('span:only-child') == ['foobar-span']
- assert pcss('li div:only-child') == ['li-div']
- assert pcss('div *:only-child') == ['li-div', 'foobar-span']
- self.assertRaises(ExpressionError, pcss, 'p *:only-of-type')
- assert pcss('p:only-of-type') == ['paragraph']
- assert pcss('a:empty', 'a:EMpty') == ['name-anchor']
- assert pcss('li:empty') == [
- 'third-li', 'fourth-li', 'fifth-li', 'sixth-li']
- assert pcss(':root', 'html:root') == ['html']
- assert pcss('li:root', '* :root') == []
+ assert pcss("ol#first-ol li + li:nth-child(4)") == ["fourth-li"]
+ assert pcss("li + li:nth-child(1)") == []
+ assert pcss("li ~ li:nth-child(2n+1)") == [
+ "third-li",
+ "fifth-li",
+ "seventh-li",
+ ] # all but the first
+ assert pcss("li ~ li:nth-last-child(2n+1)") == [
+ "third-li",
+ "fifth-li",
+ "seventh-li",
+ ] # all but the first
+
+ assert pcss("span:only-child") == ["foobar-span"]
+ assert pcss("li div:only-child") == ["li-div"]
+ assert pcss("div *:only-child") == ["li-div", "foobar-span"]
+ self.assertRaises(ExpressionError, pcss, "p *:only-of-type")
+ assert pcss("p:only-of-type") == ["paragraph"]
+ assert pcss("a:empty", "a:EMpty") == ["name-anchor"]
+ assert pcss("li:empty") == ["third-li", "fourth-li", "fifth-li", "sixth-li"]
+ assert pcss(":root", "html:root") == ["html"]
+ assert pcss("li:root", "* :root") == []
assert pcss('*:contains("link")', ':CONtains("link")') == [
- 'html', 'nil', 'outer-div', 'tag-anchor', 'nofollow-anchor']
+ "html",
+ "nil",
+ "outer-div",
+ "tag-anchor",
+ "nofollow-anchor",
+ ]
assert pcss('*:contains("LInk")') == [] # case sensitive
assert pcss('*:contains("e")') == [
- 'html', 'nil', 'outer-div', 'first-ol', 'first-li',
- 'paragraph', 'p-em']
+ "html",
+ "nil",
+ "outer-div",
+ "first-ol",
+ "first-li",
+ "paragraph",
+ "p-em",
+ ]
assert pcss('*:contains("E")') == [] # case-sensitive
- assert pcss('.a', '.b', '*.a', 'ol.a') == ['first-ol']
- assert pcss('.c', '*.c') == ['first-ol', 'third-li', 'fourth-li']
- assert pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c') == [
- 'third-li', 'fourth-li']
- assert pcss('#first-li', 'li#first-li', '*#first-li') == ['first-li']
- assert pcss('li div', 'li > div', 'div div') == ['li-div']
- assert pcss('div > div') == []
- assert pcss('div>.c', 'div > .c') == ['first-ol']
- assert pcss('div + div') == ['foobar-div']
- assert pcss('a ~ a') == ['tag-anchor', 'nofollow-anchor']
- assert pcss('a[rel="tag"] ~ a') == ['nofollow-anchor']
- assert pcss('ol#first-ol li:last-child') == ['seventh-li']
- assert pcss('ol#first-ol *:last-child') == ['li-div', 'seventh-li']
- assert pcss('#outer-div:first-child') == ['outer-div']
- assert pcss('#outer-div :first-child') == [
- 'name-anchor', 'first-li', 'li-div', 'p-b',
- 'checkbox-fieldset-disabled', 'area-href']
- assert pcss('a[href]') == ['tag-anchor', 'nofollow-anchor']
- assert pcss(':not(*)') == []
- assert pcss('a:not([href])') == ['name-anchor']
- assert pcss('ol :Not(li[class])') == [
- 'first-li', 'second-li', 'li-div',
- 'fifth-li', 'sixth-li', 'seventh-li']
- assert pcss('ol.a.b.c > li.c:nth-child(3)') == ['third-li']
+ assert pcss(".a", ".b", "*.a", "ol.a") == ["first-ol"]
+ assert pcss(".c", "*.c") == ["first-ol", "third-li", "fourth-li"]
+ assert pcss("ol *.c", "ol li.c", "li ~ li.c", "ol > li.c") == ["third-li", "fourth-li"]
+ assert pcss("#first-li", "li#first-li", "*#first-li") == ["first-li"]
+ assert pcss("li div", "li > div", "div div") == ["li-div"]
+ assert pcss("div > div") == []
+ assert pcss("div>.c", "div > .c") == ["first-ol"]
+ assert pcss("div + div") == ["foobar-div"]
+ assert pcss("a ~ a") == ["tag-anchor", "nofollow-anchor"]
+ assert pcss('a[rel="tag"] ~ a') == ["nofollow-anchor"]
+ assert pcss("ol#first-ol li:last-child") == ["seventh-li"]
+ assert pcss("ol#first-ol *:last-child") == ["li-div", "seventh-li"]
+ assert pcss("#outer-div:first-child") == ["outer-div"]
+ assert pcss("#outer-div :first-child") == [
+ "name-anchor",
+ "first-li",
+ "li-div",
+ "p-b",
+ "checkbox-fieldset-disabled",
+ "area-href",
+ ]
+ assert pcss("a[href]") == ["tag-anchor", "nofollow-anchor"]
+ assert pcss(":not(*)") == []
+ assert pcss("a:not([href])") == ["name-anchor"]
+ assert pcss("ol :Not(li[class])") == [
+ "first-li",
+ "second-li",
+ "li-div",
+ "fifth-li",
+ "sixth-li",
+ "seventh-li",
+ ]
+ assert pcss(":is(#first-li, #second-li)") == ["first-li", "second-li"]
+ assert pcss("a:is(#name-anchor, #tag-anchor)") == ["name-anchor", "tag-anchor"]
+ assert pcss(":is(.c)") == ["first-ol", "third-li", "fourth-li"]
+ assert pcss("ol.a.b.c > li.c:nth-child(3)") == ["third-li"]
# Invalid characters in XPath element names, should not crash
- assert pcss(r'di\a0 v', r'div\[') == []
- assert pcss(r'[h\a0 ref]', r'[h\]ref]') == []
+ assert pcss(r"di\a0 v", r"div\[") == []
+ assert pcss(r"[h\a0 ref]", r"[h\]ref]") == []
# HTML-specific
- assert pcss(':link', html_only=True) == [
- 'link-href', 'tag-anchor', 'nofollow-anchor', 'area-href']
- assert pcss(':visited', html_only=True) == []
- assert pcss(':enabled', html_only=True) == [
- 'link-href', 'tag-anchor', 'nofollow-anchor',
- 'checkbox-unchecked', 'text-checked', 'checkbox-checked',
- 'area-href']
- assert pcss(':disabled', html_only=True) == [
- 'checkbox-disabled', 'checkbox-disabled-checked', 'fieldset',
- 'checkbox-fieldset-disabled']
- assert pcss(':checked', html_only=True) == [
- 'checkbox-checked', 'checkbox-disabled-checked']
+ assert pcss(":link", html_only=True) == [
+ "link-href",
+ "tag-anchor",
+ "nofollow-anchor",
+ "area-href",
+ ]
+ assert pcss(":visited", html_only=True) == []
+ assert pcss(":enabled", html_only=True) == [
+ "link-href",
+ "tag-anchor",
+ "nofollow-anchor",
+ "checkbox-unchecked",
+ "text-checked",
+ "checkbox-checked",
+ "area-href",
+ ]
+ assert pcss(":disabled", html_only=True) == [
+ "checkbox-disabled",
+ "checkbox-disabled-checked",
+ "fieldset",
+ "checkbox-fieldset-disabled",
+ ]
+ assert pcss(":checked", html_only=True) == [
+ "checkbox-checked",
+ "checkbox-disabled-checked",
+ ]
def test_select_shakespeare(self):
document = html.document_fromstring(HTML_SHAKESPEARE)
- body = document.xpath('//body')[0]
+ body = document.xpath("//body")[0]
css_to_xpath = GenericTranslator().css_to_xpath
try:
@@ -921,66 +949,67 @@ def count(selector):
## Changed from original; probably because I'm only
## searching the body.
- #assert count('*') == 252
- assert count('*') == 246
- assert count('div:contains(CELIA)') == 26
- assert count('div:only-child') == 22 # ?
- assert count('div:nth-child(even)') == 106
- assert count('div:nth-child(2n)') == 106
- assert count('div:nth-child(odd)') == 137
- assert count('div:nth-child(2n+1)') == 137
- assert count('div:nth-child(n)') == 243
- assert count('div:last-child') == 53
- assert count('div:first-child') == 51
- assert count('div > div') == 242
- assert count('div + div') == 190
- assert count('div ~ div') == 190
- assert count('body') == 1
- assert count('body div') == 243
- assert count('div') == 243
- assert count('div div') == 242
- assert count('div div div') == 241
- assert count('div, div, div') == 243
- assert count('div, a, span') == 243
- assert count('.dialog') == 51
- assert count('div.dialog') == 51
- assert count('div .dialog') == 51
- assert count('div.character, div.dialog') == 99
- assert count('div.direction.dialog') == 0
- assert count('div.dialog.direction') == 0
- assert count('div.dialog.scene') == 1
- assert count('div.scene.scene') == 1
- assert count('div.scene .scene') == 0
- assert count('div.direction .dialog ') == 0
- assert count('div .dialog .direction') == 4
- assert count('div.dialog .dialog .direction') == 4
- assert count('#speech5') == 1
- assert count('div#speech5') == 1
- assert count('div #speech5') == 1
- assert count('div.scene div.dialog') == 49
- assert count('div#scene1 div.dialog div') == 142
- assert count('#scene1 #speech1') == 1
- assert count('div[class]') == 103
- assert count('div[class=dialog]') == 50
- assert count('div[class^=dia]') == 51
- assert count('div[class$=log]') == 50
- assert count('div[class*=sce]') == 1
- assert count('div[class|=dialog]') == 50 # ? Seems right
- assert count('div[class!=madeup]') == 243 # ? Seems right
- assert count('div[class~=dialog]') == 51 # ? Seems right
- assert count(':scope > div') == 1
- assert count(':scope > div > div[class=dialog]') == 1
- assert count(':scope > div div') == 242
-
-OPERATOR_PRECEDENCE_IDS = '''
+ # assert count('*') == 252
+ assert count("*") == 246
+ assert count("div:contains(CELIA)") == 26
+ assert count("div:only-child") == 22 # ?
+ assert count("div:nth-child(even)") == 106
+ assert count("div:nth-child(2n)") == 106
+ assert count("div:nth-child(odd)") == 137
+ assert count("div:nth-child(2n+1)") == 137
+ assert count("div:nth-child(n)") == 243
+ assert count("div:last-child") == 53
+ assert count("div:first-child") == 51
+ assert count("div > div") == 242
+ assert count("div + div") == 190
+ assert count("div ~ div") == 190
+ assert count("body") == 1
+ assert count("body div") == 243
+ assert count("div") == 243
+ assert count("div div") == 242
+ assert count("div div div") == 241
+ assert count("div, div, div") == 243
+ assert count("div, a, span") == 243
+ assert count(".dialog") == 51
+ assert count("div.dialog") == 51
+ assert count("div .dialog") == 51
+ assert count("div.character, div.dialog") == 99
+ assert count("div.direction.dialog") == 0
+ assert count("div.dialog.direction") == 0
+ assert count("div.dialog.scene") == 1
+ assert count("div.scene.scene") == 1
+ assert count("div.scene .scene") == 0
+ assert count("div.direction .dialog ") == 0
+ assert count("div .dialog .direction") == 4
+ assert count("div.dialog .dialog .direction") == 4
+ assert count("#speech5") == 1
+ assert count("div#speech5") == 1
+ assert count("div #speech5") == 1
+ assert count("div.scene div.dialog") == 49
+ assert count("div#scene1 div.dialog div") == 142
+ assert count("#scene1 #speech1") == 1
+ assert count("div[class]") == 103
+ assert count("div[class=dialog]") == 50
+ assert count("div[class^=dia]") == 51
+ assert count("div[class$=log]") == 50
+ assert count("div[class*=sce]") == 1
+ assert count("div[class|=dialog]") == 50 # ? Seems right
+ assert count("div[class!=madeup]") == 243 # ? Seems right
+ assert count("div[class~=dialog]") == 51 # ? Seems right
+ assert count(":scope > div") == 1
+ assert count(":scope > div > div[class=dialog]") == 1
+ assert count(":scope > div div") == 242
+
+
+OPERATOR_PRECEDENCE_IDS = """
-'''
+"""
-XMLLANG_IDS = '''
+XMLLANG_IDS = """
a
b
@@ -992,9 +1021,9 @@ def count(selector):
-'''
+"""
-HTML_IDS = '''
+HTML_IDS = """
@@ -1043,10 +1072,10 @@ def count(selector):
-'''
+"""
-HTML_SHAKESPEARE = '''
+HTML_SHAKESPEARE = """
@@ -1355,8 +1384,8 @@ def count(selector):