diff --git a/.bandit.yml b/.bandit.yml new file mode 100644 index 0000000..7fcde04 --- /dev/null +++ b/.bandit.yml @@ -0,0 +1,2 @@ +skips: +- B101 diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..89e6e07 --- /dev/null +++ b/.flake8 @@ -0,0 +1,15 @@ +[flake8] +max-line-length = 99 +ignore = W503 +exclude = + .git + .tox + venv* + + # pending revision + cssselect/__init__.py + cssselect/parser.py + cssselect/xpath.py + docs/conf.py + setup.py + tests/test_cssselect.py diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml new file mode 100644 index 0000000..db380bb --- /dev/null +++ b/.github/workflows/checks.yml @@ -0,0 +1,36 @@ +name: Checks +on: [push, pull_request] + +jobs: + checks: + runs-on: ubuntu-latest + strategy: + matrix: + include: + - python-version: 3 + env: + TOXENV: black + - python-version: 3 + env: + TOXENV: flake8 + - python-version: 3 + env: + TOXENV: pylint + - python-version: 3 + env: + TOXENV: security + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Run check + env: ${{ matrix.env }} + run: | + pip install -U pip + pip install -U tox + tox diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 817d824..799f52f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,19 +6,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - include: - - python-version: 2.7 - env: - TOXENV: py - - python-version: 3.5 - env: - TOXENV: py - - python-version: 3.6 - env: - TOXENV: py - - python-version: 3.7 - env: - TOXENV: py + python-version: [3.6, 3.7, 3.8, 3.9] steps: - uses: actions/checkout@v2 @@ -29,10 +17,10 @@ jobs: python-version: ${{ matrix.python-version }} - name: Run tests - env: ${{ matrix.env }} run: | + pip install -U pip pip install -U tox - tox + tox -e py - name: Upload coverage report - run: bash <(curl -s https://codecov.io/bash) \ No newline at end of file + run: bash <(curl -s https://codecov.io/bash) diff --git a/.gitignore b/.gitignore index b0ab86a..c276bd1 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,6 @@ /dist /docs/_build /.coverage -.idea \ No newline at end of file +.idea +htmlcov/ +coverage.xml diff --git a/README.rst b/README.rst index 9bcd648..9708616 100644 --- a/README.rst +++ b/README.rst @@ -10,32 +10,30 @@ cssselect: CSS Selectors for Python :target: https://pypi.python.org/pypi/cssselect :alt: Supported Python Versions -.. image:: https://img.shields.io/travis/scrapy/cssselect/master.svg - :target: https://travis-ci.org/scrapy/cssselect - :alt: Build Status +.. image:: https://github.com/scrapy/cssselect/actions/workflows/tests.yml/badge.svg + :target: https://github.com/scrapy/cssselect/actions/workflows/tests.yml + :alt: Tests .. image:: https://img.shields.io/codecov/c/github/scrapy/cssselect/master.svg :target: https://codecov.io/github/scrapy/cssselect?branch=master :alt: Coverage report -*cssselect* parses `CSS3 Selectors`_ and translate them to `XPath 1.0`_ -expressions. Such expressions can be used in lxml_ or another XPath engine -to find the matching elements in an XML or HTML document. +**cssselect** is a BSD-licensed Python library to parse `CSS3 selectors`_ and +translate them to `XPath 1.0`_ expressions. -This module used to live inside of lxml as ``lxml.cssselect`` before it was -extracted as a stand-alone project. - -.. _CSS3 Selectors: https://www.w3.org/TR/css3-selectors/ -.. _XPath 1.0: https://www.w3.org/TR/xpath/ -.. _lxml: http://lxml.de/ +`XPath 1.0`_ expressions can be used in lxml_ or another XPath engine to find +the matching elements in an XML or HTML document. +Find the cssselect online documentation at https://cssselect.readthedocs.io. Quick facts: -* Free software: BSD licensed -* Compatible with Python 2.7 and 3.4+ -* Latest documentation `on Read the Docs `_ * Source, issues and pull requests `on GitHub `_ -* Releases `on PyPI `_ +* Releases `on PyPI `_ * Install with ``pip install cssselect`` + + +.. _CSS3 selectors: https://www.w3.org/TR/selectors-3/ +.. _XPath 1.0: https://www.w3.org/TR/xpath/all/ +.. _lxml: https://lxml.de/ diff --git a/cssselect/__init__.py b/cssselect/__init__.py index b41cef9..2e4f824 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -13,10 +13,15 @@ """ -from cssselect.parser import (parse, Selector, FunctionalPseudoElement, - SelectorError, SelectorSyntaxError) +from cssselect.parser import ( + parse, + Selector, + FunctionalPseudoElement, + SelectorError, + SelectorSyntaxError, +) from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError -VERSION = '1.1.0' +VERSION = "1.1.0" __version__ = VERSION diff --git a/cssselect/parser.py b/cssselect/parser.py index 74e1501..7493d02 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -27,7 +27,7 @@ def ascii_lower(string): """Lower-case, but only in the ASCII range.""" - return string.encode('utf8').lower().decode('utf8') + return string.encode("utf8").lower().decode("utf8") class SelectorError(Exception): @@ -39,12 +39,14 @@ class SelectorError(Exception): """ + class SelectorSyntaxError(SelectorError, SyntaxError): """Parsing a selector that does not match the grammar.""" #### Parsed objects + class Selector(object): """ Represents a parsed selector. @@ -55,10 +57,10 @@ class Selector(object): or unsupported pseudo-elements. """ + def __init__(self, tree, pseudo_element=None): self.parsed_tree = tree - if pseudo_element is not None and not isinstance( - pseudo_element, FunctionalPseudoElement): + if pseudo_element is not None and not isinstance(pseudo_element, FunctionalPseudoElement): pseudo_element = ascii_lower(pseudo_element) #: A :class:`FunctionalPseudoElement`, #: or the identifier for the pseudo-element as a string, @@ -86,24 +88,22 @@ def __repr__(self): if isinstance(self.pseudo_element, FunctionalPseudoElement): pseudo_element = repr(self.pseudo_element) elif self.pseudo_element: - pseudo_element = '::%s' % self.pseudo_element + pseudo_element = "::%s" % self.pseudo_element else: - pseudo_element = '' - return '%s[%r%s]' % ( - self.__class__.__name__, self.parsed_tree, pseudo_element) + pseudo_element = "" + return "%s[%r%s]" % (self.__class__.__name__, self.parsed_tree, pseudo_element) def canonical(self): - """Return a CSS representation for this selector (a string) - """ + """Return a CSS representation for this selector (a string)""" if isinstance(self.pseudo_element, FunctionalPseudoElement): - pseudo_element = '::%s' % self.pseudo_element.canonical() + pseudo_element = "::%s" % self.pseudo_element.canonical() elif self.pseudo_element: - pseudo_element = '::%s' % self.pseudo_element + pseudo_element = "::%s" % self.pseudo_element else: - pseudo_element = '' - res = '%s%s' % (self.parsed_tree.canonical(), pseudo_element) + pseudo_element = "" + res = "%s%s" % (self.parsed_tree.canonical(), pseudo_element) if len(res) > 1: - res = res.lstrip('*') + res = res.lstrip("*") return res def specificity(self): @@ -122,16 +122,16 @@ class Class(object): """ Represents selector.class_name """ + def __init__(self, selector, class_name): self.selector = selector self.class_name = class_name def __repr__(self): - return '%s[%r.%s]' % ( - self.__class__.__name__, self.selector, self.class_name) + return "%s[%r.%s]" % (self.__class__.__name__, self.selector, self.class_name) def canonical(self): - return '%s.%s' % (self.selector.canonical(), self.class_name) + return "%s.%s" % (self.selector.canonical(), self.class_name) def specificity(self): a, b, c = self.selector.specificity() @@ -156,6 +156,7 @@ class FunctionalPseudoElement(object): Use at your own risks. """ + def __init__(self, name, arguments): self.name = ascii_lower(name) self.arguments = arguments @@ -213,8 +214,8 @@ def argument_types(self): return [token.type for token in self.arguments] def canonical(self): - args = ''.join(token.css() for token in self.arguments) - return '%s:%s(%s)' % (self.selector.canonical(), self.name, args) + args = "".join(token.css() for token in self.arguments) + return "%s:%s(%s)" % (self.selector.canonical(), self.name, args) def specificity(self): a, b, c = self.selector.specificity() @@ -226,16 +227,16 @@ class Pseudo(object): """ Represents selector:ident """ + def __init__(self, selector, ident): self.selector = selector self.ident = ascii_lower(ident) def __repr__(self): - return '%s[%r:%s]' % ( - self.__class__.__name__, self.selector, self.ident) + return "%s[%r:%s]" % (self.__class__.__name__, self.selector, self.ident) def canonical(self): - return '%s:%s' % (self.selector.canonical(), self.ident) + return "%s:%s" % (self.selector.canonical(), self.ident) def specificity(self): a, b, c = self.selector.specificity() @@ -247,19 +248,19 @@ class Negation(object): """ Represents selector:not(subselector) """ + def __init__(self, selector, subselector): self.selector = selector self.subselector = subselector def __repr__(self): - return '%s[%r:not(%r)]' % ( - self.__class__.__name__, self.selector, self.subselector) + return "%s[%r:not(%r)]" % (self.__class__.__name__, self.selector, self.subselector) def canonical(self): subsel = self.subselector.canonical() if len(subsel) > 1: - subsel = subsel.lstrip('*') - return '%s:not(%s)' % (self.selector.canonical(), subsel) + subsel = subsel.lstrip("*") + return "%s:not(%s)" % (self.selector.canonical(), subsel) def specificity(self): a1, b1, c1 = self.selector.specificity() @@ -267,10 +268,38 @@ def specificity(self): return a1 + a2, b1 + b2, c1 + c2 +class Matching(object): + """ + Represents selector:is(selector_list) + """ + + def __init__(self, selector, selector_list): + self.selector = selector + self.selector_list = selector_list + + def __repr__(self): + return "%s[%r:is(%s)]" % ( + self.__class__.__name__, + self.selector, + ", ".join(map(repr, self.selector_list)), + ) + + def canonical(self): + selector_arguments = [] + for s in self.selector_list: + selarg = s.canonical() + selector_arguments.append(selarg.lstrip("*")) + return "%s:is(%s)" % (self.selector.canonical(), ", ".join(map(str, selector_arguments))) + + def specificity(self): + return max([x.specificity() for x in self.selector_list]) + + class Attrib(object): """ Represents selector[namespace|attrib operator value] """ + def __init__(self, selector, namespace, attrib, operator, value): self.selector = selector self.namespace = namespace @@ -280,29 +309,32 @@ def __init__(self, selector, namespace, attrib, operator, value): def __repr__(self): if self.namespace: - attrib = '%s|%s' % (self.namespace, self.attrib) + attrib = "%s|%s" % (self.namespace, self.attrib) else: attrib = self.attrib - if self.operator == 'exists': - return '%s[%r[%s]]' % ( - self.__class__.__name__, self.selector, attrib) + if self.operator == "exists": + return "%s[%r[%s]]" % (self.__class__.__name__, self.selector, attrib) else: - return '%s[%r[%s %s %r]]' % ( - self.__class__.__name__, self.selector, attrib, - self.operator, self.value.value) + return "%s[%r[%s %s %r]]" % ( + self.__class__.__name__, + self.selector, + attrib, + self.operator, + self.value.value, + ) def canonical(self): if self.namespace: - attrib = '%s|%s' % (self.namespace, self.attrib) + attrib = "%s|%s" % (self.namespace, self.attrib) else: attrib = self.attrib - if self.operator == 'exists': + if self.operator == "exists": op = attrib else: - op = '%s%s%s' % (attrib, self.operator, self.value.css()) + op = "%s%s%s" % (attrib, self.operator, self.value.css()) - return '%s[%s]' % (self.selector.canonical(), op) + return "%s[%s]" % (self.selector.canonical(), op) def specificity(self): a, b, c = self.selector.specificity() @@ -317,17 +349,18 @@ class Element(object): `None` is for the universal selector '*' """ + def __init__(self, namespace=None, element=None): self.namespace = namespace self.element = element def __repr__(self): - return '%s[%s]' % (self.__class__.__name__, self.canonical()) + return "%s[%s]" % (self.__class__.__name__, self.canonical()) def canonical(self): - element = self.element or '*' + element = self.element or "*" if self.namespace: - element = '%s|%s' % (self.namespace, element) + element = "%s|%s" % (self.namespace, element) return element def specificity(self): @@ -341,16 +374,16 @@ class Hash(object): """ Represents selector#id """ + def __init__(self, selector, id): self.selector = selector self.id = id def __repr__(self): - return '%s[%r#%s]' % ( - self.__class__.__name__, self.selector, self.id) + return "%s[%r#%s]" % (self.__class__.__name__, self.selector, self.id) def canonical(self): - return '%s#%s' % (self.selector.canonical(), self.id) + return "%s#%s" % (self.selector.canonical(), self.id) def specificity(self): a, b, c = self.selector.specificity() @@ -366,19 +399,17 @@ def __init__(self, selector, combinator, subselector): self.subselector = subselector def __repr__(self): - if self.combinator == ' ': - comb = '' + if self.combinator == " ": + comb = "" else: comb = self.combinator - return '%s[%r %s %r]' % ( - self.__class__.__name__, self.selector, comb, self.subselector) + return "%s[%r %s %r]" % (self.__class__.__name__, self.selector, comb, self.subselector) def canonical(self): subsel = self.subselector.canonical() if len(subsel) > 1: - subsel = subsel.lstrip('*') - return '%s %s %s' % ( - self.selector.canonical(), self.combinator, subsel) + subsel = subsel.lstrip("*") + return "%s %s %s" % (self.selector.canonical(), self.combinator, subsel) def specificity(self): a1, b1, c1 = self.selector.specificity() @@ -389,14 +420,13 @@ def specificity(self): #### Parser # foo -_el_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$') +_el_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$") # foo#bar or #bar -_id_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$') +_id_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$") # foo.bar or .bar -_class_re = re.compile( - r'^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$') +_class_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$") def parse(css): @@ -420,16 +450,16 @@ def parse(css): return [Selector(Element(element=match.group(1)))] match = _id_re.match(css) if match is not None: - return [Selector(Hash(Element(element=match.group(1) or None), - match.group(2)))] + return [Selector(Hash(Element(element=match.group(1) or None), match.group(2)))] match = _class_re.match(css) if match is not None: - return [Selector(Class(Element(element=match.group(1) or None), - match.group(2)))] + return [Selector(Class(Element(element=match.group(1) or None), match.group(2)))] stream = TokenStream(tokenize(css)) stream.source = css return list(parse_selector_group(stream)) + + # except SelectorSyntaxError: # e = sys.exc_info()[1] # message = "%s at %s -> %r" % ( @@ -443,31 +473,32 @@ def parse_selector_group(stream): stream.skip_whitespace() while 1: yield Selector(*parse_selector(stream)) - if stream.peek() == ('DELIM', ','): + if stream.peek() == ("DELIM", ","): stream.next() stream.skip_whitespace() else: break + def parse_selector(stream): result, pseudo_element = parse_simple_selector(stream) while 1: stream.skip_whitespace() peek = stream.peek() - if peek in (('EOF', None), ('DELIM', ',')): + if peek in (("EOF", None), ("DELIM", ",")): break if pseudo_element: raise SelectorSyntaxError( - 'Got pseudo-element ::%s not at the end of a selector' - % pseudo_element) - if peek.is_delim('+', '>', '~'): + "Got pseudo-element ::%s not at the end of a selector" % pseudo_element + ) + if peek.is_delim("+", ">", "~"): # A combinator combinator = stream.next().value stream.skip_whitespace() else: # By exclusion, the last parse_simple_selector() ended # at peek == ' ' - combinator = ' ' + combinator = " " next_selector, pseudo_element = parse_simple_selector(stream) result = CombinedSelector(result, combinator, next_selector) return result, pseudo_element @@ -477,13 +508,13 @@ def parse_simple_selector(stream, inside_negation=False): stream.skip_whitespace() selector_start = len(stream.used) peek = stream.peek() - if peek.type == 'IDENT' or peek == ('DELIM', '*'): - if peek.type == 'IDENT': + if peek.type == "IDENT" or peek == ("DELIM", "*"): + if peek.type == "IDENT": namespace = stream.next().value else: stream.next() namespace = None - if stream.peek() == ('DELIM', '|'): + if stream.peek() == ("DELIM", "|"): stream.next() element = stream.next_ident_or_star() else: @@ -495,75 +526,83 @@ def parse_simple_selector(stream, inside_negation=False): pseudo_element = None while 1: peek = stream.peek() - if peek.type in ('S', 'EOF') or peek.is_delim(',', '+', '>', '~') or ( - inside_negation and peek == ('DELIM', ')')): + if ( + peek.type in ("S", "EOF") + or peek.is_delim(",", "+", ">", "~") + or (inside_negation and peek == ("DELIM", ")")) + ): break if pseudo_element: raise SelectorSyntaxError( - 'Got pseudo-element ::%s not at the end of a selector' - % pseudo_element) - if peek.type == 'HASH': + "Got pseudo-element ::%s not at the end of a selector" % pseudo_element + ) + if peek.type == "HASH": result = Hash(result, stream.next().value) - elif peek == ('DELIM', '.'): + elif peek == ("DELIM", "."): stream.next() result = Class(result, stream.next_ident()) - elif peek == ('DELIM', '|'): + elif peek == ("DELIM", "|"): stream.next() result = Element(None, stream.next_ident()) - elif peek == ('DELIM', '['): + elif peek == ("DELIM", "["): stream.next() result = parse_attrib(result, stream) - elif peek == ('DELIM', ':'): + elif peek == ("DELIM", ":"): stream.next() - if stream.peek() == ('DELIM', ':'): + if stream.peek() == ("DELIM", ":"): stream.next() pseudo_element = stream.next_ident() - if stream.peek() == ('DELIM', '('): + if stream.peek() == ("DELIM", "("): stream.next() pseudo_element = FunctionalPseudoElement( - pseudo_element, parse_arguments(stream)) + pseudo_element, parse_arguments(stream) + ) continue ident = stream.next_ident() - if ident.lower() in ('first-line', 'first-letter', - 'before', 'after'): + if ident.lower() in ("first-line", "first-letter", "before", "after"): # Special case: CSS 2.1 pseudo-elements can have a single ':' # Any new pseudo-element must have two. pseudo_element = _unicode(ident) continue - if stream.peek() != ('DELIM', '('): + if stream.peek() != ("DELIM", "("): result = Pseudo(result, ident) - if result.__repr__() == 'Pseudo[Element[*]:scope]': - if not (len(stream.used) == 2 or - (len(stream.used) == 3 - and stream.used[0].type == 'S')): + if result.__repr__() == "Pseudo[Element[*]:scope]": + if not ( + len(stream.used) == 2 + or (len(stream.used) == 3 and stream.used[0].type == "S") + ): raise SelectorSyntaxError( 'Got immediate child pseudo-element ":scope" ' - 'not at the start of a selector') + "not at the start of a selector" + ) continue stream.next() stream.skip_whitespace() - if ident.lower() == 'not': + if ident.lower() == "not": if inside_negation: - raise SelectorSyntaxError('Got nested :not()') + raise SelectorSyntaxError("Got nested :not()") argument, argument_pseudo_element = parse_simple_selector( - stream, inside_negation=True) + stream, inside_negation=True + ) next = stream.next() if argument_pseudo_element: raise SelectorSyntaxError( - 'Got pseudo-element ::%s inside :not() at %s' - % (argument_pseudo_element, next.pos)) - if next != ('DELIM', ')'): + "Got pseudo-element ::%s inside :not() at %s" + % (argument_pseudo_element, next.pos) + ) + if next != ("DELIM", ")"): raise SelectorSyntaxError("Expected ')', got %s" % (next,)) result = Negation(result, argument) + elif ident.lower() in ("matches", "is"): + selectors = parse_simple_selector_arguments(stream) + result = Matching(result, selectors) else: arguments, of_type = parse_arguments(stream) result = Function(result, ident, arguments, of_type) else: - raise SelectorSyntaxError( - "Expected selector, got %s" % (peek,)) + raise SelectorSyntaxError("Expected selector, got %s" % (peek,)) if len(stream.used) == selector_start: - raise SelectorSyntaxError( - "Expected selector, got %s" % (stream.peek(),)) + raise SelectorSyntaxError("Expected selector, got %s" % (stream.peek(),)) return result, pseudo_element @@ -583,9 +622,31 @@ def parse_arguments(stream): arguments.append(next) elif next == ('DELIM', ')'): return arguments, None + else: + raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) + + +def parse_simple_selector_arguments(stream): + arguments = [] + while 1: + result, pseudo_element = parse_simple_selector(stream, True) + if pseudo_element: raise SelectorSyntaxError( - "Expected an argument, got %s" % (next,)) + "Got pseudo-element ::%s inside function" % (pseudo_element,) + ) + stream.skip_whitespace() + next = stream.next() + if next in (("EOF", None), ("DELIM", ",")): + stream.next() + stream.skip_whitespace() + arguments.append(result) + elif next == ("DELIM", ")"): + arguments.append(result) + break + else: + raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) + return arguments def parse_of_type(stream): @@ -602,15 +663,14 @@ def parse_of_type(stream): def parse_attrib(selector, stream): stream.skip_whitespace() attrib = stream.next_ident_or_star() - if attrib is None and stream.peek() != ('DELIM', '|'): - raise SelectorSyntaxError( - "Expected '|', got %s" % (stream.peek(),)) - if stream.peek() == ('DELIM', '|'): + if attrib is None and stream.peek() != ("DELIM", "|"): + raise SelectorSyntaxError("Expected '|', got %s" % (stream.peek(),)) + if stream.peek() == ("DELIM", "|"): stream.next() - if stream.peek() == ('DELIM', '='): + if stream.peek() == ("DELIM", "="): namespace = None stream.next() - op = '|=' + op = "|=" else: namespace = attrib attrib = stream.next_ident() @@ -620,27 +680,23 @@ def parse_attrib(selector, stream): if op is None: stream.skip_whitespace() next = stream.next() - if next == ('DELIM', ']'): - return Attrib(selector, namespace, attrib, 'exists', None) - elif next == ('DELIM', '='): - op = '=' - elif next.is_delim('^', '$', '*', '~', '|', '!') and ( - stream.peek() == ('DELIM', '=')): - op = next.value + '=' + if next == ("DELIM", "]"): + return Attrib(selector, namespace, attrib, "exists", None) + elif next == ("DELIM", "="): + op = "=" + elif next.is_delim("^", "$", "*", "~", "|", "!") and (stream.peek() == ("DELIM", "=")): + op = next.value + "=" stream.next() else: - raise SelectorSyntaxError( - "Operator expected, got %s" % (next,)) + raise SelectorSyntaxError("Operator expected, got %s" % (next,)) stream.skip_whitespace() value = stream.next() - if value.type not in ('IDENT', 'STRING'): - raise SelectorSyntaxError( - "Expected string or ident, got %s" % (value,)) + if value.type not in ("IDENT", "STRING"): + raise SelectorSyntaxError("Expected string or ident, got %s" % (value,)) stream.skip_whitespace() next = stream.next() - if next != ('DELIM', ']'): - raise SelectorSyntaxError( - "Expected ']', got %s" % (next,)) + if next != ("DELIM", "]"): + raise SelectorSyntaxError("Expected ']', got %s" % (next,)) return Attrib(selector, namespace, attrib, op, value) @@ -659,18 +715,18 @@ def parse_series(tokens): s = ''.join(token.value for token in tokens).strip() if s == 'odd': return 2, 1 - elif s == 'even': + elif s == "even": return 2, 0 - elif s == 'n': + elif s == "n": return 1, 0 - if 'n' not in s: + if "n" not in s: # Just b return 0, int(s) a, b = s.split("n", 1) if not a: a = 1 - elif a == '-' or a == '+': - a = int(a+'1') + elif a == "-" or a == "+": + a = int(a + "1") else: a = int(a) if not b: @@ -683,6 +739,7 @@ def parse_series(tokens): #### Token objects + class Token(tuple): def __new__(cls, type_, value, pos): obj = tuple.__new__(cls, (type_, value)) @@ -693,13 +750,13 @@ def __repr__(self): return "<%s '%s' at %i>" % (self.type, self.value, self.pos) def is_delim(self, *values): - return self.type == 'DELIM' and self.value in values + return self.type == "DELIM" and self.value in values type = property(operator.itemgetter(0)) value = property(operator.itemgetter(1)) def css(self): - if self.type == 'STRING': + if self.type == "STRING": return repr(self.value) else: return self.value @@ -707,41 +764,44 @@ def css(self): class EOFToken(Token): def __new__(cls, pos): - return Token.__new__(cls, 'EOF', None, pos) + return Token.__new__(cls, "EOF", None, pos) def __repr__(self): - return '<%s at %i>' % (self.type, self.pos) + return "<%s at %i>" % (self.type, self.pos) #### Tokenizer class TokenMacros: - unicode_escape = r'\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?' - escape = unicode_escape + r'|\\[^\n\r\f0-9a-f]' - string_escape = r'\\(?:\n|\r\n|\r|\f)|' + escape - nonascii = r'[^\0-\177]' - nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii) - nmstart = '[_a-z]|%s|%s' % (escape, nonascii) + unicode_escape = r"\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?" + escape = unicode_escape + r"|\\[^\n\r\f0-9a-f]" + string_escape = r"\\(?:\n|\r\n|\r|\f)|" + escape + nonascii = r"[^\0-\177]" + nmchar = "[_a-z0-9-]|%s|%s" % (escape, nonascii) + nmstart = "[_a-z]|%s|%s" % (escape, nonascii) + def _compile(pattern): return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match -_match_whitespace = _compile(r'[ \t\r\n\f]+') -_match_number = _compile(r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)') -_match_hash = _compile('#(?:%(nmchar)s)+') -_match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*') + +_match_whitespace = _compile(r"[ \t\r\n\f]+") +_match_number = _compile(r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)") +_match_hash = _compile("#(?:%(nmchar)s)+") +_match_ident = _compile("-?(?:%(nmstart)s)(?:%(nmchar)s)*") _match_string_by_quote = { "'": _compile(r"([^\n\r\f\\']|%(string_escape)s)*"), '"': _compile(r'([^\n\r\f\\"]|%(string_escape)s)*'), } -_sub_simple_escape = re.compile(r'\\(.)').sub +_sub_simple_escape = re.compile(r"\\(.)").sub _sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub -_sub_newline_escape =re.compile(r'\\(?:\n|\r\n|\r|\f)').sub +_sub_newline_escape = re.compile(r"\\(?:\n|\r\n|\r|\f)").sub # Same as r'\1', but faster on CPython -_replace_simple = operator.methodcaller('group', 1) +_replace_simple = operator.methodcaller("group", 1) + def _replace_unicode(match): codepoint = int(match.group(1), 16) @@ -762,59 +822,62 @@ def tokenize(s): while pos < len_s: match = _match_whitespace(s, pos=pos) if match: - yield Token('S', ' ', pos) + yield Token("S", " ", pos) pos = match.end() continue match = _match_ident(s, pos=pos) if match: - value = _sub_simple_escape(_replace_simple, - _sub_unicode_escape(_replace_unicode, match.group())) - yield Token('IDENT', value, pos) + value = _sub_simple_escape( + _replace_simple, _sub_unicode_escape(_replace_unicode, match.group()) + ) + yield Token("IDENT", value, pos) pos = match.end() continue match = _match_hash(s, pos=pos) if match: - value = _sub_simple_escape(_replace_simple, - _sub_unicode_escape(_replace_unicode, match.group()[1:])) - yield Token('HASH', value, pos) + value = _sub_simple_escape( + _replace_simple, _sub_unicode_escape(_replace_unicode, match.group()[1:]) + ) + yield Token("HASH", value, pos) pos = match.end() continue quote = s[pos] if quote in _match_string_by_quote: match = _match_string_by_quote[quote](s, pos=pos + 1) - assert match, 'Should have found at least an empty match' + assert match, "Should have found at least an empty match" end_pos = match.end() if end_pos == len_s: - raise SelectorSyntaxError('Unclosed string at %s' % pos) + raise SelectorSyntaxError("Unclosed string at %s" % pos) if s[end_pos] != quote: - raise SelectorSyntaxError('Invalid string at %s' % pos) - value = _sub_simple_escape(_replace_simple, - _sub_unicode_escape(_replace_unicode, - _sub_newline_escape('', match.group()))) - yield Token('STRING', value, pos) + raise SelectorSyntaxError("Invalid string at %s" % pos) + value = _sub_simple_escape( + _replace_simple, + _sub_unicode_escape(_replace_unicode, _sub_newline_escape("", match.group())), + ) + yield Token("STRING", value, pos) pos = end_pos + 1 continue match = _match_number(s, pos=pos) if match: value = match.group() - yield Token('NUMBER', value, pos) + yield Token("NUMBER", value, pos) pos = match.end() continue pos2 = pos + 2 - if s[pos:pos2] == '/*': - pos = s.find('*/', pos2) + if s[pos:pos2] == "/*": + pos = s.find("*/", pos2) if pos == -1: pos = len_s else: pos += 2 continue - yield Token('DELIM', s[pos], pos) + yield Token("DELIM", s[pos], pos) pos += 1 assert pos == len_s @@ -852,21 +915,20 @@ def peek(self): def next_ident(self): next = self.next() - if next.type != 'IDENT': - raise SelectorSyntaxError('Expected ident, got %s' % (next,)) + if next.type != "IDENT": + raise SelectorSyntaxError("Expected ident, got %s" % (next,)) return next.value def next_ident_or_star(self): next = self.next() - if next.type == 'IDENT': + if next.type == "IDENT": return next.value - elif next == ('DELIM', '*'): + elif next == ("DELIM", "*"): return None else: - raise SelectorSyntaxError( - "Expected ident or '*', got %s" % (next,)) + raise SelectorSyntaxError("Expected ident or '*', got %s" % (next,)) def skip_whitespace(self): peek = self.peek() - if peek.type == 'S': + if peek.type == "S": self.next() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 0feab6b..7c5c2ef 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -28,7 +28,7 @@ def _unicode_safe_getattr(obj, name, default=None): # getattr() with a non-ASCII name fails on Python 2.x - name = name.encode('ascii', 'replace').decode('ascii') + name = name.encode("ascii", "replace").decode("ascii") return getattr(obj, name, default) @@ -38,48 +38,47 @@ class ExpressionError(SelectorError, RuntimeError): #### XPath Helpers -class XPathExpr(object): - def __init__(self, path='', element='*', condition='', star_prefix=False): +class XPathExpr(object): + def __init__(self, path="", element="*", condition="", star_prefix=False): self.path = path self.element = element self.condition = condition def __str__(self): - path = _unicode(self.path) + _unicode(self.element) + path = _unicode(self.path) + _unicode(self.element) if self.condition: - path += '[%s]' % self.condition + path += "[%s]" % self.condition return path def __repr__(self): - return '%s[%s]' % (self.__class__.__name__, self) + return "%s[%s]" % (self.__class__.__name__, self) - def add_condition(self, condition): + def add_condition(self, condition, conjuction="and"): if self.condition: - self.condition = '(%s) and (%s)' % (self.condition, condition) + self.condition = "(%s) %s (%s)" % (self.condition, conjuction, condition) else: self.condition = condition return self def add_name_test(self): - if self.element == '*': + if self.element == "*": # We weren't doing a test anyway return - self.add_condition( - "name() = %s" % GenericTranslator.xpath_literal(self.element)) - self.element = '*' + self.add_condition("name() = %s" % GenericTranslator.xpath_literal(self.element)) + self.element = "*" def add_star_prefix(self): """ Append '*/' to the path to keep the context constrained to a single parent. """ - self.path += '*/' + self.path += "*/" def join(self, combiner, other): path = _unicode(self) + combiner # Any "star prefix" is redundant when joining. - if other.path != '*/': + if other.path != "*/": path += other.path self.path = path self.element = other.element @@ -92,14 +91,15 @@ def join(self, combiner, other): # The spec is actually more permissive than that, but don’t bother. # This is just for the fast path. # http://www.w3.org/TR/REC-xml/#NT-NameStartChar -is_safe_name = re.compile('^[a-zA-Z_][a-zA-Z0-9_.-]*$').match +is_safe_name = re.compile("^[a-zA-Z_][a-zA-Z0-9_.-]*$").match # Test that the string is not empty and does not contain whitespace -is_non_whitespace = re.compile(r'^[^ \t\r\n\f]+$').match +is_non_whitespace = re.compile(r"^[^ \t\r\n\f]+$").match #### Translation + class GenericTranslator(object): """ Translator for "generic" XML documents. @@ -122,30 +122,30 @@ class GenericTranslator(object): #### combinator_mapping = { - ' ': 'descendant', - '>': 'child', - '+': 'direct_adjacent', - '~': 'indirect_adjacent', + " ": "descendant", + ">": "child", + "+": "direct_adjacent", + "~": "indirect_adjacent", } attribute_operator_mapping = { - 'exists': 'exists', - '=': 'equals', - '~=': 'includes', - '|=': 'dashmatch', - '^=': 'prefixmatch', - '$=': 'suffixmatch', - '*=': 'substringmatch', - '!=': 'different', # XXX Not in Level 3 but meh + "exists": "exists", + "=": "equals", + "~=": "includes", + "|=": "dashmatch", + "^=": "prefixmatch", + "$=": "suffixmatch", + "*=": "substringmatch", + "!=": "different", # XXX Not in Level 3 but meh } #: The attribute used for ID selectors depends on the document language: #: http://www.w3.org/TR/selectors/#id-selectors - id_attribute = 'id' + id_attribute = "id" #: The attribute used for ``:lang()`` depends on the document language: #: http://www.w3.org/TR/selectors/#lang-pseudo - lang_attribute = 'xml:lang' + lang_attribute = "xml:lang" #: The case sensitivity of document language element names, #: attribute names, and attribute values in selectors depends @@ -168,7 +168,7 @@ class GenericTranslator(object): # class used to represent and xpath expression xpathexpr_cls = XPathExpr - def css_to_xpath(self, css, prefix='descendant-or-self::'): + def css_to_xpath(self, css, prefix="descendant-or-self::"): """Translate a *group of selectors* to XPath. Pseudo-elements are not supported here since XPath only knows @@ -187,12 +187,14 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'): The equivalent XPath 1.0 expression as an Unicode string. """ - return ' | '.join(self.selector_to_xpath(selector, prefix, - translate_pseudo_elements=True) - for selector in parse(css)) + return " | ".join( + self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True) + for selector in parse(css) + ) - def selector_to_xpath(self, selector, prefix='descendant-or-self::', - translate_pseudo_elements=False): + def selector_to_xpath( + self, selector, prefix="descendant-or-self::", translate_pseudo_elements=False + ): """Translate a parsed selector to XPath. @@ -213,14 +215,14 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::', The equivalent XPath 1.0 expression as an Unicode string. """ - tree = getattr(selector, 'parsed_tree', None) + tree = getattr(selector, "parsed_tree", None) if not tree: - raise TypeError('Expected a parsed selector, got %r' % (selector,)) + raise TypeError("Expected a parsed selector, got %r" % (selector,)) xpath = self.xpath(tree) assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return' if translate_pseudo_elements and selector.pseudo_element: xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) - return (prefix or '') + _unicode(xpath) + return (prefix or "") + _unicode(xpath) def xpath_pseudo_element(self, xpath, pseudo_element): """Translate a pseudo-element. @@ -229,7 +231,7 @@ def xpath_pseudo_element(self, xpath, pseudo_element): but can be overridden by sub-classes. """ - raise ExpressionError('Pseudo-elements are not supported.') + raise ExpressionError("Pseudo-elements are not supported.") @staticmethod def xpath_literal(s): @@ -239,75 +241,82 @@ def xpath_literal(s): elif '"' not in s: s = '"%s"' % s else: - s = "concat(%s)" % ','.join([ - (("'" in part) and '"%s"' or "'%s'") % part - for part in split_at_single_quotes(s) if part - ]) + s = "concat(%s)" % ",".join( + [ + (("'" in part) and '"%s"' or "'%s'") % part + for part in split_at_single_quotes(s) + if part + ] + ) return s def xpath(self, parsed_selector): """Translate any parsed selector object.""" type_name = type(parsed_selector).__name__ - method = getattr(self, 'xpath_%s' % type_name.lower(), None) + method = getattr(self, "xpath_%s" % type_name.lower(), None) if method is None: - raise ExpressionError('%s is not supported.' % type_name) + raise ExpressionError("%s is not supported." % type_name) return method(parsed_selector) - # Dispatched by parsed object type def xpath_combinedselector(self, combined): """Translate a combined selector.""" combinator = self.combinator_mapping[combined.combinator] - method = getattr(self, 'xpath_%s_combinator' % combinator) - return method(self.xpath(combined.selector), - self.xpath(combined.subselector)) + method = getattr(self, "xpath_%s_combinator" % combinator) + return method(self.xpath(combined.selector), self.xpath(combined.subselector)) def xpath_negation(self, negation): xpath = self.xpath(negation.selector) sub_xpath = self.xpath(negation.subselector) sub_xpath.add_name_test() if sub_xpath.condition: - return xpath.add_condition('not(%s)' % sub_xpath.condition) + return xpath.add_condition("not(%s)" % sub_xpath.condition) else: - return xpath.add_condition('0') + return xpath.add_condition("0") + + def xpath_matching(self, matching): + xpath = self.xpath(matching.selector) + exprs = [self.xpath(selector) for selector in matching.selector_list] + for e in exprs: + e.add_name_test() + if e.condition: + xpath.add_condition(e.condition, "or") + return xpath def xpath_function(self, function): """Translate a functional pseudo-class.""" - method = 'xpath_%s_function' % function.name.replace('-', '_') + method = "xpath_%s_function" % function.name.replace("-", "_") method = _unicode_safe_getattr(self, method, None) if not method: - raise ExpressionError( - "The pseudo-class :%s() is unknown" % function.name) + raise ExpressionError("The pseudo-class :%s() is unknown" % function.name) return method(self.xpath(function.selector), function) def xpath_pseudo(self, pseudo): """Translate a pseudo-class.""" - method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_') + method = "xpath_%s_pseudo" % pseudo.ident.replace("-", "_") method = _unicode_safe_getattr(self, method, None) if not method: # TODO: better error message for pseudo-elements? - raise ExpressionError( - "The pseudo-class :%s is unknown" % pseudo.ident) + raise ExpressionError("The pseudo-class :%s is unknown" % pseudo.ident) return method(self.xpath(pseudo.selector)) - def xpath_attrib(self, selector): """Translate an attribute selector.""" operator = self.attribute_operator_mapping[selector.operator] - method = getattr(self, 'xpath_attrib_%s' % operator) + method = getattr(self, "xpath_attrib_%s" % operator) if self.lower_case_attribute_names: name = selector.attrib.lower() else: name = selector.attrib safe = is_safe_name(name) if selector.namespace: - name = '%s:%s' % (selector.namespace, name) + name = "%s:%s" % (selector.namespace, name) safe = safe and is_safe_name(selector.namespace) if safe: - attrib = '@' + name + attrib = "@" + name else: - attrib = 'attribute::*[name() = %s]' % self.xpath_literal(name) + attrib = "attribute::*[name() = %s]" % self.xpath_literal(name) if selector.value is None: value = None elif self.lower_case_attribute_values: @@ -320,19 +329,18 @@ def xpath_class(self, class_selector): """Translate a class selector.""" # .foo is defined as [class~=foo] in the spec. xpath = self.xpath(class_selector.selector) - return self.xpath_attrib_includes( - xpath, '@class', class_selector.class_name) + return self.xpath_attrib_includes(xpath, "@class", class_selector.class_name) def xpath_hash(self, id_selector): """Translate an ID selector.""" xpath = self.xpath(id_selector.selector) - return self.xpath_attrib_equals(xpath, '@id', id_selector.id) + return self.xpath_attrib_equals(xpath, "@id", id_selector.id) def xpath_element(self, selector): """Translate a type or universal selector.""" element = selector.element if not element: - element = '*' + element = "*" safe = True else: safe = is_safe_name(element) @@ -341,39 +349,36 @@ def xpath_element(self, selector): if selector.namespace: # Namespace prefixes are case-sensitive. # http://www.w3.org/TR/css3-namespace/#prefixes - element = '%s:%s' % (selector.namespace, element) + element = "%s:%s" % (selector.namespace, element) safe = safe and is_safe_name(selector.namespace) xpath = self.xpathexpr_cls(element=element) if not safe: xpath.add_name_test() return xpath - # CombinedSelector: dispatch by combinator def xpath_descendant_combinator(self, left, right): """right is a child, grand-child or further descendant of left""" - return left.join('/descendant-or-self::*/', right) + return left.join("/descendant-or-self::*/", right) def xpath_child_combinator(self, left, right): """right is an immediate child of left""" - return left.join('/', right) + return left.join("/", right) def xpath_direct_adjacent_combinator(self, left, right): """right is a sibling immediately after left""" - xpath = left.join('/following-sibling::', right) + xpath = left.join("/following-sibling::", right) xpath.add_name_test() - return xpath.add_condition('position() = 1') + return xpath.add_condition("position() = 1") def xpath_indirect_adjacent_combinator(self, left, right): """right is a sibling after left, immediately or not""" - return left.join('/following-sibling::', right) - + return left.join("/following-sibling::", right) # Function: dispatch by function/pseudo-class name - def xpath_nth_child_function(self, xpath, function, last=False, - add_name_test=True): + def xpath_nth_child_function(self, xpath, function, last=False, add_name_test=True): try: a, b = parse_series(function.arguments) except ValueError: @@ -427,14 +432,14 @@ def xpath_nth_child_function(self, xpath, function, last=False, # for a == 1, nth-*(an+b) means n+b-1 siblings before/after, # and since n ∈ {0, 1, 2, ...}, if b-1<=0, # there is always an "n" matching any number of siblings (maybe none) - if a == 1 and b_min_1 <=0: + if a == 1 and b_min_1 <= 0: return xpath # early-exit condition 2: # ~~~~~~~~~~~~~~~~~~~~~~~ # an+b-1 siblings with a<0 and (b-1)<0 is not possible if a < 0 and b_min_1 < 0: - return xpath.add_condition('0') + return xpath.add_condition("0") # `add_name_test` boolean is inverted and somewhat counter-intuitive: # @@ -444,20 +449,20 @@ def xpath_nth_child_function(self, xpath, function, last=False, elif add_name_test: nodetest = '*' else: - nodetest = '%s' % xpath.element + nodetest = "%s" % xpath.element # count siblings before or after the element if not last: - siblings_count = 'count(preceding-sibling::%s)' % nodetest + siblings_count = "count(preceding-sibling::%s)" % nodetest else: - siblings_count = 'count(following-sibling::%s)' % nodetest + siblings_count = "count(following-sibling::%s)" % nodetest # special case of fixed position: nth-*(0n+b) # if a == 0: # ~~~~~~~~~~ # count(***-sibling::***) = b-1 if a == 0: - return xpath.add_condition('%s = %s' % (siblings_count, b_min_1)) + return xpath.add_condition("%s = %s" % (siblings_count, b_min_1)) expressions = [] @@ -466,12 +471,12 @@ def xpath_nth_child_function(self, xpath, function, last=False, # so if a>0, and (b-1)<=0, an "n" exists to satisfy this, # therefore, the predicate is only interesting if (b-1)>0 if b_min_1 > 0: - expressions.append('%s >= %s' % (siblings_count, b_min_1)) + expressions.append("%s >= %s" % (siblings_count, b_min_1)) else: # if a<0, and (b-1)<0, no "n" satisfies this, # this is tested above as an early exist condition # otherwise, - expressions.append('%s <= %s' % (siblings_count, b_min_1)) + expressions.append("%s <= %s" % (siblings_count, b_min_1)) # operations modulo 1 or -1 are simpler, one only needs to verify: # @@ -494,56 +499,48 @@ def xpath_nth_child_function(self, xpath, function, last=False, b_neg = (-b_min_1) % abs(a) if b_neg != 0: - b_neg = '+%s' % b_neg - left = '(%s %s)' % (left, b_neg) + b_neg = "+%s" % b_neg + left = "(%s %s)" % (left, b_neg) - expressions.append('%s mod %s = 0' % (left, a)) + expressions.append("%s mod %s = 0" % (left, a)) if len(expressions) > 1: - template = '(%s)' + template = "(%s)" else: - template = '%s' - xpath.add_condition(' and '.join(template % expression - for expression in expressions)) + template = "%s" + xpath.add_condition(" and ".join(template % expression for expression in expressions)) return xpath def xpath_nth_last_child_function(self, xpath, function): return self.xpath_nth_child_function(xpath, function, last=True) def xpath_nth_of_type_function(self, xpath, function): - if xpath.element == '*': - raise ExpressionError( - "*:nth-of-type() is not implemented") - return self.xpath_nth_child_function(xpath, function, - add_name_test=False) + if xpath.element == "*": + raise ExpressionError("*:nth-of-type() is not implemented") + return self.xpath_nth_child_function(xpath, function, add_name_test=False) def xpath_nth_last_of_type_function(self, xpath, function): - if xpath.element == '*': - raise ExpressionError( - "*:nth-of-type() is not implemented") - return self.xpath_nth_child_function(xpath, function, last=True, - add_name_test=False) + if xpath.element == "*": + raise ExpressionError("*:nth-of-type() is not implemented") + return self.xpath_nth_child_function(xpath, function, last=True, add_name_test=False) def xpath_contains_function(self, xpath, function): # Defined there, removed in later drafts: # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors - if function.argument_types() not in (['STRING'], ['IDENT']): + if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :contains(), got %r" - % function.arguments) + "Expected a single string or ident for :contains(), got %r" % function.arguments + ) value = function.arguments[0].value - return xpath.add_condition( - 'contains(., %s)' % self.xpath_literal(value)) + return xpath.add_condition("contains(., %s)" % self.xpath_literal(value)) def xpath_lang_function(self, xpath, function): - if function.argument_types() not in (['STRING'], ['IDENT']): + if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :lang(), got %r" - % function.arguments) + "Expected a single string or ident for :lang(), got %r" % function.arguments + ) value = function.arguments[0].value - return xpath.add_condition( - "lang(%s)" % (self.xpath_literal(value))) - + return xpath.add_condition("lang(%s)" % (self.xpath_literal(value))) # Pseudo: dispatch by pseudo-class name @@ -559,31 +556,28 @@ def xpath_scope_pseudo(self, xpath): return xpath.add_condition("1") def xpath_first_child_pseudo(self, xpath): - return xpath.add_condition('count(preceding-sibling::*) = 0') + return xpath.add_condition("count(preceding-sibling::*) = 0") def xpath_last_child_pseudo(self, xpath): - return xpath.add_condition('count(following-sibling::*) = 0') + return xpath.add_condition("count(following-sibling::*) = 0") def xpath_first_of_type_pseudo(self, xpath): - if xpath.element == '*': - raise ExpressionError( - "*:first-of-type is not implemented") - return xpath.add_condition('count(preceding-sibling::%s) = 0' % xpath.element) + if xpath.element == "*": + raise ExpressionError("*:first-of-type is not implemented") + return xpath.add_condition("count(preceding-sibling::%s) = 0" % xpath.element) def xpath_last_of_type_pseudo(self, xpath): - if xpath.element == '*': - raise ExpressionError( - "*:last-of-type is not implemented") - return xpath.add_condition('count(following-sibling::%s) = 0' % xpath.element) + if xpath.element == "*": + raise ExpressionError("*:last-of-type is not implemented") + return xpath.add_condition("count(following-sibling::%s) = 0" % xpath.element) def xpath_only_child_pseudo(self, xpath): - return xpath.add_condition('count(parent::*/child::*) = 1') + return xpath.add_condition("count(parent::*/child::*) = 1") def xpath_only_of_type_pseudo(self, xpath): - if xpath.element == '*': - raise ExpressionError( - "*:only-of-type is not implemented") - return xpath.add_condition('count(parent::*/child::%s) = 1' % xpath.element) + if xpath.element == "*": + raise ExpressionError("*:only-of-type is not implemented") + return xpath.add_condition("count(parent::*/child::%s) = 1" % xpath.element) def xpath_empty_pseudo(self, xpath): return xpath.add_condition("not(*) and not(string-length())") @@ -610,61 +604,63 @@ def xpath_attrib_exists(self, xpath, name, value): return xpath def xpath_attrib_equals(self, xpath, name, value): - xpath.add_condition('%s = %s' % (name, self.xpath_literal(value))) + xpath.add_condition("%s = %s" % (name, self.xpath_literal(value))) return xpath def xpath_attrib_different(self, xpath, name, value): # FIXME: this seems like a weird hack... if value: - xpath.add_condition('not(%s) or %s != %s' - % (name, name, self.xpath_literal(value))) + xpath.add_condition("not(%s) or %s != %s" % (name, name, self.xpath_literal(value))) else: - xpath.add_condition('%s != %s' - % (name, self.xpath_literal(value))) + xpath.add_condition("%s != %s" % (name, self.xpath_literal(value))) return xpath def xpath_attrib_includes(self, xpath, name, value): if is_non_whitespace(value): xpath.add_condition( "%s and contains(concat(' ', normalize-space(%s), ' '), %s)" - % (name, name, self.xpath_literal(' '+value+' '))) + % (name, name, self.xpath_literal(" " + value + " ")) + ) else: - xpath.add_condition('0') + xpath.add_condition("0") return xpath def xpath_attrib_dashmatch(self, xpath, name, value): # Weird, but true... - xpath.add_condition('%s and (%s = %s or starts-with(%s, %s))' % ( - name, - name, self.xpath_literal(value), - name, self.xpath_literal(value + '-'))) + xpath.add_condition( + "%s and (%s = %s or starts-with(%s, %s))" + % (name, name, self.xpath_literal(value), name, self.xpath_literal(value + "-")) + ) return xpath def xpath_attrib_prefixmatch(self, xpath, name, value): if value: - xpath.add_condition('%s and starts-with(%s, %s)' % ( - name, name, self.xpath_literal(value))) + xpath.add_condition( + "%s and starts-with(%s, %s)" % (name, name, self.xpath_literal(value)) + ) else: - xpath.add_condition('0') + xpath.add_condition("0") return xpath def xpath_attrib_suffixmatch(self, xpath, name, value): if value: # Oddly there is a starts-with in XPath 1.0, but not ends-with xpath.add_condition( - '%s and substring(%s, string-length(%s)-%s) = %s' - % (name, name, name, len(value)-1, self.xpath_literal(value))) + "%s and substring(%s, string-length(%s)-%s) = %s" + % (name, name, name, len(value) - 1, self.xpath_literal(value)) + ) else: - xpath.add_condition('0') + xpath.add_condition("0") return xpath def xpath_attrib_substringmatch(self, xpath, name, value): if value: # Attribute selectors are case sensitive - xpath.add_condition('%s and contains(%s, %s)' % ( - name, name, self.xpath_literal(value))) + xpath.add_condition( + "%s and contains(%s, %s)" % (name, name, self.xpath_literal(value)) + ) else: - xpath.add_condition('0') + xpath.add_condition("0") return xpath @@ -685,7 +681,7 @@ class HTMLTranslator(GenericTranslator): """ - lang_attribute = 'lang' + lang_attribute = "lang" def __init__(self, xhtml=False): self.xhtml = xhtml # Might be useful for sub-classes? @@ -699,33 +695,36 @@ def xpath_checked_pseudo(self, xpath): return xpath.add_condition( "(@selected and name(.) = 'option') or " "(@checked " - "and (name(.) = 'input' or name(.) = 'command')" - "and (@type = 'checkbox' or @type = 'radio'))") + "and (name(.) = 'input' or name(.) = 'command')" + "and (@type = 'checkbox' or @type = 'radio'))" + ) def xpath_lang_function(self, xpath, function): - if function.argument_types() not in (['STRING'], ['IDENT']): + if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :lang(), got %r" - % function.arguments) + "Expected a single string or ident for :lang(), got %r" % function.arguments + ) value = function.arguments[0].value return xpath.add_condition( "ancestor-or-self::*[@lang][1][starts-with(concat(" - # XPath 1.0 has no lower-case function... - "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " - "'abcdefghijklmnopqrstuvwxyz'), " - "'-'), %s)]" - % (self.lang_attribute, self.xpath_literal(value.lower() + '-'))) + # XPath 1.0 has no lower-case function... + "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " + "'abcdefghijklmnopqrstuvwxyz'), " + "'-'), %s)]" % (self.lang_attribute, self.xpath_literal(value.lower() + "-")) + ) def xpath_link_pseudo(self, xpath): - return xpath.add_condition("@href and " - "(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')") + return xpath.add_condition( + "@href and " "(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')" + ) # Links are never visited, the implementation for :visited is the same # as in GenericTranslator def xpath_disabled_pseudo(self, xpath): # http://www.w3.org/TR/html5/section-index.html#attributes-1 - return xpath.add_condition(''' + return xpath.add_condition( + """ ( @disabled and ( @@ -747,13 +746,15 @@ def xpath_disabled_pseudo(self, xpath): ) and ancestor::fieldset[@disabled] ) - ''') + """ + ) # FIXME: in the second half, add "and is not a descendant of that # fieldset element's first legend element child, if any." def xpath_enabled_pseudo(self, xpath): # http://www.w3.org/TR/html5/section-index.html#attributes-1 - return xpath.add_condition(''' + return xpath.add_condition( + """ ( @href and ( name(.) = 'a' or @@ -781,7 +782,8 @@ def xpath_enabled_pseudo(self, xpath): @disabled or ancestor::optgroup[@disabled] ) ) - ''') + """ + ) # FIXME: ... or "li elements that are children of menu elements, # and that have a child element that defines a command, if the first # such element's Disabled State facet is false (not disabled)". diff --git a/docs/conf.py b/docs/conf.py index aa897ef..9dc2575 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -50,9 +50,9 @@ # built documents. # # The full version, including alpha/beta/rc tags. -init_py = open(os.path.join(os.path.dirname(__file__), - '..', 'cssselect', '__init__.py')).read() -release = re.search("VERSION = '([^']+)'", init_py).group(1) +with open(os.path.join(os.path.dirname(__file__), '..', 'cssselect', '__init__.py')) as init_file: + init_py = init_file.read() +release = re.search('VERSION = "([^"]+)"', init_py).group(1) # The short X.Y version. version = release.rstrip('dev') diff --git a/pylintrc b/pylintrc new file mode 100644 index 0000000..7da580b --- /dev/null +++ b/pylintrc @@ -0,0 +1,34 @@ +[MASTER] +persistent=no + +[MESSAGES CONTROL] +disable=assignment-from-no-return, + bad-continuation, + bad-whitespace, + c-extension-no-member, + consider-using-in, + fixme, + inconsistent-return-statements, + invalid-name, + missing-class-docstring, + missing-function-docstring, + missing-module-docstring, + multiple-imports, + no-else-return, + no-member, + no-self-use, + raise-missing-from, + redefined-builtin, + redefined-outer-name, + too-few-public-methods, + too-many-arguments, + too-many-branches, + too-many-function-args, + too-many-lines, + too-many-public-methods, + too-many-statements, + undefined-variable, + unidiomatic-typecheck, + unused-argument, + unused-import, + useless-object-inheritance # Required for Python 2 support diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..57a5583 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[tool.black] +line-length = 99 diff --git a/setup.py b/setup.py index de7128d..f95721d 100644 --- a/setup.py +++ b/setup.py @@ -2,45 +2,47 @@ import re import os.path + try: from setuptools import setup - extra_kwargs = {'test_suite': 'cssselect.tests'} + + extra_kwargs = {"test_suite": "cssselect.tests"} except ImportError: from distutils.core import setup + extra_kwargs = {} ROOT = os.path.dirname(__file__) -README = open(os.path.join(ROOT, 'README.rst')).read() -INIT_PY = open(os.path.join(ROOT, 'cssselect', '__init__.py')).read() -VERSION = re.search("VERSION = '([^']+)'", INIT_PY).group(1) +with open(os.path.join(ROOT, "README.rst")) as readme_file: + README = readme_file.read() +with open(os.path.join(ROOT, "cssselect", "__init__.py")) as init_file: + INIT_PY = init_file.read() +VERSION = re.search('VERSION = "([^"]+)"', INIT_PY).group(1) setup( - name='cssselect', + name="cssselect", version=VERSION, - author='Ian Bicking', - author_email='ianb@colorstudy.com', - maintainer='Paul Tremberth', - maintainer_email='paul.tremberth@gmail.com', - description= - 'cssselect parses CSS3 Selectors and translates them to XPath 1.0', + author="Ian Bicking", + author_email="ianb@colorstudy.com", + maintainer="Paul Tremberth", + maintainer_email="paul.tremberth@gmail.com", + description="cssselect parses CSS3 Selectors and translates them to XPath 1.0", long_description=README, - url='https://github.com/scrapy/cssselect', - license='BSD', - packages=['cssselect'], - python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*', + url="https://github.com/scrapy/cssselect", + license="BSD", + packages=["cssselect"], + python_requires=">=3.6", classifiers=[ - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: BSD License', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7' + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: BSD License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", ], - **extra_kwargs + **extra_kwargs, ) diff --git a/tests/requirements.txt b/tests/requirements.txt deleted file mode 100644 index 000d5f2..0000000 --- a/tests/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -codecov -lxml;python_version!="3.4" -lxml<=4.3.5;python_version=="3.4" -pytest >=4.6, <4.7 # 4.7 drops support for Python 2.7 and 3.4 -pytest-cov \ No newline at end of file diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index e40ca62..9aaede9 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -21,17 +21,23 @@ import unittest from lxml import etree, html -from cssselect import (parse, GenericTranslator, HTMLTranslator, - SelectorSyntaxError, ExpressionError) -from cssselect.parser import (tokenize, parse_series, _unicode, - FunctionalPseudoElement) +from cssselect import ( + parse, + GenericTranslator, + HTMLTranslator, + SelectorSyntaxError, + ExpressionError, +) +from cssselect.parser import tokenize, parse_series, _unicode, FunctionalPseudoElement from cssselect.xpath import _unicode_safe_getattr, XPathExpr if sys.version_info[0] < 3: # Python 2 def u(text): - return text.decode('utf8') + return text.decode("utf8") + + else: # Python 3 def u(text): @@ -41,8 +47,8 @@ def u(text): class TestCssselect(unittest.TestCase): def test_tokenizer(self): tokens = [ - _unicode(item) for item in tokenize( - u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)'))] + _unicode(item) for item in tokenize(u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)')) + ] assert tokens == [ u(""), "", @@ -69,8 +75,7 @@ def repr_parse(css): selectors = parse(css) for selector in selectors: assert selector.pseudo_element is None - return [repr(selector.parsed_tree).replace("(u'", "('") - for selector in selectors] + return [repr(selector.parsed_tree).replace("(u'", "('") for selector in selectors] def parse_many(first, *others): result = repr_parse(first) @@ -78,88 +83,91 @@ def parse_many(first, *others): assert repr_parse(other) == result return result - assert parse_many('*') == ['Element[*]'] - assert parse_many('*|*') == ['Element[*]'] - assert parse_many('*|foo') == ['Element[foo]'] - assert parse_many('|foo') == ['Element[foo]'] - assert parse_many('foo|*') == ['Element[foo|*]'] - assert parse_many('foo|bar') == ['Element[foo|bar]'] + assert parse_many("*") == ["Element[*]"] + assert parse_many("*|*") == ["Element[*]"] + assert parse_many("*|foo") == ["Element[foo]"] + assert parse_many("|foo") == ["Element[foo]"] + assert parse_many("foo|*") == ["Element[foo|*]"] + assert parse_many("foo|bar") == ["Element[foo|bar]"] # This will never match, but it is valid: - assert parse_many('#foo#bar') == ['Hash[Hash[Element[*]#foo]#bar]'] - assert parse_many( - 'div>.foo', - 'div> .foo', - 'div >.foo', - 'div > .foo', - 'div \n> \t \t .foo', 'div\r>\n\n\n.foo', 'div\f>\f.foo' - ) == ['CombinedSelector[Element[div] > Class[Element[*].foo]]'] - assert parse_many('td.foo,.bar', - 'td.foo, .bar', - 'td.foo\t\r\n\f ,\t\r\n\f .bar' - ) == [ - 'Class[Element[td].foo]', - 'Class[Element[*].bar]' + assert parse_many("#foo#bar") == ["Hash[Hash[Element[*]#foo]#bar]"] + assert ( + parse_many( + "div>.foo", + "div> .foo", + "div >.foo", + "div > .foo", + "div \n> \t \t .foo", + "div\r>\n\n\n.foo", + "div\f>\f.foo", + ) + == ["CombinedSelector[Element[div] > Class[Element[*].foo]]"] + ) + assert parse_many("td.foo,.bar", "td.foo, .bar", "td.foo\t\r\n\f ,\t\r\n\f .bar") == [ + "Class[Element[td].foo]", + "Class[Element[*].bar]", + ] + assert parse_many("div, td.foo, div.bar span") == [ + "Element[div]", + "Class[Element[td].foo]", + "CombinedSelector[Class[Element[div].bar] Element[span]]", + ] + assert parse_many("div > p") == ["CombinedSelector[Element[div] > Element[p]]"] + assert parse_many("td:first") == ["Pseudo[Element[td]:first]"] + assert parse_many("td:first") == ["Pseudo[Element[td]:first]"] + assert parse_many("td :first") == [ + "CombinedSelector[Element[td] Pseudo[Element[*]:first]]" + ] + assert parse_many("td :first") == [ + "CombinedSelector[Element[td] Pseudo[Element[*]:first]]" + ] + assert parse_many("a[name]", "a[ name\t]") == ["Attrib[Element[a][name]]"] + assert parse_many("a [name]") == [ + "CombinedSelector[Element[a] Attrib[Element[*][name]]]" + ] + assert parse_many('a[rel="include"]', "a[rel = include]") == [ + "Attrib[Element[a][rel = 'include']]" ] - assert parse_many('div, td.foo, div.bar span') == [ - 'Element[div]', - 'Class[Element[td].foo]', - 'CombinedSelector[Class[Element[div].bar] ' - ' Element[span]]'] - assert parse_many('div > p') == [ - 'CombinedSelector[Element[div] > Element[p]]'] - assert parse_many('td:first') == [ - 'Pseudo[Element[td]:first]'] - assert parse_many('td:first') == [ - 'Pseudo[Element[td]:first]'] - assert parse_many('td :first') == [ - 'CombinedSelector[Element[td] ' - ' Pseudo[Element[*]:first]]'] - assert parse_many('td :first') == [ - 'CombinedSelector[Element[td] ' - ' Pseudo[Element[*]:first]]'] - assert parse_many('a[name]', 'a[ name\t]') == [ - 'Attrib[Element[a][name]]'] - assert parse_many('a [name]') == [ - 'CombinedSelector[Element[a] Attrib[Element[*][name]]]'] - assert parse_many('a[rel="include"]', 'a[rel = include]') == [ - "Attrib[Element[a][rel = 'include']]"] assert parse_many("a[hreflang |= 'en']", "a[hreflang|=en]") == [ - "Attrib[Element[a][hreflang |= 'en']]"] - assert parse_many('div:nth-child(10)') == [ - "Function[Element[div]:nth-child(['10'])]"] - assert parse_many(':nth-child(2n+2)') == [ - "Function[Element[*]:nth-child(['2', 'n', '+2'])]"] - assert parse_many('div:nth-of-type(10)') == [ - "Function[Element[div]:nth-of-type(['10'])]"] - assert parse_many('div div:nth-of-type(10) .aclass') == [ - 'CombinedSelector[CombinedSelector[Element[div] ' - "Function[Element[div]:nth-of-type(['10'])]] " - ' Class[Element[*].aclass]]'] - assert parse_many('label:only') == [ - 'Pseudo[Element[label]:only]'] - assert parse_many('a:lang(fr)') == [ - "Function[Element[a]:lang(['fr'])]"] - assert parse_many('div:contains("foo")') == [ - "Function[Element[div]:contains(['foo'])]"] - assert parse_many('div#foobar') == [ - 'Hash[Element[div]#foobar]'] - assert parse_many('div:not(div.foo)') == [ - 'Negation[Element[div]:not(Class[Element[div].foo])]'] - assert parse_many('td ~ th') == [ - 'CombinedSelector[Element[td] ~ Element[th]]'] - assert parse_many(':scope > foo') == [ - 'CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]' + "Attrib[Element[a][hreflang |= 'en']]" + ] + assert parse_many("div:nth-child(10)") == ["Function[Element[div]:nth-child(['10'])]"] + assert parse_many(":nth-child(2n+2)") == [ + "Function[Element[*]:nth-child(['2', 'n', '+2'])]" + ] + assert parse_many("div:nth-of-type(10)") == ["Function[Element[div]:nth-of-type(['10'])]"] + assert parse_many("div div:nth-of-type(10) .aclass") == [ + "CombinedSelector[CombinedSelector[Element[div] " + "Function[Element[div]:nth-of-type(['10'])]] " + " Class[Element[*].aclass]]" + ] + assert parse_many("label:only") == ["Pseudo[Element[label]:only]"] + assert parse_many("a:lang(fr)") == ["Function[Element[a]:lang(['fr'])]"] + assert parse_many('div:contains("foo")') == ["Function[Element[div]:contains(['foo'])]"] + assert parse_many("div#foobar") == ["Hash[Element[div]#foobar]"] + assert parse_many("div:not(div.foo)") == [ + "Negation[Element[div]:not(Class[Element[div].foo])]" + ] + assert parse_many("div:is(.foo, #bar)") == [ + "Matching[Element[div]:is(Class[Element[*].foo], Hash[Element[*]#bar])]" + ] + assert parse_many(":is(:hover, :visited)") == [ + "Matching[Element[*]:is(Pseudo[Element[*]:hover], Pseudo[Element[*]:visited])]" + ] + assert parse_many("td ~ th") == ["CombinedSelector[Element[td] ~ Element[th]]"] + assert parse_many(":scope > foo") == [ + "CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]" ] - assert parse_many(' :scope > foo') == [ - 'CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]' + assert parse_many(" :scope > foo") == [ + "CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]" ] - assert parse_many(':scope > foo bar > div') == [ - 'CombinedSelector[CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > ' - 'Element[foo]] Element[bar]] > Element[div]]' + assert parse_many(":scope > foo bar > div") == [ + "CombinedSelector[CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > " + "Element[foo]] Element[bar]] > Element[div]]" ] - assert parse_many(':scope > #foo #bar') == [ - 'CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > ' - 'Hash[Element[*]#foo]] Hash[Element[*]#bar]]' + assert parse_many(":scope > #foo #bar") == [ + "CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > " + "Hash[Element[*]#foo]] Hash[Element[*]#bar]]" ] def test_pseudo_elements(self): @@ -185,61 +193,66 @@ def test_pseudo_repr(css): selector = result[0] return selector.parsed_tree.__repr__() - assert parse_one('foo') == ('Element[foo]', None) - assert parse_one('*') == ('Element[*]', None) - assert parse_one(':empty') == ('Pseudo[Element[*]:empty]', None) - assert parse_one(':scope') == ('Pseudo[Element[*]:scope]', None) + assert parse_one("foo") == ("Element[foo]", None) + assert parse_one("*") == ("Element[*]", None) + assert parse_one(":empty") == ("Pseudo[Element[*]:empty]", None) + assert parse_one(":scope") == ("Pseudo[Element[*]:scope]", None) # Special cases for CSS 2.1 pseudo-elements - assert parse_one(':BEfore') == ('Element[*]', 'before') - assert parse_one(':aftER') == ('Element[*]', 'after') - assert parse_one(':First-Line') == ('Element[*]', 'first-line') - assert parse_one(':First-Letter') == ('Element[*]', 'first-letter') - - assert parse_one('::befoRE') == ('Element[*]', 'before') - assert parse_one('::AFter') == ('Element[*]', 'after') - assert parse_one('::firsT-linE') == ('Element[*]', 'first-line') - assert parse_one('::firsT-letteR') == ('Element[*]', 'first-letter') - - assert parse_one('::text-content') == ('Element[*]', 'text-content') - assert parse_one('::attr(name)') == ( - "Element[*]", "FunctionalPseudoElement[::attr(['name'])]") - - assert parse_one('::Selection') == ('Element[*]', 'selection') - assert parse_one('foo:after') == ('Element[foo]', 'after') - assert parse_one('foo::selection') == ('Element[foo]', 'selection') - assert parse_one('lorem#ipsum ~ a#b.c[href]:empty::selection') == ( - 'CombinedSelector[Hash[Element[lorem]#ipsum] ~ ' - 'Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]', - 'selection') - assert parse_pseudo(':scope > div, foo bar') == [ - ('CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]', None), - ('CombinedSelector[Element[foo] Element[bar]]', None) + assert parse_one(":BEfore") == ("Element[*]", "before") + assert parse_one(":aftER") == ("Element[*]", "after") + assert parse_one(":First-Line") == ("Element[*]", "first-line") + assert parse_one(":First-Letter") == ("Element[*]", "first-letter") + + assert parse_one("::befoRE") == ("Element[*]", "before") + assert parse_one("::AFter") == ("Element[*]", "after") + assert parse_one("::firsT-linE") == ("Element[*]", "first-line") + assert parse_one("::firsT-letteR") == ("Element[*]", "first-letter") + + assert parse_one("::text-content") == ("Element[*]", "text-content") + assert parse_one("::attr(name)") == ( + "Element[*]", + "FunctionalPseudoElement[::attr(['name'])]", + ) + + assert parse_one("::Selection") == ("Element[*]", "selection") + assert parse_one("foo:after") == ("Element[foo]", "after") + assert parse_one("foo::selection") == ("Element[foo]", "selection") + assert parse_one("lorem#ipsum ~ a#b.c[href]:empty::selection") == ( + "CombinedSelector[Hash[Element[lorem]#ipsum] ~ " + "Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]", + "selection", + ) + assert parse_pseudo(":scope > div, foo bar") == [ + ("CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]", None), + ("CombinedSelector[Element[foo] Element[bar]]", None), ] - assert parse_pseudo('foo:before, bar, baz:after') == [ - ('Element[foo]', 'before'), ('Element[bar]', None), - ('Element[baz]', 'after') + assert parse_pseudo("foo:before, bar, baz:after") == [ + ("Element[foo]", "before"), + ("Element[bar]", None), + ("Element[baz]", "after"), ] # Special cases for CSS 2.1 pseudo-elements are ignored by default - for pseudo in ('after', 'before', 'first-line', 'first-letter'): - selector, = parse('e:%s' % pseudo) + for pseudo in ("after", "before", "first-line", "first-letter"): + (selector,) = parse("e:%s" % pseudo) assert selector.pseudo_element == pseudo - assert GenericTranslator().selector_to_xpath(selector, prefix='') == "e" + assert GenericTranslator().selector_to_xpath(selector, prefix="") == "e" # Pseudo Elements are ignored by default, but if allowed they are not # supported by GenericTranslator tr = GenericTranslator() - selector, = parse('e::foo') - assert selector.pseudo_element == 'foo' - assert tr.selector_to_xpath(selector, prefix='') == "e" - self.assertRaises(ExpressionError, tr.selector_to_xpath, selector, - translate_pseudo_elements=True) + (selector,) = parse("e::foo") + assert selector.pseudo_element == "foo" + assert tr.selector_to_xpath(selector, prefix="") == "e" + self.assertRaises( + ExpressionError, tr.selector_to_xpath, selector, translate_pseudo_elements=True + ) # Special test for the unicode symbols and ':scope' element if check # Errors if use repr() instead of __repr__() - assert test_pseudo_repr(u':fİrst-child') == u'Pseudo[Element[*]:fİrst-child]' - assert test_pseudo_repr(':scope') == 'Pseudo[Element[*]:scope]' + assert test_pseudo_repr(u":fİrst-child") == u"Pseudo[Element[*]:fİrst-child]" + assert test_pseudo_repr(":scope") == "Pseudo[Element[*]:scope]" def test_specificity(self): def specificity(css): @@ -247,32 +260,34 @@ def specificity(css): assert len(selectors) == 1 return selectors[0].specificity() - assert specificity('*') == (0, 0, 0) - assert specificity(' foo') == (0, 0, 1) - assert specificity(':empty ') == (0, 1, 0) - assert specificity(':before') == (0, 0, 1) - assert specificity('*:before') == (0, 0, 1) - assert specificity(':nth-child(2)') == (0, 1, 0) - assert specificity('.bar') == (0, 1, 0) - assert specificity('[baz]') == (0, 1, 0) + assert specificity("*") == (0, 0, 0) + assert specificity(" foo") == (0, 0, 1) + assert specificity(":empty ") == (0, 1, 0) + assert specificity(":before") == (0, 0, 1) + assert specificity("*:before") == (0, 0, 1) + assert specificity(":nth-child(2)") == (0, 1, 0) + assert specificity(".bar") == (0, 1, 0) + assert specificity("[baz]") == (0, 1, 0) assert specificity('[baz="4"]') == (0, 1, 0) assert specificity('[baz^="4"]') == (0, 1, 0) - assert specificity('#lipsum') == (1, 0, 0) + assert specificity("#lipsum") == (1, 0, 0) - assert specificity(':not(*)') == (0, 0, 0) - assert specificity(':not(foo)') == (0, 0, 1) - assert specificity(':not(.foo)') == (0, 1, 0) - assert specificity(':not([foo])') == (0, 1, 0) - assert specificity(':not(:empty)') == (0, 1, 0) - assert specificity(':not(#foo)') == (1, 0, 0) + assert specificity(":not(*)") == (0, 0, 0) + assert specificity(":not(foo)") == (0, 0, 1) + assert specificity(":not(.foo)") == (0, 1, 0) + assert specificity(":not([foo])") == (0, 1, 0) + assert specificity(":not(:empty)") == (0, 1, 0) + assert specificity(":not(#foo)") == (1, 0, 0) - assert specificity('foo:empty') == (0, 1, 1) - assert specificity('foo:before') == (0, 0, 2) - assert specificity('foo::before') == (0, 0, 2) - assert specificity('foo:empty::before') == (0, 1, 2) + assert specificity(":is(.foo, #bar)") == (1, 0, 0) + assert specificity(":is(:hover, :visited)") == (0, 1, 0) - assert specificity('#lorem + foo#ipsum:first-child > bar:first-line' - ) == (2, 1, 3) + assert specificity("foo:empty") == (0, 1, 1) + assert specificity("foo:before") == (0, 0, 2) + assert specificity("foo::before") == (0, 0, 2) + assert specificity("foo:empty::before") == (0, 1, 2) + + assert specificity("#lorem + foo#ipsum:first-child > bar:first-line") == (2, 1, 3) def test_css_export(self): def css2css(css, res=None): @@ -280,32 +295,34 @@ def css2css(css, res=None): assert len(selectors) == 1 assert selectors[0].canonical() == (res or css) - css2css('*') - css2css(' foo', 'foo') - css2css('Foo', 'Foo') - css2css(':empty ', ':empty') - css2css(':before', '::before') - css2css(':beFOre', '::before') - css2css('*:before', '::before') - css2css(':nth-child(2)') - css2css('.bar') - css2css('[baz]') + css2css("*") + css2css(" foo", "foo") + css2css("Foo", "Foo") + css2css(":empty ", ":empty") + css2css(":before", "::before") + css2css(":beFOre", "::before") + css2css("*:before", "::before") + css2css(":nth-child(2)") + css2css(".bar") + css2css("[baz]") css2css('[baz="4"]', "[baz='4']") css2css('[baz^="4"]', "[baz^='4']") css2css("[ns|attr='4']") - css2css('#lipsum') - css2css(':not(*)') - css2css(':not(foo)') - css2css(':not(*.foo)', ':not(.foo)') - css2css(':not(*[foo])', ':not([foo])') - css2css(':not(:empty)') - css2css(':not(#foo)') - css2css('foo:empty') - css2css('foo::before') - css2css('foo:empty::before') + css2css("#lipsum") + css2css(":not(*)") + css2css(":not(foo)") + css2css(":not(*.foo)", ":not(.foo)") + css2css(":not(*[foo])", ":not([foo])") + css2css(":not(:empty)") + css2css(":not(#foo)") + css2css(":is(#bar, .foo)") + css2css(":is(:focused, :visited)") + css2css("foo:empty") + css2css("foo::before") + css2css("foo:empty::before") css2css('::name(arg + "val" - 3)', "::name(arg+'val'-3)") - css2css('#lorem + foo#ipsum:first-child > bar::first-line') - css2css('foo > *') + css2css("#lorem + foo#ipsum:first-child > bar::first-line") + css2css("foo > *") def test_parse_errors(self): def get_error(css): @@ -315,97 +332,78 @@ def get_error(css): # Py2, Py3, ... return str(sys.exc_info()[1]).replace("(u'", "('") - assert get_error('attributes(href)/html/body/a') == ( - "Expected selector, got ") - assert get_error('attributes(href)') == ( - "Expected selector, got ") - assert get_error('html/body/a') == ( - "Expected selector, got ") - assert get_error(' ') == ( - "Expected selector, got ") - assert get_error('div, ') == ( - "Expected selector, got ") - assert get_error(' , div') == ( - "Expected selector, got ") - assert get_error('p, , div') == ( - "Expected selector, got ") - assert get_error('div > ') == ( - "Expected selector, got ") - assert get_error(' > div') == ( - "Expected selector, got ' at 2>") - assert get_error('foo|#bar') == ( - "Expected ident or '*', got ") - assert get_error('#.foo') == ( - "Expected selector, got ") - assert get_error('.#foo') == ( - "Expected ident, got ") - assert get_error(':#foo') == ( - "Expected ident, got ") - assert get_error('[*]') == ( - "Expected '|', got ") - assert get_error('[foo|]') == ( - "Expected ident, got ") - assert get_error('[#]') == ( - "Expected ident or '*', got ") - assert get_error('[foo=#]') == ( - "Expected string or ident, got ") - assert get_error('[href]a') == ( - "Expected selector, got ") - assert get_error('[rel=stylesheet]') is None - assert get_error('[rel:stylesheet]') == ( - "Operator expected, got ") - assert get_error('[rel=stylesheet') == ( - "Expected ']', got ") - assert get_error(':lang(fr)') is None - assert get_error(':lang(fr') == ( - "Expected an argument, got ") - assert get_error(':contains("foo') == ( - "Unclosed string at 10") - assert get_error('foo!') == ( - "Expected selector, got ") + assert get_error("attributes(href)/html/body/a") == ( + "Expected selector, got " + ) + assert get_error("attributes(href)") == ("Expected selector, got ") + assert get_error("html/body/a") == ("Expected selector, got ") + assert get_error(" ") == ("Expected selector, got ") + assert get_error("div, ") == ("Expected selector, got ") + assert get_error(" , div") == ("Expected selector, got ") + assert get_error("p, , div") == ("Expected selector, got ") + assert get_error("div > ") == ("Expected selector, got ") + assert get_error(" > div") == ("Expected selector, got ' at 2>") + assert get_error("foo|#bar") == ("Expected ident or '*', got ") + assert get_error("#.foo") == ("Expected selector, got ") + assert get_error(".#foo") == ("Expected ident, got ") + assert get_error(":#foo") == ("Expected ident, got ") + assert get_error("[*]") == ("Expected '|', got ") + assert get_error("[foo|]") == ("Expected ident, got ") + assert get_error("[#]") == ("Expected ident or '*', got ") + assert get_error("[foo=#]") == ("Expected string or ident, got ") + assert get_error("[href]a") == ("Expected selector, got ") + assert get_error("[rel=stylesheet]") is None + assert get_error("[rel:stylesheet]") == ("Operator expected, got ") + assert get_error("[rel=stylesheet") == ("Expected ']', got ") + assert get_error(":lang(fr)") is None + assert get_error(":lang(fr") == ("Expected an argument, got ") + assert get_error(':contains("foo') == ("Unclosed string at 10") + assert get_error("foo!") == ("Expected selector, got ") # Mis-placed pseudo-elements - assert get_error('a:before:empty') == ( - "Got pseudo-element ::before not at the end of a selector") - assert get_error('li:before a') == ( - "Got pseudo-element ::before not at the end of a selector") - assert get_error(':not(:before)') == ( - "Got pseudo-element ::before inside :not() at 12") - assert get_error(':not(:not(a))') == ( - "Got nested :not()") - assert get_error(':scope > div :scope header') == ( + assert get_error("a:before:empty") == ( + "Got pseudo-element ::before not at the end of a selector" + ) + assert get_error("li:before a") == ( + "Got pseudo-element ::before not at the end of a selector" + ) + assert get_error(":not(:before)") == ("Got pseudo-element ::before inside :not() at 12") + assert get_error(":not(:not(a))") == ("Got nested :not()") + assert get_error(":is(:before)") == ("Got pseudo-element ::before inside function") + assert get_error(":is(a b)") == ("Expected an argument, got ") + assert get_error(":scope > div :scope header") == ( 'Got immediate child pseudo-element ":scope" not at the start of a selector' ) - assert get_error('div :scope header') == ( + assert get_error("div :scope header") == ( 'Got immediate child pseudo-element ":scope" not at the start of a selector' ) - assert get_error('> div p') == ("Expected selector, got ' at 0>") + assert get_error("> div p") == ("Expected selector, got ' at 0>") def test_translation(self): def xpath(css): - return _unicode(GenericTranslator().css_to_xpath(css, prefix='')) - - assert xpath('*') == "*" - assert xpath('e') == "e" - assert xpath('*|e') == "e" - assert xpath('e|f') == "e:f" - assert xpath('e[foo]') == "e[@foo]" - assert xpath('e[foo|bar]') == "e[@foo:bar]" + return _unicode(GenericTranslator().css_to_xpath(css, prefix="")) + + assert xpath("*") == "*" + assert xpath("e") == "e" + assert xpath("*|e") == "e" + assert xpath("e|f") == "e:f" + assert xpath("e[foo]") == "e[@foo]" + assert xpath("e[foo|bar]") == "e[@foo:bar]" assert xpath('e[foo="bar"]') == "e[@foo = 'bar']" assert xpath('e[foo~="bar"]') == ( - "e[@foo and contains(" - "concat(' ', normalize-space(@foo), ' '), ' bar ')]") - assert xpath('e[foo^="bar"]') == ( - "e[@foo and starts-with(@foo, 'bar')]") + "e[@foo and contains(" "concat(' ', normalize-space(@foo), ' '), ' bar ')]" + ) + assert xpath('e[foo^="bar"]') == ("e[@foo and starts-with(@foo, 'bar')]") assert xpath('e[foo$="bar"]') == ( - "e[@foo and substring(@foo, string-length(@foo)-2) = 'bar']") - assert xpath('e[foo*="bar"]') == ( - "e[@foo and contains(@foo, 'bar')]") + "e[@foo and substring(@foo, string-length(@foo)-2) = 'bar']" + ) + assert xpath('e[foo*="bar"]') == ("e[@foo and contains(@foo, 'bar')]") assert xpath('e[hreflang|="en"]') == ( - "e[@hreflang and (" - "@hreflang = 'en' or starts-with(@hreflang, 'en-'))]") + "e[@hreflang and (" "@hreflang = 'en' or starts-with(@hreflang, 'en-'))]" + ) # --- nth-* and nth-last-* ------------------------------------- +<<<<<<< HEAD assert ( xpath("e:nth-child(2n+1 of S)") == "e[count(preceding-sibling::S) mod 2 = 0]" @@ -421,186 +419,155 @@ def xpath(css): assert xpath('e:nth-child(1)') == ( "e[count(preceding-sibling::*) = 0]") +======= + assert xpath("e:nth-child(1)") == ("e[count(preceding-sibling::*) = 0]") +>>>>>>> parent/master # always true - assert xpath('e:nth-child(n)') == ( - "e") - assert xpath('e:nth-child(n+1)') == ( - "e") + assert xpath("e:nth-child(n)") == ("e") + assert xpath("e:nth-child(n+1)") == ("e") # always true too - assert xpath('e:nth-child(n-10)') == ( - "e") + assert xpath("e:nth-child(n-10)") == ("e") # b=2 is the limit... - assert xpath('e:nth-child(n+2)') == ( - "e[count(preceding-sibling::*) >= 1]") + assert xpath("e:nth-child(n+2)") == ("e[count(preceding-sibling::*) >= 1]") # always false - assert xpath('e:nth-child(-n)') == ( - "e[0]") + assert xpath("e:nth-child(-n)") == ("e[0]") # equivalent to first child - assert xpath('e:nth-child(-n+1)') == ( - "e[count(preceding-sibling::*) <= 0]") + assert xpath("e:nth-child(-n+1)") == ("e[count(preceding-sibling::*) <= 0]") - assert xpath('e:nth-child(3n+2)') == ( + assert xpath("e:nth-child(3n+2)") == ( "e[(count(preceding-sibling::*) >= 1) and " - "((count(preceding-sibling::*) +2) mod 3 = 0)]") - assert xpath('e:nth-child(3n-2)') == ( - "e[count(preceding-sibling::*) mod 3 = 0]") - assert xpath('e:nth-child(-n+6)') == ( - "e[count(preceding-sibling::*) <= 5]") - - assert xpath('e:nth-last-child(1)') == ( - "e[count(following-sibling::*) = 0]") - assert xpath('e:nth-last-child(2n)') == ( - "e[(count(following-sibling::*) +1) mod 2 = 0]") - assert xpath('e:nth-last-child(2n+1)') == ( - "e[count(following-sibling::*) mod 2 = 0]") - assert xpath('e:nth-last-child(2n+2)') == ( + "((count(preceding-sibling::*) +2) mod 3 = 0)]" + ) + assert xpath("e:nth-child(3n-2)") == ("e[count(preceding-sibling::*) mod 3 = 0]") + assert xpath("e:nth-child(-n+6)") == ("e[count(preceding-sibling::*) <= 5]") + + assert xpath("e:nth-last-child(1)") == ("e[count(following-sibling::*) = 0]") + assert xpath("e:nth-last-child(2n)") == ("e[(count(following-sibling::*) +1) mod 2 = 0]") + assert xpath("e:nth-last-child(2n+1)") == ("e[count(following-sibling::*) mod 2 = 0]") + assert xpath("e:nth-last-child(2n+2)") == ( "e[(count(following-sibling::*) >= 1) and " - "((count(following-sibling::*) +1) mod 2 = 0)]") - assert xpath('e:nth-last-child(3n+1)') == ( - "e[count(following-sibling::*) mod 3 = 0]") + "((count(following-sibling::*) +1) mod 2 = 0)]" + ) + assert xpath("e:nth-last-child(3n+1)") == ("e[count(following-sibling::*) mod 3 = 0]") # represents the two last e elements - assert xpath('e:nth-last-child(-n+2)') == ( - "e[count(following-sibling::*) <= 1]") - - assert xpath('e:nth-of-type(1)') == ( - "e[count(preceding-sibling::e) = 0]") - assert xpath('e:nth-last-of-type(1)') == ( - "e[count(following-sibling::e) = 0]") - assert xpath('div e:nth-last-of-type(1) .aclass') == ( + assert xpath("e:nth-last-child(-n+2)") == ("e[count(following-sibling::*) <= 1]") + + assert xpath("e:nth-of-type(1)") == ("e[count(preceding-sibling::e) = 0]") + assert xpath("e:nth-last-of-type(1)") == ("e[count(following-sibling::e) = 0]") + assert xpath("div e:nth-last-of-type(1) .aclass") == ( "div/descendant-or-self::*/e[count(following-sibling::e) = 0]" - "/descendant-or-self::*/*[@class and contains(" - "concat(' ', normalize-space(@class), ' '), ' aclass ')]") + "/descendant-or-self::*/*[@class and contains(" + "concat(' ', normalize-space(@class), ' '), ' aclass ')]" + ) - assert xpath('e:first-child') == ( - "e[count(preceding-sibling::*) = 0]") - assert xpath('e:last-child') == ( - "e[count(following-sibling::*) = 0]") - assert xpath('e:first-of-type') == ( - "e[count(preceding-sibling::e) = 0]") - assert xpath('e:last-of-type') == ( - "e[count(following-sibling::e) = 0]") - assert xpath('e:only-child') == ( - "e[count(parent::*/child::*) = 1]") - assert xpath('e:only-of-type') == ( - "e[count(parent::*/child::e) = 1]") - assert xpath('e:empty') == ( - "e[not(*) and not(string-length())]") - assert xpath('e:EmPTY') == ( - "e[not(*) and not(string-length())]") - assert xpath('e:root') == ( - "e[not(parent::*)]") - assert xpath('e:hover') == ( - "e[0]") # never matches - assert xpath('e:contains("foo")') == ( - "e[contains(., 'foo')]") - assert xpath('e:ConTains(foo)') == ( - "e[contains(., 'foo')]") - assert xpath('e.warning') == ( - "e[@class and contains(" - "concat(' ', normalize-space(@class), ' '), ' warning ')]") - assert xpath('e#myid') == ( - "e[@id = 'myid']") - assert xpath('e:not(:nth-child(odd))') == ( - "e[not(count(preceding-sibling::*) mod 2 = 0)]") - assert xpath('e:nOT(*)') == ( - "e[0]") # never matches - assert xpath('e f') == ( - "e/descendant-or-self::*/f") - assert xpath('e > f') == ( - "e/f") - assert xpath('e + f') == ( - "e/following-sibling::*[(name() = 'f') and (position() = 1)]") - assert xpath('e ~ f') == ( - "e/following-sibling::f") - assert xpath('e ~ f:nth-child(3)') == ( - "e/following-sibling::f[count(preceding-sibling::*) = 2]") - assert xpath('div#container p') == ( - "div[@id = 'container']/descendant-or-self::*/p") + assert xpath("e:first-child") == ("e[count(preceding-sibling::*) = 0]") + assert xpath("e:last-child") == ("e[count(following-sibling::*) = 0]") + assert xpath("e:first-of-type") == ("e[count(preceding-sibling::e) = 0]") + assert xpath("e:last-of-type") == ("e[count(following-sibling::e) = 0]") + assert xpath("e:only-child") == ("e[count(parent::*/child::*) = 1]") + assert xpath("e:only-of-type") == ("e[count(parent::*/child::e) = 1]") + assert xpath("e:empty") == ("e[not(*) and not(string-length())]") + assert xpath("e:EmPTY") == ("e[not(*) and not(string-length())]") + assert xpath("e:root") == ("e[not(parent::*)]") + assert xpath("e:hover") == ("e[0]") # never matches + assert xpath('e:contains("foo")') == ("e[contains(., 'foo')]") + assert xpath("e:ConTains(foo)") == ("e[contains(., 'foo')]") + assert xpath("e.warning") == ( + "e[@class and contains(" "concat(' ', normalize-space(@class), ' '), ' warning ')]" + ) + assert xpath("e#myid") == ("e[@id = 'myid']") + assert xpath("e:not(:nth-child(odd))") == ("e[not(count(preceding-sibling::*) mod 2 = 0)]") + assert xpath("e:nOT(*)") == ("e[0]") # never matches + assert xpath("e f") == ("e/descendant-or-self::*/f") + assert xpath("e > f") == ("e/f") + assert xpath("e + f") == ("e/following-sibling::*[(name() = 'f') and (position() = 1)]") + assert xpath("e ~ f") == ("e/following-sibling::f") + assert xpath("e ~ f:nth-child(3)") == ( + "e/following-sibling::f[count(preceding-sibling::*) = 2]" + ) + assert xpath("div#container p") == ("div[@id = 'container']/descendant-or-self::*/p") # Invalid characters in XPath element names - assert xpath(r'di\a0 v') == ( - u("*[name() = 'di v']")) # di\xa0v - assert xpath(r'di\[v') == ( - "*[name() = 'di[v']") - assert xpath(r'[h\a0 ref]') == ( - u("*[attribute::*[name() = 'h ref']]")) # h\xa0ref - assert xpath(r'[h\]ref]') == ( - "*[attribute::*[name() = 'h]ref']]") - - self.assertRaises(ExpressionError, xpath, u(':fİrst-child')) - self.assertRaises(ExpressionError, xpath, ':first-of-type') - self.assertRaises(ExpressionError, xpath, ':only-of-type') - self.assertRaises(ExpressionError, xpath, ':last-of-type') - self.assertRaises(ExpressionError, xpath, ':nth-of-type(1)') - self.assertRaises(ExpressionError, xpath, ':nth-last-of-type(1)') - self.assertRaises(ExpressionError, xpath, ':nth-child(n-)') - self.assertRaises(ExpressionError, xpath, ':after') - self.assertRaises(ExpressionError, xpath, ':lorem-ipsum') - self.assertRaises(ExpressionError, xpath, ':lorem(ipsum)') - self.assertRaises(ExpressionError, xpath, '::lorem-ipsum') + assert xpath(r"di\a0 v") == (u("*[name() = 'di v']")) # di\xa0v + assert xpath(r"di\[v") == ("*[name() = 'di[v']") + assert xpath(r"[h\a0 ref]") == (u("*[attribute::*[name() = 'h ref']]")) # h\xa0ref + assert xpath(r"[h\]ref]") == ("*[attribute::*[name() = 'h]ref']]") + + self.assertRaises(ExpressionError, xpath, u(":fİrst-child")) + self.assertRaises(ExpressionError, xpath, ":first-of-type") + self.assertRaises(ExpressionError, xpath, ":only-of-type") + self.assertRaises(ExpressionError, xpath, ":last-of-type") + self.assertRaises(ExpressionError, xpath, ":nth-of-type(1)") + self.assertRaises(ExpressionError, xpath, ":nth-last-of-type(1)") + self.assertRaises(ExpressionError, xpath, ":nth-child(n-)") + self.assertRaises(ExpressionError, xpath, ":after") + self.assertRaises(ExpressionError, xpath, ":lorem-ipsum") + self.assertRaises(ExpressionError, xpath, ":lorem(ipsum)") + self.assertRaises(ExpressionError, xpath, "::lorem-ipsum") self.assertRaises(TypeError, GenericTranslator().css_to_xpath, 4) - self.assertRaises(TypeError, GenericTranslator().selector_to_xpath, - 'foo') + self.assertRaises(TypeError, GenericTranslator().selector_to_xpath, "foo") def test_unicode(self): if sys.version_info[0] < 3: - css = '.a\xc1b'.decode('ISO-8859-1') + css = ".a\xc1b".decode("ISO-8859-1") else: - css = '.a\xc1b' + css = ".a\xc1b" xpath = GenericTranslator().css_to_xpath(css) assert css[1:] in xpath - xpath = xpath.encode('ascii', 'xmlcharrefreplace').decode('ASCII') + xpath = xpath.encode("ascii", "xmlcharrefreplace").decode("ASCII") assert xpath == ( "descendant-or-self::*[@class and contains(" - "concat(' ', normalize-space(@class), ' '), ' aÁb ')]") + "concat(' ', normalize-space(@class), ' '), ' aÁb ')]" + ) def test_quoting(self): css_to_xpath = GenericTranslator().css_to_xpath - assert css_to_xpath('*[aval="\'"]') == ( - '''descendant-or-self::*[@aval = "'"]''') - assert css_to_xpath('*[aval="\'\'\'"]') == ( - """descendant-or-self::*[@aval = "'''"]""") - assert css_to_xpath('*[aval=\'"\']') == ( - '''descendant-or-self::*[@aval = '"']''') - assert css_to_xpath('*[aval=\'"""\']') == ( - '''descendant-or-self::*[@aval = '"""']''') + assert css_to_xpath('*[aval="\'"]') == ("""descendant-or-self::*[@aval = "'"]""") + assert css_to_xpath("*[aval=\"'''\"]") == ("""descendant-or-self::*[@aval = "'''"]""") + assert css_to_xpath("*[aval='\"']") == ("""descendant-or-self::*[@aval = '"']""") + assert css_to_xpath('*[aval=\'"""\']') == ('''descendant-or-self::*[@aval = '"""']''') assert css_to_xpath(':scope > div[dataimg=""]') == ( - "descendant-or-self::*[1]/div[@dataimg = '']") + "descendant-or-self::*[1]/div[@dataimg = '']" + ) def test_unicode_escapes(self): # \22 == '"' \20 == ' ' css_to_xpath = GenericTranslator().css_to_xpath assert css_to_xpath(r'*[aval="\'\22\'"]') == ( - '''descendant-or-self::*[@aval = concat("'",'"',"'")]''') + """descendant-or-self::*[@aval = concat("'",'"',"'")]""" + ) assert css_to_xpath(r'*[aval="\'\22 2\'"]') == ( - '''descendant-or-self::*[@aval = concat("'",'"2',"'")]''') + """descendant-or-self::*[@aval = concat("'",'"2',"'")]""" + ) assert css_to_xpath(r'*[aval="\'\20 \'"]') == ( - '''descendant-or-self::*[@aval = "' '"]''') - assert css_to_xpath('*[aval="\'\\20\r\n \'"]') == ( - '''descendant-or-self::*[@aval = "' '"]''') + """descendant-or-self::*[@aval = "' '"]""" + ) + assert css_to_xpath("*[aval=\"'\\20\r\n '\"]") == ( + """descendant-or-self::*[@aval = "' '"]""" + ) def test_xpath_pseudo_elements(self): class CustomTranslator(GenericTranslator): def xpath_pseudo_element(self, xpath, pseudo_element): if isinstance(pseudo_element, FunctionalPseudoElement): - method = 'xpath_%s_functional_pseudo_element' % ( - pseudo_element.name.replace('-', '_')) + method = "xpath_%s_functional_pseudo_element" % ( + pseudo_element.name.replace("-", "_") + ) method = _unicode_safe_getattr(self, method, None) if not method: raise ExpressionError( - "The functional pseudo-element ::%s() is unknown" - % pseudo_element.name) + "The functional pseudo-element ::%s() is unknown" % pseudo_element.name + ) xpath = method(xpath, pseudo_element.arguments) else: - method = 'xpath_%s_simple_pseudo_element' % ( - pseudo_element.replace('-', '_')) + method = "xpath_%s_simple_pseudo_element" % (pseudo_element.replace("-", "_")) method = _unicode_safe_getattr(self, method, None) if not method: raise ExpressionError( - "The pseudo-element ::%s is unknown" - % pseudo_element) + "The pseudo-element ::%s is unknown" % pseudo_element + ) xpath = method(xpath) return xpath @@ -608,8 +575,7 @@ def xpath_pseudo_element(self, xpath, pseudo_element): # elements that have a certain number of attributes def xpath_nb_attr_function(self, xpath, function): nb_attributes = int(function.arguments[0].value) - return xpath.add_condition( - "count(@*)=%d" % nb_attributes) + return xpath.add_condition("count(@*)=%d" % nb_attributes) # pseudo-class: # elements that have 5 attributes @@ -619,21 +585,36 @@ def xpath_five_attributes_pseudo(self, xpath): # functional pseudo-element: # element's attribute by name def xpath_attr_functional_pseudo_element(self, xpath, arguments): +<<<<<<< HEAD attribute_name = arguments[0][0].value other = XPathExpr('@%s' % attribute_name, '', ) return xpath.join('/', other) +======= + attribute_name = arguments[0].value + other = XPathExpr( + "@%s" % attribute_name, + "", + ) + return xpath.join("/", other) +>>>>>>> parent/master # pseudo-element: # element's text() nodes def xpath_text_node_simple_pseudo_element(self, xpath): - other = XPathExpr('text()', '', ) - return xpath.join('/', other) + other = XPathExpr( + "text()", + "", + ) + return xpath.join("/", other) # pseudo-element: # element's href attribute def xpath_attr_href_simple_pseudo_element(self, xpath): - other = XPathExpr('@href', '', ) - return xpath.join('/', other) + other = XPathExpr( + "@href", + "", + ) + return xpath.join("/", other) # pseudo-element: # used to demonstrate operator precedence @@ -643,91 +624,86 @@ def xpath_first_or_second_pseudo(self, xpath): def xpath(css): return _unicode(CustomTranslator().css_to_xpath(css)) - assert xpath(':five-attributes') == "descendant-or-self::*[count(@*)=5]" - assert xpath(':nb-attr(3)') == "descendant-or-self::*[count(@*)=3]" - assert xpath('::attr(href)') == "descendant-or-self::*/@href" - assert xpath('::text-node') == "descendant-or-self::*/text()" - assert xpath('::attr-href') == "descendant-or-self::*/@href" - assert xpath('p img::attr(src)') == ( - "descendant-or-self::p/descendant-or-self::*/img/@src") - assert xpath(':scope') == "descendant-or-self::*[1]" - assert xpath(':first-or-second[href]') == ( - "descendant-or-self::*[(@id = 'first' or @id = 'second') " - "and (@href)]") + assert xpath(":five-attributes") == "descendant-or-self::*[count(@*)=5]" + assert xpath(":nb-attr(3)") == "descendant-or-self::*[count(@*)=3]" + assert xpath("::attr(href)") == "descendant-or-self::*/@href" + assert xpath("::text-node") == "descendant-or-self::*/text()" + assert xpath("::attr-href") == "descendant-or-self::*/@href" + assert xpath("p img::attr(src)") == ( + "descendant-or-self::p/descendant-or-self::*/img/@src" + ) + assert xpath(":scope") == "descendant-or-self::*[1]" + assert xpath(":first-or-second[href]") == ( + "descendant-or-self::*[(@id = 'first' or @id = 'second') " "and (@href)]" + ) - assert str(XPathExpr('', '', condition='@href')) == "[@href]" + assert str(XPathExpr("", "", condition="@href")) == "[@href]" document = etree.fromstring(OPERATOR_PRECEDENCE_IDS) - sort_key = dict( - (el, count) for count, el in enumerate(document.getiterator()) - ).__getitem__ + sort_key = dict((el, count) for count, el in enumerate(document.getiterator())).__getitem__ + def operator_id(selector): xpath = CustomTranslator().css_to_xpath(selector) items = document.xpath(xpath) items.sort(key=sort_key) - return [element.get('id', 'nil') for element in items] + return [element.get("id", "nil") for element in items] - assert operator_id(':first-or-second') == ['first', 'second'] - assert operator_id(':first-or-second[href]') == ['second'] - assert operator_id('[href]:first-or-second') == ['second'] + assert operator_id(":first-or-second") == ["first", "second"] + assert operator_id(":first-or-second[href]") == ["second"] + assert operator_id("[href]:first-or-second") == ["second"] def test_series(self): def series(css): - selector, = parse(':nth-child(%s)' % css) + (selector,) = parse(":nth-child(%s)" % css) args = selector.parsed_tree.arguments try: return parse_series(args) except ValueError: return None - assert series('1n+3') == (1, 3) - assert series('1n +3') == (1, 3) - assert series('1n + 3') == (1, 3) - assert series('1n+ 3') == (1, 3) - assert series('1n-3') == (1, -3) - assert series('1n -3') == (1, -3) - assert series('1n - 3') == (1, -3) - assert series('1n- 3') == (1, -3) - assert series('n-5') == (1, -5) - assert series('odd') == (2, 1) - assert series('even') == (2, 0) - assert series('3n') == (3, 0) - assert series('n') == (1, 0) - assert series('+n') == (1, 0) - assert series('-n') == (-1, 0) - assert series('5') == (0, 5) - assert series('foo') is None - assert series('n+') is None + assert series("1n+3") == (1, 3) + assert series("1n +3") == (1, 3) + assert series("1n + 3") == (1, 3) + assert series("1n+ 3") == (1, 3) + assert series("1n-3") == (1, -3) + assert series("1n -3") == (1, -3) + assert series("1n - 3") == (1, -3) + assert series("1n- 3") == (1, -3) + assert series("n-5") == (1, -5) + assert series("odd") == (2, 1) + assert series("even") == (2, 0) + assert series("3n") == (3, 0) + assert series("n") == (1, 0) + assert series("+n") == (1, 0) + assert series("-n") == (-1, 0) + assert series("5") == (0, 5) + assert series("foo") is None + assert series("n+") is None def test_lang(self): document = etree.fromstring(XMLLANG_IDS) - sort_key = dict( - (el, count) for count, el in enumerate(document.getiterator()) - ).__getitem__ + sort_key = dict((el, count) for count, el in enumerate(document.getiterator())).__getitem__ css_to_xpath = GenericTranslator().css_to_xpath def langid(selector): xpath = css_to_xpath(selector) items = document.xpath(xpath) items.sort(key=sort_key) - return [element.get('id', 'nil') for element in items] - - assert langid(':lang("EN")') == ['first', 'second', 'third', 'fourth'] - assert langid(':lang("en-us")') == ['second', 'fourth'] - assert langid(':lang(en-nz)') == ['third'] - assert langid(':lang(fr)') == ['fifth'] - assert langid(':lang(ru)') == ['sixth'] - assert langid(":lang('ZH')") == ['eighth'] - assert langid(':lang(de) :lang(zh)') == ['eighth'] - assert langid(':lang(en), :lang(zh)') == [ - 'first', 'second', 'third', 'fourth', 'eighth'] - assert langid(':lang(es)') == [] + return [element.get("id", "nil") for element in items] + + assert langid(':lang("EN")') == ["first", "second", "third", "fourth"] + assert langid(':lang("en-us")') == ["second", "fourth"] + assert langid(":lang(en-nz)") == ["third"] + assert langid(":lang(fr)") == ["fifth"] + assert langid(":lang(ru)") == ["sixth"] + assert langid(":lang('ZH')") == ["eighth"] + assert langid(":lang(de) :lang(zh)") == ["eighth"] + assert langid(":lang(en), :lang(zh)") == ["first", "second", "third", "fourth", "eighth"] + assert langid(":lang(es)") == [] def test_select(self): document = etree.fromstring(HTML_IDS) - sort_key = dict( - (el, count) for count, el in enumerate(document.getiterator()) - ).__getitem__ + sort_key = dict((el, count) for count, el in enumerate(document.getiterator())).__getitem__ css_to_xpath = GenericTranslator().css_to_xpath html_css_to_xpath = HTMLTranslator().css_to_xpath @@ -739,166 +715,218 @@ def select_ids(selector, html_only): xpath = html_css_to_xpath(selector) items = document.xpath(xpath) items.sort(key=sort_key) - return [element.get('id', 'nil') for element in items] + return [element.get("id", "nil") for element in items] def pcss(main, *selectors, **kwargs): - html_only = kwargs.pop('html_only', False) + html_only = kwargs.pop("html_only", False) result = select_ids(main, html_only) for selector in selectors: assert select_ids(selector, html_only) == result return result - all_ids = pcss('*') - assert all_ids[:6] == [ - 'html', 'nil', 'link-href', 'link-nohref', 'nil', 'outer-div'] - assert all_ids[-1:] == ['foobar-span'] - assert pcss('div') == ['outer-div', 'li-div', 'foobar-div'] - assert pcss('DIV', html_only=True) == [ - 'outer-div', 'li-div', 'foobar-div'] # case-insensitive in HTML - assert pcss('div div') == ['li-div'] - assert pcss('div, div div') == ['outer-div', 'li-div', 'foobar-div'] - assert pcss('a[name]') == ['name-anchor'] - assert pcss('a[NAme]', html_only=True) == [ - 'name-anchor'] # case-insensitive in HTML: - assert pcss('a[rel]') == ['tag-anchor', 'nofollow-anchor'] - assert pcss('a[rel="tag"]') == ['tag-anchor'] - assert pcss('a[href*="localhost"]') == ['tag-anchor'] + all_ids = pcss("*") + assert all_ids[:6] == ["html", "nil", "link-href", "link-nohref", "nil", "outer-div"] + assert all_ids[-1:] == ["foobar-span"] + assert pcss("div") == ["outer-div", "li-div", "foobar-div"] + assert pcss("DIV", html_only=True) == [ + "outer-div", + "li-div", + "foobar-div", + ] # case-insensitive in HTML + assert pcss("div div") == ["li-div"] + assert pcss("div, div div") == ["outer-div", "li-div", "foobar-div"] + assert pcss("a[name]") == ["name-anchor"] + assert pcss("a[NAme]", html_only=True) == ["name-anchor"] # case-insensitive in HTML: + assert pcss("a[rel]") == ["tag-anchor", "nofollow-anchor"] + assert pcss('a[rel="tag"]') == ["tag-anchor"] + assert pcss('a[href*="localhost"]') == ["tag-anchor"] assert pcss('a[href*=""]') == [] - assert pcss('a[href^="http"]') == ['tag-anchor', 'nofollow-anchor'] - assert pcss('a[href^="http:"]') == ['tag-anchor'] + assert pcss('a[href^="http"]') == ["tag-anchor", "nofollow-anchor"] + assert pcss('a[href^="http:"]') == ["tag-anchor"] assert pcss('a[href^=""]') == [] - assert pcss('a[href$="org"]') == ['nofollow-anchor'] + assert pcss('a[href$="org"]') == ["nofollow-anchor"] assert pcss('a[href$=""]') == [] - assert pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') == [ - 'foobar-div'] - assert pcss('[foobar~="ab bc"]', - '[foobar~=""]', '[foobar~=" \t"]') == [] + assert pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') == ["foobar-div"] + assert pcss('[foobar~="ab bc"]', '[foobar~=""]', '[foobar~=" \t"]') == [] assert pcss('div[foobar~="cd"]') == [] - assert pcss('*[lang|="En"]', '[lang|="En-us"]') == ['second-li'] + assert pcss('*[lang|="En"]', '[lang|="En-us"]') == ["second-li"] # Attribute values are case sensitive assert pcss('*[lang|="en"]', '[lang|="en-US"]') == [] assert pcss('*[lang|="e"]') == [] # ... :lang() is not. - assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [ - 'second-li', 'li-div'] + assert pcss(':lang("EN")', "*:lang(en-US)", html_only=True) == ["second-li", "li-div"] assert pcss(':lang("e")', html_only=True) == [] - assert pcss(':scope > div') == [] - assert pcss(':scope body') == ['nil'] - assert pcss(':scope body > div') == ['outer-div', 'foobar-div'] - assert pcss(':scope head') == ['nil'] - assert pcss(':scope html') == [] + assert pcss(":scope > div") == [] + assert pcss(":scope body") == ["nil"] + assert pcss(":scope body > div") == ["outer-div", "foobar-div"] + assert pcss(":scope head") == ["nil"] + assert pcss(":scope html") == [] # --- nth-* and nth-last-* ------------------------------------- # select nothing - assert pcss('li:nth-child(-n)') == [] + assert pcss("li:nth-child(-n)") == [] # select all children - assert pcss('li:nth-child(n)') == [ - 'first-li', 'second-li', 'third-li', 'fourth-li', - 'fifth-li', 'sixth-li', 'seventh-li'] - - assert pcss('li:nth-child(3)', - '#first-li ~ :nth-child(3)') == ['third-li'] - assert pcss('li:nth-child(10)') == [] - assert pcss('li:nth-child(2n)', 'li:nth-child(even)', - 'li:nth-child(2n+0)') == [ - 'second-li', 'fourth-li', 'sixth-li'] - assert pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)') == [ - 'first-li', 'third-li', 'fifth-li', 'seventh-li'] - assert pcss('li:nth-child(2n+4)') == ['fourth-li', 'sixth-li'] - assert pcss('li:nth-child(3n+1)') == [ - 'first-li', 'fourth-li', 'seventh-li'] - assert pcss('li:nth-child(-n+3)') == [ - 'first-li', 'second-li', 'third-li'] - assert pcss('li:nth-child(-2n+4)') == ['second-li', 'fourth-li'] - assert pcss('li:nth-last-child(0)') == [] - assert pcss('li:nth-last-child(1)') == ['seventh-li'] - assert pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)') == [ - 'second-li', 'fourth-li', 'sixth-li'] - assert pcss('li:nth-last-child(2n+1)') == [ - 'first-li', 'third-li', 'fifth-li', 'seventh-li'] - assert pcss('li:nth-last-child(2n+2)') == [ - 'second-li', 'fourth-li', 'sixth-li'] - assert pcss('li:nth-last-child(3n+1)') == [ - 'first-li', 'fourth-li', 'seventh-li'] - assert pcss('ol:first-of-type') == ['first-ol'] - assert pcss('ol:nth-child(1)') == [] - assert pcss('ol:nth-of-type(2)') == ['second-ol'] - assert pcss('ol:nth-last-of-type(1)') == ['second-ol'] + assert pcss("li:nth-child(n)") == [ + "first-li", + "second-li", + "third-li", + "fourth-li", + "fifth-li", + "sixth-li", + "seventh-li", + ] + + assert pcss("li:nth-child(3)", "#first-li ~ :nth-child(3)") == ["third-li"] + assert pcss("li:nth-child(10)") == [] + assert pcss("li:nth-child(2n)", "li:nth-child(even)", "li:nth-child(2n+0)") == [ + "second-li", + "fourth-li", + "sixth-li", + ] + assert pcss("li:nth-child(+2n+1)", "li:nth-child(odd)") == [ + "first-li", + "third-li", + "fifth-li", + "seventh-li", + ] + assert pcss("li:nth-child(2n+4)") == ["fourth-li", "sixth-li"] + assert pcss("li:nth-child(3n+1)") == ["first-li", "fourth-li", "seventh-li"] + assert pcss("li:nth-child(-n+3)") == ["first-li", "second-li", "third-li"] + assert pcss("li:nth-child(-2n+4)") == ["second-li", "fourth-li"] + assert pcss("li:nth-last-child(0)") == [] + assert pcss("li:nth-last-child(1)") == ["seventh-li"] + assert pcss("li:nth-last-child(2n)", "li:nth-last-child(even)") == [ + "second-li", + "fourth-li", + "sixth-li", + ] + assert pcss("li:nth-last-child(2n+1)") == [ + "first-li", + "third-li", + "fifth-li", + "seventh-li", + ] + assert pcss("li:nth-last-child(2n+2)") == ["second-li", "fourth-li", "sixth-li"] + assert pcss("li:nth-last-child(3n+1)") == ["first-li", "fourth-li", "seventh-li"] + assert pcss("ol:first-of-type") == ["first-ol"] + assert pcss("ol:nth-child(1)") == [] + assert pcss("ol:nth-of-type(2)") == ["second-ol"] + assert pcss("ol:nth-last-of-type(1)") == ["second-ol"] # "+" and "~" tests - assert pcss('ol#first-ol li + li:nth-child(4)') == ['fourth-li'] - assert pcss('li + li:nth-child(1)') == [] - assert pcss('li ~ li:nth-child(2n+1)') == [ - 'third-li', 'fifth-li', 'seventh-li' - ] # all but the first - assert pcss('li ~ li:nth-last-child(2n+1)') == [ - 'third-li', 'fifth-li', 'seventh-li' - ] # all but the first - - assert pcss('span:only-child') == ['foobar-span'] - assert pcss('li div:only-child') == ['li-div'] - assert pcss('div *:only-child') == ['li-div', 'foobar-span'] - self.assertRaises(ExpressionError, pcss, 'p *:only-of-type') - assert pcss('p:only-of-type') == ['paragraph'] - assert pcss('a:empty', 'a:EMpty') == ['name-anchor'] - assert pcss('li:empty') == [ - 'third-li', 'fourth-li', 'fifth-li', 'sixth-li'] - assert pcss(':root', 'html:root') == ['html'] - assert pcss('li:root', '* :root') == [] + assert pcss("ol#first-ol li + li:nth-child(4)") == ["fourth-li"] + assert pcss("li + li:nth-child(1)") == [] + assert pcss("li ~ li:nth-child(2n+1)") == [ + "third-li", + "fifth-li", + "seventh-li", + ] # all but the first + assert pcss("li ~ li:nth-last-child(2n+1)") == [ + "third-li", + "fifth-li", + "seventh-li", + ] # all but the first + + assert pcss("span:only-child") == ["foobar-span"] + assert pcss("li div:only-child") == ["li-div"] + assert pcss("div *:only-child") == ["li-div", "foobar-span"] + self.assertRaises(ExpressionError, pcss, "p *:only-of-type") + assert pcss("p:only-of-type") == ["paragraph"] + assert pcss("a:empty", "a:EMpty") == ["name-anchor"] + assert pcss("li:empty") == ["third-li", "fourth-li", "fifth-li", "sixth-li"] + assert pcss(":root", "html:root") == ["html"] + assert pcss("li:root", "* :root") == [] assert pcss('*:contains("link")', ':CONtains("link")') == [ - 'html', 'nil', 'outer-div', 'tag-anchor', 'nofollow-anchor'] + "html", + "nil", + "outer-div", + "tag-anchor", + "nofollow-anchor", + ] assert pcss('*:contains("LInk")') == [] # case sensitive assert pcss('*:contains("e")') == [ - 'html', 'nil', 'outer-div', 'first-ol', 'first-li', - 'paragraph', 'p-em'] + "html", + "nil", + "outer-div", + "first-ol", + "first-li", + "paragraph", + "p-em", + ] assert pcss('*:contains("E")') == [] # case-sensitive - assert pcss('.a', '.b', '*.a', 'ol.a') == ['first-ol'] - assert pcss('.c', '*.c') == ['first-ol', 'third-li', 'fourth-li'] - assert pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c') == [ - 'third-li', 'fourth-li'] - assert pcss('#first-li', 'li#first-li', '*#first-li') == ['first-li'] - assert pcss('li div', 'li > div', 'div div') == ['li-div'] - assert pcss('div > div') == [] - assert pcss('div>.c', 'div > .c') == ['first-ol'] - assert pcss('div + div') == ['foobar-div'] - assert pcss('a ~ a') == ['tag-anchor', 'nofollow-anchor'] - assert pcss('a[rel="tag"] ~ a') == ['nofollow-anchor'] - assert pcss('ol#first-ol li:last-child') == ['seventh-li'] - assert pcss('ol#first-ol *:last-child') == ['li-div', 'seventh-li'] - assert pcss('#outer-div:first-child') == ['outer-div'] - assert pcss('#outer-div :first-child') == [ - 'name-anchor', 'first-li', 'li-div', 'p-b', - 'checkbox-fieldset-disabled', 'area-href'] - assert pcss('a[href]') == ['tag-anchor', 'nofollow-anchor'] - assert pcss(':not(*)') == [] - assert pcss('a:not([href])') == ['name-anchor'] - assert pcss('ol :Not(li[class])') == [ - 'first-li', 'second-li', 'li-div', - 'fifth-li', 'sixth-li', 'seventh-li'] - assert pcss('ol.a.b.c > li.c:nth-child(3)') == ['third-li'] + assert pcss(".a", ".b", "*.a", "ol.a") == ["first-ol"] + assert pcss(".c", "*.c") == ["first-ol", "third-li", "fourth-li"] + assert pcss("ol *.c", "ol li.c", "li ~ li.c", "ol > li.c") == ["third-li", "fourth-li"] + assert pcss("#first-li", "li#first-li", "*#first-li") == ["first-li"] + assert pcss("li div", "li > div", "div div") == ["li-div"] + assert pcss("div > div") == [] + assert pcss("div>.c", "div > .c") == ["first-ol"] + assert pcss("div + div") == ["foobar-div"] + assert pcss("a ~ a") == ["tag-anchor", "nofollow-anchor"] + assert pcss('a[rel="tag"] ~ a') == ["nofollow-anchor"] + assert pcss("ol#first-ol li:last-child") == ["seventh-li"] + assert pcss("ol#first-ol *:last-child") == ["li-div", "seventh-li"] + assert pcss("#outer-div:first-child") == ["outer-div"] + assert pcss("#outer-div :first-child") == [ + "name-anchor", + "first-li", + "li-div", + "p-b", + "checkbox-fieldset-disabled", + "area-href", + ] + assert pcss("a[href]") == ["tag-anchor", "nofollow-anchor"] + assert pcss(":not(*)") == [] + assert pcss("a:not([href])") == ["name-anchor"] + assert pcss("ol :Not(li[class])") == [ + "first-li", + "second-li", + "li-div", + "fifth-li", + "sixth-li", + "seventh-li", + ] + assert pcss(":is(#first-li, #second-li)") == ["first-li", "second-li"] + assert pcss("a:is(#name-anchor, #tag-anchor)") == ["name-anchor", "tag-anchor"] + assert pcss(":is(.c)") == ["first-ol", "third-li", "fourth-li"] + assert pcss("ol.a.b.c > li.c:nth-child(3)") == ["third-li"] # Invalid characters in XPath element names, should not crash - assert pcss(r'di\a0 v', r'div\[') == [] - assert pcss(r'[h\a0 ref]', r'[h\]ref]') == [] + assert pcss(r"di\a0 v", r"div\[") == [] + assert pcss(r"[h\a0 ref]", r"[h\]ref]") == [] # HTML-specific - assert pcss(':link', html_only=True) == [ - 'link-href', 'tag-anchor', 'nofollow-anchor', 'area-href'] - assert pcss(':visited', html_only=True) == [] - assert pcss(':enabled', html_only=True) == [ - 'link-href', 'tag-anchor', 'nofollow-anchor', - 'checkbox-unchecked', 'text-checked', 'checkbox-checked', - 'area-href'] - assert pcss(':disabled', html_only=True) == [ - 'checkbox-disabled', 'checkbox-disabled-checked', 'fieldset', - 'checkbox-fieldset-disabled'] - assert pcss(':checked', html_only=True) == [ - 'checkbox-checked', 'checkbox-disabled-checked'] + assert pcss(":link", html_only=True) == [ + "link-href", + "tag-anchor", + "nofollow-anchor", + "area-href", + ] + assert pcss(":visited", html_only=True) == [] + assert pcss(":enabled", html_only=True) == [ + "link-href", + "tag-anchor", + "nofollow-anchor", + "checkbox-unchecked", + "text-checked", + "checkbox-checked", + "area-href", + ] + assert pcss(":disabled", html_only=True) == [ + "checkbox-disabled", + "checkbox-disabled-checked", + "fieldset", + "checkbox-fieldset-disabled", + ] + assert pcss(":checked", html_only=True) == [ + "checkbox-checked", + "checkbox-disabled-checked", + ] def test_select_shakespeare(self): document = html.document_fromstring(HTML_SHAKESPEARE) - body = document.xpath('//body')[0] + body = document.xpath("//body")[0] css_to_xpath = GenericTranslator().css_to_xpath try: @@ -921,66 +949,67 @@ def count(selector): ## Changed from original; probably because I'm only ## searching the body. - #assert count('*') == 252 - assert count('*') == 246 - assert count('div:contains(CELIA)') == 26 - assert count('div:only-child') == 22 # ? - assert count('div:nth-child(even)') == 106 - assert count('div:nth-child(2n)') == 106 - assert count('div:nth-child(odd)') == 137 - assert count('div:nth-child(2n+1)') == 137 - assert count('div:nth-child(n)') == 243 - assert count('div:last-child') == 53 - assert count('div:first-child') == 51 - assert count('div > div') == 242 - assert count('div + div') == 190 - assert count('div ~ div') == 190 - assert count('body') == 1 - assert count('body div') == 243 - assert count('div') == 243 - assert count('div div') == 242 - assert count('div div div') == 241 - assert count('div, div, div') == 243 - assert count('div, a, span') == 243 - assert count('.dialog') == 51 - assert count('div.dialog') == 51 - assert count('div .dialog') == 51 - assert count('div.character, div.dialog') == 99 - assert count('div.direction.dialog') == 0 - assert count('div.dialog.direction') == 0 - assert count('div.dialog.scene') == 1 - assert count('div.scene.scene') == 1 - assert count('div.scene .scene') == 0 - assert count('div.direction .dialog ') == 0 - assert count('div .dialog .direction') == 4 - assert count('div.dialog .dialog .direction') == 4 - assert count('#speech5') == 1 - assert count('div#speech5') == 1 - assert count('div #speech5') == 1 - assert count('div.scene div.dialog') == 49 - assert count('div#scene1 div.dialog div') == 142 - assert count('#scene1 #speech1') == 1 - assert count('div[class]') == 103 - assert count('div[class=dialog]') == 50 - assert count('div[class^=dia]') == 51 - assert count('div[class$=log]') == 50 - assert count('div[class*=sce]') == 1 - assert count('div[class|=dialog]') == 50 # ? Seems right - assert count('div[class!=madeup]') == 243 # ? Seems right - assert count('div[class~=dialog]') == 51 # ? Seems right - assert count(':scope > div') == 1 - assert count(':scope > div > div[class=dialog]') == 1 - assert count(':scope > div div') == 242 - -OPERATOR_PRECEDENCE_IDS = ''' + # assert count('*') == 252 + assert count("*") == 246 + assert count("div:contains(CELIA)") == 26 + assert count("div:only-child") == 22 # ? + assert count("div:nth-child(even)") == 106 + assert count("div:nth-child(2n)") == 106 + assert count("div:nth-child(odd)") == 137 + assert count("div:nth-child(2n+1)") == 137 + assert count("div:nth-child(n)") == 243 + assert count("div:last-child") == 53 + assert count("div:first-child") == 51 + assert count("div > div") == 242 + assert count("div + div") == 190 + assert count("div ~ div") == 190 + assert count("body") == 1 + assert count("body div") == 243 + assert count("div") == 243 + assert count("div div") == 242 + assert count("div div div") == 241 + assert count("div, div, div") == 243 + assert count("div, a, span") == 243 + assert count(".dialog") == 51 + assert count("div.dialog") == 51 + assert count("div .dialog") == 51 + assert count("div.character, div.dialog") == 99 + assert count("div.direction.dialog") == 0 + assert count("div.dialog.direction") == 0 + assert count("div.dialog.scene") == 1 + assert count("div.scene.scene") == 1 + assert count("div.scene .scene") == 0 + assert count("div.direction .dialog ") == 0 + assert count("div .dialog .direction") == 4 + assert count("div.dialog .dialog .direction") == 4 + assert count("#speech5") == 1 + assert count("div#speech5") == 1 + assert count("div #speech5") == 1 + assert count("div.scene div.dialog") == 49 + assert count("div#scene1 div.dialog div") == 142 + assert count("#scene1 #speech1") == 1 + assert count("div[class]") == 103 + assert count("div[class=dialog]") == 50 + assert count("div[class^=dia]") == 51 + assert count("div[class$=log]") == 50 + assert count("div[class*=sce]") == 1 + assert count("div[class|=dialog]") == 50 # ? Seems right + assert count("div[class!=madeup]") == 243 # ? Seems right + assert count("div[class~=dialog]") == 51 # ? Seems right + assert count(":scope > div") == 1 + assert count(":scope > div > div[class=dialog]") == 1 + assert count(":scope > div div") == 242 + + +OPERATOR_PRECEDENCE_IDS = """ -''' +""" -XMLLANG_IDS = ''' +XMLLANG_IDS = """ a b @@ -992,9 +1021,9 @@ def count(selector): -''' +""" -HTML_IDS = ''' +HTML_IDS = """ @@ -1043,10 +1072,10 @@ def count(selector):
-''' +""" -HTML_SHAKESPEARE = ''' +HTML_SHAKESPEARE = """ @@ -1355,8 +1384,8 @@ def count(selector): -''' +""" -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tox.ini b/tox.ini index 4fb1d7c..372ecb9 100644 --- a/tox.ini +++ b/tox.ini @@ -1,9 +1,37 @@ [tox] -envlist = py +envlist = black,flake8,pylint,security,py [testenv] -deps= - -r tests/requirements.txt +deps = + lxml>=4.4 + pytest-cov>=2.8 + pytest>=5.4 +commands = + pytest --cov=cssselect \ + --cov-report=term-missing --cov-report=html --cov-report=xml \ + --verbose {posargs: cssselect tests} + +[testenv:black] +deps = + black==21.6b0 +commands = + black --check {posargs: cssselect setup.py tests} + +[testenv:flake8] +deps = + flake8==3.9.2 +commands = + flake8 {posargs: cssselect setup.py tests docs/conf.py} + +[testenv:pylint] +deps = + {[testenv]deps} + pylint==2.9.5 +commands = + pylint {posargs: cssselect setup.py tests docs} +[testenv:security] +deps = + bandit commands = - py.test --cov-report term --cov=cssselect + bandit -r -c .bandit.yml {posargs: cssselect}