Skip to content

Commit

Permalink
Merge pull request #227 from noviluni/update_to_python3.6
Browse files Browse the repository at this point in the history
Upgrade some semantics and references to Python 3.6
  • Loading branch information
wRAR committed Aug 9, 2021
2 parents d20db09 + f6f0331 commit fdba8af
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 34 deletions.
20 changes: 9 additions & 11 deletions docs/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
from doctest import ELLIPSIS, NORMALIZE_WHITESPACE
from sys import version_info

from sybil import Sybil
from sybil.parsers.codeblock import CodeBlockParser
Expand All @@ -20,13 +19,12 @@ def setup(namespace):
namespace['load_selector'] = load_selector


if version_info >= (3,):
pytest_collect_file = Sybil(
parsers=[
DocTestParser(optionflags=ELLIPSIS | NORMALIZE_WHITESPACE),
CodeBlockParser(future_imports=['print_function']),
skip,
],
pattern='*.rst',
setup=setup,
).pytest()
pytest_collect_file = Sybil(
parsers=[
DocTestParser(optionflags=ELLIPSIS | NORMALIZE_WHITESPACE),
CodeBlockParser(future_imports=['print_function']),
skip,
],
pattern='*.rst',
setup=setup,
).pytest()
4 changes: 2 additions & 2 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ Using selectors with regular expressions

:class:`~parsel.selector.Selector` also has a ``.re()`` method for extracting
data using regular expressions. However, unlike using ``.xpath()`` or
``.css()`` methods, ``.re()`` returns a list of unicode strings. So you
``.css()`` methods, ``.re()`` returns a list of strings. So you
can't construct nested ``.re()`` calls.

Here's an example used to extract image names from the :ref:`HTML code
Expand Down Expand Up @@ -917,7 +917,7 @@ a :class:`~parsel.selector.Selector` instantiated with an HTML text like this::
sel.xpath("//h1")

2. Extract the text of all ``<h1>`` elements from an HTML text,
returning a list of unicode strings::
returning a list of strings::

sel.xpath("//h1").getall() # this includes the h1 tag
sel.xpath("//h1/text()").getall() # this excludes the h1 tag
Expand Down
24 changes: 12 additions & 12 deletions parsel/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def re(
) -> List[str]:
"""
Call the ``.re()`` method for each element in this list and return
their results flattened, as a list of unicode strings.
their results flattened, as a list of strings.
By default, character entity references are replaced by their
corresponding character (except for ``&amp;`` and ``&lt;``.
Expand Down Expand Up @@ -158,7 +158,7 @@ def re_first(
) -> Optional[str]:
"""
Call the ``.re()`` method for the first element in this list and
return the result in an unicode string. If the list is empty or the
return the result in an string. If the list is empty or the
regex doesn't match anything, return the default value (``None`` if
the argument is not provided).
Expand All @@ -176,7 +176,7 @@ def re_first(
def getall(self) -> List[str]:
"""
Call the ``.get()`` method for each element is this list and return
their results flattened, as a list of unicode strings.
their results flattened, as a list of strings.
"""
return [x.get() for x in self]

Expand Down Expand Up @@ -223,7 +223,7 @@ class Selector:
:class:`Selector` allows you to select parts of an XML or HTML text using CSS
or XPath expressions and extract data from it.
``text`` is a ``unicode`` object in Python 2 or a ``str`` object in Python 3
``text`` is a `str`` object
``type`` defines the selector type, it can be ``"html"``, ``"xml"`` or ``None`` (default).
If ``type`` is ``None``, the selector defaults to ``"html"``.
Expand Down Expand Up @@ -328,7 +328,7 @@ def xpath(
query, namespaces=nsp, smart_strings=self._lxml_smart_strings, **kwargs
)
except etree.XPathError as exc:
raise ValueError("XPath error: %s in %s" % (exc, query))
raise ValueError(f"XPath error: {exc} in {query}")

if type(result) is not list:
result = [result]
Expand Down Expand Up @@ -361,7 +361,7 @@ def re(
self, regex: Union[str, Pattern[str]], replace_entities: bool = True
) -> List[str]:
"""
Apply the given regex and return a list of unicode strings with the
Apply the given regex and return a list of strings with the
matches.
``regex`` can be either a compiled regular expression or a string which
Expand Down Expand Up @@ -399,9 +399,9 @@ def re_first(
replace_entities: bool = True,
) -> Optional[str]:
"""
Apply the given regex and return the first unicode string which
matches. If there is no match, return the default value (``None`` if
the argument is not provided).
Apply the given regex and return the first string which matches. If
there is no match, return the default value (``None`` if the argument
is not provided).
By default, character entity references are replaced by their
corresponding character (except for ``&amp;`` and ``&lt;``).
Expand All @@ -414,7 +414,7 @@ def re_first(

def get(self) -> str:
"""
Serialize and return the matched nodes in a single unicode string.
Serialize and return the matched nodes in a single string.
Percent encoded content is unquoted.
"""
try:
Expand All @@ -436,7 +436,7 @@ def get(self) -> str:

def getall(self) -> List[str]:
"""
Serialize and return the matched node in a 1-element list of unicode strings.
Serialize and return the matched node in a 1-element list of strings.
"""
return [self.get()]

Expand Down Expand Up @@ -504,6 +504,6 @@ def __bool__(self) -> bool:

def __str__(self) -> str:
data = repr(shorten(self.get(), width=40))
return "<%s xpath=%r data=%s>" % (type(self).__name__, self._expr, data)
return f"<{type(self).__name__} xpath={self._expr!r} data={data}>"

__repr__ = __str__
5 changes: 2 additions & 3 deletions parsel/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ def iflatten(x):
Similar to ``.flatten()``, but returns iterator instead"""
for el in x:
if _is_listlike(el):
for el_ in flatten(el):
yield el_
yield from flatten(el)
else:
yield el

Expand Down Expand Up @@ -59,7 +58,7 @@ def _is_listlike(x: Any) -> bool:
def extract_regex(
regex: Union[str, Pattern[str]], text: str, replace_entities: bool = True
) -> List[str]:
"""Extract a list of unicode strings from the given text/encoding using the following policies:
"""Extract a list of strings from the given text/encoding using the following policies:
* if the regex contains a named group called "extract" that will be returned
* if the regex contains multiple numbered groups, all those will be returned (flattened)
* if the regex doesn't contain any group the entire regex matching is returned
Expand Down
2 changes: 1 addition & 1 deletion parsel/xpathfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from w3lib.html import HTML5_WHITESPACE

regex = "[{}]+".format(HTML5_WHITESPACE)
regex = f"[{HTML5_WHITESPACE}]+"
replace_html5_whitespaces = re.compile(regex).sub


Expand Down
4 changes: 2 additions & 2 deletions tests/test_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def test_simple_selection_with_variables_escape_friendly(self) -> None:
t = 'I say "Yeah!"'
# naive string formatting with give something like:
# ValueError: XPath error: Invalid predicate in //input[@value="I say "Yeah!""]/@name
self.assertRaises(ValueError, sel.xpath, '//input[@value="{}"]/@name'.format(t))
self.assertRaises(ValueError, sel.xpath, f'//input[@value="{t}"]/@name')

# with XPath variables, escaping is done for you
self.assertEqual(
Expand All @@ -149,7 +149,7 @@ def test_simple_selection_with_variables_escape_friendly(self) -> None:
# the following gives you something like
# ValueError: XPath error: Invalid predicate in //p[normalize-space()='I'm mixing single and "double quotes" and I don't care :)']//@name
self.assertRaises(
ValueError, sel.xpath, "//p[normalize-space()='{}']//@name".format(lt)
ValueError, sel.xpath, f"//p[normalize-space()='{lt}']//@name"
)

self.assertEqual(
Expand Down
4 changes: 2 additions & 2 deletions tests/test_xml_attacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

def _load(attack):
folder_path = path.dirname(__file__)
file_path = path.join(folder_path, "xml_attacks", "{}.xml".format(attack))
file_path = path.join(folder_path, "xml_attacks", f"{attack}.xml")
with open(file_path, "rb") as attack_file:
return attack_file.read().decode("utf-8")

Expand All @@ -28,6 +28,6 @@ def test_billion_laughs(self):
lolz = selector.css("lolz::text").get()
memory_usage_after = process.memory_info().rss
memory_change = memory_usage_after - memory_usage_before
assert_message = "Memory change: {}B".format(memory_change)
assert_message = f"Memory change: {memory_change}B"
assert memory_change <= MiB_1, assert_message
assert lolz == "&lol9;"
2 changes: 1 addition & 1 deletion tests/test_xpathfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_has_class_error_invalid_unicode(self):
ValueError,
"All strings must be XML compatible",
sel.xpath,
'has-class("héllö")'.encode("utf-8"),
'has-class("héllö")'.encode(),
)

def test_has_class_unicode(self):
Expand Down

0 comments on commit fdba8af

Please sign in to comment.