Merge pull request #227 from noviluni/update_to_python3.6

Upgrade some semantics and references to Python 3.6
scrapy · Aug 9, 2021 · fdba8af · fdba8af
2 parents d20db09 + f6f0331
commit fdba8af
Show file tree

Hide file tree

Showing 8 changed files with 31 additions and 34 deletions.
diff --git a/docs/conftest.py b/docs/conftest.py
@@ -1,6 +1,5 @@
 import os
 from doctest import ELLIPSIS, NORMALIZE_WHITESPACE
-from sys import version_info
 
 from sybil import Sybil
 from sybil.parsers.codeblock import CodeBlockParser
@@ -20,13 +19,12 @@ def setup(namespace):
     namespace['load_selector'] = load_selector
 
 
-if version_info >= (3,):
-    pytest_collect_file = Sybil(
-        parsers=[
-            DocTestParser(optionflags=ELLIPSIS | NORMALIZE_WHITESPACE),
-            CodeBlockParser(future_imports=['print_function']),
-            skip,
-        ],
-        pattern='*.rst',
-        setup=setup,
-    ).pytest()
+pytest_collect_file = Sybil(
+    parsers=[
+        DocTestParser(optionflags=ELLIPSIS | NORMALIZE_WHITESPACE),
+        CodeBlockParser(future_imports=['print_function']),
+        skip,
+    ],
+    pattern='*.rst',
+    setup=setup,
+).pytest()
diff --git a/docs/usage.rst b/docs/usage.rst
@@ -339,7 +339,7 @@ Using selectors with regular expressions
 
 :class:`~parsel.selector.Selector` also has a ``.re()`` method for extracting
 data using regular expressions. However, unlike using ``.xpath()`` or
-``.css()`` methods, ``.re()`` returns a list of unicode strings. So you
+``.css()`` methods, ``.re()`` returns a list of strings. So you
 can't construct nested ``.re()`` calls.
 
 Here's an example used to extract image names from the :ref:`HTML code
@@ -917,7 +917,7 @@ a :class:`~parsel.selector.Selector` instantiated with an HTML text like this::
       sel.xpath("//h1")
 
 2. Extract the text of all ``<h1>`` elements from an HTML text,
-   returning a list of unicode strings::
+   returning a list of strings::
 
       sel.xpath("//h1").getall()         # this includes the h1 tag
       sel.xpath("//h1/text()").getall()  # this excludes the h1 tag

diff --git a/parsel/selector.py b/parsel/selector.py
@@ -123,7 +123,7 @@ def re(
     ) -> List[str]:
         """
         Call the ``.re()`` method for each element in this list and return
-        their results flattened, as a list of unicode strings.
+        their results flattened, as a list of strings.
 
         By default, character entity references are replaced by their
         corresponding character (except for ``&amp;`` and ``&lt;``.
@@ -158,7 +158,7 @@ def re_first(
     ) -> Optional[str]:
         """
         Call the ``.re()`` method for the first element in this list and
-        return the result in an unicode string. If the list is empty or the
+        return the result in an string. If the list is empty or the
         regex doesn't match anything, return the default value (``None`` if
         the argument is not provided).
 
@@ -176,7 +176,7 @@ def re_first(
     def getall(self) -> List[str]:
         """
         Call the ``.get()`` method for each element is this list and return
-        their results flattened, as a list of unicode strings.
+        their results flattened, as a list of strings.
         """
         return [x.get() for x in self]
 
@@ -223,7 +223,7 @@ class Selector:
     :class:`Selector` allows you to select parts of an XML or HTML text using CSS
     or XPath expressions and extract data from it.
 
-    ``text`` is a ``unicode`` object in Python 2 or a ``str`` object in Python 3
+    ``text`` is a `str`` object
 
     ``type`` defines the selector type, it can be ``"html"``, ``"xml"`` or ``None`` (default).
     If ``type`` is ``None``, the selector defaults to ``"html"``.
@@ -328,7 +328,7 @@ def xpath(
                 query, namespaces=nsp, smart_strings=self._lxml_smart_strings, **kwargs
             )
         except etree.XPathError as exc:
-            raise ValueError("XPath error: %s in %s" % (exc, query))
+            raise ValueError(f"XPath error: {exc} in {query}")
 
         if type(result) is not list:
             result = [result]
@@ -361,7 +361,7 @@ def re(
         self, regex: Union[str, Pattern[str]], replace_entities: bool = True
     ) -> List[str]:
         """
-        Apply the given regex and return a list of unicode strings with the
+        Apply the given regex and return a list of strings with the
         matches.
 
         ``regex`` can be either a compiled regular expression or a string which
@@ -399,9 +399,9 @@ def re_first(
         replace_entities: bool = True,
     ) -> Optional[str]:
         """
-        Apply the given regex and return the first unicode string which
-        matches. If there is no match, return the default value (``None`` if
-        the argument is not provided).
+        Apply the given regex and return the first string which matches. If
+        there is no match, return the default value (``None`` if the argument
+        is not provided).
 
         By default, character entity references are replaced by their
         corresponding character (except for ``&amp;`` and ``&lt;``).
@@ -414,7 +414,7 @@ def re_first(
 
     def get(self) -> str:
         """
-        Serialize and return the matched nodes in a single unicode string.
+        Serialize and return the matched nodes in a single string.
         Percent encoded content is unquoted.
         """
         try:
@@ -436,7 +436,7 @@ def get(self) -> str:
 
     def getall(self) -> List[str]:
         """
-        Serialize and return the matched node in a 1-element list of unicode strings.
+        Serialize and return the matched node in a 1-element list of strings.
         """
         return [self.get()]
 
@@ -504,6 +504,6 @@ def __bool__(self) -> bool:
 
     def __str__(self) -> str:
         data = repr(shorten(self.get(), width=40))
-        return "<%s xpath=%r data=%s>" % (type(self).__name__, self._expr, data)
+        return f"<{type(self).__name__} xpath={self._expr!r} data={data}>"
 
     __repr__ = __str__
diff --git a/parsel/utils.py b/parsel/utils.py
@@ -26,8 +26,7 @@ def iflatten(x):
     Similar to ``.flatten()``, but returns iterator instead"""
     for el in x:
         if _is_listlike(el):
-            for el_ in flatten(el):
-                yield el_
+            yield from flatten(el)
         else:
             yield el
 
@@ -59,7 +58,7 @@ def _is_listlike(x: Any) -> bool:
 def extract_regex(
     regex: Union[str, Pattern[str]], text: str, replace_entities: bool = True
 ) -> List[str]:
-    """Extract a list of unicode strings from the given text/encoding using the following policies:
+    """Extract a list of strings from the given text/encoding using the following policies:
     * if the regex contains a named group called "extract" that will be returned
     * if the regex contains multiple numbered groups, all those will be returned (flattened)
     * if the regex doesn't contain any group the entire regex matching is returned

diff --git a/parsel/xpathfuncs.py b/parsel/xpathfuncs.py
@@ -3,7 +3,7 @@
 
 from w3lib.html import HTML5_WHITESPACE
 
-regex = "[{}]+".format(HTML5_WHITESPACE)
+regex = f"[{HTML5_WHITESPACE}]+"
 replace_html5_whitespaces = re.compile(regex).sub
 
 

diff --git a/tests/test_selector.py b/tests/test_selector.py
@@ -138,7 +138,7 @@ def test_simple_selection_with_variables_escape_friendly(self) -> None:
         t = 'I say "Yeah!"'
         # naive string formatting with give something like:
         # ValueError: XPath error: Invalid predicate in //input[@value="I say "Yeah!""]/@name
-        self.assertRaises(ValueError, sel.xpath, '//input[@value="{}"]/@name'.format(t))
+        self.assertRaises(ValueError, sel.xpath, f'//input[@value="{t}"]/@name')
 
         # with XPath variables, escaping is done for you
         self.assertEqual(
@@ -149,7 +149,7 @@ def test_simple_selection_with_variables_escape_friendly(self) -> None:
         # the following gives you something like
         # ValueError: XPath error: Invalid predicate in //p[normalize-space()='I'm mixing single and "double quotes" and I don't care :)']//@name
         self.assertRaises(
-            ValueError, sel.xpath, "//p[normalize-space()='{}']//@name".format(lt)
+            ValueError, sel.xpath, f"//p[normalize-space()='{lt}']//@name"
         )
 
         self.assertEqual(

diff --git a/tests/test_xml_attacks.py b/tests/test_xml_attacks.py
@@ -13,7 +13,7 @@
 
 def _load(attack):
     folder_path = path.dirname(__file__)
-    file_path = path.join(folder_path, "xml_attacks", "{}.xml".format(attack))
+    file_path = path.join(folder_path, "xml_attacks", f"{attack}.xml")
     with open(file_path, "rb") as attack_file:
         return attack_file.read().decode("utf-8")
 
@@ -28,6 +28,6 @@ def test_billion_laughs(self):
         lolz = selector.css("lolz::text").get()
         memory_usage_after = process.memory_info().rss
         memory_change = memory_usage_after - memory_usage_before
-        assert_message = "Memory change: {}B".format(memory_change)
+        assert_message = f"Memory change: {memory_change}B"
         assert memory_change <= MiB_1, assert_message
         assert lolz == "&lol9;"
diff --git a/tests/test_xpathfuncs.py b/tests/test_xpathfuncs.py
@@ -57,7 +57,7 @@ def test_has_class_error_invalid_unicode(self):
             ValueError,
             "All strings must be XML compatible",
             sel.xpath,
-            'has-class("héllö")'.encode("utf-8"),
+            'has-class("héllö")'.encode(),
         )
 
     def test_has_class_unicode(self):