python-hyper · mahmoud · Feb 24, 2018 · Jan 11, 2018 · Jan 11, 2018 · Feb 24, 2018
diff --git a/hyperlink/_url.py b/hyperlink/_url.py
@@ -510,10 +510,11 @@ def _percent_decode(text, normalize_case=False, subencoding='utf-8',
     """Convert percent-encoded text characters to their normal,
     human-readable equivalents.
 
-    All characters in the input text must be valid ASCII. All special
-    characters underlying the values in the percent-encoding must be
-    valid UTF-8. If a non-UTF8-valid string is passed, the original
-    text is returned with no changes applied.
+    All characters in the input text must be encodable by
+    *subencoding*. All special characters underlying the values in the
+    percent-encoding must be decodable as *subencoding*. If a
+    non-*subencoding*-valid string is passed, the original text is
+    returned with no changes applied.
 
     Only called by field-tailored variants, e.g.,
     :func:`_decode_path_part`, as every percent-encodable part of the
@@ -523,18 +524,22 @@ def _percent_decode(text, normalize_case=False, subencoding='utf-8',
     u'abc def'
 
     Args:
-       text (unicode): The ASCII text with percent-encoding present.
+       text (unicode): Text with percent-encoding present.
        normalize_case (bool): Whether undecoded percent segments, such
           as encoded delimiters, should be uppercased, per RFC 3986
           Section 2.1. See :func:`_decode_path_part` for an example.
+       subencoding (unicode): The name of the encoding underlying the
+          percent-encoding. Pass `False` to get back raw bytes.
+       raise_subencoding_exc (bool): Whether an error in decoding the bytes
+          underlying the percent-decoding should be raised.
 
     Returns:
-       unicode: The percent-decoded version of *text*, with UTF-8
-         decoding applied.
+       unicode: The percent-decoded version of *text*, decoded by
+         *subencoding*, unless `subencoding=False` which returns bytes.
 
     """
     try:
-        quoted_bytes = text.encode("ascii")
+        quoted_bytes = text.encode('utf-8' if subencoding is False else subencoding)
     except UnicodeEncodeError:
         return text
 
@@ -1673,8 +1678,7 @@ def path(self):
             return self._path
         except AttributeError:
             pass
-        self._path = tuple([_percent_decode(_encode_path_part(p),
-                                            raise_subencoding_exc=True)
+        self._path = tuple([_percent_decode(p, raise_subencoding_exc=True)
                             for p in self._url.path])
         return self._path
 
@@ -1684,8 +1688,7 @@ def query(self):
             return self._query
         except AttributeError:
             pass
-        _q = [tuple(_percent_decode(_encode_query_part(x),
-                                    raise_subencoding_exc=True)
+        _q = [tuple(_percent_decode(x, raise_subencoding_exc=True)
                     if x is not None else None
                     for x in (k, v))
               for k, v in self._url.query]
@@ -1699,8 +1702,7 @@ def fragment(self):
         except AttributeError:
             pass
         frag = self._url.fragment
-        self._fragment = _percent_decode(_encode_fragment_part(frag),
-                                         raise_subencoding_exc=True)
+        self._fragment = _percent_decode(frag, raise_subencoding_exc=True)
         return self._fragment
 
     @property
@@ -1709,8 +1711,7 @@ def userinfo(self):
             return self._userinfo
         except AttributeError:
             pass
-        self._userinfo = tuple([_percent_decode(_encode_userinfo_part(p),
-                                                raise_subencoding_exc=True)
+        self._userinfo = tuple([_percent_decode(p, raise_subencoding_exc=True)
                                 for p in self._url.userinfo.split(':', 1)])
         return self._userinfo
 

diff --git a/hyperlink/test/test_decoded_url.py b/hyperlink/test/test_decoded_url.py
@@ -145,3 +145,21 @@ def test_twisted_compat(self):
 
     def test_percent_decode_bytes(self):
         assert _percent_decode('%00', subencoding=False) == b'\0'
+
+    def test_percent_decode_mixed(self):
+        # See https://github.com/python-hyper/hyperlink/pull/59 for a
+        # nice discussion of the possibilities
+        assert _percent_decode('abcdé%C3%A9éfg') == 'abcdéééfg'
+
+        # still allow percent encoding in the case of an error
+        assert _percent_decode('abcdé%C3éfg') == 'abcdé%C3éfg'
+
+        # ...unless explicitly told otherwise
+        with self.assertRaises(UnicodeDecodeError):
+            _percent_decode('abcdé%C3éfg', raise_subencoding_exc=True)
+
+        # check that getting raw bytes works ok
+        assert _percent_decode('a%00b', subencoding=False) == b'a\x00b'
+
+        # when not encodable as subencoding
+        assert _percent_decode('é%25é', subencoding='ascii') == 'é%25é'