From 5d4b542c4d8f0b4bc8f77aa809487e6724a487cd Mon Sep 17 00:00:00 2001
From: Mahmoud Hashemi <mahmoud@hatnote.com>
Date: Wed, 10 Jan 2018 18:48:33 -0800
Subject: [PATCH 1/3] enable _percent_decode to decode percent encodings within
 unicode text

---
 hyperlink/_url.py                  | 12 ++++++++----
 hyperlink/test/test_decoded_url.py | 10 ++++++++++
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/hyperlink/_url.py b/hyperlink/_url.py
index 04ac4b71..746a6884 100644
--- a/hyperlink/_url.py
+++ b/hyperlink/_url.py
@@ -523,18 +523,22 @@ def _percent_decode(text, normalize_case=False, subencoding='utf-8',
     u'abc def'
 
     Args:
-       text (unicode): The ASCII text with percent-encoding present.
+       text (unicode): Text with percent-encoding present.
        normalize_case (bool): Whether undecoded percent segments, such
           as encoded delimiters, should be uppercased, per RFC 3986
           Section 2.1. See :func:`_decode_path_part` for an example.
+       subencoding (unicode): The name of the encoding underlying the
+          percent-encoding. Pass `False` to get back bytes.
+       raise_subencoding_exc (bool): Whether an error in decoding the bytes
+          underlying the percent-decoding should be raised.
 
     Returns:
-       unicode: The percent-decoded version of *text*, with UTF-8
-         decoding applied.
+       unicode: The percent-decoded version of *text*, with decoding
+         applied, unless `subencoding=False` which returns bytes.
 
     """
     try:
-        quoted_bytes = text.encode("ascii")
+        quoted_bytes = text.encode(subencoding or 'utf-8')
     except UnicodeEncodeError:
         return text
 
diff --git a/hyperlink/test/test_decoded_url.py b/hyperlink/test/test_decoded_url.py
index 53fef34e..faac1de3 100644
--- a/hyperlink/test/test_decoded_url.py
+++ b/hyperlink/test/test_decoded_url.py
@@ -145,3 +145,13 @@ def test_twisted_compat(self):
 
     def test_percent_decode_bytes(self):
         assert _percent_decode('%00', subencoding=False) == b'\0'
+
+    def test_percent_decode_mixed(self):
+        assert _percent_decode('abcdé%C3%A9éfg') == 'abcdéééfg'
+
+        # still allow percent encoding in the case of an error
+        assert _percent_decode('abcdé%C3éfg') == 'abcdé%C3éfg'
+
+        # ...unless explicitly told otherwise
+        with self.assertRaises(UnicodeDecodeError):
+            _percent_decode('abcdé%C3éfg', raise_subencoding_exc=True)

From a0cf6d5fef39e960f241f6dc41af98c2a3c9283e Mon Sep 17 00:00:00 2001
From: Mahmoud Hashemi <mahmoud@hatnote.com>
Date: Wed, 10 Jan 2018 18:51:02 -0800
Subject: [PATCH 2/3] remove excessive _encode_* from DecodedURL now that
 _percent_decode() supports mixed decoding

---
 hyperlink/_url.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/hyperlink/_url.py b/hyperlink/_url.py
index 746a6884..8f1708bd 100644
--- a/hyperlink/_url.py
+++ b/hyperlink/_url.py
@@ -1677,8 +1677,7 @@ def path(self):
             return self._path
         except AttributeError:
             pass
-        self._path = tuple([_percent_decode(_encode_path_part(p),
-                                            raise_subencoding_exc=True)
+        self._path = tuple([_percent_decode(p, raise_subencoding_exc=True)
                             for p in self._url.path])
         return self._path
 
@@ -1688,8 +1687,7 @@ def query(self):
             return self._query
         except AttributeError:
             pass
-        _q = [tuple(_percent_decode(_encode_query_part(x),
-                                    raise_subencoding_exc=True)
+        _q = [tuple(_percent_decode(x, raise_subencoding_exc=True)
                     if x is not None else None
                     for x in (k, v))
               for k, v in self._url.query]
@@ -1703,8 +1701,7 @@ def fragment(self):
         except AttributeError:
             pass
         frag = self._url.fragment
-        self._fragment = _percent_decode(_encode_fragment_part(frag),
-                                         raise_subencoding_exc=True)
+        self._fragment = _percent_decode(frag, raise_subencoding_exc=True)
         return self._fragment
 
     @property
@@ -1713,8 +1710,7 @@ def userinfo(self):
             return self._userinfo
         except AttributeError:
             pass
-        self._userinfo = tuple([_percent_decode(_encode_userinfo_part(p),
-                                                raise_subencoding_exc=True)
+        self._userinfo = tuple([_percent_decode(p, raise_subencoding_exc=True)
                                 for p in self._url.userinfo.split(':', 1)])
         return self._userinfo
 

From 4dd846dea1c75df3ddc2c15d3988a43576627b7f Mon Sep 17 00:00:00 2001
From: Mahmoud Hashemi <mahmoud@hatnote.com>
Date: Sat, 24 Feb 2018 12:40:37 -0800
Subject: [PATCH 3/3] add a couple more tests around mixed percent decoding and
 fix docstring for _percent_decode, per @markrwilliams review

---
 hyperlink/_url.py                  | 17 +++++++++--------
 hyperlink/test/test_decoded_url.py |  8 ++++++++
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/hyperlink/_url.py b/hyperlink/_url.py
index 8f1708bd..81c992a0 100644
--- a/hyperlink/_url.py
+++ b/hyperlink/_url.py
@@ -510,10 +510,11 @@ def _percent_decode(text, normalize_case=False, subencoding='utf-8',
     """Convert percent-encoded text characters to their normal,
     human-readable equivalents.
 
-    All characters in the input text must be valid ASCII. All special
-    characters underlying the values in the percent-encoding must be
-    valid UTF-8. If a non-UTF8-valid string is passed, the original
-    text is returned with no changes applied.
+    All characters in the input text must be encodable by
+    *subencoding*. All special characters underlying the values in the
+    percent-encoding must be decodable as *subencoding*. If a
+    non-*subencoding*-valid string is passed, the original text is
+    returned with no changes applied.
 
     Only called by field-tailored variants, e.g.,
     :func:`_decode_path_part`, as every percent-encodable part of the
@@ -528,17 +529,17 @@ def _percent_decode(text, normalize_case=False, subencoding='utf-8',
           as encoded delimiters, should be uppercased, per RFC 3986
           Section 2.1. See :func:`_decode_path_part` for an example.
        subencoding (unicode): The name of the encoding underlying the
-          percent-encoding. Pass `False` to get back bytes.
+          percent-encoding. Pass `False` to get back raw bytes.
        raise_subencoding_exc (bool): Whether an error in decoding the bytes
           underlying the percent-decoding should be raised.
 
     Returns:
-       unicode: The percent-decoded version of *text*, with decoding
-         applied, unless `subencoding=False` which returns bytes.
+       unicode: The percent-decoded version of *text*, decoded by
+         *subencoding*, unless `subencoding=False` which returns bytes.
 
     """
     try:
-        quoted_bytes = text.encode(subencoding or 'utf-8')
+        quoted_bytes = text.encode('utf-8' if subencoding is False else subencoding)
     except UnicodeEncodeError:
         return text
 
diff --git a/hyperlink/test/test_decoded_url.py b/hyperlink/test/test_decoded_url.py
index faac1de3..5515fce7 100644
--- a/hyperlink/test/test_decoded_url.py
+++ b/hyperlink/test/test_decoded_url.py
@@ -147,6 +147,8 @@ def test_percent_decode_bytes(self):
         assert _percent_decode('%00', subencoding=False) == b'\0'
 
     def test_percent_decode_mixed(self):
+        # See https://github.com/python-hyper/hyperlink/pull/59 for a
+        # nice discussion of the possibilities
         assert _percent_decode('abcdé%C3%A9éfg') == 'abcdéééfg'
 
         # still allow percent encoding in the case of an error
@@ -155,3 +157,9 @@ def test_percent_decode_mixed(self):
         # ...unless explicitly told otherwise
         with self.assertRaises(UnicodeDecodeError):
             _percent_decode('abcdé%C3éfg', raise_subencoding_exc=True)
+
+        # check that getting raw bytes works ok
+        assert _percent_decode('a%00b', subencoding=False) == b'a\x00b'
+
+        # when not encodable as subencoding
+        assert _percent_decode('é%25é', subencoding='ascii') == 'é%25é'