Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add hook for urllib3 and requests (#269)
* fix urllib3 issue with parsing URIs * prevent URL encoding * bump version to 1.9.0
- Loading branch information
Showing
8 changed files
with
331 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,248 @@ | ||
from __future__ import absolute_import | ||
from collections import namedtuple | ||
import urllib3 | ||
|
||
|
||
class HTTPError(Exception): | ||
"Base exception used by this module." | ||
pass | ||
|
||
|
||
class LocationValueError(ValueError, HTTPError): | ||
"Raised when there is something wrong with a given URL input." | ||
pass | ||
|
||
|
||
class LocationParseError(LocationValueError): | ||
"Raised when get_host or similar fails to parse the URL input." | ||
|
||
def __init__(self, location): | ||
message = "Failed to parse: %s" % location | ||
HTTPError.__init__(self, message) | ||
|
||
self.location = location | ||
|
||
|
||
url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] | ||
|
||
# We only want to normalize urls with an HTTP(S) scheme. | ||
# urllib3 infers URLs without a scheme (None) to be http. | ||
NORMALIZABLE_SCHEMES = ('http', 'https', None) | ||
|
||
|
||
class Url(namedtuple('Url', url_attrs)): | ||
""" | ||
Datastructure for representing an HTTP URL. Used as a return value for | ||
:func:`parse_url`. Both the scheme and host are normalized as they are | ||
both case-insensitive according to RFC 3986. | ||
""" | ||
__slots__ = () | ||
|
||
def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, | ||
query=None, fragment=None): | ||
if path and not path.startswith('/'): | ||
path = '/' + path | ||
if scheme: | ||
scheme = scheme.lower() | ||
if host and scheme in NORMALIZABLE_SCHEMES: | ||
host = host.lower() | ||
return super(Url, cls).__new__(cls, scheme, auth, host, port, path, | ||
query, fragment) | ||
|
||
@property | ||
def hostname(self): | ||
"""For backwards-compatibility with urlparse. We're nice like that.""" | ||
return self.host | ||
|
||
@property | ||
def request_uri(self): | ||
"""Absolute path including the query string.""" | ||
uri = self.path or '/' | ||
|
||
if self.query is not None: | ||
uri += '?' + self.query | ||
|
||
return uri | ||
|
||
@property | ||
def netloc(self): | ||
"""Network location including host and port""" | ||
if self.port: | ||
return '%s:%d' % (self.host, self.port) | ||
return self.host | ||
|
||
@property | ||
def url(self): | ||
""" | ||
Convert self into a url | ||
This function should more or less round-trip with :func:`.parse_url`. The | ||
returned url may not be exactly the same as the url inputted to | ||
:func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls | ||
with a blank port will have : removed). | ||
Example: :: | ||
>>> U = parse_url('http://google.com/mail/') | ||
>>> U.url | ||
'http://google.com/mail/' | ||
>>> Url('http', 'username:password', 'host.com', 80, | ||
... '/path', 'query', 'fragment').url | ||
'http://username:password@host.com:80/path?query#fragment' | ||
""" | ||
scheme, auth, host, port, path, query, fragment = self | ||
url = '' | ||
|
||
# We use "is not None" we want things to happen with empty strings (or 0 port) | ||
if scheme is not None: | ||
url += scheme + '://' | ||
if auth is not None: | ||
url += auth + '@' | ||
if host is not None: | ||
url += host | ||
if port is not None: | ||
url += ':' + str(port) | ||
if path is not None: | ||
url += path | ||
if query is not None: | ||
url += '?' + query | ||
if fragment is not None: | ||
url += '#' + fragment | ||
|
||
return url | ||
|
||
def __str__(self): | ||
return self.url | ||
|
||
|
||
def split_first(s, delims): | ||
""" | ||
Given a string and an iterable of delimiters, split on the first found | ||
delimiter. Return two split parts and the matched delimiter. | ||
If not found, then the first part is the full input string. | ||
Example:: | ||
>>> split_first('foo/bar?baz', '?/=') | ||
('foo', 'bar?baz', '/') | ||
>>> split_first('foo/bar?baz', '123') | ||
('foo/bar?baz', '', None) | ||
Scales linearly with number of delims. Not ideal for large number of delims. | ||
""" | ||
min_idx = None | ||
min_delim = None | ||
for d in delims: | ||
idx = s.find(d) | ||
if idx < 0: | ||
continue | ||
|
||
if min_idx is None or idx < min_idx: | ||
min_idx = idx | ||
min_delim = d | ||
|
||
if min_idx is None or min_idx < 0: | ||
return s, '', None | ||
|
||
return s[:min_idx], s[min_idx + 1:], min_delim | ||
|
||
|
||
def patched_parse_url(url): | ||
""" | ||
Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is | ||
performed to parse incomplete urls. Fields not provided will be None. | ||
Partly backwards-compatible with :mod:`urlparse`. | ||
Example:: | ||
>>> parse_url('http://google.com/mail/') | ||
Url(scheme='http', host='google.com', port=None, path='/mail/', ...) | ||
>>> parse_url('google.com:80') | ||
Url(scheme=None, host='google.com', port=80, path=None, ...) | ||
>>> parse_url('/foo?bar') | ||
Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) | ||
""" | ||
|
||
# While this code has overlap with stdlib's urlparse, it is much | ||
# simplified for our needs and less annoying. | ||
# Additionally, this implementations does silly things to be optimal | ||
# on CPython. | ||
|
||
if not url: | ||
# Empty | ||
return Url() | ||
|
||
scheme = None | ||
auth = None | ||
host = None | ||
port = None | ||
path = None | ||
fragment = None | ||
query = None | ||
|
||
# Scheme | ||
if '://' in url: | ||
scheme, url = url.split('://', 1) | ||
|
||
# Find the earliest Authority Terminator | ||
# (http://tools.ietf.org/html/rfc3986#section-3.2) | ||
url, path_, delim = split_first(url, ['/', '?', '#']) | ||
|
||
if delim: | ||
# Reassemble the path | ||
path = delim + path_ | ||
|
||
# Auth | ||
if '@' in url: | ||
# Last '@' denotes end of auth part | ||
auth, url = url.rsplit('@', 1) | ||
|
||
# IPv6 | ||
if url and url[0] == '[': | ||
host, url = url.split(']', 1) | ||
host += ']' | ||
|
||
# Port | ||
if ':' in url: | ||
_host, port = url.split(':', 1) | ||
|
||
if not host: | ||
host = _host | ||
|
||
if port: | ||
# If given, ports must be integers. No whitespace, no plus or | ||
# minus prefixes, no non-integer digits such as ^2 (superscript). | ||
if not port.isdigit(): | ||
raise LocationParseError(url) | ||
try: | ||
port = int(port) | ||
except ValueError: | ||
raise LocationParseError(url) | ||
else: | ||
# Blank ports are cool, too. (rfc3986#section-3.2.3) | ||
port = None | ||
|
||
elif not host and url: | ||
host = url | ||
|
||
if not path: | ||
return Url(scheme, auth, host, port, path, query, fragment) | ||
|
||
# Fragment | ||
if '#' in path: | ||
path, fragment = path.split('#', 1) | ||
|
||
# Query | ||
if '?' in path: | ||
path, query = path.split('?', 1) | ||
|
||
return Url(scheme, auth, host, port, path, query, fragment) | ||
|
||
|
||
def patch_urllib3_parse_url(): | ||
try: | ||
urllib3.util.parse_url.__code__ = patched_parse_url.__code__ | ||
except Exception: | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import requests | ||
import urllib3 | ||
from requests.exceptions import InvalidURL | ||
from urllib.parse import quote | ||
|
||
|
||
# The unreserved URI characters (RFC 3986) | ||
UNRESERVED_SET = frozenset( | ||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~") | ||
|
||
|
||
def unquote_unreserved(uri): | ||
"""Un-escape any percent-escape sequences in a URI that are unreserved | ||
characters. This leaves all reserved, illegal and non-ASCII bytes encoded. | ||
:rtype: str | ||
""" | ||
parts = uri.split('%') | ||
for i in range(1, len(parts)): | ||
h = parts[i][0:2] | ||
if len(h) == 2 and h.isalnum(): | ||
try: | ||
c = chr(int(h, 16)) | ||
except ValueError: | ||
raise InvalidURL("Invalid percent-escape sequence: '%s'" % h) | ||
|
||
if c in UNRESERVED_SET: | ||
parts[i] = c + parts[i][2:] | ||
else: | ||
parts[i] = '%' + parts[i] | ||
else: | ||
parts[i] = '%' + parts[i] | ||
return ''.join(parts) | ||
|
||
|
||
def patched_requote_uri(uri): | ||
"""Re-quote the given URI. | ||
This function passes the given URI through an unquote/quote cycle to | ||
ensure that it is fully and consistently quoted. | ||
:rtype: str | ||
""" | ||
safe_with_percent = "!\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~" | ||
safe_without_percent = "!\"#$&\'()*+,-./:;<=>?@[\\]^_`{|}~" | ||
try: | ||
# Unquote only the unreserved characters | ||
# Then quote only illegal characters (do not quote reserved, | ||
# unreserved, or '%') | ||
return quote(unquote_unreserved(uri), safe=safe_with_percent) | ||
except InvalidURL: | ||
# We couldn't unquote the given URI, so let's try quoting it, but | ||
# there may be unquoted '%'s in the URI. We need to make sure they're | ||
# properly quoted so they do not cause issues elsewhere. | ||
return quote(uri, safe=safe_without_percent) | ||
|
||
|
||
def patched_encode_target(target): | ||
return target | ||
|
||
|
||
def unquote_request_uri(): | ||
try: | ||
requests.utils.requote_uri.__code__ = patched_requote_uri.__code__ | ||
except Exception: | ||
pass | ||
|
||
try: | ||
urllib3.util.url._encode_target.__code__ = patched_encode_target.__code__ | ||
except Exception: | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters