feat: add hook for urllib3 and requests (#269)

* fix urllib3 issue with parsing URIs * prevent URL encoding * bump version to 1.9.0
knownsec · Mar 5, 2022 · e7d4420 · e7d4420
1 parent 29a90a4
commit e7d4420
Show file tree

Hide file tree

Showing 8 changed files with 331 additions and 5 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -302,3 +302,8 @@ Cross-platform shell code generation
 # version 1.8.12
 -----------------
 * update fofa api url #263
+
+# version 1.9.0
+-----------------
+* Fix urllib3 issue with parsing URIs
+* Prevent URL encoding
diff --git a/COPYING b/COPYING
@@ -1,7 +1,7 @@
 COPYING -- Describes the terms under which pocsuite is distributed. A copy
 of the GNU General Public License (GPL) is appended to this file.
 
-pocsuite3 is (C) 2014-2021 404-team@knownsec.com
+pocsuite3 is (C) 2014-2022 404-team@knownsec.com
 
 This program is free software; you may redistribute and/or modify it under
 the terms of the GNU General Public License as published by the Free

diff --git a/pocsuite3/__init__.py b/pocsuite3/__init__.py
@@ -1,9 +1,9 @@
 __title__ = 'pocsuite3'
-__version__ = '1.8.12'
+__version__ = '1.9.0'
 __author__ = 'Knownsec 404 Team'
 __author_email__ = '404-team@knownsec.com'
 __license__ = 'GPLv2'
-__copyright__ = 'Copyright 2014-2021 Knownsec 404 Team'
+__copyright__ = 'Copyright 2014-2022 Knownsec 404 Team'
 __name__ = 'pocsuite3'
 __package__ = 'pocsuite3'
 

diff --git a/pocsuite3/lib/request/patch/__init__.py b/pocsuite3/lib/request/patch/__init__.py
@@ -6,9 +6,14 @@
 from .hook_request import patch_session
 from .add_httpraw import patch_addraw
 from .hook_request_redirect import patch_redirect
+from .hook_urllib3_parse_url import patch_urllib3_parse_url
+from .unquote_request_uri import unquote_request_uri
 
 
 def patch_all():
+    # fix https://github.com/urllib3/urllib3/issues/1790
+    patch_urllib3_parse_url()
+    unquote_request_uri()
     urllib3.response.HTTPResponse._update_chunk_length = _update_chunk_length
     disable_warnings()
     remove_ssl_verify()

diff --git a/pocsuite3/lib/request/patch/hook_request.py b/pocsuite3/lib/request/patch/hook_request.py
@@ -56,7 +56,7 @@ def _merge_retain_none(request_setting, session_setting, dict_class=OrderedDict)
 
     # proxies = proxies or (conf.proxies if 'proxies' in conf else {})
     if proxies is None:
-       proxies = conf.proxies if 'proxies' in conf else {}
+        proxies = conf.proxies if 'proxies' in conf else {}
 
     settings = self.merge_environment_settings(
         prep.url, proxies, stream, verify, cert

diff --git a/pocsuite3/lib/request/patch/hook_urllib3_parse_url.py b/pocsuite3/lib/request/patch/hook_urllib3_parse_url.py
@@ -0,0 +1,248 @@
+from __future__ import absolute_import
+from collections import namedtuple
+import urllib3
+
+
+class HTTPError(Exception):
+    "Base exception used by this module."
+    pass
+
+
+class LocationValueError(ValueError, HTTPError):
+    "Raised when there is something wrong with a given URL input."
+    pass
+
+
+class LocationParseError(LocationValueError):
+    "Raised when get_host or similar fails to parse the URL input."
+
+    def __init__(self, location):
+        message = "Failed to parse: %s" % location
+        HTTPError.__init__(self, message)
+
+        self.location = location
+
+
+url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
+
+# We only want to normalize urls with an HTTP(S) scheme.
+# urllib3 infers URLs without a scheme (None) to be http.
+NORMALIZABLE_SCHEMES = ('http', 'https', None)
+
+
+class Url(namedtuple('Url', url_attrs)):
+    """
+    Datastructure for representing an HTTP URL. Used as a return value for
+    :func:`parse_url`. Both the scheme and host are normalized as they are
+    both case-insensitive according to RFC 3986.
+    """
+    __slots__ = ()
+
+    def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None,
+                query=None, fragment=None):
+        if path and not path.startswith('/'):
+            path = '/' + path
+        if scheme:
+            scheme = scheme.lower()
+        if host and scheme in NORMALIZABLE_SCHEMES:
+            host = host.lower()
+        return super(Url, cls).__new__(cls, scheme, auth, host, port, path,
+                                       query, fragment)
+
+    @property
+    def hostname(self):
+        """For backwards-compatibility with urlparse. We're nice like that."""
+        return self.host
+
+    @property
+    def request_uri(self):
+        """Absolute path including the query string."""
+        uri = self.path or '/'
+
+        if self.query is not None:
+            uri += '?' + self.query
+
+        return uri
+
+    @property
+    def netloc(self):
+        """Network location including host and port"""
+        if self.port:
+            return '%s:%d' % (self.host, self.port)
+        return self.host
+
+    @property
+    def url(self):
+        """
+        Convert self into a url
+
+        This function should more or less round-trip with :func:`.parse_url`. The
+        returned url may not be exactly the same as the url inputted to
+        :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
+        with a blank port will have : removed).
+
+        Example: ::
+
+            >>> U = parse_url('http://google.com/mail/')
+            >>> U.url
+            'http://google.com/mail/'
+            >>> Url('http', 'username:password', 'host.com', 80,
+            ... '/path', 'query', 'fragment').url
+            'http://username:password@host.com:80/path?query#fragment'
+        """
+        scheme, auth, host, port, path, query, fragment = self
+        url = ''
+
+        # We use "is not None" we want things to happen with empty strings (or 0 port)
+        if scheme is not None:
+            url += scheme + '://'
+        if auth is not None:
+            url += auth + '@'
+        if host is not None:
+            url += host
+        if port is not None:
+            url += ':' + str(port)
+        if path is not None:
+            url += path
+        if query is not None:
+            url += '?' + query
+        if fragment is not None:
+            url += '#' + fragment
+
+        return url
+
+    def __str__(self):
+        return self.url
+
+
+def split_first(s, delims):
+    """
+    Given a string and an iterable of delimiters, split on the first found
+    delimiter. Return two split parts and the matched delimiter.
+
+    If not found, then the first part is the full input string.
+
+    Example::
+
+        >>> split_first('foo/bar?baz', '?/=')
+        ('foo', 'bar?baz', '/')
+        >>> split_first('foo/bar?baz', '123')
+        ('foo/bar?baz', '', None)
+
+    Scales linearly with number of delims. Not ideal for large number of delims.
+    """
+    min_idx = None
+    min_delim = None
+    for d in delims:
+        idx = s.find(d)
+        if idx < 0:
+            continue
+
+        if min_idx is None or idx < min_idx:
+            min_idx = idx
+            min_delim = d
+
+    if min_idx is None or min_idx < 0:
+        return s, '', None
+
+    return s[:min_idx], s[min_idx + 1:], min_delim
+
+
+def patched_parse_url(url):
+    """
+    Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
+    performed to parse incomplete urls. Fields not provided will be None.
+
+    Partly backwards-compatible with :mod:`urlparse`.
+
+    Example::
+
+        >>> parse_url('http://google.com/mail/')
+        Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
+        >>> parse_url('google.com:80')
+        Url(scheme=None, host='google.com', port=80, path=None, ...)
+        >>> parse_url('/foo?bar')
+        Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
+    """
+
+    # While this code has overlap with stdlib's urlparse, it is much
+    # simplified for our needs and less annoying.
+    # Additionally, this implementations does silly things to be optimal
+    # on CPython.
+
+    if not url:
+        # Empty
+        return Url()
+
+    scheme = None
+    auth = None
+    host = None
+    port = None
+    path = None
+    fragment = None
+    query = None
+
+    # Scheme
+    if '://' in url:
+        scheme, url = url.split('://', 1)
+
+    # Find the earliest Authority Terminator
+    # (http://tools.ietf.org/html/rfc3986#section-3.2)
+    url, path_, delim = split_first(url, ['/', '?', '#'])
+
+    if delim:
+        # Reassemble the path
+        path = delim + path_
+
+    # Auth
+    if '@' in url:
+        # Last '@' denotes end of auth part
+        auth, url = url.rsplit('@', 1)
+
+    # IPv6
+    if url and url[0] == '[':
+        host, url = url.split(']', 1)
+        host += ']'
+
+    # Port
+    if ':' in url:
+        _host, port = url.split(':', 1)
+
+        if not host:
+            host = _host
+
+        if port:
+            # If given, ports must be integers. No whitespace, no plus or
+            # minus prefixes, no non-integer digits such as ^2 (superscript).
+            if not port.isdigit():
+                raise LocationParseError(url)
+            try:
+                port = int(port)
+            except ValueError:
+                raise LocationParseError(url)
+        else:
+            # Blank ports are cool, too. (rfc3986#section-3.2.3)
+            port = None
+
+    elif not host and url:
+        host = url
+
+    if not path:
+        return Url(scheme, auth, host, port, path, query, fragment)
+
+    # Fragment
+    if '#' in path:
+        path, fragment = path.split('#', 1)
+
+    # Query
+    if '?' in path:
+        path, query = path.split('?', 1)
+
+    return Url(scheme, auth, host, port, path, query, fragment)
+
+
+def patch_urllib3_parse_url():
+    try:
+        urllib3.util.parse_url.__code__ = patched_parse_url.__code__
+    except Exception:
+        pass
diff --git a/pocsuite3/lib/request/patch/unquote_request_uri.py b/pocsuite3/lib/request/patch/unquote_request_uri.py
@@ -0,0 +1,68 @@
+import requests
+import urllib3
+from requests.exceptions import InvalidURL
+from urllib.parse import quote
+
+
+# The unreserved URI characters (RFC 3986)
+UNRESERVED_SET = frozenset(
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~")
+
+
+def unquote_unreserved(uri):
+    """Un-escape any percent-escape sequences in a URI that are unreserved
+    characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
+    :rtype: str
+    """
+    parts = uri.split('%')
+    for i in range(1, len(parts)):
+        h = parts[i][0:2]
+        if len(h) == 2 and h.isalnum():
+            try:
+                c = chr(int(h, 16))
+            except ValueError:
+                raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
+
+            if c in UNRESERVED_SET:
+                parts[i] = c + parts[i][2:]
+            else:
+                parts[i] = '%' + parts[i]
+        else:
+            parts[i] = '%' + parts[i]
+    return ''.join(parts)
+
+
+def patched_requote_uri(uri):
+    """Re-quote the given URI.
+    This function passes the given URI through an unquote/quote cycle to
+    ensure that it is fully and consistently quoted.
+    :rtype: str
+    """
+    safe_with_percent = "!\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~"
+    safe_without_percent = "!\"#$&\'()*+,-./:;<=>?@[\\]^_`{|}~"
+    try:
+        # Unquote only the unreserved characters
+        # Then quote only illegal characters (do not quote reserved,
+        # unreserved, or '%')
+        return quote(unquote_unreserved(uri), safe=safe_with_percent)
+    except InvalidURL:
+        # We couldn't unquote the given URI, so let's try quoting it, but
+        # there may be unquoted '%'s in the URI. We need to make sure they're
+        # properly quoted so they do not cause issues elsewhere.
+        return quote(uri, safe=safe_without_percent)
+
+
+def patched_encode_target(target):
+    return target
+
+
+def unquote_request_uri():
+    try:
+        requests.utils.requote_uri.__code__ = patched_requote_uri.__code__
+    except Exception:
+        pass
+
+    try:
+        urllib3.util.url._encode_target.__code__ = patched_encode_target.__code__
+    except Exception:
+        pass
diff --git a/setup.py b/setup.py
@@ -21,7 +21,7 @@ def find_packages(where='.'):
 
 setup(
     name='pocsuite3',
-    version='1.8.12',
+    version='1.9.0',
     url='https://pocsuite.org',
     description='Open-sourced remote vulnerability testing framework.',
     long_description=long_description,