From 2509db49e9a3f7bf85a56ed697c39124d3fb783c Mon Sep 17 00:00:00 2001 From: Ben Kallus Date: Fri, 14 Oct 2022 21:05:45 -0400 Subject: [PATCH 1/7] pythongh-96035: Make urllib.parse.urlparse reject non-numeric port strings --- Lib/urllib/parse.py | 4 ++++ .../Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst | 3 +++ 2 files changed, 7 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 3734c73948c695..6339434aa7eb5f 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -168,6 +168,8 @@ def port(self): port = self._hostinfo[1] if port is not None: try: + if not port.isdigit(): + raise ValueError(f"Port {port!r} contains non-numeric character(s)") port = int(port, 10) except ValueError: message = f'Port could not be cast to integer value as {port!r}' @@ -1139,6 +1141,8 @@ def _splitnport(host, defport=-1): host = port elif port: try: + if not port.isdigit(): + raise ValueError(f"Port {port!r} contains non-numeric character(s)") nport = int(port) except ValueError: nport = None diff --git a/Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst b/Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst new file mode 100644 index 00000000000000..34b0b733426e8b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst @@ -0,0 +1,3 @@ +Fixes bug in urllib.parse.urlparse that causes certain port numbers +containing whitespace, underscores, or plus and minus signs to be +incorrectly accepted. From 98450a62e6abb0954aa4f160d7fef37343bc4419 Mon Sep 17 00:00:00 2001 From: Ben Kallus Date: Wed, 19 Oct 2022 22:25:28 -0400 Subject: [PATCH 2/7] Add tests and remove nested errors. --- Lib/test/test_urlparse.py | 3 ++- Lib/urllib/parse.py | 14 ++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 81d6018bd1a495..0f3f96080b02b5 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -653,7 +653,7 @@ def test_attributes_bad_port(self): """Check handling of invalid ports.""" for bytes in (False, True): for parse in (urllib.parse.urlsplit, urllib.parse.urlparse): - for port in ("foo", "1.5", "-1", "0x10"): + for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 "): with self.subTest(bytes=bytes, parse=parse, port=port): netloc = "www.example.net:" + port url = "http://" + netloc @@ -1199,6 +1199,7 @@ def test_splitnport(self): self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55)) self.assertEqual(splitnport('parrot:cheese'), ('parrot', None)) self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None)) + self.assertEqual(splitnport('parrot: +1_0 '), ('parrot', None)) def test_splitquery(self): # Normal cases are exercised by other tests; ensure that we also diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 6339434aa7eb5f..14f886973fb113 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -167,14 +167,14 @@ def hostname(self): def port(self): port = self._hostinfo[1] if port is not None: + if not port.isdigit(): + raise ValueError(f"Port {port!r} contains non-numeric character(s)") try: - if not port.isdigit(): - raise ValueError(f"Port {port!r} contains non-numeric character(s)") port = int(port, 10) except ValueError: message = f'Port could not be cast to integer value as {port!r}' raise ValueError(message) from None - if not ( 0 <= port <= 65535): + if not (0 <= port <= 65535): raise ValueError("Port out of range 0-65535") return port @@ -1134,17 +1134,15 @@ def splitnport(host, defport=-1): def _splitnport(host, defport=-1): """Split host and port, returning numeric port. Return given default port if no ':' found; defaults to -1. - Return numerical port if a valid number are found after ':'. + Return numerical port if a valid number is found after ':'. Return None if ':' but not a valid number.""" host, delim, port = host.rpartition(':') if not delim: host = port elif port: - try: - if not port.isdigit(): - raise ValueError(f"Port {port!r} contains non-numeric character(s)") + if port.isdigit(): nport = int(port) - except ValueError: + else: nport = None return host, nport return host, defport From b4ea289858a081f1682627816c5117206a6e2e7e Mon Sep 17 00:00:00 2001 From: Ben Kallus Date: Wed, 19 Oct 2022 22:30:52 -0400 Subject: [PATCH 3/7] Simplify port number validation. A port is valid iff it consists only of digits --- Lib/urllib/parse.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 14f886973fb113..4f33353bbd1485 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -167,13 +167,10 @@ def hostname(self): def port(self): port = self._hostinfo[1] if port is not None: - if not port.isdigit(): - raise ValueError(f"Port {port!r} contains non-numeric character(s)") - try: - port = int(port, 10) - except ValueError: - message = f'Port could not be cast to integer value as {port!r}' - raise ValueError(message) from None + if port.isdigit(): + port = int(port) + else: + raise ValueError(f"Port could not be cast to integer value as {port!r}") if not (0 <= port <= 65535): raise ValueError("Port out of range 0-65535") return port From 58d8e3006d84230b690833fad6122d34dc13f683 Mon Sep 17 00:00:00 2001 From: Ben Kallus Date: Wed, 19 Oct 2022 22:46:24 -0400 Subject: [PATCH 4/7] add check for ascii --- Lib/urllib/parse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 4f33353bbd1485..9a3102afd63b9c 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -167,7 +167,7 @@ def hostname(self): def port(self): port = self._hostinfo[1] if port is not None: - if port.isdigit(): + if port.isdigit() and port.isascii(): port = int(port) else: raise ValueError(f"Port could not be cast to integer value as {port!r}") @@ -1137,7 +1137,7 @@ def _splitnport(host, defport=-1): if not delim: host = port elif port: - if port.isdigit(): + if port.isdigit() and port.isascii(): nport = int(port) else: nport = None From 40f1b37064b0db76e6824cd3fa53a1b963aacf3c Mon Sep 17 00:00:00 2001 From: Ben Kallus <49924171+kenballus@users.noreply.github.com> Date: Thu, 20 Oct 2022 02:47:45 +0000 Subject: [PATCH 5/7] Update Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst Co-authored-by: Jelle Zijlstra --- .../next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst b/Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst index 34b0b733426e8b..fa347448d2e1b0 100644 --- a/Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst +++ b/Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst @@ -1,3 +1,3 @@ -Fixes bug in urllib.parse.urlparse that causes certain port numbers +Fix bug in :func:`urllib.parse.urlparse` that causes certain port numbers containing whitespace, underscores, or plus and minus signs to be incorrectly accepted. From 6a6683becf71e70e603f2283f5434a9192878ba6 Mon Sep 17 00:00:00 2001 From: Ben Kallus Date: Thu, 20 Oct 2022 10:07:32 -0400 Subject: [PATCH 6/7] add test for non-ascii port number --- Lib/test/test_urlparse.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 0f3f96080b02b5..59a601d9e85b08 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -653,13 +653,16 @@ def test_attributes_bad_port(self): """Check handling of invalid ports.""" for bytes in (False, True): for parse in (urllib.parse.urlsplit, urllib.parse.urlparse): - for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 "): + for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "рем"): with self.subTest(bytes=bytes, parse=parse, port=port): netloc = "www.example.net:" + port url = "http://" + netloc if bytes: - netloc = netloc.encode("ascii") - url = url.encode("ascii") + if netloc.isascii() and port.isascii(): + netloc = netloc.encode("ascii") + url = url.encode("ascii") + else: + continue p = parse(url) self.assertEqual(p.netloc, netloc) with self.assertRaises(ValueError): From c4c15542ccb3cd3814edbce5c137ca0fe060880f Mon Sep 17 00:00:00 2001 From: Ben Kallus <49924171+kenballus@users.noreply.github.com> Date: Thu, 20 Oct 2022 10:08:24 -0400 Subject: [PATCH 7/7] Update Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst Co-authored-by: Jelle Zijlstra --- .../next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst b/Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst index fa347448d2e1b0..f04a0fd0915ec9 100644 --- a/Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst +++ b/Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst @@ -1,3 +1,3 @@ Fix bug in :func:`urllib.parse.urlparse` that causes certain port numbers -containing whitespace, underscores, or plus and minus signs to be +containing whitespace, underscores, plus and minus signs, or non-ASCII digits to be incorrectly accepted.