diff --git a/Lib/http/client.py b/Lib/http/client.py index 1e1a535c4c4eb18..c845fca2c397a94 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -158,6 +158,13 @@ # to prevent http header injection. _contains_disallowed_method_pchar_re = re.compile('[\x00-\x1f]') +# RFC 9112: Content-Length = 1*DIGIT and chunk-size = 1*HEXDIG. int() is more +# permissive (it accepts a leading sign, underscores, surrounding whitespace +# and, in base 16, an "0x" prefix and non-ASCII digits), so the body-framing +# values are matched against the grammar before being passed to int(). +_is_legal_content_length = re.compile(r'[0-9]+').fullmatch +_is_legal_chunk_size = re.compile(rb'[0-9a-fA-F]+').fullmatch + # We always set the Content-Length header for these methods because some # servers will otherwise respond with a 411 _METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'} @@ -376,14 +383,8 @@ def begin(self, *, _max_headers=None): # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" self.length = None length = self.headers.get("content-length") - if length and not self.chunked: - try: - self.length = int(length) - except ValueError: - self.length = None - else: - if self.length < 0: # ignore nonsensical negative lengths - self.length = None + if length and not self.chunked and _is_legal_content_length(length): + self.length = int(length) else: self.length = None @@ -550,7 +551,10 @@ def _read_next_chunk_size(self): i = line.find(b";") if i >= 0: line = line[:i] # strip chunk-extensions + line = line.strip() try: + if not _is_legal_chunk_size(line): + raise ValueError("invalid chunk size") return int(line, 16) except ValueError: # close the connection as protocol synchronisation is diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index f771fc48dada368..ed62e895fd61593 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -1319,6 +1319,32 @@ def test_negative_content_length(self): self.assertEqual(resp.read(), b'Hello\r\n') self.assertTrue(resp.isclosed()) + def test_malformed_content_length(self): + # RFC 9112: Content-Length = 1*DIGIT. Values that int() accepts but + # the grammar forbids must not be used to frame the body. + for value in ('+5', '5_0'): + with self.subTest(value=value): + sock = FakeSocket( + 'HTTP/1.1 200 OK\r\nContent-Length: %s\r\n\r\nHello\r\n' % value) + resp = client.HTTPResponse(sock, method="GET") + resp.begin() + self.assertIsNone(resp.length) + self.assertEqual(resp.read(), b'Hello\r\n') + resp.close() + + def test_malformed_chunk_size(self): + # RFC 9112: chunk-size = 1*HEXDIG. Reject sizes that int(_, 16) accepts + # but the grammar forbids (a sign, an "0x" prefix or underscores). + start = 'HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n' + for size in ('-5', '+5', '0x5', '1_f'): + with self.subTest(size=size): + sock = FakeSocket(start + '%s\r\nHELLO\r\n0\r\n\r\n' % size) + resp = client.HTTPResponse(sock, method="GET") + resp.begin() + self.assertRaises(client.IncompleteRead, resp.read) + self.assertTrue(resp.isclosed()) + resp.close() + def test_incomplete_read(self): sock = FakeSocket('HTTP/1.1 200 OK\r\nContent-Length: 10\r\n\r\nHello\r\n') resp = client.HTTPResponse(sock, method="GET") diff --git a/Misc/NEWS.d/next/Library/2026-06-02-13-10-00.gh-issue-150751.Hk2Vw9.rst b/Misc/NEWS.d/next/Library/2026-06-02-13-10-00.gh-issue-150751.Hk2Vw9.rst new file mode 100644 index 000000000000000..670d677aff79ff2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-02-13-10-00.gh-issue-150751.Hk2Vw9.rst @@ -0,0 +1,5 @@ +:mod:`http.client` now validates the ``Content-Length`` header and the +chunked ``chunk-size`` against the RFC 9112 grammar (``1*DIGIT`` and +``1*HEXDIG``) before parsing them, rejecting values such as ``+5``, ``5_0`` +or a ``0x``-prefixed or negative chunk size that :func:`int` would otherwise +accept. This avoids framing a response differently from a strict peer.