From ebde87662a8b4f99ce767e00d21449a09030b14a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 1 Dec 2025 17:26:07 +0200 Subject: [PATCH] gh-119451: Fix a potential denial of service in http.client (GH-119454) Reading the whole body of the HTTP response could cause OOM if the Content-Length value is too large even if the server does not send a large amount of data. Now the HTTP client reads large data by chunks, therefore the amount of consumed memory is proportional to the amount of sent data. (cherry picked from commit 5a4c4a033a4a54481be6870aa1896fad732555b5) Co-authored-by: Serhiy Storchaka --- Lib/http/client.py | 28 ++++++-- Lib/test/test_httplib.py | 66 +++++++++++++++++++ ...-05-23-11-47-48.gh-issue-119451.qkJe9-.rst | 5 ++ 3 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst diff --git a/Lib/http/client.py b/Lib/http/client.py index 8a728438439e39..dd5f4136e9eab5 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -111,6 +111,11 @@ _MAXLINE = 65536 _MAXHEADERS = 100 +# Data larger than this will be read in chunks, to prevent extreme +# overallocation. +_MIN_READ_BUF_SIZE = 1 << 20 + + # Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) # # VCHAR = %x21-7E @@ -639,10 +644,25 @@ def _safe_read(self, amt): reading. If the bytes are truly not available (due to EOF), then the IncompleteRead exception can be used to detect the problem. """ - data = self.fp.read(amt) - if len(data) < amt: - raise IncompleteRead(data, amt-len(data)) - return data + cursize = min(amt, _MIN_READ_BUF_SIZE) + data = self.fp.read(cursize) + if len(data) >= amt: + return data + if len(data) < cursize: + raise IncompleteRead(data, amt - len(data)) + + data = io.BytesIO(data) + data.seek(0, 2) + while True: + # This is a geometric increase in read size (never more than + # doubling out the current length of data per loop iteration). + delta = min(cursize, amt - cursize) + data.write(self.fp.read(delta)) + if data.tell() >= amt: + return data.getvalue() + cursize += delta + if data.tell() < cursize: + raise IncompleteRead(data.getvalue(), amt - data.tell()) def _safe_readinto(self, b): """Same as _safe_read, but for reading into a buffer.""" diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 84e99a156039a1..5267d2fe011766 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -1455,6 +1455,72 @@ def run_server(): thread.join() self.assertEqual(result, b"proxied data\n") + def test_large_content_length(self): + serv = socket.create_server((HOST, 0)) + self.addCleanup(serv.close) + + def run_server(): + [conn, address] = serv.accept() + with conn: + while conn.recv(1024): + conn.sendall( + b"HTTP/1.1 200 Ok\r\n" + b"Content-Length: %d\r\n" + b"\r\n" % size) + conn.sendall(b'A' * (size//3)) + conn.sendall(b'B' * (size - size//3)) + + thread = threading.Thread(target=run_server) + thread.start() + self.addCleanup(thread.join, 1.0) + + conn = client.HTTPConnection(*serv.getsockname()) + try: + for w in range(15, 27): + size = 1 << w + conn.request("GET", "/") + with conn.getresponse() as response: + self.assertEqual(len(response.read()), size) + finally: + conn.close() + thread.join(1.0) + + def test_large_content_length_truncated(self): + serv = socket.create_server((HOST, 0)) + self.addCleanup(serv.close) + + def run_server(): + while True: + [conn, address] = serv.accept() + with conn: + conn.recv(1024) + if not size: + break + conn.sendall( + b"HTTP/1.1 200 Ok\r\n" + b"Content-Length: %d\r\n" + b"\r\n" + b"Text" % size) + + thread = threading.Thread(target=run_server) + thread.start() + self.addCleanup(thread.join, 1.0) + + conn = client.HTTPConnection(*serv.getsockname()) + try: + for w in range(18, 65): + size = 1 << w + conn.request("GET", "/") + with conn.getresponse() as response: + self.assertRaises(client.IncompleteRead, response.read) + conn.close() + finally: + conn.close() + size = 0 + conn.request("GET", "/") + conn.close() + thread.join(1.0) + def test_putrequest_override_domain_validation(self): """ It should be possible to override the default validation diff --git a/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst b/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst new file mode 100644 index 00000000000000..6d6f25cd2f8bf7 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst @@ -0,0 +1,5 @@ +Fix a potential memory denial of service in the :mod:`http.client` module. +When connecting to a malicious server, it could cause +an arbitrary amount of memory to be allocated. +This could have led to symptoms including a :exc:`MemoryError`, swapping, out +of memory (OOM) killed processes or containers, or even system crashes.