From 03336d7373be3eb6dcdd94a075089c417dda5df1 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 1 Dec 2025 17:26:07 +0200 Subject: [PATCH] gh-119451: Fix a potential denial of service in http.client (GH-119454) Reading the whole body of the HTTP response could cause OOM if the Content-Length value is too large even if the server does not send a large amount of data. Now the HTTP client reads large data by chunks, therefore the amount of consumed memory is proportional to the amount of sent data. (cherry picked from commit 5a4c4a033a4a54481be6870aa1896fad732555b5) Co-authored-by: Serhiy Storchaka --- Lib/http/client.py | 28 ++++++-- Lib/test/test_httplib.py | 66 +++++++++++++++++++ ...-05-23-11-47-48.gh-issue-119451.qkJe9-.rst | 5 ++ 3 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst diff --git a/Lib/http/client.py b/Lib/http/client.py index fb29923d94274c..70451d67d4cd48 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -111,6 +111,11 @@ _MAXLINE = 65536 _MAXHEADERS = 100 +# Data larger than this will be read in chunks, to prevent extreme +# overallocation. +_MIN_READ_BUF_SIZE = 1 << 20 + + # Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) # # VCHAR = %x21-7E @@ -639,10 +644,25 @@ def _safe_read(self, amt): reading. If the bytes are truly not available (due to EOF), then the IncompleteRead exception can be used to detect the problem. """ - data = self.fp.read(amt) - if len(data) < amt: - raise IncompleteRead(data, amt-len(data)) - return data + cursize = min(amt, _MIN_READ_BUF_SIZE) + data = self.fp.read(cursize) + if len(data) >= amt: + return data + if len(data) < cursize: + raise IncompleteRead(data, amt - len(data)) + + data = io.BytesIO(data) + data.seek(0, 2) + while True: + # This is a geometric increase in read size (never more than + # doubling out the current length of data per loop iteration). + delta = min(cursize, amt - cursize) + data.write(self.fp.read(delta)) + if data.tell() >= amt: + return data.getvalue() + cursize += delta + if data.tell() < cursize: + raise IncompleteRead(data.getvalue(), amt - data.tell()) def _safe_readinto(self, b): """Same as _safe_read, but for reading into a buffer.""" diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 01f5a10190194c..e46dac00779313 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -1452,6 +1452,72 @@ def run_server(): thread.join() self.assertEqual(result, b"proxied data\n") + def test_large_content_length(self): + serv = socket.create_server((HOST, 0)) + self.addCleanup(serv.close) + + def run_server(): + [conn, address] = serv.accept() + with conn: + while conn.recv(1024): + conn.sendall( + b"HTTP/1.1 200 Ok\r\n" + b"Content-Length: %d\r\n" + b"\r\n" % size) + conn.sendall(b'A' * (size//3)) + conn.sendall(b'B' * (size - size//3)) + + thread = threading.Thread(target=run_server) + thread.start() + self.addCleanup(thread.join, 1.0) + + conn = client.HTTPConnection(*serv.getsockname()) + try: + for w in range(15, 27): + size = 1 << w + conn.request("GET", "/") + with conn.getresponse() as response: + self.assertEqual(len(response.read()), size) + finally: + conn.close() + thread.join(1.0) + + def test_large_content_length_truncated(self): + serv = socket.create_server((HOST, 0)) + self.addCleanup(serv.close) + + def run_server(): + while True: + [conn, address] = serv.accept() + with conn: + conn.recv(1024) + if not size: + break + conn.sendall( + b"HTTP/1.1 200 Ok\r\n" + b"Content-Length: %d\r\n" + b"\r\n" + b"Text" % size) + + thread = threading.Thread(target=run_server) + thread.start() + self.addCleanup(thread.join, 1.0) + + conn = client.HTTPConnection(*serv.getsockname()) + try: + for w in range(18, 65): + size = 1 << w + conn.request("GET", "/") + with conn.getresponse() as response: + self.assertRaises(client.IncompleteRead, response.read) + conn.close() + finally: + conn.close() + size = 0 + conn.request("GET", "/") + conn.close() + thread.join(1.0) + def test_putrequest_override_domain_validation(self): """ It should be possible to override the default validation diff --git a/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst b/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst new file mode 100644 index 00000000000000..6d6f25cd2f8bf7 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst @@ -0,0 +1,5 @@ +Fix a potential memory denial of service in the :mod:`http.client` module. +When connecting to a malicious server, it could cause +an arbitrary amount of memory to be allocated. +This could have led to symptoms including a :exc:`MemoryError`, swapping, out +of memory (OOM) killed processes or containers, or even system crashes.