From 6c8df75b133afa92d3dd41a9640dfc37f145d7ef Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 7 Jun 2026 19:37:10 +0100 Subject: [PATCH 1/2] [3.14] gh-150599: Prevent bz2 decompressor reuse after errors (GH-150600) (GH-151054) (cherry picked from commit 157a5df8cb5d82b33f918a7489e72ce95ceb12b6) Co-authored-by: Stan Ulbrych (cherry picked from commit 5755d0f083949ff3c5bf3a37e673e24e306b036e) --- Lib/test/test_bz2.py | 15 +++++++++++++++ ...6-05-30-09-36-20.gh-issue-150599.nlHqU-.rst | 3 +++ Modules/_bz2module.c | 18 +++++++++++++++--- 3 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py index b5cd202a613725..442296ab172664 100644 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -1032,6 +1032,21 @@ def test_failure(self): # Previously, a second call could crash due to internal inconsistency self.assertRaises(Exception, bzd.decompress, self.BAD_DATA * 30) + def test_decompress_after_data_error(self): + data = bytes.fromhex( + "425a6839314159265359000000000000007fffff000000000000000000000000" + "00000000000000000000000000000000000000e0370000000000000000000000" + "000000000000000000000000000000000000000000000000000083f3" + ) + bzd = BZ2Decompressor() + with self.assertRaisesRegex(OSError, "Invalid data stream"): + bzd.decompress(data) + # Previously, a second call could crash due to internal inconsistency + self.assertFalse(bzd.needs_input) + self.assertFalse(bzd.eof) + with self.assertRaisesRegex(ValueError, "previous error"): + bzd.decompress(b'\x00' * 18) + @support.refcount_test def test_refleaks_in___init__(self): gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount') diff --git a/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst b/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst new file mode 100644 index 00000000000000..a37d86cf423f82 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst @@ -0,0 +1,3 @@ +Fix a possible stack buffer overflow in :mod:`bz2` when a +:class:`bz2.BZ2Decompressor` is reused after a decompression error. +The decompressor now becomes unusable after libbz2 reports an error. diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index ddf2f1dceeadcf..a60cb38d9df408 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -116,6 +116,7 @@ typedef struct { typedef struct { PyObject_HEAD bz_stream bzs; + int bzerror; char eof; /* Py_T_BOOL expects a char */ PyObject *unused_data; char needs_input; @@ -455,8 +456,11 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length) d->bzs_avail_in_real += bzs->avail_in; - if (catch_bz2_error(bzret)) + if (catch_bz2_error(bzret)) { + d->bzerror = bzret; + _Py_atomic_store_char_relaxed(&d->needs_input, 0); goto error; + } if (bzret == BZ_STREAM_END) { d->eof = 1; break; @@ -625,10 +629,17 @@ _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data, PyObject *result = NULL; ACQUIRE_LOCK(self); - if (self->eof) + if (self->eof) { PyErr_SetString(PyExc_EOFError, "End of stream already reached"); - else + } + else if (self->bzerror) { + // Re-entering BZ2_bzDecompress() after an error can write out of bounds. + PyErr_SetString(PyExc_ValueError, + "Decompressor is unusable after a previous error"); + } + else { result = decompress(self, data->buf, data->len, max_length); + } RELEASE_LOCK(self); return result; } @@ -662,6 +673,7 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type) return NULL; } + self->bzerror = 0; self->needs_input = 1; self->bzs_avail_in_real = 0; self->input_buffer = NULL; From e1940230bf6fb80c5118da868ff9f27fa3748392 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 7 Jun 2026 19:47:38 +0100 Subject: [PATCH 2/2] We can just make it a plain assignment, 1a9cdaf63af7014dd7bd852b4d8a8c0ab98387ab was 3.15+ anyway. --- Modules/_bz2module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index a60cb38d9df408..dc536835acd01b 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -458,7 +458,7 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length) if (catch_bz2_error(bzret)) { d->bzerror = bzret; - _Py_atomic_store_char_relaxed(&d->needs_input, 0); + d->needs_input = 0; goto error; } if (bzret == BZ_STREAM_END) {