diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py index b5cd202a613725..442296ab172664 100644 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -1032,6 +1032,21 @@ def test_failure(self): # Previously, a second call could crash due to internal inconsistency self.assertRaises(Exception, bzd.decompress, self.BAD_DATA * 30) + def test_decompress_after_data_error(self): + data = bytes.fromhex( + "425a6839314159265359000000000000007fffff000000000000000000000000" + "00000000000000000000000000000000000000e0370000000000000000000000" + "000000000000000000000000000000000000000000000000000083f3" + ) + bzd = BZ2Decompressor() + with self.assertRaisesRegex(OSError, "Invalid data stream"): + bzd.decompress(data) + # Previously, a second call could crash due to internal inconsistency + self.assertFalse(bzd.needs_input) + self.assertFalse(bzd.eof) + with self.assertRaisesRegex(ValueError, "previous error"): + bzd.decompress(b'\x00' * 18) + @support.refcount_test def test_refleaks_in___init__(self): gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount') diff --git a/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst b/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst new file mode 100644 index 00000000000000..a37d86cf423f82 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst @@ -0,0 +1,3 @@ +Fix a possible stack buffer overflow in :mod:`bz2` when a +:class:`bz2.BZ2Decompressor` is reused after a decompression error. +The decompressor now becomes unusable after libbz2 reports an error. diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index ddf2f1dceeadcf..dc536835acd01b 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -116,6 +116,7 @@ typedef struct { typedef struct { PyObject_HEAD bz_stream bzs; + int bzerror; char eof; /* Py_T_BOOL expects a char */ PyObject *unused_data; char needs_input; @@ -455,8 +456,11 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length) d->bzs_avail_in_real += bzs->avail_in; - if (catch_bz2_error(bzret)) + if (catch_bz2_error(bzret)) { + d->bzerror = bzret; + d->needs_input = 0; goto error; + } if (bzret == BZ_STREAM_END) { d->eof = 1; break; @@ -625,10 +629,17 @@ _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data, PyObject *result = NULL; ACQUIRE_LOCK(self); - if (self->eof) + if (self->eof) { PyErr_SetString(PyExc_EOFError, "End of stream already reached"); - else + } + else if (self->bzerror) { + // Re-entering BZ2_bzDecompress() after an error can write out of bounds. + PyErr_SetString(PyExc_ValueError, + "Decompressor is unusable after a previous error"); + } + else { result = decompress(self, data->buf, data->len, max_length); + } RELEASE_LOCK(self); return result; } @@ -662,6 +673,7 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type) return NULL; } + self->bzerror = 0; self->needs_input = 1; self->bzs_avail_in_real = 0; self->input_buffer = NULL;