From 955f1fc8a494b82e5122fdda0c565aec7ecb233b Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Sun, 7 Jun 2026 23:49:56 +0800 Subject: [PATCH 1/2] gh-151046: Fix use-after-free in _Unpickler_ReadIntoFromFile When unpickling from a file-like object that provides readinto(), the C Unpickler handed it a temporary memoryview over an internal buffer and never released it. A readinto() implementation that kept a reference to the view could read or write the buffer after it had been freed, a use-after-free reachable from pure Python. Keep an owned reference across the call and release the memoryview as soon as readinto() returns, so a surviving reference raises ValueError instead of dereferencing freed memory. Add a regression test. --- Lib/test/test_pickle.py | 47 +++++++++++++++++++ ...-06-07-23-45-00.gh-issue-151046.S6dZlz.rst | 7 +++ Modules/_pickle.c | 45 +++++++++++++++++- 3 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-06-07-23-45-00.gh-issue-151046.S6dZlz.rst diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index 48375cf459ea0b1..1c47d5794103edb 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -382,6 +382,53 @@ class CUnpicklerTests(PyUnpicklerTests): truncated_data_error = (pickle.UnpicklingError, 'truncated') size_overflow_error = (OverflowError, 'exceeds') + def test_readinto_does_not_keep_buffer_alive(self): + # The C unpickler hands readinto() a temporary memoryview over an + # internal buffer that does not outlive the unpickling operation. + # A readinto() implementation that keeps a reference to that view + # must not be able to read or write the buffer after readinto() + # returns: the view is released, so using it raises ValueError + # rather than accessing freed memory. + stashed = [] + + class StashingFile: + def __init__(self, data): + self._data = memoryview(data) + self._pos = 0 + + def read(self, n=-1): + if n is None or n < 0: + chunk = self._data[self._pos:] + else: + chunk = self._data[self._pos:self._pos + n] + self._pos += len(chunk) + return bytes(chunk) + + def readline(self): + return self.read(-1) + + def readinto(self, view): + stashed.append(view) + n = min(len(view), len(self._data) - self._pos) + view[:n] = self._data[self._pos:self._pos + n] + self._pos += n + return n + + # A large bytes object forces the file-read (readinto) path. + payload = b'spam' * 100_000 + data = pickle.dumps(payload, protocol=4) + obj = self.unpickler(StashingFile(data)).load() + self.assertEqual(obj, payload) + + self.assertTrue(stashed) + for view in stashed: + with self.assertRaises(ValueError): + view[0] + with self.assertRaises(ValueError): + view[0] = 0 + with self.assertRaises(ValueError): + bytes(view) + class CPicklingErrorTests(PyPicklingErrorTests): pickler = _pickle.Pickler diff --git a/Misc/NEWS.d/next/Library/2026-06-07-23-45-00.gh-issue-151046.S6dZlz.rst b/Misc/NEWS.d/next/Library/2026-06-07-23-45-00.gh-issue-151046.S6dZlz.rst new file mode 100644 index 000000000000000..2acb0d65c572050 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-07-23-45-00.gh-issue-151046.S6dZlz.rst @@ -0,0 +1,7 @@ +Fix a use-after-free in the C implementation of :mod:`pickle`. When +unpickling from a file-like object that provides ``readinto()``, the +:class:`~pickle.Unpickler` passed it a temporary :class:`memoryview` over an +internal buffer. A ``readinto()`` implementation that kept a reference to that +view could use it to read or write the buffer after it had been freed. The +view is now released as soon as ``readinto()`` returns, so a surviving +reference raises :exc:`ValueError` instead of accessing freed memory. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index a5595be251a738d..84e43038e060b79 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1283,6 +1283,32 @@ _Unpickler_SkipConsumed(UnpicklerObject *self) static const Py_ssize_t READ_WHOLE_LINE = -1; +/* Release the temporary memoryview that was handed to a readinto() call and + drop our reference to it. Releasing it severs the link between the view and + the underlying buffer, so a reference retained by readinto() can no longer be + used to read or write the (soon to be freed) buffer. Returns 0 on success, + -1 if the view could not be released (for example because readinto() exported + the buffer and still holds it); in that case an exception is set. Any + exception already pending when this is called is preserved. */ +static int +_Unpickler_ReleaseBufObj(PyObject *buf_obj) +{ + PyObject *exc = PyErr_GetRaisedException(); + PyObject *res = PyObject_CallMethodNoArgs(buf_obj, &_Py_ID(release)); + int err = (res == NULL) ? -1 : 0; + Py_XDECREF(res); + if (exc != NULL) { + /* Keep the original error; ignore any error from release(). */ + if (err < 0) { + PyErr_Clear(); + } + PyErr_SetRaisedException(exc); + err = 0; + } + Py_DECREF(buf_obj); + return err; +} + /* Don't call it directly: use _Unpickler_ReadInto() */ static Py_ssize_t _Unpickler_ReadIntoFromFile(PickleState *state, UnpicklerObject *self, char *buf, @@ -1318,17 +1344,32 @@ _Unpickler_ReadIntoFromFile(PickleState *state, UnpicklerObject *self, char *buf return n; } - /* Call readinto() into user buffer */ + /* Call readinto() into user buffer. + + buf points into a temporary buffer (e.g. a bytes object on the + unpickler stack) that does not outlive this unpickling operation. We + wrap it in a memoryview only so we can hand it to readinto(); a buggy or + hostile readinto() implementation may keep a reference to that view. + Release the view as soon as readinto() returns so that any surviving + reference can no longer dereference buf: using it then raises a clean + Python exception instead of accessing freed memory. */ PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE); if (buf_obj == NULL) { return -1; } - PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj); + /* Keep our own reference across the call (PyObject_CallOneArg does not + steal it) so that we can release the view afterwards regardless of what + readinto() does with the object it receives. */ + PyObject *read_size_obj = PyObject_CallOneArg(self->readinto, buf_obj); if (read_size_obj == NULL) { + _Unpickler_ReleaseBufObj(buf_obj); return -1; } Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj); Py_DECREF(read_size_obj); + if (_Unpickler_ReleaseBufObj(buf_obj) < 0) { + return -1; + } if (read_size < 0) { if (!PyErr_Occurred()) { From 1363a11dffda654d6e16c7578f73d99c48985e0b Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Mon, 8 Jun 2026 08:50:19 +0800 Subject: [PATCH 2/2] Reduce comment verbosity in the readinto fix --- Lib/test/test_pickle.py | 8 ++------ Modules/_pickle.c | 27 +++++++-------------------- 2 files changed, 9 insertions(+), 26 deletions(-) diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index 1c47d5794103edb..5729bebfc7887b7 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -383,12 +383,8 @@ class CUnpicklerTests(PyUnpicklerTests): size_overflow_error = (OverflowError, 'exceeds') def test_readinto_does_not_keep_buffer_alive(self): - # The C unpickler hands readinto() a temporary memoryview over an - # internal buffer that does not outlive the unpickling operation. - # A readinto() implementation that keeps a reference to that view - # must not be able to read or write the buffer after readinto() - # returns: the view is released, so using it raises ValueError - # rather than accessing freed memory. + # A readinto() that retains the memoryview it is handed must not be + # able to access the buffer after readinto() returns (gh-151046). stashed = [] class StashingFile: diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 84e43038e060b79..74663a3e2a31866 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1283,13 +1283,9 @@ _Unpickler_SkipConsumed(UnpicklerObject *self) static const Py_ssize_t READ_WHOLE_LINE = -1; -/* Release the temporary memoryview that was handed to a readinto() call and - drop our reference to it. Releasing it severs the link between the view and - the underlying buffer, so a reference retained by readinto() can no longer be - used to read or write the (soon to be freed) buffer. Returns 0 on success, - -1 if the view could not be released (for example because readinto() exported - the buffer and still holds it); in that case an exception is set. Any - exception already pending when this is called is preserved. */ +/* Release the temporary memoryview handed to readinto() and drop our reference, + so a view retained by readinto() can no longer reach the underlying buffer. + Returns -1 if release() failed. Any pending exception is preserved. */ static int _Unpickler_ReleaseBufObj(PyObject *buf_obj) { @@ -1298,7 +1294,6 @@ _Unpickler_ReleaseBufObj(PyObject *buf_obj) int err = (res == NULL) ? -1 : 0; Py_XDECREF(res); if (exc != NULL) { - /* Keep the original error; ignore any error from release(). */ if (err < 0) { PyErr_Clear(); } @@ -1344,22 +1339,14 @@ _Unpickler_ReadIntoFromFile(PickleState *state, UnpicklerObject *self, char *buf return n; } - /* Call readinto() into user buffer. - - buf points into a temporary buffer (e.g. a bytes object on the - unpickler stack) that does not outlive this unpickling operation. We - wrap it in a memoryview only so we can hand it to readinto(); a buggy or - hostile readinto() implementation may keep a reference to that view. - Release the view as soon as readinto() returns so that any surviving - reference can no longer dereference buf: using it then raises a clean - Python exception instead of accessing freed memory. */ + /* buf is a temporary buffer; we wrap it in a memoryview only to pass it to + readinto(). Release the view once readinto() returns, so a view it kept + a reference to cannot be used to access buf after it is freed. */ PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE); if (buf_obj == NULL) { return -1; } - /* Keep our own reference across the call (PyObject_CallOneArg does not - steal it) so that we can release the view afterwards regardless of what - readinto() does with the object it receives. */ + /* PyObject_CallOneArg does not steal buf_obj, so we can release it below. */ PyObject *read_size_obj = PyObject_CallOneArg(self->readinto, buf_obj); if (read_size_obj == NULL) { _Unpickler_ReleaseBufObj(buf_obj);