From f904fce197da9010cfc87e89ac97bc0e2b0667d5 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Thu, 13 Nov 2025 13:16:04 -0800 Subject: [PATCH 01/11] gh-60107: Remove a copy from RawIOBase.read If the underlying I/O class keeps a reference to the memory raise BufferError. --- Lib/test/test_io/test_general.py | 20 +++++++++++++++++++ ...5-11-13-13-11-02.gh-issue-60107.LZq3QF.rst | 2 ++ Modules/_io/iobase.c | 13 +++++++++++- 3 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-11-13-13-11-02.gh-issue-60107.LZq3QF.rst diff --git a/Lib/test/test_io/test_general.py b/Lib/test/test_io/test_general.py index f0677b01ea5ce1..c13fba68bb7153 100644 --- a/Lib/test/test_io/test_general.py +++ b/Lib/test/test_io/test_general.py @@ -724,6 +724,26 @@ def test_RawIOBase_readall(self): rawio = self.MockRawIOWithoutRead((b"abc", b"d", b"efg")) self.assertEqual(rawio.readall(), b"abcdefg") + # gh-60107: Ensure a "Raw I/O" which keeps a reference to the + # mutable memory doesn't allow making a mutable bytes. + class RawIOKeepsReference(self.MockRawIOWithoutRead): + def __init__(self, *args, **kwargs): + self.buf = None + super().__init__(*args, **kwargs) + + def readinto(self, buf): + # buf is the bytearray so keeping a reference to it doesn't keep + # the memory alive; a memoryview does. + self.buf = memoryview(buf) + buf[0:4] = self._read_stack.pop() + return 3 + + with self.assertRaises(BufferError): + rawio = RawIOKeepsReference([b"1234"]) + rawio.read(4) + + + def test_BufferedIOBase_readinto(self): # Exercise the default BufferedIOBase.readinto() and readinto1() # implementations (which call read() or read1() internally). diff --git a/Misc/NEWS.d/next/Library/2025-11-13-13-11-02.gh-issue-60107.LZq3QF.rst b/Misc/NEWS.d/next/Library/2025-11-13-13-11-02.gh-issue-60107.LZq3QF.rst new file mode 100644 index 00000000000000..8fbcc603762b65 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-11-13-13-11-02.gh-issue-60107.LZq3QF.rst @@ -0,0 +1,2 @@ +Remove a copy from :meth:`io.RawIOBase.read`. If the underlying I/O class +keeps a reference to the mutable memory raise a :exc:`BufferError`. diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index f1c2fe17801881..d308988f20ddba 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -953,7 +953,18 @@ _io__RawIOBase_read_impl(PyObject *self, Py_ssize_t n) return NULL; } - res = PyBytes_FromStringAndSize(PyByteArray_AsString(b), bytes_filled); + res = PyObject_CallMethod(b, "resize", "i", bytes_filled); + if (res != Py_None) { + Py_DECREF(b); + if (res != NULL) { + PyErr_Format(PyExc_ValueError, + "resize returned unexpected value %R", + res); + Py_DECREF(res); + } + return res; + } + res = PyObject_CallMethod(b, "take_bytes", NULL); Py_DECREF(b); return res; } From 6c9d14e44f6e99958ccfc0a1cf30e90e9960682e Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Thu, 13 Nov 2025 13:24:49 -0800 Subject: [PATCH 02/11] Add missing res = NULL --- Modules/_io/iobase.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index d308988f20ddba..48c4697bcdbfed 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -961,6 +961,7 @@ _io__RawIOBase_read_impl(PyObject *self, Py_ssize_t n) "resize returned unexpected value %R", res); Py_DECREF(res); + res = NULL; } return res; } From 847c7edf860d1d37fdb91f1755f586ecef1a1dda Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Thu, 13 Nov 2025 14:30:22 -0800 Subject: [PATCH 03/11] Better place for test. Remove excess newlines. Remove old TODO --- Lib/test/test_io/test_general.py | 39 ++++++++++++++++---------------- Modules/_io/iobase.c | 2 -- 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/Lib/test/test_io/test_general.py b/Lib/test/test_io/test_general.py index c13fba68bb7153..085ed3ea6a95ee 100644 --- a/Lib/test/test_io/test_general.py +++ b/Lib/test/test_io/test_general.py @@ -609,6 +609,25 @@ def readinto(self, b): with self.assertRaises(ValueError): Misbehaved(bad_size).read() + def test_RawIOBase_read_gh60107(self): + # gh-60107: Ensure a "Raw I/O" which keeps a reference to the + # mutable memory doesn't allow making a mutable bytes. + class RawIOKeepsReference(self.MockRawIOWithoutRead): + def __init__(self, *args, **kwargs): + self.buf = None + super().__init__(*args, **kwargs) + + def readinto(self, buf): + # buf is the bytearray so keeping a reference to it doesn't keep + # the memory alive; a memoryview does. + self.buf = memoryview(buf) + buf[0:4] = self._read_stack.pop() + return 3 + + with self.assertRaises(BufferError): + rawio = RawIOKeepsReference([b"1234"]) + rawio.read(4) + def test_types_have_dict(self): test = ( self.IOBase(), @@ -724,26 +743,6 @@ def test_RawIOBase_readall(self): rawio = self.MockRawIOWithoutRead((b"abc", b"d", b"efg")) self.assertEqual(rawio.readall(), b"abcdefg") - # gh-60107: Ensure a "Raw I/O" which keeps a reference to the - # mutable memory doesn't allow making a mutable bytes. - class RawIOKeepsReference(self.MockRawIOWithoutRead): - def __init__(self, *args, **kwargs): - self.buf = None - super().__init__(*args, **kwargs) - - def readinto(self, buf): - # buf is the bytearray so keeping a reference to it doesn't keep - # the memory alive; a memoryview does. - self.buf = memoryview(buf) - buf[0:4] = self._read_stack.pop() - return 3 - - with self.assertRaises(BufferError): - rawio = RawIOKeepsReference([b"1234"]) - rawio.read(4) - - - def test_BufferedIOBase_readinto(self): # Exercise the default BufferedIOBase.readinto() and readinto1() # implementations (which call read() or read1() internally). diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index 48c4697bcdbfed..4ba74bde64b3dd 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -927,8 +927,6 @@ _io__RawIOBase_read_impl(PyObject *self, Py_ssize_t n) return PyObject_CallMethodNoArgs(self, &_Py_ID(readall)); } - /* TODO: allocate a bytes object directly instead and manually construct - a writable memoryview pointing to it. */ b = PyByteArray_FromStringAndSize(NULL, n); if (b == NULL) return NULL; From bfb0c712e69acb8da3586857a17ef606a3515cb8 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Thu, 13 Nov 2025 14:31:53 -0800 Subject: [PATCH 04/11] Fix indentation --- Modules/_io/iobase.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index 4ba74bde64b3dd..c5d508276dcba5 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -957,7 +957,7 @@ _io__RawIOBase_read_impl(PyObject *self, Py_ssize_t n) if (res != NULL) { PyErr_Format(PyExc_ValueError, "resize returned unexpected value %R", - res); + res); Py_DECREF(res); res = NULL; } From 3709cf92198e8fe8be15e538655e868662f79fb9 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Thu, 13 Nov 2025 14:33:55 -0800 Subject: [PATCH 05/11] Simplify with Py_CLEAR --- Modules/_io/iobase.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index c5d508276dcba5..f0ac775009f6f7 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -958,8 +958,7 @@ _io__RawIOBase_read_impl(PyObject *self, Py_ssize_t n) PyErr_Format(PyExc_ValueError, "resize returned unexpected value %R", res); - Py_DECREF(res); - res = NULL; + Py_CLEAR(res); } return res; } From 63ceed7fae8257af9e5946663537180227c94c79 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Thu, 13 Nov 2025 14:38:57 -0800 Subject: [PATCH 06/11] Simplify with gotos --- Modules/_io/iobase.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index f0ac775009f6f7..da3ba3959dc2e7 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -928,41 +928,40 @@ _io__RawIOBase_read_impl(PyObject *self, Py_ssize_t n) } b = PyByteArray_FromStringAndSize(NULL, n); - if (b == NULL) + if (b == NULL) { return NULL; + } res = PyObject_CallMethodObjArgs(self, &_Py_ID(readinto), b, NULL); if (res == NULL || res == Py_None) { - Py_DECREF(b); - return res; + goto cleanup; } Py_ssize_t bytes_filled = PyNumber_AsSsize_t(res, PyExc_ValueError); - Py_DECREF(res); + Py_CLEAR(res); if (bytes_filled == -1 && PyErr_Occurred()) { - Py_DECREF(b); - return NULL; + goto cleanup; } if (bytes_filled < 0 || bytes_filled > n) { - Py_DECREF(b); PyErr_Format(PyExc_ValueError, "readinto returned %zd outside buffer size %zd", bytes_filled, n); - return NULL; + goto cleanup; } res = PyObject_CallMethod(b, "resize", "i", bytes_filled); if (res != Py_None) { - Py_DECREF(b); if (res != NULL) { PyErr_Format(PyExc_ValueError, "resize returned unexpected value %R", res); Py_CLEAR(res); } - return res; + goto cleanup; } res = PyObject_CallMethod(b, "take_bytes", NULL); + +cleanup: Py_DECREF(b); return res; } From a1daaae26094a76e390fa7e09601c1ad905cd853 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Thu, 13 Nov 2025 15:34:04 -0800 Subject: [PATCH 07/11] Fix ubsan --- Modules/_io/iobase.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index da3ba3959dc2e7..0284a640467e3f 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -949,7 +949,7 @@ _io__RawIOBase_read_impl(PyObject *self, Py_ssize_t n) goto cleanup; } - res = PyObject_CallMethod(b, "resize", "i", bytes_filled); + res = PyObject_CallMethod(b, "resize", "n", bytes_filled); if (res != Py_None) { if (res != NULL) { PyErr_Format(PyExc_ValueError, From 13abb41682e00ec66bdd2eba7c9d2a768f899028 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Thu, 13 Nov 2025 15:36:21 -0800 Subject: [PATCH 08/11] Just use the C API... Sorry for the onoise... --- Modules/_io/iobase.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index 0284a640467e3f..9be9673a84db23 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -948,15 +948,7 @@ _io__RawIOBase_read_impl(PyObject *self, Py_ssize_t n) bytes_filled, n); goto cleanup; } - - res = PyObject_CallMethod(b, "resize", "n", bytes_filled); - if (res != Py_None) { - if (res != NULL) { - PyErr_Format(PyExc_ValueError, - "resize returned unexpected value %R", - res); - Py_CLEAR(res); - } + if (PyByteArray_Resize(b, bytes_filled) < 0) { goto cleanup; } res = PyObject_CallMethod(b, "take_bytes", NULL); From 82643f8dd25704d3e2eb429cd88f60c92298e9a5 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 18 Nov 2025 14:35:30 -0800 Subject: [PATCH 09/11] Update Modules/_io/iobase.c Co-authored-by: Victor Stinner --- Modules/_io/iobase.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index 9be9673a84db23..cf4524d1822cf0 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -951,7 +951,7 @@ _io__RawIOBase_read_impl(PyObject *self, Py_ssize_t n) if (PyByteArray_Resize(b, bytes_filled) < 0) { goto cleanup; } - res = PyObject_CallMethod(b, "take_bytes", NULL); + res = _PyObject_CallMethod(b, &_Py_ID(takes_bytes), NULL); cleanup: Py_DECREF(b); From a9a1f14ddebd9110a9fbf7e693ceb6c8d5fdc0f8 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 18 Nov 2025 14:45:11 -0800 Subject: [PATCH 10/11] Regen globals and simplify more --- Include/internal/pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + Include/internal/pycore_runtime_init_generated.h | 1 + Include/internal/pycore_unicodeobject_generated.h | 4 ++++ Modules/_io/iobase.c | 2 +- 5 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 92ded14891a101..84151565f22b71 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -2066,6 +2066,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(symmetric_difference_update)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tabsize)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tag)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(take_bytes)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(target)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(target_is_directory)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(task)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index cd21b0847b7cdd..1814f2a983fed8 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -789,6 +789,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(symmetric_difference_update) STRUCT_FOR_ID(tabsize) STRUCT_FOR_ID(tag) + STRUCT_FOR_ID(take_bytes) STRUCT_FOR_ID(target) STRUCT_FOR_ID(target_is_directory) STRUCT_FOR_ID(task) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 50d82d0a365037..8958d0a8b21114 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -2064,6 +2064,7 @@ extern "C" { INIT_ID(symmetric_difference_update), \ INIT_ID(tabsize), \ INIT_ID(tag), \ + INIT_ID(take_bytes), \ INIT_ID(target), \ INIT_ID(target_is_directory), \ INIT_ID(task), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index b4d920154b6e83..73647ce30d1024 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -2944,6 +2944,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(take_bytes); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(target); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index cf4524d1822cf0..f036ea503b11e8 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -951,7 +951,7 @@ _io__RawIOBase_read_impl(PyObject *self, Py_ssize_t n) if (PyByteArray_Resize(b, bytes_filled) < 0) { goto cleanup; } - res = _PyObject_CallMethod(b, &_Py_ID(takes_bytes), NULL); + res = PyObject_CallMethodNoArgs(b, &_Py_ID(take_bytes)); cleanup: Py_DECREF(b); From 8ca1cef2dbbd3ae2de2ec62fd77d61349871f90d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 20 Nov 2025 18:05:28 +0100 Subject: [PATCH 11/11] Update Misc/NEWS.d/next/Library/2025-11-13-13-11-02.gh-issue-60107.LZq3QF.rst --- .../next/Library/2025-11-13-13-11-02.gh-issue-60107.LZq3QF.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-11-13-13-11-02.gh-issue-60107.LZq3QF.rst b/Misc/NEWS.d/next/Library/2025-11-13-13-11-02.gh-issue-60107.LZq3QF.rst index 8fbcc603762b65..c5103fb8005ebe 100644 --- a/Misc/NEWS.d/next/Library/2025-11-13-13-11-02.gh-issue-60107.LZq3QF.rst +++ b/Misc/NEWS.d/next/Library/2025-11-13-13-11-02.gh-issue-60107.LZq3QF.rst @@ -1,2 +1,2 @@ Remove a copy from :meth:`io.RawIOBase.read`. If the underlying I/O class -keeps a reference to the mutable memory raise a :exc:`BufferError`. +keeps a reference to the mutable memory, raise a :exc:`BufferError`.