From 783717c3a6556d3c4bf72f82b1ed839a5068a372 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 19 Nov 2025 01:30:08 -0800 Subject: [PATCH 1/3] gh-139871: Optimize small takes in bytearray.take_bytes When less than half the buffer is taken just copy that small part out rather than doing a big alloc + memmove + big shrink. --- Lib/test/test_bytes.py | 25 +++++++++++++++++++++++++ Objects/bytearrayobject.c | 14 +++++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 86898bfcab9135..8ad4c2050c00a4 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -1524,6 +1524,31 @@ def test_take_bytes(self): self.assertRaises(BufferError, ba.take_bytes) self.assertEqual(ba.take_bytes(), b'abc') + def test_take_bytes_optimization(self): + # Validate optimization around taking lots of little chunks out of a + # much bigger buffer. Save work by only copying a little rather than + # moving a lot. + ba = bytearray(b'abcdef' + b'0' * 1000) + start_alloc = ba.__alloc__() + + # Take two bytes at a time, checking alloc doesn't change. + self.assertEqual(ba.take_bytes(2), b'ab') + self.assertEqual(ba.__alloc__(), start_alloc) + self.assertEqual(len(ba), 4 + 1000) + self.assertEqual(ba.take_bytes(2), b'cd') + self.assertEqual(ba.__alloc__(), start_alloc) + self.assertEqual(len(ba), 2 + 1000) + self.assertEqual(ba.take_bytes(2), b'ef') + self.assertEqual(ba.__alloc__(), start_alloc) + self.assertEqual(len(ba), 0 + 1000) + self.assertEqual(ba.__alloc__(), start_alloc) + + # Take over half, alloc shrinks to exact size. + self.assertEqual(ba.take_bytes(501), b'0' * 501) + self.assertEqual(len(ba), 499) + bytes_header_size = sys.getsizeof(b'') + self.assertEqual(ba.__alloc__(), 499 + bytes_header_size) + def test_setitem(self): def setitem_as_mapping(b, i, val): b[i] = val diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 99bfdec89f6c3a..3d1119f23b20d6 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -1547,8 +1547,20 @@ bytearray_take_bytes_impl(PyByteArrayObject *self, PyObject *n) return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } - // Copy remaining bytes to a new bytes. Py_ssize_t remaining_length = size - to_take; + // optimization: If taking less than leaving just copy the small to_take + // portion out and move ob_start. + if (to_take < remaining_length) { + PyObject *ret = PyBytes_FromStringAndSize(self->ob_start, to_take); + if (ret == NULL) { + return NULL; + } + self->ob_start += to_take; + Py_SET_SIZE(self, remaining_length); + return ret; + } + + // Copy remaining bytes to a new bytes. PyObject *remaining = PyBytes_FromStringAndSize(self->ob_start + to_take, remaining_length); if (remaining == NULL) { From 16a73785b2820bcda9c26606d1c406eb2dcae8ed Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 19 Nov 2025 12:02:39 -0800 Subject: [PATCH 2/3] Update Objects/bytearrayobject.c Co-authored-by: Victor Stinner --- Objects/bytearrayobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 3d1119f23b20d6..99e1c9b13f7879 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -1548,7 +1548,7 @@ bytearray_take_bytes_impl(PyByteArrayObject *self, PyObject *n) } Py_ssize_t remaining_length = size - to_take; - // optimization: If taking less than leaving just copy the small to_take + // optimization: If taking less than leaving, just copy the small to_take // portion out and move ob_start. if (to_take < remaining_length) { PyObject *ret = PyBytes_FromStringAndSize(self->ob_start, to_take); From 25a03d9ff0ed5d915ed7b6a33c787ab468577dc6 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 19 Nov 2025 12:02:56 -0800 Subject: [PATCH 3/3] Update Lib/test/test_bytes.py Co-authored-by: Petr Viktorin --- Lib/test/test_bytes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 8ad4c2050c00a4..7ca38bb8c8421e 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -1524,6 +1524,7 @@ def test_take_bytes(self): self.assertRaises(BufferError, ba.take_bytes) self.assertEqual(ba.take_bytes(), b'abc') + @support.cpython_only # tests an implementation detail def test_take_bytes_optimization(self): # Validate optimization around taking lots of little chunks out of a # much bigger buffer. Save work by only copying a little rather than