Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions Doc/library/stdtypes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3173,6 +3173,92 @@ objects.

.. versionadded:: 3.14

.. method:: take_bytes(n=None, /)

Take the first *n* bytes as an immutable :class:`bytes`. Defaults to all
bytes.

If *n* is negative indexes from the end and takes the first :func:`len`
plus *n* bytes. If *n* is out of bounds raises :exc:`IndexError`.

Taking less than the full length will leave remaining bytes in the
:class:`bytearray` which requires a copy. If the remaining bytes should be
discarded use :func:`~bytearray.resize` or :keyword:`del` to truncate
then :func:`~bytearray.take_bytes` without a size.

.. impl-detail::

Taking all bytes is a zero-copy operation.

.. list-table:: Suggested Replacements
:header-rows: 1

* - Description
- Old
- New

* - Return :class:`bytes` after working with :class:`bytearray`
- .. code:: python


def read() -> bytes:
buffer = bytearray(1024)
...
return bytes(buffer)
- .. code:: python

def read() -> bytes:
buffer = bytearray(1024)
...
return buffer.take_bytes()

* - Empty a buffer getting the bytes
- .. code:: python

buffer = bytearray(1024)
...
data = bytes(buffer)
buffer.clear()
- .. code:: python

buffer = bytearray(1024)
...
data = buffer.take_bytes()

* - Split a buffer at a specific separator
- .. code:: python

buffer = bytearray(b'abc\ndef')
n = buffer.find(b'\n')
data = bytes(buffer[:n + 1])
del buffer[:n + 1]
assert buffer == bytearray(b'def')

- .. code:: python

buffer = bytearray(b'abc\ndef')
n = buffer.find(b'\n')
data = buffer.take_bytes(n + 1)

* - Split a buffer at a specific separator; discard after the separator
- .. code:: python

buffer = bytearray(b'abc\ndef')
n = buffer.find(b'\n')
data = bytes(buffer[:n])
buffer.clear()
assert data == b'abc'
assert len(buffer) == 0

- .. code:: python

buffer = bytearray(b'abc\ndef')
n = buffer.find(b'\n')
buffer.resize(n)
data = buffer.take_bytes()

.. versionadded:: next

Since bytearray objects are sequences of integers (akin to a list), for a
bytearray object *b*, ``b[0]`` will be an integer, while ``b[0:1]`` will be
a bytearray object of length 1. (This contrasts with text strings, where
Expand Down
1 change: 1 addition & 0 deletions Include/cpython/bytearrayobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ typedef struct {
char *ob_bytes; /* Physical backing buffer */
char *ob_start; /* Logical start inside ob_bytes */
Py_ssize_t ob_exports; /* How many buffer exports */
PyObject *ob_bytes_object; /* PyBytes for zero-copy bytes conversion */
} PyByteArrayObject;

PyAPI_DATA(char) _PyByteArray_empty_string[];
Expand Down
52 changes: 52 additions & 0 deletions Lib/test/test_bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1451,6 +1451,58 @@ def test_resize(self):
self.assertRaises(MemoryError, bytearray().resize, sys.maxsize)
self.assertRaises(MemoryError, bytearray(1000).resize, sys.maxsize)

def test_take_bytes(self):
ba = bytearray(b'ab')
self.assertEqual(ba.take_bytes(), b'ab')
self.assertEqual(len(ba), 0)
self.assertEqual(ba, bytearray(b''))

# Positive and negative slicing.
ba = bytearray(b'abcdef')
self.assertEqual(ba.take_bytes(1), b'a')
self.assertEqual(ba, bytearray(b'bcdef'))
self.assertEqual(len(ba), 5)
self.assertEqual(ba.take_bytes(-5), b'')
self.assertEqual(ba, bytearray(b'bcdef'))
self.assertEqual(len(ba), 5)
self.assertEqual(ba.take_bytes(-3), b'bc')
self.assertEqual(ba, bytearray(b'def'))
self.assertEqual(len(ba), 3)
self.assertEqual(ba.take_bytes(3), b'def')
self.assertEqual(ba, bytearray(b''))
self.assertEqual(len(ba), 0)

# Take nothing from emptiness.
self.assertEqual(ba.take_bytes(0), b'')
self.assertEqual(ba.take_bytes(), b'')
self.assertEqual(ba.take_bytes(None), b'')

# Out of bounds, bad take value.
self.assertRaises(IndexError, ba.take_bytes, -1)
self.assertRaises(TypeError, ba.take_bytes, 3.14)
ba = bytearray(b'abcdef')
self.assertRaises(IndexError, ba.take_bytes, 7)

# Offset between physical and logical start (ob_bytes != ob_start).
ba = bytearray(b'abcde')
del ba[:2]
self.assertEqual(ba, bytearray(b'cde'))
self.assertEqual(ba.take_bytes(), b'cde')

# Overallocation at end.
ba = bytearray(b'abcde')
del ba[-2:]
self.assertEqual(ba, bytearray(b'abc'))
self.assertEqual(ba.take_bytes(), b'abc')
ba = bytearray(b'abcde')
ba.resize(4)
self.assertEqual(ba.take_bytes(), b'abcd')

# Take of a bytearray with references should fail.
ba = bytearray(b'abc')
with memoryview(ba) as mv:
self.assertRaises(BufferError, ba.take_bytes)
self.assertEqual(ba.take_bytes(), b'abc')
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to update test_free_threading_bytearray to include resize (probably separate PR) and take_bytes


def test_setitem(self):
def setitem_as_mapping(b, i, val):
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -1583,7 +1583,7 @@ def test_objecttypes(self):
samples = [b'', b'u'*100000]
for sample in samples:
x = bytearray(sample)
check(x, vsize('n2Pi') + x.__alloc__())
check(x, vsize('n2PiP') + x.__alloc__())
# bytearray_iterator
check(iter(bytearray()), size('nP'))
# bytes
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Update :class:`bytearray` to use a :class:`bytes` under the hood as its buffer
and add :func:`bytearray.take_bytes` to take it out.
128 changes: 111 additions & 17 deletions Objects/bytearrayobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -141,22 +141,26 @@ PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
}

new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
if (new == NULL)
if (new == NULL) {
return NULL;
}

if (size == 0) {
new->ob_bytes_object = NULL;
new->ob_bytes = NULL;
alloc = 0;
}
else {
alloc = size + 1;
new->ob_bytes = PyMem_Malloc(alloc);
new->ob_bytes_object = PyBytes_FromStringAndSize(NULL, alloc);
new->ob_bytes = PyBytes_AsString(new->ob_bytes_object);
if (new->ob_bytes == NULL) {
Py_DECREF(new);
return PyErr_NoMemory();
}
if (bytes != NULL && size > 0)
if (bytes != NULL && size > 0) {
memcpy(new->ob_bytes, bytes, size);
}
new->ob_bytes[size] = '\0'; /* Trailing null byte */
}
Py_SET_SIZE(new, size);
Expand Down Expand Up @@ -189,7 +193,6 @@ static int
bytearray_resize_lock_held(PyObject *self, Py_ssize_t requested_size)
{
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self);
void *sval;
PyByteArrayObject *obj = ((PyByteArrayObject *)self);
/* All computations are done unsigned to avoid integer overflows
(see issue #22335). */
Expand Down Expand Up @@ -244,25 +247,28 @@ bytearray_resize_lock_held(PyObject *self, Py_ssize_t requested_size)
return -1;
}

/* re-align data to the start of the allocation. */
if (logical_offset > 0) {
sval = PyMem_Malloc(alloc);
if (sval == NULL) {
PyErr_NoMemory();
memmove(obj->ob_bytes, obj->ob_start,
Py_MIN(requested_size, Py_SIZE(self)));
}

if (obj->ob_bytes_object == NULL) {
obj->ob_bytes_object = PyBytes_FromStringAndSize(NULL, alloc);
if (obj->ob_bytes_object == NULL) {
return -1;
}
memcpy(sval, PyByteArray_AS_STRING(self),
Py_MIN((size_t)requested_size, (size_t)Py_SIZE(self)));
PyMem_Free(obj->ob_bytes);
}
else {
sval = PyMem_Realloc(obj->ob_bytes, alloc);
if (sval == NULL) {
PyErr_NoMemory();
if (_PyBytes_Resize(&obj->ob_bytes_object, alloc) == -1) {
Py_SET_SIZE(self, 0);
obj->ob_bytes = obj->ob_start = NULL;
FT_ATOMIC_STORE_SSIZE_RELAXED(obj->ob_alloc, 0);
return -1;
}
}

obj->ob_bytes = obj->ob_start = sval;
obj->ob_bytes = obj->ob_start = PyBytes_AS_STRING(obj->ob_bytes_object);
Py_SET_SIZE(self, size);
FT_ATOMIC_STORE_SSIZE_RELAXED(obj->ob_alloc, alloc);
obj->ob_bytes[size] = '\0'; /* Trailing null byte */
Expand Down Expand Up @@ -1169,9 +1175,7 @@ bytearray_dealloc(PyObject *op)
"deallocated bytearray object has exported buffers");
PyErr_Print();
}
if (self->ob_bytes != 0) {
PyMem_Free(self->ob_bytes);
}
Py_CLEAR(self->ob_bytes_object);
Py_TYPE(self)->tp_free((PyObject *)self);
}

Expand Down Expand Up @@ -1491,6 +1495,95 @@ bytearray_resize_impl(PyByteArrayObject *self, Py_ssize_t size)
}


/*[clinic input]
@critical_section
bytearray.take_bytes
n: object = None
Bytes to take, negative indexes from end. None indicates all bytes.
/
Take *n* bytes from the bytearray and return them as a bytes object.
[clinic start generated code]*/

static PyObject *
bytearray_take_bytes_impl(PyByteArrayObject *self, PyObject *n)
/*[clinic end generated code: output=3147fbc0bbbe8d94 input=b15b5172cdc6deda]*/
{
Py_ssize_t to_take, original;
Py_ssize_t size = Py_SIZE(self);
if (Py_IsNone(n)) {
to_take = original = size;
}
// Integer index, from start (zero, positive) or end (negative).
else if (_PyIndex_Check(n)) {
to_take = original = PyNumber_AsSsize_t(n, PyExc_IndexError);
if (to_take == -1 && PyErr_Occurred()) {
return NULL;
}
if (to_take < 0) {
to_take += size;
}
} else {
PyErr_SetString(PyExc_TypeError, "n must be an integer or None");
return NULL;
}

if (to_take < 0 || to_take > size) {
PyErr_Format(PyExc_IndexError,
"can't take %d(%d) outside size %d",
original, to_take, size);
return NULL;
}

// Exports may change the contents, No mutable bytes allowed.
if (!_canresize(self)) {
return NULL;
}

if (to_take == 0 || size == 0) {
return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
}

// Copy remaining bytes to a new bytes.
PyObject *remaining = NULL;
Py_ssize_t remaining_length = size - to_take;
if (remaining_length > 0) {
// +1 to copy across the null which always ends a bytearray.
remaining = PyBytes_FromStringAndSize(self->ob_start + to_take,
remaining_length + 1);
if (remaining == NULL) {
return NULL;
}
}

// If the bytes are offset inside the buffer must first align.
if (self->ob_start != self->ob_bytes) {
memmove(self->ob_bytes, self->ob_start, to_take);
self->ob_start = self->ob_bytes;
}

if (_PyBytes_Resize(&self->ob_bytes_object, to_take) == -1) {
Py_CLEAR(remaining);
return NULL;
}

// Point the bytearray towards the buffer with the remaining data.
PyObject *result = self->ob_bytes_object;
self->ob_bytes_object = remaining;
if (remaining) {
self->ob_bytes = self->ob_start = PyBytes_AS_STRING(self->ob_bytes_object);
Py_SET_SIZE(self, size - to_take);
FT_ATOMIC_STORE_SSIZE_RELAXED(self->ob_alloc, size - to_take + 1);
}
else {
self->ob_bytes = self->ob_start = NULL;
Py_SET_SIZE(self, 0);
FT_ATOMIC_STORE_SSIZE_RELAXED(self->ob_alloc, 0);
}

return result;
}


/*[clinic input]
@critical_section
bytearray.translate
Expand Down Expand Up @@ -2686,6 +2779,7 @@ static PyMethodDef bytearray_methods[] = {
BYTEARRAY_STARTSWITH_METHODDEF
BYTEARRAY_STRIP_METHODDEF
{"swapcase", bytearray_swapcase, METH_NOARGS, _Py_swapcase__doc__},
BYTEARRAY_TAKE_BYTES_METHODDEF
{"title", bytearray_title, METH_NOARGS, _Py_title__doc__},
BYTEARRAY_TRANSLATE_METHODDEF
{"upper", bytearray_upper, METH_NOARGS, _Py_upper__doc__},
Expand Down
Loading
Loading