From 783c0b129bf4ddc75ddfad98f93a105ed4cbafee Mon Sep 17 00:00:00 2001
From: Cody Maloney <cmaloney@theoreticalchaos.com>
Date: Wed, 3 Dec 2025 14:36:47 -0800
Subject: [PATCH] gh-139871: Optimize bytearray construction with encoding

When a `str` is encoded in `bytearray.__init__` the encoder tends to
create a new unique bytes object. Rather than allocate new memory and
copy the bytes use the already created bytes object as bytearray
backing. The bigger the `str` the bigger the saving.

Mean +- std dev: [main_encoding] 497 us +- 9 us -> [encoding] 14.2 us +- 0.3 us: 34.97x faster

```python
import pyperf

runner = pyperf.Runner()

runner.timeit(
    name="encode",
    setup="a = 'a' * 1_000_000",
    stmt="bytearray(a, encoding='utf8')")
```
---
 Objects/bytearrayobject.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index 99e1c9b13f7879..25cc0bfcbaba45 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -914,6 +914,10 @@ bytearray___init___impl(PyByteArrayObject *self, PyObject *arg,
             return -1;
     }
 
+    /* Should be caused by first init or the resize to 0. */
+    assert(self->ob_bytes_object == Py_GetConstantBorrowed(Py_CONSTANT_EMPTY_BYTES));
+    assert(self->ob_exports == 0);
+
     /* Make a quick exit if no first argument */
     if (arg == NULL) {
         if (encoding != NULL || errors != NULL) {
@@ -935,9 +939,20 @@ bytearray___init___impl(PyByteArrayObject *self, PyObject *arg,
             return -1;
         }
         encoded = PyUnicode_AsEncodedString(arg, encoding, errors);
-        if (encoded == NULL)
+        if (encoded == NULL) {
             return -1;
+        }
         assert(PyBytes_Check(encoded));
+
+        /* Most encodes return a new unique bytes, just use it as buffer. */
+        if (_PyObject_IsUniquelyReferenced(encoded)
+            && PyBytes_CheckExact(encoded))
+        {
+            Py_ssize_t size = Py_SIZE(encoded);
+            self->ob_bytes_object = encoded;
+            bytearray_reinit_from_bytes(self, size, size);
+            return 0;
+        }
         new = bytearray_iconcat((PyObject*)self, encoded);
         Py_DECREF(encoded);
         if (new == NULL)