From b9def8ab84772ef604734f407fa3b8db26d76d30 Mon Sep 17 00:00:00 2001 From: Robsdedude Date: Thu, 24 Jul 2025 14:52:36 +0200 Subject: [PATCH 1/2] Optimize packing of `bytearray` By special-casing `bytearray`, we can avoid an allocation and complete extra copy of the data when packing it. This speeds up packing by roughly 1/3. --- src/codec/packstream/v1/pack.rs | 63 +++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 10 deletions(-) diff --git a/src/codec/packstream/v1/pack.rs b/src/codec/packstream/v1/pack.rs index 5f89b6a..a0ba339 100644 --- a/src/codec/packstream/v1/pack.rs +++ b/src/codec/packstream/v1/pack.rs @@ -18,8 +18,9 @@ use std::sync::atomic::{AtomicBool, Ordering}; use pyo3::exceptions::{PyImportError, PyOverflowError, PyTypeError, PyValueError}; use pyo3::prelude::*; +use pyo3::sync::with_critical_section; use pyo3::sync::GILOnceCell; -use pyo3::types::{PyBytes, PyDict, PyString, PyType}; +use pyo3::types::{PyByteArray, PyBytes, PyDict, PyString, PyTuple, PyType}; use pyo3::{intern, IntoPyObjectExt}; use super::super::Structure; @@ -43,6 +44,35 @@ struct TypeMappings { impl TypeMappings { fn new(locals: &Bound) -> PyResult { + /// Remove some byte types from an iterable of types. + /// Types removed are `bytes`, `bytearray`, as those are handled specially in `pack`. + /// If the filtering fails for any reason, it returns the original input. + fn filter_bytes_types(types: Bound) -> Bound { + fn inner<'py>(types: &Bound<'py, PyAny>) -> PyResult> { + fn is_of_known_bytes_types(typ: &Bound) -> PyResult { + Ok(typ.is_subclass_of::()? || typ.is_subclass_of::()?) + } + + let py = types.py(); + let types = types + .try_iter()? + .filter(|typ| { + let Ok(typ) = typ else { + return true; + }; + let Ok(typ) = typ.downcast::() else { + return true; + }; + is_of_known_bytes_types(typ).map(|b| !b).unwrap_or(true) + }) + .collect::, _>>()?; + + Ok(PyTuple::new(py, types)?.into_any()) + } + + inner(&types).unwrap_or(types) + } + let py = locals.py(); Ok(Self { none_values: locals @@ -87,12 +117,15 @@ impl TypeMappings { PyErr::new::("Type mappings are missing MAPPING_TYPES.") })? .into_py_any(py)?, - bytes_types: locals - .get_item("BYTES_TYPES")? - .ok_or_else(|| { - PyErr::new::("Type mappings are missing BYTES_TYPES.") - })? - .into_py_any(py)?, + bytes_types: filter_bytes_types( + locals + .get_item("BYTES_TYPES")? + .ok_or_else(|| { + PyErr::new::("Type mappings are missing BYTES_TYPES.") + })? + .into_bound_py_any(py)?, + ) + .unbind(), }) } } @@ -180,8 +213,18 @@ impl<'a> PackStreamEncoder<'a> { return self.write_string(value.extract::<&str>()?); } - if value.is_instance(self.type_mappings.bytes_types.bind(py))? { - return self.write_bytes(value.extract::>()?); + if let Ok(value) = value.downcast::() { + return self.write_bytes(value.as_bytes()); + } else if let Ok(value) = value.downcast::() { + return with_critical_section(value, || { + // SAFETY: + // * we're holding the GIL/are attached to the Python interpreter + // * we're using a critical section to ensure exclusive access to the byte array + // * we don't interact with the interpreter/PyO3 APIs while reading the bytes + unsafe { self.write_bytes(value.as_bytes()) } + }); + } else if value.is_instance(self.type_mappings.bytes_types.bind(py))? { + return self.write_bytes(&value.extract::>()?); } if value.is_instance(self.type_mappings.sequence_types.bind(py))? { @@ -278,7 +321,7 @@ impl<'a> PackStreamEncoder<'a> { Ok(()) } - fn write_bytes(&mut self, b: Cow<[u8]>) -> PyResult<()> { + fn write_bytes(&mut self, b: &[u8]) -> PyResult<()> { let size = Self::usize_to_u64(b.len())?; if size <= 255 { self.buffer.extend(&[BYTES_8]); From ec93d272d50b9dc6bc0bd92da67a603a35747b0e Mon Sep 17 00:00:00 2001 From: Robsdedude Date: Fri, 25 Jul 2025 09:18:02 +0200 Subject: [PATCH 2/2] Add changelog entry --- changelog.d/51.improve.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 changelog.d/51.improve.md diff --git a/changelog.d/51.improve.md b/changelog.d/51.improve.md new file mode 100644 index 0000000..d45b238 --- /dev/null +++ b/changelog.d/51.improve.md @@ -0,0 +1,3 @@ +Optimize packing of `bytearray`. +By special-casing `bytearray`, we can avoid an allocation and complete extra copy of the data when packing it. +This speeds up packing of `bytearray`s by roughly 1/3.