From a0dcf0595e569a7927b6e6c7ce1717d24ba774ba Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 24 Sep 2025 17:35:57 +0200 Subject: [PATCH 1/2] gh-139156: Optimize _PyUnicode_EncodeCharmap() Specialize _PyUnicode_EncodeCharmap() for EncodingMapType which is used by Python codecs such as iso8859_15. --- Objects/unicodeobject.c | 74 +++++++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 14 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9c00e22ea24bd0..a9c9affcecd784 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6440,6 +6440,8 @@ _PyUnicode_EncodeUTF16(PyObject *str, #endif if (kind == PyUnicode_1BYTE_KIND) { + // gh-139156: Don't use PyBytesWriter API here since it has an overhead + // on short strings PyObject *v = PyBytes_FromStringAndSize(NULL, nsize * 2); if (v == NULL) { return NULL; @@ -8857,11 +8859,15 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping, if (Py_IS_TYPE(mapping, &EncodingMapType)) { int res = encoding_map_lookup(c, mapping); Py_ssize_t requiredsize = *outpos+1; - if (res == -1) + if (res == -1) { return enc_FAILED; - if (outsize adjust input position */ + ++inpos; + continue; + +enc_FAILED: if (charmap_encoding_error(unicode, &inpos, mapping, &exc, &error_handler, &error_handler_obj, errors, writer, &respos)) { goto onError; } + outstart = _PyBytesWriter_GetData(writer); + outsize = _PyBytesWriter_GetSize(writer); } - else { - /* done with this character => adjust input position */ - ++inpos; + } + else { + while (inpos adjust input position */ + ++inpos; + } } } From 48e9293531d4884d8edfc4c0e6d99c5233352262 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 24 Sep 2025 18:06:33 +0200 Subject: [PATCH 2/2] Update Objects/unicodeobject.c Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> --- Objects/unicodeobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a9c9affcecd784..c1e1dd34745fa3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9129,8 +9129,9 @@ _PyUnicode_EncodeCharmap(PyObject *unicode, Py_UCS4 ch = PyUnicode_READ(kind, data, inpos); /* try to encode it */ charmapencode_result x = charmapencode_output(ch, mapping, writer, &respos); - if (x==enc_EXCEPTION) /* error */ + if (x==enc_EXCEPTION) { /* error */ goto onError; + } if (x==enc_FAILED) { /* unencodable character */ if (charmap_encoding_error(unicode, &inpos, mapping, &exc,