@@ -6435,6 +6435,8 @@ _PyUnicode_EncodeUTF16(PyObject *str,
6435
6435
#endif
6436
6436
6437
6437
if (kind == PyUnicode_1BYTE_KIND ) {
6438
+ // gh-139156: Don't use PyBytesWriter API here since it has an overhead
6439
+ // on short strings
6438
6440
PyObject * v = PyBytes_FromStringAndSize (NULL , nsize * 2 );
6439
6441
if (v == NULL ) {
6440
6442
return NULL ;
@@ -8852,11 +8854,15 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping,
8852
8854
if (Py_IS_TYPE (mapping , & EncodingMapType )) {
8853
8855
int res = encoding_map_lookup (c , mapping );
8854
8856
Py_ssize_t requiredsize = * outpos + 1 ;
8855
- if (res == -1 )
8857
+ if (res == -1 ) {
8856
8858
return enc_FAILED ;
8857
- if (outsize < requiredsize )
8858
- if (charmapencode_resize (writer , outpos , requiredsize ))
8859
+ }
8860
+
8861
+ if (outsize < requiredsize ) {
8862
+ if (charmapencode_resize (writer , outpos , requiredsize )) {
8859
8863
return enc_EXCEPTION ;
8864
+ }
8865
+ }
8860
8866
outstart = _PyBytesWriter_GetData (writer );
8861
8867
outstart [(* outpos )++ ] = (char )res ;
8862
8868
return enc_SUCCESS ;
@@ -8897,7 +8903,7 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping,
8897
8903
return enc_SUCCESS ;
8898
8904
}
8899
8905
8900
- /* handle an error in PyUnicode_EncodeCharmap
8906
+ /* handle an error in _PyUnicode_EncodeCharmap()
8901
8907
Return 0 on success, -1 on error */
8902
8908
static int
8903
8909
charmap_encoding_error (
@@ -9075,23 +9081,64 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
9075
9081
Py_ssize_t respos = 0 ;
9076
9082
_Py_error_handler error_handler = _Py_ERROR_UNKNOWN ;
9077
9083
9078
- while (inpos < size ) {
9079
- Py_UCS4 ch = PyUnicode_READ (kind , data , inpos );
9080
- /* try to encode it */
9081
- charmapencode_result x = charmapencode_output (ch , mapping , writer , & respos );
9082
- if (x == enc_EXCEPTION ) /* error */
9083
- goto onError ;
9084
- if (x == enc_FAILED ) { /* unencodable character */
9084
+ if (Py_IS_TYPE (mapping , & EncodingMapType )) {
9085
+ char * outstart = _PyBytesWriter_GetData (writer );
9086
+ Py_ssize_t outsize = _PyBytesWriter_GetSize (writer );
9087
+
9088
+ while (inpos < size ) {
9089
+ Py_UCS4 ch = PyUnicode_READ (kind , data , inpos );
9090
+
9091
+ /* try to encode it */
9092
+ int res = encoding_map_lookup (ch , mapping );
9093
+ Py_ssize_t requiredsize = respos + 1 ;
9094
+ if (res == -1 ) {
9095
+ goto enc_FAILED ;
9096
+ }
9097
+
9098
+ if (outsize < requiredsize ) {
9099
+ if (charmapencode_resize (writer , & respos , requiredsize )) {
9100
+ goto onError ;
9101
+ }
9102
+ outstart = _PyBytesWriter_GetData (writer );
9103
+ outsize = _PyBytesWriter_GetSize (writer );
9104
+ }
9105
+ outstart [respos ++ ] = (char )res ;
9106
+
9107
+ /* done with this character => adjust input position */
9108
+ ++ inpos ;
9109
+ continue ;
9110
+
9111
+ enc_FAILED :
9085
9112
if (charmap_encoding_error (unicode , & inpos , mapping ,
9086
9113
& exc ,
9087
9114
& error_handler , & error_handler_obj , errors ,
9088
9115
writer , & respos )) {
9089
9116
goto onError ;
9090
9117
}
9118
+ outstart = _PyBytesWriter_GetData (writer );
9119
+ outsize = _PyBytesWriter_GetSize (writer );
9091
9120
}
9092
- else {
9093
- /* done with this character => adjust input position */
9094
- ++ inpos ;
9121
+ }
9122
+ else {
9123
+ while (inpos < size ) {
9124
+ Py_UCS4 ch = PyUnicode_READ (kind , data , inpos );
9125
+ /* try to encode it */
9126
+ charmapencode_result x = charmapencode_output (ch , mapping , writer , & respos );
9127
+ if (x == enc_EXCEPTION ) { /* error */
9128
+ goto onError ;
9129
+ }
9130
+ if (x == enc_FAILED ) { /* unencodable character */
9131
+ if (charmap_encoding_error (unicode , & inpos , mapping ,
9132
+ & exc ,
9133
+ & error_handler , & error_handler_obj , errors ,
9134
+ writer , & respos )) {
9135
+ goto onError ;
9136
+ }
9137
+ }
9138
+ else {
9139
+ /* done with this character => adjust input position */
9140
+ ++ inpos ;
9141
+ }
9095
9142
}
9096
9143
}
9097
9144
0 commit comments