diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index b83039c1869f23..ebb9f82d26aec7 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -17,6 +17,46 @@ extern "C" { extern int _PyUnicode_IsModifiable(PyObject *unicode); +extern void _PyUnicodeWriter_InitWithBuffer( + _PyUnicodeWriter *writer, + PyObject *buffer); +extern PyObject* _PyUnicode_Result(PyObject *unicode); +extern int _PyUnicode_DecodeUTF8Writer( + _PyUnicodeWriter *writer, + const char *s, + Py_ssize_t size, + _Py_error_handler error_handler, + const char *errors, + Py_ssize_t *consumed); +extern PyObject* _PyUnicode_ResizeCompact( + PyObject *unicode, + Py_ssize_t length); +extern PyObject* _PyUnicode_GetEmpty(void); + + +/* Generic helper macro to convert characters of different types. + from_type and to_type have to be valid type names, begin and end + are pointers to the source characters which should be of type + "from_type *". to is a pointer of type "to_type *" and points to the + buffer where the result characters are written to. */ +#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \ + do { \ + to_type *_to = (to_type *)(to); \ + const from_type *_iter = (const from_type *)(begin);\ + const from_type *_end = (const from_type *)(end);\ + Py_ssize_t n = (_end) - (_iter); \ + const from_type *_unrolled_end = \ + _iter + _Py_SIZE_ROUND_DOWN(n, 4); \ + while (_iter < (_unrolled_end)) { \ + _to[0] = (to_type) _iter[0]; \ + _to[1] = (to_type) _iter[1]; \ + _to[2] = (to_type) _iter[2]; \ + _to[3] = (to_type) _iter[3]; \ + _iter += 4; _to += 4; \ + } \ + while (_iter < (_end)) \ + *_to++ = (to_type) *_iter++; \ + } while (0) static inline void diff --git a/Makefile.pre.in b/Makefile.pre.in index 19423c11545c19..92ecd9fb9b60d9 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -559,6 +559,7 @@ OBJECT_OBJS= \ Objects/typevarobject.o \ Objects/unicode_format.o \ Objects/unicode_formatter.o \ + Objects/unicode_writer.o \ Objects/unicodectype.o \ Objects/unicodeobject.o \ Objects/unionobject.o \ diff --git a/Objects/unicode_writer.c b/Objects/unicode_writer.c new file mode 100644 index 00000000000000..2b944bf1ea8cde --- /dev/null +++ b/Objects/unicode_writer.c @@ -0,0 +1,639 @@ +/* + +Unicode implementation based on original code by Fredrik Lundh, +modified by Marc-Andre Lemburg . + +Major speed upgrades to the method implementations at the Reykjavik +NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke. + +Copyright (c) Corporation for National Research Initiatives. + +-------------------------------------------------------------------- +The original string type implementation is: + + Copyright (c) 1999 by Secret Labs AB + Copyright (c) 1999 by Fredrik Lundh + +By obtaining, using, and/or copying this software and/or its +associated documentation, you agree that you have read, understood, +and will comply with the following terms and conditions: + +Permission to use, copy, modify, and distribute this software and its +associated documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appears in all +copies, and that both that copyright notice and this permission notice +appear in supporting documentation, and that the name of Secret Labs +AB or the author not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior +permission. + +SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO +THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR +ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +-------------------------------------------------------------------- + +*/ + +#include "Python.h" +#include "pycore_freelist.h" // _Py_FREELIST_FREE() +#include "pycore_long.h" // _PyLong_FormatWriter() +#include "pycore_unicodeobject.h" // _PyUnicode_Result() + + +#ifdef MS_WINDOWS + /* On Windows, overallocate by 50% is the best factor */ +# define OVERALLOCATE_FACTOR 2 +#else + /* On Linux, overallocate by 25% is the best factor */ +# define OVERALLOCATE_FACTOR 4 +#endif + + +/* Compilation of templated routines */ + +#define STRINGLIB_GET_EMPTY() _PyUnicode_GetEmpty() + +#include "stringlib/ucs1lib.h" +#include "stringlib/find_max_char.h" +#include "stringlib/undef.h" + + +/* Copy an ASCII or latin1 char* string into a Python Unicode string. + + WARNING: The function doesn't copy the terminating null character and + doesn't check the maximum character (may write a latin1 character in an + ASCII string). */ +static void +unicode_write_cstr(PyObject *unicode, Py_ssize_t index, + const char *str, Py_ssize_t len) +{ + int kind = PyUnicode_KIND(unicode); + const void *data = PyUnicode_DATA(unicode); + const char *end = str + len; + + assert(index + len <= PyUnicode_GET_LENGTH(unicode)); + switch (kind) { + case PyUnicode_1BYTE_KIND: { +#ifdef Py_DEBUG + if (PyUnicode_IS_ASCII(unicode)) { + Py_UCS4 maxchar = ucs1lib_find_max_char( + (const Py_UCS1*)str, + (const Py_UCS1*)str + len); + assert(maxchar < 128); + } +#endif + memcpy((char *) data + index, str, len); + break; + } + case PyUnicode_2BYTE_KIND: { + Py_UCS2 *start = (Py_UCS2 *)data + index; + Py_UCS2 *ucs2 = start; + + for (; str < end; ++ucs2, ++str) + *ucs2 = (Py_UCS2)*str; + + assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode)); + break; + } + case PyUnicode_4BYTE_KIND: { + Py_UCS4 *start = (Py_UCS4 *)data + index; + Py_UCS4 *ucs4 = start; + + for (; str < end; ++ucs4, ++str) + *ucs4 = (Py_UCS4)*str; + + assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode)); + break; + } + default: + Py_UNREACHABLE(); + } +} + + +static inline void +_PyUnicodeWriter_Update(_PyUnicodeWriter *writer) +{ + writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer); + writer->data = PyUnicode_DATA(writer->buffer); + + if (!writer->readonly) { + writer->kind = PyUnicode_KIND(writer->buffer); + writer->size = PyUnicode_GET_LENGTH(writer->buffer); + } + else { + /* use a value smaller than PyUnicode_1BYTE_KIND() so + _PyUnicodeWriter_PrepareKind() will copy the buffer. */ + writer->kind = 0; + assert(writer->kind <= PyUnicode_1BYTE_KIND); + + /* Copy-on-write mode: set buffer size to 0 so + * _PyUnicodeWriter_Prepare() will copy (and enlarge) the buffer on + * next write. */ + writer->size = 0; + } +} + + +void +_PyUnicodeWriter_Init(_PyUnicodeWriter *writer) +{ + memset(writer, 0, sizeof(*writer)); + + /* ASCII is the bare minimum */ + writer->min_char = 127; + + /* use a kind value smaller than PyUnicode_1BYTE_KIND so + _PyUnicodeWriter_PrepareKind() will copy the buffer. */ + assert(writer->kind == 0); + assert(writer->kind < PyUnicode_1BYTE_KIND); +} + + +PyUnicodeWriter* +PyUnicodeWriter_Create(Py_ssize_t length) +{ + if (length < 0) { + PyErr_SetString(PyExc_ValueError, + "length must be positive"); + return NULL; + } + + const size_t size = sizeof(_PyUnicodeWriter); + PyUnicodeWriter *pub_writer; + pub_writer = _Py_FREELIST_POP_MEM(unicode_writers); + if (pub_writer == NULL) { + pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size); + if (pub_writer == NULL) { + return (PyUnicodeWriter *)PyErr_NoMemory(); + } + } + _PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer; + + _PyUnicodeWriter_Init(writer); + if (_PyUnicodeWriter_Prepare(writer, length, 127) < 0) { + PyUnicodeWriter_Discard(pub_writer); + return NULL; + } + writer->overallocate = 1; + + return pub_writer; +} + + +void PyUnicodeWriter_Discard(PyUnicodeWriter *writer) +{ + if (writer == NULL) { + return; + } + _PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer); + _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free); +} + + +// Initialize _PyUnicodeWriter with initial buffer +void +_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer) +{ + memset(writer, 0, sizeof(*writer)); + writer->buffer = buffer; + _PyUnicodeWriter_Update(writer); + writer->min_length = writer->size; +} + + +int +_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar) +{ + Py_ssize_t newlen; + PyObject *newbuffer; + + assert(length >= 0); + assert(maxchar <= _Py_MAX_UNICODE); + + /* ensure that the _PyUnicodeWriter_Prepare macro was used */ + assert((maxchar > writer->maxchar && length >= 0) + || length > 0); + + if (length > PY_SSIZE_T_MAX - writer->pos) { + PyErr_NoMemory(); + return -1; + } + newlen = writer->pos + length; + + maxchar = Py_MAX(maxchar, writer->min_char); + + if (writer->buffer == NULL) { + assert(!writer->readonly); + if (writer->overallocate + && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) { + /* overallocate to limit the number of realloc() */ + newlen += newlen / OVERALLOCATE_FACTOR; + } + if (newlen < writer->min_length) + newlen = writer->min_length; + + writer->buffer = PyUnicode_New(newlen, maxchar); + if (writer->buffer == NULL) + return -1; + } + else if (newlen > writer->size) { + if (writer->overallocate + && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) { + /* overallocate to limit the number of realloc() */ + newlen += newlen / OVERALLOCATE_FACTOR; + } + if (newlen < writer->min_length) + newlen = writer->min_length; + + if (maxchar > writer->maxchar || writer->readonly) { + /* resize + widen */ + maxchar = Py_MAX(maxchar, writer->maxchar); + newbuffer = PyUnicode_New(newlen, maxchar); + if (newbuffer == NULL) + return -1; + _PyUnicode_FastCopyCharacters(newbuffer, 0, + writer->buffer, 0, writer->pos); + Py_DECREF(writer->buffer); + writer->readonly = 0; + } + else { + newbuffer = _PyUnicode_ResizeCompact(writer->buffer, newlen); + if (newbuffer == NULL) + return -1; + } + writer->buffer = newbuffer; + } + else if (maxchar > writer->maxchar) { + assert(!writer->readonly); + newbuffer = PyUnicode_New(writer->size, maxchar); + if (newbuffer == NULL) + return -1; + _PyUnicode_FastCopyCharacters(newbuffer, 0, + writer->buffer, 0, writer->pos); + Py_SETREF(writer->buffer, newbuffer); + } + _PyUnicodeWriter_Update(writer); + return 0; + +#undef OVERALLOCATE_FACTOR +} + +int +_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, + int kind) +{ + Py_UCS4 maxchar; + + /* ensure that the _PyUnicodeWriter_PrepareKind macro was used */ + assert(writer->kind < kind); + + switch (kind) + { + case PyUnicode_1BYTE_KIND: maxchar = 0xff; break; + case PyUnicode_2BYTE_KIND: maxchar = 0xffff; break; + case PyUnicode_4BYTE_KIND: maxchar = _Py_MAX_UNICODE; break; + default: + Py_UNREACHABLE(); + } + + return _PyUnicodeWriter_PrepareInternal(writer, 0, maxchar); +} + + +int +_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch) +{ + return _PyUnicodeWriter_WriteCharInline(writer, ch); +} + + +int +PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch) +{ + if (ch > _Py_MAX_UNICODE) { + PyErr_SetString(PyExc_ValueError, + "character must be in range(0x110000)"); + return -1; + } + + return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch); +} + + +int +_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) +{ + assert(PyUnicode_Check(str)); + + Py_UCS4 maxchar; + Py_ssize_t len; + + len = PyUnicode_GET_LENGTH(str); + if (len == 0) + return 0; + maxchar = PyUnicode_MAX_CHAR_VALUE(str); + if (maxchar > writer->maxchar || len > writer->size - writer->pos) { + if (writer->buffer == NULL && !writer->overallocate) { + assert(_PyUnicode_CheckConsistency(str, 1)); + writer->readonly = 1; + writer->buffer = Py_NewRef(str); + _PyUnicodeWriter_Update(writer); + writer->pos += len; + return 0; + } + if (_PyUnicodeWriter_PrepareInternal(writer, len, maxchar) == -1) + return -1; + } + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + str, 0, len); + writer->pos += len; + return 0; +} + + +int +PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj) +{ + PyTypeObject *type = Py_TYPE(obj); + if (type == &PyUnicode_Type) { + return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, obj); + } + + if (type == &PyLong_Type) { + return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0); + } + + PyObject *str = PyObject_Str(obj); + if (str == NULL) { + return -1; + } + + int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str); + Py_DECREF(str); + return res; +} + + +int +PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj) +{ + if (Py_TYPE(obj) == &PyLong_Type) { + return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0); + } + + PyObject *repr = PyObject_Repr(obj); + if (repr == NULL) { + return -1; + } + + int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, repr); + Py_DECREF(repr); + return res; +} + + +int +_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str, + Py_ssize_t start, Py_ssize_t end) +{ + assert(0 <= start); + assert(end <= PyUnicode_GET_LENGTH(str)); + assert(start <= end); + + if (start == 0 && end == PyUnicode_GET_LENGTH(str)) + return _PyUnicodeWriter_WriteStr(writer, str); + + Py_ssize_t len = end - start; + if (len == 0) { + return 0; + } + + Py_UCS4 maxchar; + if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar) { + maxchar = _PyUnicode_FindMaxChar(str, start, end); + } + else { + maxchar = writer->maxchar; + } + if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0) { + return -1; + } + + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + str, start, len); + writer->pos += len; + return 0; +} + + +int +PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str, + Py_ssize_t start, Py_ssize_t end) +{ + if (!PyUnicode_Check(str)) { + PyErr_Format(PyExc_TypeError, "expect str, not %T", str); + return -1; + } + if (start < 0 || start > end) { + PyErr_Format(PyExc_ValueError, "invalid start argument"); + return -1; + } + if (end > PyUnicode_GET_LENGTH(str)) { + PyErr_Format(PyExc_ValueError, "invalid end argument"); + return -1; + } + + return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str, + start, end); +} + + +int +_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, + const char *ascii, Py_ssize_t len) +{ + if (len == -1) + len = strlen(ascii); + + assert(ucs1lib_find_max_char((const Py_UCS1*)ascii, (const Py_UCS1*)ascii + len) < 128); + + if (writer->buffer == NULL && !writer->overallocate) { + PyObject *str; + + str = _PyUnicode_FromASCII(ascii, len); + if (str == NULL) + return -1; + + writer->readonly = 1; + writer->buffer = str; + _PyUnicodeWriter_Update(writer); + writer->pos += len; + return 0; + } + + if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) + return -1; + + switch (writer->kind) + { + case PyUnicode_1BYTE_KIND: + { + const Py_UCS1 *str = (const Py_UCS1 *)ascii; + Py_UCS1 *data = writer->data; + + memcpy(data + writer->pos, str, len); + break; + } + case PyUnicode_2BYTE_KIND: + { + _PyUnicode_CONVERT_BYTES( + Py_UCS1, Py_UCS2, + ascii, ascii + len, + (Py_UCS2 *)writer->data + writer->pos); + break; + } + case PyUnicode_4BYTE_KIND: + { + _PyUnicode_CONVERT_BYTES( + Py_UCS1, Py_UCS4, + ascii, ascii + len, + (Py_UCS4 *)writer->data + writer->pos); + break; + } + default: + Py_UNREACHABLE(); + } + + writer->pos += len; + return 0; +} + + +int +PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, + const char *str, + Py_ssize_t size) +{ + assert(writer != NULL); + _Py_AssertHoldsTstate(); + + _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer; + return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size); +} + + +int +PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer, + const char *str, + Py_ssize_t size) +{ + if (size < 0) { + size = strlen(str); + } + + _PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer; + Py_ssize_t old_pos = _writer->pos; + int res = _PyUnicode_DecodeUTF8Writer(_writer, str, size, + _Py_ERROR_STRICT, NULL, NULL); + if (res < 0) { + _writer->pos = old_pos; + } + return res; +} + + +int +PyUnicodeWriter_DecodeUTF8Stateful(PyUnicodeWriter *writer, + const char *string, + Py_ssize_t length, + const char *errors, + Py_ssize_t *consumed) +{ + if (length < 0) { + length = strlen(string); + } + + _PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer; + Py_ssize_t old_pos = _writer->pos; + int res = _PyUnicode_DecodeUTF8Writer(_writer, string, length, + _Py_ERROR_UNKNOWN, errors, + consumed); + if (res < 0) { + _writer->pos = old_pos; + if (consumed) { + *consumed = 0; + } + } + return res; +} + + +int +_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer, + const char *str, Py_ssize_t len) +{ + Py_UCS4 maxchar; + + maxchar = ucs1lib_find_max_char((const Py_UCS1*)str, (const Py_UCS1*)str + len); + if (_PyUnicodeWriter_Prepare(writer, len, maxchar) == -1) + return -1; + unicode_write_cstr(writer->buffer, writer->pos, str, len); + writer->pos += len; + return 0; +} + + +PyObject * +_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer) +{ + PyObject *str; + + if (writer->pos == 0) { + Py_CLEAR(writer->buffer); + return _PyUnicode_GetEmpty(); + } + + str = writer->buffer; + writer->buffer = NULL; + + if (writer->readonly) { + assert(PyUnicode_GET_LENGTH(str) == writer->pos); + return str; + } + + if (PyUnicode_GET_LENGTH(str) != writer->pos) { + PyObject *str2; + str2 = _PyUnicode_ResizeCompact(str, writer->pos); + if (str2 == NULL) { + Py_DECREF(str); + return NULL; + } + str = str2; + } + + assert(_PyUnicode_CheckConsistency(str, 1)); + return _PyUnicode_Result(str); +} + + +PyObject* +PyUnicodeWriter_Finish(PyUnicodeWriter *writer) +{ + PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer); + assert(((_PyUnicodeWriter*)writer)->buffer == NULL); + _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free); + return str; +} + + +void +_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer) +{ + Py_CLEAR(writer->buffer); +} diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a67bf9b1c5337b..c76d8ba76e2d35 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -46,7 +46,6 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include "pycore_codecs.h" // _PyCodec_Lookup() #include "pycore_critical_section.h" // Py_*_CRITICAL_SECTION_SEQUENCE_FAST #include "pycore_format.h" // F_LJUST -#include "pycore_freelist.h" // _Py_FREELIST_FREE(), _Py_FREELIST_POP() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // PyInterpreterState.fs_codec #include "pycore_long.h" // _PyLong_FormatWriter() @@ -184,45 +183,9 @@ static inline int _PyUnicode_HAS_UTF8_MEMORY(PyObject *op) } -/* Generic helper macro to convert characters of different types. - from_type and to_type have to be valid type names, begin and end - are pointers to the source characters which should be of type - "from_type *". to is a pointer of type "to_type *" and points to the - buffer where the result characters are written to. */ -#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \ - do { \ - to_type *_to = (to_type *)(to); \ - const from_type *_iter = (const from_type *)(begin);\ - const from_type *_end = (const from_type *)(end);\ - Py_ssize_t n = (_end) - (_iter); \ - const from_type *_unrolled_end = \ - _iter + _Py_SIZE_ROUND_DOWN(n, 4); \ - while (_iter < (_unrolled_end)) { \ - _to[0] = (to_type) _iter[0]; \ - _to[1] = (to_type) _iter[1]; \ - _to[2] = (to_type) _iter[2]; \ - _to[3] = (to_type) _iter[3]; \ - _iter += 4; _to += 4; \ - } \ - while (_iter < (_end)) \ - *_to++ = (to_type) *_iter++; \ - } while (0) - #define LATIN1 _Py_LATIN1_CHR -#ifdef MS_WINDOWS - /* On Windows, overallocate by 50% is the best factor */ -# define OVERALLOCATE_FACTOR 2 -#else - /* On Linux, overallocate by 25% is the best factor */ -# define OVERALLOCATE_FACTOR 4 -#endif - /* Forward declaration */ -static inline int -_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch); -static inline void -_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer); static PyObject * unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler, const char *errors); @@ -230,11 +193,6 @@ static PyObject * unicode_decode_utf8(const char *s, Py_ssize_t size, _Py_error_handler error_handler, const char *errors, Py_ssize_t *consumed); -static int -unicode_decode_utf8_writer(_PyUnicodeWriter *writer, - const char *s, Py_ssize_t size, - _Py_error_handler error_handler, const char *errors, - Py_ssize_t *consumed); #ifdef Py_DEBUG static inline int unicode_is_finalizing(void); static int unicode_is_singleton(PyObject *unicode); @@ -242,7 +200,8 @@ static int unicode_is_singleton(PyObject *unicode); // Return a reference to the immortal empty string singleton. -static inline PyObject* unicode_get_empty(void) +PyObject* +_PyUnicode_GetEmpty(void) { _Py_DECLARE_STR(empty, ""); return &_Py_STR(empty); @@ -416,7 +375,7 @@ static void clear_global_interned_strings(void) #define _Py_RETURN_UNICODE_EMPTY() \ do { \ - return unicode_get_empty(); \ + return _PyUnicode_GetEmpty();\ } while (0) @@ -748,14 +707,14 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) #undef CHECK } -static PyObject* -unicode_result(PyObject *unicode) +PyObject* +_PyUnicode_Result(PyObject *unicode) { assert(_PyUnicode_CHECK(unicode)); Py_ssize_t length = PyUnicode_GET_LENGTH(unicode); if (length == 0) { - PyObject *empty = unicode_get_empty(); + PyObject *empty = _PyUnicode_GetEmpty(); if (unicode != empty) { Py_DECREF(unicode); } @@ -778,6 +737,7 @@ unicode_result(PyObject *unicode) assert(_PyUnicode_CheckConsistency(unicode, 1)); return unicode; } +#define unicode_result _PyUnicode_Result static PyObject* unicode_result_unchanged(PyObject *unicode) @@ -985,7 +945,7 @@ make_bloom_mask(int kind, const void* ptr, Py_ssize_t len) /* Compilation of templated routines */ -#define STRINGLIB_GET_EMPTY() unicode_get_empty() +#define STRINGLIB_GET_EMPTY() _PyUnicode_GetEmpty() #include "stringlib/asciilib.h" #include "stringlib/fastsearch.h" @@ -1097,8 +1057,8 @@ resize_copy(PyObject *unicode, Py_ssize_t length) return copy; } -static PyObject* -resize_compact(PyObject *unicode, Py_ssize_t length) +PyObject* +_PyUnicode_ResizeCompact(PyObject *unicode, Py_ssize_t length) { Py_ssize_t char_size; Py_ssize_t struct_size; @@ -1306,7 +1266,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) { /* Optimization for empty strings */ if (size == 0) { - return unicode_get_empty(); + return _PyUnicode_GetEmpty(); } PyObject *obj; @@ -1799,7 +1759,7 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length) return 0; if (length == 0) { - PyObject *empty = unicode_get_empty(); + PyObject *empty = _PyUnicode_GetEmpty(); Py_SETREF(*p_unicode, empty); return 0; } @@ -1813,7 +1773,7 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length) } if (PyUnicode_IS_COMPACT(unicode)) { - PyObject *new_unicode = resize_compact(unicode, length); + PyObject *new_unicode = _PyUnicode_ResizeCompact(unicode, length); if (new_unicode == NULL) return -1; *p_unicode = new_unicode; @@ -1839,58 +1799,6 @@ PyUnicode_Resize(PyObject **p_unicode, Py_ssize_t length) return unicode_resize(p_unicode, length); } -/* Copy an ASCII or latin1 char* string into a Python Unicode string. - - WARNING: The function doesn't copy the terminating null character and - doesn't check the maximum character (may write a latin1 character in an - ASCII string). */ -static void -unicode_write_cstr(PyObject *unicode, Py_ssize_t index, - const char *str, Py_ssize_t len) -{ - int kind = PyUnicode_KIND(unicode); - const void *data = PyUnicode_DATA(unicode); - const char *end = str + len; - - assert(index + len <= PyUnicode_GET_LENGTH(unicode)); - switch (kind) { - case PyUnicode_1BYTE_KIND: { -#ifdef Py_DEBUG - if (PyUnicode_IS_ASCII(unicode)) { - Py_UCS4 maxchar = ucs1lib_find_max_char( - (const Py_UCS1*)str, - (const Py_UCS1*)str + len); - assert(maxchar < 128); - } -#endif - memcpy((char *) data + index, str, len); - break; - } - case PyUnicode_2BYTE_KIND: { - Py_UCS2 *start = (Py_UCS2 *)data + index; - Py_UCS2 *ucs2 = start; - - for (; str < end; ++ucs2, ++str) - *ucs2 = (Py_UCS2)*str; - - assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode)); - break; - } - case PyUnicode_4BYTE_KIND: { - Py_UCS4 *start = (Py_UCS4 *)data + index; - Py_UCS4 *ucs4 = start; - - for (; str < end; ++ucs4, ++str) - *ucs4 = (Py_UCS4)*str; - - assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode)); - break; - } - default: - Py_UNREACHABLE(); - } -} - static PyObject* get_latin1_char(Py_UCS1 ch) { @@ -2105,7 +2013,7 @@ PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) "NULL string with positive size with NULL passed to PyUnicode_FromStringAndSize"); return NULL; } - return unicode_get_empty(); + return _PyUnicode_GetEmpty(); } PyObject * @@ -2672,8 +2580,8 @@ unicode_fromformat_write_utf8(_PyUnicodeWriter *writer, const char *str, } if (width < 0) { - return unicode_decode_utf8_writer(writer, str, length, - _Py_ERROR_REPLACE, "replace", pconsumed); + return _PyUnicode_DecodeUTF8Writer(writer, str, length, + _Py_ERROR_REPLACE, "replace", pconsumed); } PyObject *unicode = PyUnicode_DecodeUTF8Stateful(str, length, @@ -5425,11 +5333,11 @@ unicode_decode_utf8(const char *s, Py_ssize_t size, // Used by PyUnicodeWriter_WriteUTF8() implementation -static int -unicode_decode_utf8_writer(_PyUnicodeWriter *writer, - const char *s, Py_ssize_t size, - _Py_error_handler error_handler, const char *errors, - Py_ssize_t *consumed) +int +_PyUnicode_DecodeUTF8Writer(_PyUnicodeWriter *writer, + const char *s, Py_ssize_t size, + _Py_error_handler error_handler, const char *errors, + Py_ssize_t *consumed) { if (size == 0) { if (consumed) { @@ -10768,7 +10676,7 @@ replace(PyObject *self, PyObject *str1, } new_size = slen + n * (len2 - len1); if (new_size == 0) { - u = unicode_get_empty(); + u = _PyUnicode_GetEmpty(); goto done; } if (new_size > (PY_SSIZE_T_MAX / rkind)) { @@ -11441,7 +11349,7 @@ PyUnicode_Concat(PyObject *left, PyObject *right) } /* Shortcuts */ - PyObject *empty = unicode_get_empty(); // Borrowed reference + PyObject *empty = _PyUnicode_GetEmpty(); // Borrowed reference if (left == empty) { return PyUnicode_FromObject(right); } @@ -11493,7 +11401,7 @@ PyUnicode_Append(PyObject **p_left, PyObject *right) } /* Shortcuts */ - PyObject *empty = unicode_get_empty(); // Borrowed reference + PyObject *empty = _PyUnicode_GetEmpty(); // Borrowed reference if (left == empty) { Py_DECREF(left); *p_left = Py_NewRef(right); @@ -12989,7 +12897,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj) len1 = PyUnicode_GET_LENGTH(str_obj); len2 = PyUnicode_GET_LENGTH(sep_obj); if (kind1 < kind2 || len1 < len2) { - PyObject *empty = unicode_get_empty(); // Borrowed reference + PyObject *empty = _PyUnicode_GetEmpty(); // Borrowed reference return PyTuple_Pack(3, str_obj, empty, empty); } buf1 = PyUnicode_DATA(str_obj); @@ -13041,7 +12949,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj) len1 = PyUnicode_GET_LENGTH(str_obj); len2 = PyUnicode_GET_LENGTH(sep_obj); if (kind1 < kind2 || len1 < len2) { - PyObject *empty = unicode_get_empty(); // Borrowed reference + PyObject *empty = _PyUnicode_GetEmpty(); // Borrowed reference return PyTuple_Pack(3, empty, empty, str_obj); } buf1 = PyUnicode_DATA(str_obj); @@ -13520,523 +13428,6 @@ unicode_endswith_impl(PyObject *self, PyObject *subobj, Py_ssize_t start, } -static inline void -_PyUnicodeWriter_Update(_PyUnicodeWriter *writer) -{ - writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer); - writer->data = PyUnicode_DATA(writer->buffer); - - if (!writer->readonly) { - writer->kind = PyUnicode_KIND(writer->buffer); - writer->size = PyUnicode_GET_LENGTH(writer->buffer); - } - else { - /* use a value smaller than PyUnicode_1BYTE_KIND() so - _PyUnicodeWriter_PrepareKind() will copy the buffer. */ - writer->kind = 0; - assert(writer->kind <= PyUnicode_1BYTE_KIND); - - /* Copy-on-write mode: set buffer size to 0 so - * _PyUnicodeWriter_Prepare() will copy (and enlarge) the buffer on - * next write. */ - writer->size = 0; - } -} - - -void -_PyUnicodeWriter_Init(_PyUnicodeWriter *writer) -{ - memset(writer, 0, sizeof(*writer)); - - /* ASCII is the bare minimum */ - writer->min_char = 127; - - /* use a kind value smaller than PyUnicode_1BYTE_KIND so - _PyUnicodeWriter_PrepareKind() will copy the buffer. */ - assert(writer->kind == 0); - assert(writer->kind < PyUnicode_1BYTE_KIND); -} - - -PyUnicodeWriter* -PyUnicodeWriter_Create(Py_ssize_t length) -{ - if (length < 0) { - PyErr_SetString(PyExc_ValueError, - "length must be positive"); - return NULL; - } - - const size_t size = sizeof(_PyUnicodeWriter); - PyUnicodeWriter *pub_writer; - pub_writer = _Py_FREELIST_POP_MEM(unicode_writers); - if (pub_writer == NULL) { - pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size); - if (pub_writer == NULL) { - return (PyUnicodeWriter *)PyErr_NoMemory(); - } - } - _PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer; - - _PyUnicodeWriter_Init(writer); - if (_PyUnicodeWriter_Prepare(writer, length, 127) < 0) { - PyUnicodeWriter_Discard(pub_writer); - return NULL; - } - writer->overallocate = 1; - - return pub_writer; -} - - -void PyUnicodeWriter_Discard(PyUnicodeWriter *writer) -{ - if (writer == NULL) { - return; - } - _PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer); - _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free); -} - - -// Initialize _PyUnicodeWriter with initial buffer -static inline void -_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer) -{ - memset(writer, 0, sizeof(*writer)); - writer->buffer = buffer; - _PyUnicodeWriter_Update(writer); - writer->min_length = writer->size; -} - - -int -_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, - Py_ssize_t length, Py_UCS4 maxchar) -{ - Py_ssize_t newlen; - PyObject *newbuffer; - - assert(length >= 0); - assert(maxchar <= MAX_UNICODE); - - /* ensure that the _PyUnicodeWriter_Prepare macro was used */ - assert((maxchar > writer->maxchar && length >= 0) - || length > 0); - - if (length > PY_SSIZE_T_MAX - writer->pos) { - PyErr_NoMemory(); - return -1; - } - newlen = writer->pos + length; - - maxchar = Py_MAX(maxchar, writer->min_char); - - if (writer->buffer == NULL) { - assert(!writer->readonly); - if (writer->overallocate - && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) { - /* overallocate to limit the number of realloc() */ - newlen += newlen / OVERALLOCATE_FACTOR; - } - if (newlen < writer->min_length) - newlen = writer->min_length; - - writer->buffer = PyUnicode_New(newlen, maxchar); - if (writer->buffer == NULL) - return -1; - } - else if (newlen > writer->size) { - if (writer->overallocate - && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) { - /* overallocate to limit the number of realloc() */ - newlen += newlen / OVERALLOCATE_FACTOR; - } - if (newlen < writer->min_length) - newlen = writer->min_length; - - if (maxchar > writer->maxchar || writer->readonly) { - /* resize + widen */ - maxchar = Py_MAX(maxchar, writer->maxchar); - newbuffer = PyUnicode_New(newlen, maxchar); - if (newbuffer == NULL) - return -1; - _PyUnicode_FastCopyCharacters(newbuffer, 0, - writer->buffer, 0, writer->pos); - Py_DECREF(writer->buffer); - writer->readonly = 0; - } - else { - newbuffer = resize_compact(writer->buffer, newlen); - if (newbuffer == NULL) - return -1; - } - writer->buffer = newbuffer; - } - else if (maxchar > writer->maxchar) { - assert(!writer->readonly); - newbuffer = PyUnicode_New(writer->size, maxchar); - if (newbuffer == NULL) - return -1; - _PyUnicode_FastCopyCharacters(newbuffer, 0, - writer->buffer, 0, writer->pos); - Py_SETREF(writer->buffer, newbuffer); - } - _PyUnicodeWriter_Update(writer); - return 0; - -#undef OVERALLOCATE_FACTOR -} - -int -_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, - int kind) -{ - Py_UCS4 maxchar; - - /* ensure that the _PyUnicodeWriter_PrepareKind macro was used */ - assert(writer->kind < kind); - - switch (kind) - { - case PyUnicode_1BYTE_KIND: maxchar = 0xff; break; - case PyUnicode_2BYTE_KIND: maxchar = 0xffff; break; - case PyUnicode_4BYTE_KIND: maxchar = MAX_UNICODE; break; - default: - Py_UNREACHABLE(); - } - - return _PyUnicodeWriter_PrepareInternal(writer, 0, maxchar); -} - -int -_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch) -{ - return _PyUnicodeWriter_WriteCharInline(writer, ch); -} - -int -PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch) -{ - if (ch > MAX_UNICODE) { - PyErr_SetString(PyExc_ValueError, - "character must be in range(0x110000)"); - return -1; - } - - return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch); -} - -int -_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) -{ - assert(PyUnicode_Check(str)); - - Py_UCS4 maxchar; - Py_ssize_t len; - - len = PyUnicode_GET_LENGTH(str); - if (len == 0) - return 0; - maxchar = PyUnicode_MAX_CHAR_VALUE(str); - if (maxchar > writer->maxchar || len > writer->size - writer->pos) { - if (writer->buffer == NULL && !writer->overallocate) { - assert(_PyUnicode_CheckConsistency(str, 1)); - writer->readonly = 1; - writer->buffer = Py_NewRef(str); - _PyUnicodeWriter_Update(writer); - writer->pos += len; - return 0; - } - if (_PyUnicodeWriter_PrepareInternal(writer, len, maxchar) == -1) - return -1; - } - _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, - str, 0, len); - writer->pos += len; - return 0; -} - -int -PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj) -{ - PyTypeObject *type = Py_TYPE(obj); - if (type == &PyUnicode_Type) { - return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, obj); - } - - if (type == &PyLong_Type) { - return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0); - } - - PyObject *str = PyObject_Str(obj); - if (str == NULL) { - return -1; - } - - int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str); - Py_DECREF(str); - return res; -} - - -int -PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj) -{ - if (Py_TYPE(obj) == &PyLong_Type) { - return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0); - } - - PyObject *repr = PyObject_Repr(obj); - if (repr == NULL) { - return -1; - } - - int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, repr); - Py_DECREF(repr); - return res; -} - - -int -_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str, - Py_ssize_t start, Py_ssize_t end) -{ - assert(0 <= start); - assert(end <= PyUnicode_GET_LENGTH(str)); - assert(start <= end); - - if (start == 0 && end == PyUnicode_GET_LENGTH(str)) - return _PyUnicodeWriter_WriteStr(writer, str); - - Py_ssize_t len = end - start; - if (len == 0) { - return 0; - } - - Py_UCS4 maxchar; - if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar) { - maxchar = _PyUnicode_FindMaxChar(str, start, end); - } - else { - maxchar = writer->maxchar; - } - if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0) { - return -1; - } - - _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, - str, start, len); - writer->pos += len; - return 0; -} - - -int -PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str, - Py_ssize_t start, Py_ssize_t end) -{ - if (!PyUnicode_Check(str)) { - PyErr_Format(PyExc_TypeError, "expect str, not %T", str); - return -1; - } - if (start < 0 || start > end) { - PyErr_Format(PyExc_ValueError, "invalid start argument"); - return -1; - } - if (end > PyUnicode_GET_LENGTH(str)) { - PyErr_Format(PyExc_ValueError, "invalid end argument"); - return -1; - } - - return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str, - start, end); -} - - -int -_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, - const char *ascii, Py_ssize_t len) -{ - if (len == -1) - len = strlen(ascii); - - assert(ucs1lib_find_max_char((const Py_UCS1*)ascii, (const Py_UCS1*)ascii + len) < 128); - - if (writer->buffer == NULL && !writer->overallocate) { - PyObject *str; - - str = _PyUnicode_FromASCII(ascii, len); - if (str == NULL) - return -1; - - writer->readonly = 1; - writer->buffer = str; - _PyUnicodeWriter_Update(writer); - writer->pos += len; - return 0; - } - - if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) - return -1; - - switch (writer->kind) - { - case PyUnicode_1BYTE_KIND: - { - const Py_UCS1 *str = (const Py_UCS1 *)ascii; - Py_UCS1 *data = writer->data; - - memcpy(data + writer->pos, str, len); - break; - } - case PyUnicode_2BYTE_KIND: - { - _PyUnicode_CONVERT_BYTES( - Py_UCS1, Py_UCS2, - ascii, ascii + len, - (Py_UCS2 *)writer->data + writer->pos); - break; - } - case PyUnicode_4BYTE_KIND: - { - _PyUnicode_CONVERT_BYTES( - Py_UCS1, Py_UCS4, - ascii, ascii + len, - (Py_UCS4 *)writer->data + writer->pos); - break; - } - default: - Py_UNREACHABLE(); - } - - writer->pos += len; - return 0; -} - - -int -PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, - const char *str, - Py_ssize_t size) -{ - assert(writer != NULL); - _Py_AssertHoldsTstate(); - - _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer; - return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size); -} - - -int -PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer, - const char *str, - Py_ssize_t size) -{ - if (size < 0) { - size = strlen(str); - } - - _PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer; - Py_ssize_t old_pos = _writer->pos; - int res = unicode_decode_utf8_writer(_writer, str, size, - _Py_ERROR_STRICT, NULL, NULL); - if (res < 0) { - _writer->pos = old_pos; - } - return res; -} - - -int -PyUnicodeWriter_DecodeUTF8Stateful(PyUnicodeWriter *writer, - const char *string, - Py_ssize_t length, - const char *errors, - Py_ssize_t *consumed) -{ - if (length < 0) { - length = strlen(string); - } - - _PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer; - Py_ssize_t old_pos = _writer->pos; - int res = unicode_decode_utf8_writer(_writer, string, length, - _Py_ERROR_UNKNOWN, errors, consumed); - if (res < 0) { - _writer->pos = old_pos; - if (consumed) { - *consumed = 0; - } - } - return res; -} - - -int -_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer, - const char *str, Py_ssize_t len) -{ - Py_UCS4 maxchar; - - maxchar = ucs1lib_find_max_char((const Py_UCS1*)str, (const Py_UCS1*)str + len); - if (_PyUnicodeWriter_Prepare(writer, len, maxchar) == -1) - return -1; - unicode_write_cstr(writer->buffer, writer->pos, str, len); - writer->pos += len; - return 0; -} - -PyObject * -_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer) -{ - PyObject *str; - - if (writer->pos == 0) { - Py_CLEAR(writer->buffer); - _Py_RETURN_UNICODE_EMPTY(); - } - - str = writer->buffer; - writer->buffer = NULL; - - if (writer->readonly) { - assert(PyUnicode_GET_LENGTH(str) == writer->pos); - return str; - } - - if (PyUnicode_GET_LENGTH(str) != writer->pos) { - PyObject *str2; - str2 = resize_compact(str, writer->pos); - if (str2 == NULL) { - Py_DECREF(str); - return NULL; - } - str = str2; - } - - assert(_PyUnicode_CheckConsistency(str, 1)); - return unicode_result(str); -} - - -PyObject* -PyUnicodeWriter_Finish(PyUnicodeWriter *writer) -{ - PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer); - assert(((_PyUnicodeWriter*)writer)->buffer == NULL); - _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free); - return str; -} - - -void -_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer) -{ - Py_CLEAR(writer->buffer); -} - #include "stringlib/unicode_format.h" PyDoc_STRVAR(format__doc__, @@ -14458,7 +13849,7 @@ unicode_new_impl(PyTypeObject *type, PyObject *x, const char *encoding, { PyObject *unicode; if (x == NULL) { - unicode = unicode_get_empty(); + unicode = _PyUnicode_GetEmpty(); } else if (encoding == NULL && errors == NULL) { unicode = PyObject_Str(x); @@ -14512,7 +13903,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, return NULL; } if (nargs == 0) { - return unicode_get_empty(); + return _PyUnicode_GetEmpty(); } PyObject *object = args[0]; if (nargs == 1) { @@ -15188,7 +14579,7 @@ unicodeiter_reduce(PyObject *op, PyObject *Py_UNUSED(ignored)) if (it->it_seq != NULL) { return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index); } else { - PyObject *u = unicode_get_empty(); + PyObject *u = _PyUnicode_GetEmpty(); if (u == NULL) { Py_XDECREF(iter); return NULL; diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 02b6f35798f845..e65f201623fbbe 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -167,6 +167,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 39462a6380cd21..a9fb6f2328ad95 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -490,6 +490,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 2657ee5c444e60..55b0f65ceeab75 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -561,6 +561,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 9c12be6e9356a6..348c6f1c271369 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -1280,6 +1280,9 @@ Objects + + Objects + Objects