diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index c85c01da89a2ff..f1c9bcd47888b1 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -11,6 +11,44 @@ extern "C" { #include "pycore_fileutils.h" // _Py_error_handler #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI +// Maximum code point of Unicode 6.0: 0x10ffff (1,114,111). +#define _Py_MAX_UNICODE 0x10ffff + + +static inline void +_PyUnicode_Fill(int kind, void *data, Py_UCS4 value, + Py_ssize_t start, Py_ssize_t length) +{ + assert(0 <= start); + switch (kind) { + case PyUnicode_1BYTE_KIND: { + assert(value <= 0xff); + Py_UCS1 ch = (unsigned char)value; + Py_UCS1 *to = (Py_UCS1 *)data + start; + memset(to, ch, length); + break; + } + case PyUnicode_2BYTE_KIND: { + assert(value <= 0xffff); + Py_UCS2 ch = (Py_UCS2)value; + Py_UCS2 *to = (Py_UCS2 *)data + start; + const Py_UCS2 *end = to + length; + for (; to < end; ++to) *to = ch; + break; + } + case PyUnicode_4BYTE_KIND: { + assert(value <= _Py_MAX_UNICODE); + Py_UCS4 ch = value; + Py_UCS4 * to = (Py_UCS4 *)data + start; + const Py_UCS4 *end = to + length; + for (; to < end; ++to) *to = ch; + break; + } + default: Py_UNREACHABLE(); + } +} + + /* --- Characters Type APIs ----------------------------------------------- */ extern int _PyUnicode_IsXidStart(Py_UCS4 ch); @@ -240,21 +278,6 @@ extern PyObject* _PyUnicode_XStrip( ); -/* Using explicit passed-in values, insert the thousands grouping - into the string pointed to by buffer. For the argument descriptions, - see Objects/stringlib/localeutil.h */ -extern Py_ssize_t _PyUnicode_InsertThousandsGrouping( - _PyUnicodeWriter *writer, - Py_ssize_t n_buffer, - PyObject *digits, - Py_ssize_t d_pos, - Py_ssize_t n_digits, - Py_ssize_t min_width, - const char *grouping, - PyObject *thousands_sep, - Py_UCS4 *maxchar, - int forward); - /* Dedent a string. Behaviour is expected to be an exact match of `textwrap.dedent`. Return a new reference on success, NULL with exception set on error. diff --git a/Makefile.pre.in b/Makefile.pre.in index 244e25c348f5a4..d0d0ec80f537c8 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -501,7 +501,6 @@ PYTHON_OBJS= \ Python/pystrtod.o \ Python/pystrhex.o \ Python/dtoa.o \ - Python/formatter_unicode.o \ Python/fileutils.o \ Python/suggestions.o \ Python/perf_trampoline.o \ @@ -558,6 +557,7 @@ OBJECT_OBJS= \ Objects/tupleobject.o \ Objects/typeobject.o \ Objects/typevarobject.o \ + Objects/unicode_formatter.o \ Objects/unicodeobject.o \ Objects/unicodectype.o \ Objects/unionobject.o \ @@ -2090,7 +2090,6 @@ UNICODE_DEPS = \ $(srcdir)/Objects/stringlib/fastsearch.h \ $(srcdir)/Objects/stringlib/find.h \ $(srcdir)/Objects/stringlib/find_max_char.h \ - $(srcdir)/Objects/stringlib/localeutil.h \ $(srcdir)/Objects/stringlib/partition.h \ $(srcdir)/Objects/stringlib/replace.h \ $(srcdir)/Objects/stringlib/repr.h \ diff --git a/Objects/stringlib/localeutil.h b/Objects/stringlib/localeutil.h deleted file mode 100644 index a4ab701de004c8..00000000000000 --- a/Objects/stringlib/localeutil.h +++ /dev/null @@ -1,97 +0,0 @@ -/* _PyUnicode_InsertThousandsGrouping() helper functions */ - -typedef struct { - const char *grouping; - char previous; - Py_ssize_t i; /* Where we're currently pointing in grouping. */ -} GroupGenerator; - - -static void -GroupGenerator_init(GroupGenerator *self, const char *grouping) -{ - self->grouping = grouping; - self->i = 0; - self->previous = 0; -} - - -/* Returns the next grouping, or 0 to signify end. */ -static Py_ssize_t -GroupGenerator_next(GroupGenerator *self) -{ - /* Note that we don't really do much error checking here. If a - grouping string contains just CHAR_MAX, for example, then just - terminate the generator. That shouldn't happen, but at least we - fail gracefully. */ - switch (self->grouping[self->i]) { - case 0: - return self->previous; - case CHAR_MAX: - /* Stop the generator. */ - return 0; - default: { - char ch = self->grouping[self->i]; - self->previous = ch; - self->i++; - return (Py_ssize_t)ch; - } - } -} - - -/* Fill in some digits, leading zeros, and thousands separator. All - are optional, depending on when we're called. */ -static void -InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos, - PyObject *digits, Py_ssize_t *digits_pos, - Py_ssize_t n_chars, Py_ssize_t n_zeros, - PyObject *thousands_sep, Py_ssize_t thousands_sep_len, - Py_UCS4 *maxchar, int forward) -{ - if (!writer) { - /* if maxchar > 127, maxchar is already set */ - if (*maxchar == 127 && thousands_sep) { - Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep); - *maxchar = Py_MAX(*maxchar, maxchar2); - } - return; - } - - if (thousands_sep) { - if (!forward) { - *buffer_pos -= thousands_sep_len; - } - /* Copy the thousands_sep chars into the buffer. */ - _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos, - thousands_sep, 0, - thousands_sep_len); - if (forward) { - *buffer_pos += thousands_sep_len; - } - } - - if (!forward) { - *buffer_pos -= n_chars; - *digits_pos -= n_chars; - } - _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos, - digits, *digits_pos, - n_chars); - if (forward) { - *buffer_pos += n_chars; - *digits_pos += n_chars; - } - - if (n_zeros) { - if (!forward) { - *buffer_pos -= n_zeros; - } - int kind = PyUnicode_KIND(writer->buffer); - void *data = PyUnicode_DATA(writer->buffer); - unicode_fill(kind, data, '0', *buffer_pos, n_zeros); - if (forward) { - *buffer_pos += n_zeros; - } - } -} diff --git a/Python/formatter_unicode.c b/Objects/unicode_formatter.c similarity index 88% rename from Python/formatter_unicode.c rename to Objects/unicode_formatter.c index 30807f428c7d71..b8604d1355940a 100644 --- a/Python/formatter_unicode.c +++ b/Objects/unicode_formatter.c @@ -8,6 +8,241 @@ #include "pycore_unicodeobject.h" // PyUnicode_MAX_CHAR_VALUE() #include + +/* _PyUnicode_InsertThousandsGrouping() helper functions */ + +typedef struct { + const char *grouping; + char previous; + Py_ssize_t i; /* Where we're currently pointing in grouping. */ +} GroupGenerator; + + +static void +GroupGenerator_init(GroupGenerator *self, const char *grouping) +{ + self->grouping = grouping; + self->i = 0; + self->previous = 0; +} + + +/* Returns the next grouping, or 0 to signify end. */ +static Py_ssize_t +GroupGenerator_next(GroupGenerator *self) +{ + /* Note that we don't really do much error checking here. If a + grouping string contains just CHAR_MAX, for example, then just + terminate the generator. That shouldn't happen, but at least we + fail gracefully. */ + switch (self->grouping[self->i]) { + case 0: + return self->previous; + case CHAR_MAX: + /* Stop the generator. */ + return 0; + default: { + char ch = self->grouping[self->i]; + self->previous = ch; + self->i++; + return (Py_ssize_t)ch; + } + } +} + + +/* Fill in some digits, leading zeros, and thousands separator. All + are optional, depending on when we're called. */ +static void +InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos, + PyObject *digits, Py_ssize_t *digits_pos, + Py_ssize_t n_chars, Py_ssize_t n_zeros, + PyObject *thousands_sep, Py_ssize_t thousands_sep_len, + Py_UCS4 *maxchar, int forward) +{ + if (!writer) { + /* if maxchar > 127, maxchar is already set */ + if (*maxchar == 127 && thousands_sep) { + Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep); + *maxchar = Py_MAX(*maxchar, maxchar2); + } + return; + } + + if (thousands_sep) { + if (!forward) { + *buffer_pos -= thousands_sep_len; + } + /* Copy the thousands_sep chars into the buffer. */ + _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos, + thousands_sep, 0, + thousands_sep_len); + if (forward) { + *buffer_pos += thousands_sep_len; + } + } + + if (!forward) { + *buffer_pos -= n_chars; + *digits_pos -= n_chars; + } + _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos, + digits, *digits_pos, + n_chars); + if (forward) { + *buffer_pos += n_chars; + *digits_pos += n_chars; + } + + if (n_zeros) { + if (!forward) { + *buffer_pos -= n_zeros; + } + int kind = PyUnicode_KIND(writer->buffer); + void *data = PyUnicode_DATA(writer->buffer); + _PyUnicode_Fill(kind, data, '0', *buffer_pos, n_zeros); + if (forward) { + *buffer_pos += n_zeros; + } + } +} + + +/** + * InsertThousandsGrouping: + * @writer: Unicode writer. + * @n_buffer: Number of characters in @buffer. + * @digits: Digits we're reading from. If count is non-NULL, this is unused. + * @d_pos: Start of digits string. + * @n_digits: The number of digits in the string, in which we want + * to put the grouping chars. + * @min_width: The minimum width of the digits in the output string. + * Output will be zero-padded on the left to fill. + * @grouping: see definition in localeconv(). + * @thousands_sep: see definition in localeconv(). + * + * There are 2 modes: counting and filling. If @writer is NULL, + * we are in counting mode, else filling mode. + * If counting, the required buffer size is returned. + * If filling, we know the buffer will be large enough, so we don't + * need to pass in the buffer size. + * Inserts thousand grouping characters (as defined by grouping and + * thousands_sep) into @writer. + * + * Return value: -1 on error, number of characters otherwise. + **/ +static Py_ssize_t +_PyUnicode_InsertThousandsGrouping( + _PyUnicodeWriter *writer, + Py_ssize_t n_buffer, + PyObject *digits, + Py_ssize_t d_pos, + Py_ssize_t n_digits, + Py_ssize_t min_width, + const char *grouping, + PyObject *thousands_sep, + Py_UCS4 *maxchar, + int forward) +{ + min_width = Py_MAX(0, min_width); + if (writer) { + assert(digits != NULL); + assert(maxchar == NULL); + } + else { + assert(digits == NULL); + assert(maxchar != NULL); + } + assert(0 <= d_pos); + assert(0 <= n_digits); + assert(grouping != NULL); + + Py_ssize_t count = 0; + Py_ssize_t n_zeros; + int loop_broken = 0; + int use_separator = 0; /* First time through, don't append the + separator. They only go between + groups. */ + Py_ssize_t buffer_pos; + Py_ssize_t digits_pos; + Py_ssize_t len; + Py_ssize_t n_chars; + Py_ssize_t remaining = n_digits; /* Number of chars remaining to + be looked at */ + /* A generator that returns all of the grouping widths, until it + returns 0. */ + GroupGenerator groupgen; + GroupGenerator_init(&groupgen, grouping); + const Py_ssize_t thousands_sep_len = PyUnicode_GET_LENGTH(thousands_sep); + + /* if digits are not grouped, thousands separator + should be an empty string */ + assert(!(grouping[0] == CHAR_MAX && thousands_sep_len != 0)); + + digits_pos = d_pos + (forward ? 0 : n_digits); + if (writer) { + buffer_pos = writer->pos + (forward ? 0 : n_buffer); + assert(buffer_pos <= PyUnicode_GET_LENGTH(writer->buffer)); + assert(digits_pos <= PyUnicode_GET_LENGTH(digits)); + } + else { + buffer_pos = forward ? 0 : n_buffer; + } + + if (!writer) { + *maxchar = 127; + } + + while ((len = GroupGenerator_next(&groupgen)) > 0) { + len = Py_MIN(len, Py_MAX(Py_MAX(remaining, min_width), 1)); + n_zeros = Py_MAX(0, len - remaining); + n_chars = Py_MAX(0, Py_MIN(remaining, len)); + + /* Use n_zero zero's and n_chars chars */ + + /* Count only, don't do anything. */ + count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; + + /* Copy into the writer. */ + InsertThousandsGrouping_fill(writer, &buffer_pos, + digits, &digits_pos, + n_chars, n_zeros, + use_separator ? thousands_sep : NULL, + thousands_sep_len, maxchar, forward); + + /* Use a separator next time. */ + use_separator = 1; + + remaining -= n_chars; + min_width -= len; + + if (remaining <= 0 && min_width <= 0) { + loop_broken = 1; + break; + } + min_width -= thousands_sep_len; + } + if (!loop_broken) { + /* We left the loop without using a break statement. */ + + len = Py_MAX(Py_MAX(remaining, min_width), 1); + n_zeros = Py_MAX(0, len - remaining); + n_chars = Py_MAX(0, Py_MIN(remaining, len)); + + /* Use n_zero zero's and n_chars chars */ + count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; + + /* Copy into the writer. */ + InsertThousandsGrouping_fill(writer, &buffer_pos, + digits, &digits_pos, + n_chars, n_zeros, + use_separator ? thousands_sep : NULL, + thousands_sep_len, maxchar, forward); + } + return count; +} + + /* Raises an exception about an unknown presentation type for this * type. */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 11ba147a744692..c71f9d3f71dea5 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -104,9 +104,7 @@ NOTE: In the interpreter's initialization phase, some globals are currently */ -// Maximum code point of Unicode 6.0: 0x10ffff (1,114,111). -// The value must be the same in fileutils.c. -#define MAX_UNICODE 0x10ffff +#define MAX_UNICODE _Py_MAX_UNICODE #ifdef Py_DEBUG # define _PyUnicode_CHECK(op) _PyUnicode_CheckConsistency(op, 0) @@ -420,39 +418,6 @@ static void clear_global_interned_strings(void) return unicode_get_empty(); \ } while (0) -static inline void -unicode_fill(int kind, void *data, Py_UCS4 value, - Py_ssize_t start, Py_ssize_t length) -{ - assert(0 <= start); - switch (kind) { - case PyUnicode_1BYTE_KIND: { - assert(value <= 0xff); - Py_UCS1 ch = (unsigned char)value; - Py_UCS1 *to = (Py_UCS1 *)data + start; - memset(to, ch, length); - break; - } - case PyUnicode_2BYTE_KIND: { - assert(value <= 0xffff); - Py_UCS2 ch = (Py_UCS2)value; - Py_UCS2 *to = (Py_UCS2 *)data + start; - const Py_UCS2 *end = to + length; - for (; to < end; ++to) *to = ch; - break; - } - case PyUnicode_4BYTE_KIND: { - assert(value <= MAX_UNICODE); - Py_UCS4 ch = value; - Py_UCS4 * to = (Py_UCS4 *)data + start; - const Py_UCS4 *end = to + length; - for (; to < end; ++to) *to = ch; - break; - } - default: Py_UNREACHABLE(); - } -} - /* Fast detection of the most frequent whitespace characters */ const unsigned char _Py_ascii_whitespace[] = { @@ -9735,142 +9700,6 @@ any_find_slice(PyObject* s1, PyObject* s2, return result; } -/* _PyUnicode_InsertThousandsGrouping() helper functions */ -#include "stringlib/localeutil.h" - -/** - * InsertThousandsGrouping: - * @writer: Unicode writer. - * @n_buffer: Number of characters in @buffer. - * @digits: Digits we're reading from. If count is non-NULL, this is unused. - * @d_pos: Start of digits string. - * @n_digits: The number of digits in the string, in which we want - * to put the grouping chars. - * @min_width: The minimum width of the digits in the output string. - * Output will be zero-padded on the left to fill. - * @grouping: see definition in localeconv(). - * @thousands_sep: see definition in localeconv(). - * - * There are 2 modes: counting and filling. If @writer is NULL, - * we are in counting mode, else filling mode. - * If counting, the required buffer size is returned. - * If filling, we know the buffer will be large enough, so we don't - * need to pass in the buffer size. - * Inserts thousand grouping characters (as defined by grouping and - * thousands_sep) into @writer. - * - * Return value: -1 on error, number of characters otherwise. - **/ -Py_ssize_t -_PyUnicode_InsertThousandsGrouping( - _PyUnicodeWriter *writer, - Py_ssize_t n_buffer, - PyObject *digits, - Py_ssize_t d_pos, - Py_ssize_t n_digits, - Py_ssize_t min_width, - const char *grouping, - PyObject *thousands_sep, - Py_UCS4 *maxchar, - int forward) -{ - min_width = Py_MAX(0, min_width); - if (writer) { - assert(digits != NULL); - assert(maxchar == NULL); - } - else { - assert(digits == NULL); - assert(maxchar != NULL); - } - assert(0 <= d_pos); - assert(0 <= n_digits); - assert(grouping != NULL); - - Py_ssize_t count = 0; - Py_ssize_t n_zeros; - int loop_broken = 0; - int use_separator = 0; /* First time through, don't append the - separator. They only go between - groups. */ - Py_ssize_t buffer_pos; - Py_ssize_t digits_pos; - Py_ssize_t len; - Py_ssize_t n_chars; - Py_ssize_t remaining = n_digits; /* Number of chars remaining to - be looked at */ - /* A generator that returns all of the grouping widths, until it - returns 0. */ - GroupGenerator groupgen; - GroupGenerator_init(&groupgen, grouping); - const Py_ssize_t thousands_sep_len = PyUnicode_GET_LENGTH(thousands_sep); - - /* if digits are not grouped, thousands separator - should be an empty string */ - assert(!(grouping[0] == CHAR_MAX && thousands_sep_len != 0)); - - digits_pos = d_pos + (forward ? 0 : n_digits); - if (writer) { - buffer_pos = writer->pos + (forward ? 0 : n_buffer); - assert(buffer_pos <= PyUnicode_GET_LENGTH(writer->buffer)); - assert(digits_pos <= PyUnicode_GET_LENGTH(digits)); - } - else { - buffer_pos = forward ? 0 : n_buffer; - } - - if (!writer) { - *maxchar = 127; - } - - while ((len = GroupGenerator_next(&groupgen)) > 0) { - len = Py_MIN(len, Py_MAX(Py_MAX(remaining, min_width), 1)); - n_zeros = Py_MAX(0, len - remaining); - n_chars = Py_MAX(0, Py_MIN(remaining, len)); - - /* Use n_zero zero's and n_chars chars */ - - /* Count only, don't do anything. */ - count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; - - /* Copy into the writer. */ - InsertThousandsGrouping_fill(writer, &buffer_pos, - digits, &digits_pos, - n_chars, n_zeros, - use_separator ? thousands_sep : NULL, - thousands_sep_len, maxchar, forward); - - /* Use a separator next time. */ - use_separator = 1; - - remaining -= n_chars; - min_width -= len; - - if (remaining <= 0 && min_width <= 0) { - loop_broken = 1; - break; - } - min_width -= thousands_sep_len; - } - if (!loop_broken) { - /* We left the loop without using a break statement. */ - - len = Py_MAX(Py_MAX(remaining, min_width), 1); - n_zeros = Py_MAX(0, len - remaining); - n_chars = Py_MAX(0, Py_MIN(remaining, len)); - - /* Use n_zero zero's and n_chars chars */ - count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; - - /* Copy into the writer. */ - InsertThousandsGrouping_fill(writer, &buffer_pos, - digits, &digits_pos, - n_chars, n_zeros, - use_separator ? thousands_sep : NULL, - thousands_sep_len, maxchar, forward); - } - return count; -} Py_ssize_t PyUnicode_Count(PyObject *str, @@ -10427,7 +10256,7 @@ _PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length, assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode)); assert(start >= 0); assert(start + length <= PyUnicode_GET_LENGTH(unicode)); - unicode_fill(kind, data, fill_char, start, length); + _PyUnicode_Fill(kind, data, fill_char, start, length); } Py_ssize_t @@ -10496,9 +10325,10 @@ pad(PyObject *self, kind = PyUnicode_KIND(u); data = PyUnicode_DATA(u); if (left) - unicode_fill(kind, data, fill, 0, left); + _PyUnicode_Fill(kind, data, fill, 0, left); if (right) - unicode_fill(kind, data, fill, left + _PyUnicode_LENGTH(self), right); + _PyUnicode_Fill(kind, data, fill, + left + _PyUnicode_LENGTH(self), right); _PyUnicode_FastCopyCharacters(u, left, self, 0, _PyUnicode_LENGTH(self)); assert(_PyUnicode_CheckConsistency(u, 1)); return u; @@ -11910,7 +11740,7 @@ unicode_expandtabs_impl(PyObject *self, int tabsize) if (tabsize > 0) { incr = tabsize - (line_pos % tabsize); line_pos += incr; - unicode_fill(kind, dest_data, ' ', j, incr); + _PyUnicode_Fill(kind, dest_data, ' ', j, incr); j += incr; } } @@ -15405,7 +15235,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx, /* Pad left with the fill character if needed */ if (arg->width > len && !(arg->flags & F_LJUST)) { sublen = arg->width - len; - unicode_fill(writer->kind, writer->data, fill, writer->pos, sublen); + _PyUnicode_Fill(writer->kind, writer->data, fill, writer->pos, sublen); writer->pos += sublen; arg->width = len; } @@ -15437,7 +15267,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx, /* Pad right with the fill character if needed */ if (arg->width > len) { sublen = arg->width - len; - unicode_fill(writer->kind, writer->data, ' ', writer->pos, sublen); + _PyUnicode_Fill(writer->kind, writer->data, ' ', writer->pos, sublen); writer->pos += sublen; } return 0; diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 5ceddf759b8f3b..c4a11fa9b242bd 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -165,6 +165,7 @@ + @@ -209,7 +210,6 @@ - diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 332d466b1f7409..7bbbec2c9887bf 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -160,9 +160,6 @@ Source Files - - Source Files - Source Files @@ -487,6 +484,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index d7544d3a9fb122..266c8e14730ee7 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -556,6 +556,7 @@ + @@ -602,7 +603,6 @@ - $(GeneratedFrozenModulesDir)Python;%(AdditionalIncludeDirectories) diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 1868b222f18534..beed99d980e70d 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -1271,6 +1271,9 @@ Objects + + Objects + Objects @@ -1379,9 +1382,6 @@ Python - - Python - Python diff --git a/Python/fileutils.c b/Python/fileutils.c index 2a3f12d4e872f8..b808229716fd9c 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2,6 +2,7 @@ #include "pycore_fileutils.h" // fileutils definitions #include "pycore_runtime.h" // _PyRuntime #include "pycore_pystate.h" // _Py_AssertHoldsTstate() +#include "pycore_unicodeobject.h" // _Py_MAX_UNICODE #include "osdefs.h" // SEP #include // mbstowcs() @@ -50,9 +51,6 @@ extern int winerror_to_errno(int); int _Py_open_cloexec_works = -1; #endif -// The value must be the same in unicodeobject.c. -#define MAX_UNICODE 0x10ffff - // mbstowcs() and mbrtowc() errors static const size_t DECODE_ERROR = ((size_t)-1); #ifdef HAVE_MBRTOWC @@ -123,7 +121,7 @@ is_valid_wide_char(wchar_t ch) { #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding - for non-Unicode locales, which makes values higher than MAX_UNICODE + for non-Unicode locales, which makes values higher than _Py_MAX_UNICODE possibly valid. */ return 1; #endif @@ -132,7 +130,7 @@ is_valid_wide_char(wchar_t ch) return 0; } #if SIZEOF_WCHAR_T > 2 - if (ch > MAX_UNICODE) { + if (ch > _Py_MAX_UNICODE) { // bpo-35883: Reject characters outside [U+0000; U+10ffff] range. // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629, // it creates characters outside the [U+0000; U+10ffff] range: