From d39945ec55aaa14d62c90fac3f7541034c5597be Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 3 Oct 2023 18:24:05 +0300 Subject: [PATCH] gh-110289: C API: Add PyUnicode_EqualToString() function --- Doc/c-api/unicode.rst | 10 +++ Doc/data/stable_abi.dat | 1 + Doc/whatsnew/3.13.rst | 7 +++ Include/unicodeobject.h | 8 +++ Lib/test/test_stable_abi_ctypes.py | 1 + ...-10-03-19-01-20.gh-issue-110289.YBIHEz.rst | 1 + Misc/stable_abi.toml | 2 + Objects/unicodeobject.c | 61 +++++++++++++++++++ PC/python3dll.c | 1 + 9 files changed, 92 insertions(+) create mode 100644 Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 2a2cb1b8c458e7..f552380124bb37 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1396,6 +1396,16 @@ They all return ``NULL`` or ``-1`` if an exception occurs. :c:func:`PyErr_Occurred` to check for errors. +.. c:function:: int PyUnicode_EqualToString(PyObject *unicode, const char *string) + + Compare a Unicode object with a UTF-8 encoded C string and return true + if they are equal and false otherwise. + + This function does not raise exceptions. + + .. versionadded:: 3.13 + + .. c:function:: int PyUnicode_CompareWithASCIIString(PyObject *uni, const char *string) Compare a Unicode object, *uni*, with *string* and return ``-1``, ``0``, ``1`` for less diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index c189c78238f40f..abfc186cdc460d 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -755,6 +755,7 @@ function,PyUnicode_DecodeUnicodeEscape,3.2,, function,PyUnicode_EncodeCodePage,3.7,on Windows, function,PyUnicode_EncodeFSDefault,3.2,, function,PyUnicode_EncodeLocale,3.7,, +function,PyUnicode_EqualToString,3.13,, function,PyUnicode_FSConverter,3.2,, function,PyUnicode_FSDecoder,3.2,, function,PyUnicode_Find,3.2,, diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 1ef04fa7ae6adc..7f05a0275f4664 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1001,6 +1001,13 @@ New Features :c:macro:`Py_TPFLAGS_MANAGED_DICT` flag. (Contributed by Victor Stinner in :gh:`107073`.) +* Add :c:func:`PyUnicode_EqualToString` function: compare Unicode object with + a :c:expr:`const char*` UTF-8 encoded bytes string and return true if they + are equal or false otherwise. + This function does not raise exceptions. + (Contributed by Serhiy Storchaka in :gh:`110289`.) + + Porting to Python 3.13 ---------------------- diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index f00277787122aa..a7fad22e606b28 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -957,6 +957,14 @@ PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString( const char *right /* ASCII-encoded string */ ); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030D0000 +/* Compare a Unicode object with UTF-8 encoded C string and return 1 for equal + and 0 otherwise. + This function does not raise exceptions. */ + +PyAPI_FUNC(int) PyUnicode_EqualToString(PyObject *, const char *); +#endif + /* Rich compare two strings and return one of the following: - NULL in case an exception was raised diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index 94f817f8e1d159..f224d67e6416d6 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -770,6 +770,7 @@ def test_windows_feature_macros(self): "PyUnicode_DecodeUnicodeEscape", "PyUnicode_EncodeFSDefault", "PyUnicode_EncodeLocale", + "PyUnicode_EqualToString", "PyUnicode_FSConverter", "PyUnicode_FSDecoder", "PyUnicode_Find", diff --git a/Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst b/Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst new file mode 100644 index 00000000000000..ada5072071a476 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst @@ -0,0 +1 @@ +Add :c:func:`PyUnicode_EqualToString` function. diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index 8df3f85e61eec6..20f6ea560b4316 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2460,3 +2460,5 @@ added = '3.13' [function.PyMapping_HasKeyStringWithError] added = '3.13' +[function.PyUnicode_EqualToString] + added = '3.13' diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 49981a1f881c21..8c71990a011849 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10673,6 +10673,67 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) } } +int +PyUnicode_EqualToString(PyObject *unicode, const char *str) +{ + assert(_PyUnicode_CHECK(unicode)); + assert(str); + if (PyUnicode_IS_ASCII(unicode)) { + size_t len = (size_t)PyUnicode_GET_LENGTH(unicode); + return strlen(str) == len && + memcmp(PyUnicode_1BYTE_DATA(unicode), str, len) == 0; + } + if (PyUnicode_UTF8(unicode) != NULL) { + size_t len = (size_t)PyUnicode_UTF8_LENGTH(unicode); + return strlen(str) == len && + memcmp(PyUnicode_UTF8(unicode), str, len) == 0; + } + + Py_UCS4 ch; + Py_ssize_t len = PyUnicode_GET_LENGTH(unicode); + int kind = PyUnicode_KIND(unicode); + const void *data = PyUnicode_DATA(unicode); + /* Compare Unicode string and UTF-8 string */ + for (Py_ssize_t i = 0; i < len; i++) { + ch = PyUnicode_READ(kind, data, i); + if (ch == 0x80) { + return 0; + } + else if (ch < 0x80) { + if (ch != (unsigned char)*str++) { + return 0; + } + } + else if (ch < 0x800) { + if ((0xc0 | (ch >> 6)) != (unsigned char)*str++ || + (0x80 | (ch & 0x3f)) != (unsigned char)*str++) + { + return 0; + } + } + else if (ch < 0x10000) { + if (Py_UNICODE_IS_SURROGATE(ch) || + (0xe0 | (ch >> 12)) != (unsigned char)*str++ || + (0x80 | ((ch >> 6) & 0x3f)) != (unsigned char)*str++ || + (0x80 | (ch & 0x3f)) != (unsigned char)*str++) + { + return 0; + } + } + else { + assert(ch <= MAX_UNICODE); + if ((0xf0 | (ch >> 18)) != (unsigned char)*str++ || + (0x80 | ((ch >> 12) & 0x3f)) != (unsigned char)*str++ || + (0x80 | ((ch >> 6) & 0x3f)) != (unsigned char)*str++ || + (0x80 | (ch & 0x3f)) != (unsigned char)*str++) + { + return 0; + } + } + } + return *str == 0; +} + int _PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str) { diff --git a/PC/python3dll.c b/PC/python3dll.c index 2c1cc8098ce856..5f629ccf99d28a 100755 --- a/PC/python3dll.c +++ b/PC/python3dll.c @@ -688,6 +688,7 @@ EXPORT_FUNC(PyUnicode_DecodeUTF8Stateful) EXPORT_FUNC(PyUnicode_EncodeCodePage) EXPORT_FUNC(PyUnicode_EncodeFSDefault) EXPORT_FUNC(PyUnicode_EncodeLocale) +EXPORT_FUNC(PyUnicode_EqualToString) EXPORT_FUNC(PyUnicode_Find) EXPORT_FUNC(PyUnicode_FindChar) EXPORT_FUNC(PyUnicode_Format)