Skip to content

Commit

Permalink
pythongh-110289: C API: Add PyUnicode_EqualToString() function
Browse files Browse the repository at this point in the history
  • Loading branch information
serhiy-storchaka committed Oct 3, 2023
1 parent 8c07137 commit d39945e
Show file tree
Hide file tree
Showing 9 changed files with 92 additions and 0 deletions.
10 changes: 10 additions & 0 deletions Doc/c-api/unicode.rst
Expand Up @@ -1396,6 +1396,16 @@ They all return ``NULL`` or ``-1`` if an exception occurs.
:c:func:`PyErr_Occurred` to check for errors.
.. c:function:: int PyUnicode_EqualToString(PyObject *unicode, const char *string)
Compare a Unicode object with a UTF-8 encoded C string and return true
if they are equal and false otherwise.
This function does not raise exceptions.
.. versionadded:: 3.13
.. c:function:: int PyUnicode_CompareWithASCIIString(PyObject *uni, const char *string)
Compare a Unicode object, *uni*, with *string* and return ``-1``, ``0``, ``1`` for less
Expand Down
1 change: 1 addition & 0 deletions Doc/data/stable_abi.dat

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions Doc/whatsnew/3.13.rst
Expand Up @@ -1001,6 +1001,13 @@ New Features
:c:macro:`Py_TPFLAGS_MANAGED_DICT` flag.
(Contributed by Victor Stinner in :gh:`107073`.)

* Add :c:func:`PyUnicode_EqualToString` function: compare Unicode object with
a :c:expr:`const char*` UTF-8 encoded bytes string and return true if they
are equal or false otherwise.
This function does not raise exceptions.
(Contributed by Serhiy Storchaka in :gh:`110289`.)


Porting to Python 3.13
----------------------

Expand Down
8 changes: 8 additions & 0 deletions Include/unicodeobject.h
Expand Up @@ -957,6 +957,14 @@ PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
const char *right /* ASCII-encoded string */
);

#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030D0000
/* Compare a Unicode object with UTF-8 encoded C string and return 1 for equal
and 0 otherwise.
This function does not raise exceptions. */

PyAPI_FUNC(int) PyUnicode_EqualToString(PyObject *, const char *);
#endif

/* Rich compare two strings and return one of the following:
- NULL in case an exception was raised
Expand Down
1 change: 1 addition & 0 deletions Lib/test/test_stable_abi_ctypes.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

@@ -0,0 +1 @@
Add :c:func:`PyUnicode_EqualToString` function.
2 changes: 2 additions & 0 deletions Misc/stable_abi.toml
Expand Up @@ -2460,3 +2460,5 @@
added = '3.13'
[function.PyMapping_HasKeyStringWithError]
added = '3.13'
[function.PyUnicode_EqualToString]
added = '3.13'
61 changes: 61 additions & 0 deletions Objects/unicodeobject.c
Expand Up @@ -10673,6 +10673,67 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
}
}

int
PyUnicode_EqualToString(PyObject *unicode, const char *str)
{
assert(_PyUnicode_CHECK(unicode));
assert(str);
if (PyUnicode_IS_ASCII(unicode)) {
size_t len = (size_t)PyUnicode_GET_LENGTH(unicode);
return strlen(str) == len &&
memcmp(PyUnicode_1BYTE_DATA(unicode), str, len) == 0;
}
if (PyUnicode_UTF8(unicode) != NULL) {
size_t len = (size_t)PyUnicode_UTF8_LENGTH(unicode);
return strlen(str) == len &&
memcmp(PyUnicode_UTF8(unicode), str, len) == 0;
}

Py_UCS4 ch;
Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
int kind = PyUnicode_KIND(unicode);
const void *data = PyUnicode_DATA(unicode);
/* Compare Unicode string and UTF-8 string */
for (Py_ssize_t i = 0; i < len; i++) {
ch = PyUnicode_READ(kind, data, i);
if (ch == 0x80) {
return 0;
}
else if (ch < 0x80) {
if (ch != (unsigned char)*str++) {
return 0;
}
}
else if (ch < 0x800) {
if ((0xc0 | (ch >> 6)) != (unsigned char)*str++ ||
(0x80 | (ch & 0x3f)) != (unsigned char)*str++)
{
return 0;
}
}
else if (ch < 0x10000) {
if (Py_UNICODE_IS_SURROGATE(ch) ||
(0xe0 | (ch >> 12)) != (unsigned char)*str++ ||
(0x80 | ((ch >> 6) & 0x3f)) != (unsigned char)*str++ ||
(0x80 | (ch & 0x3f)) != (unsigned char)*str++)
{
return 0;
}
}
else {
assert(ch <= MAX_UNICODE);
if ((0xf0 | (ch >> 18)) != (unsigned char)*str++ ||
(0x80 | ((ch >> 12) & 0x3f)) != (unsigned char)*str++ ||
(0x80 | ((ch >> 6) & 0x3f)) != (unsigned char)*str++ ||
(0x80 | (ch & 0x3f)) != (unsigned char)*str++)
{
return 0;
}
}
}
return *str == 0;
}

int
_PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str)
{
Expand Down
1 change: 1 addition & 0 deletions PC/python3dll.c

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d39945e

Please sign in to comment.