From 5153eb83c127ede5158a30779def4910c1e9697a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 13 Oct 2023 14:04:18 +0300 Subject: [PATCH] gh-110815: Support non-ASCII keyword names in PyArg_ParseTupleAndKeywords() It already mostly worked, except in the case when invalid keyword argument with non-ASCII name was passed to function with non-ASCII parameter names. Then it crashed in the debug mode. --- Doc/c-api/arg.rst | 9 ++- Doc/whatsnew/3.13.rst | 4 ++ Lib/test/test_capi/test_getargs.py | 66 +++++++++++++++++-- ...-10-13-14-18-06.gh-issue-110815.tEFLVl.rst | 1 + Modules/_testcapi/getargs.c | 52 +++++++++++---- Python/getargs.c | 2 +- 6 files changed, 113 insertions(+), 21 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2023-10-13-14-18-06.gh-issue-110815.tEFLVl.rst diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index c43dd0f4303cd4..62d87d898e682c 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -416,8 +416,10 @@ API Functions .. c:function:: int PyArg_ParseTupleAndKeywords(PyObject *args, PyObject *kw, const char *format, char *keywords[], ...) Parse the parameters of a function that takes both positional and keyword - parameters into local variables. The *keywords* argument is a - ``NULL``-terminated array of keyword parameter names. Empty names denote + parameters into local variables. + The *keywords* argument is a ``NULL``-terminated array of keyword parameter + names specified as null-terminated ASCII or UTF-8 encoded C strings. + Empty names denote :ref:`positional-only parameters `. Returns true on success; on failure, it returns false and raises the appropriate exception. @@ -426,6 +428,9 @@ API Functions Added support for :ref:`positional-only parameters `. + .. versionchanged:: 3.13 + Added support for non-ASCII keyword parameter names. + .. c:function:: int PyArg_VaParseTupleAndKeywords(PyObject *args, PyObject *kw, const char *format, char *keywords[], va_list vargs) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index dfce976fbb50ee..eb49e015fd0dcc 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1045,6 +1045,10 @@ New Features but pass event arguments as a Python :class:`tuple` object. (Contributed by Victor Stinner in :gh:`85283`.) +* :c:func:`PyArg_ParseTupleAndKeywords` now supports non-ASCII keyword + parameter names. + (Contributed by Serhiy Storchaka in :gh:`110815`.) + Porting to Python 3.13 ---------------------- diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index e10f679eeb71c8..d83989457046d9 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -55,6 +55,8 @@ LLONG_MIN = -2**63 ULLONG_MAX = 2**64-1 +NULL = None + class Index: def __index__(self): return 99 @@ -1187,20 +1189,23 @@ def test_bad_use(self): def test_positional_only(self): parse = _testcapi.parse_tuple_and_keywords - parse((1, 2, 3), {}, 'OOO', ['', '', 'a']) - parse((1, 2), {'a': 3}, 'OOO', ['', '', 'a']) + self.assertEqual(parse((1, 2, 3), {}, 'OOO', ['', '', 'a']), (1, 2, 3)) + self.assertEqual(parse((1, 2), {'a': 3}, 'OOO', ['', '', 'a']), (1, 2, 3)) with self.assertRaisesRegex(TypeError, r'function takes at least 2 positional arguments \(1 given\)'): parse((1,), {'a': 3}, 'OOO', ['', '', 'a']) - parse((1,), {}, 'O|OO', ['', '', 'a']) + self.assertEqual(parse((1,), {}, 'O|OO', ['', '', 'a']), + (1, NULL, NULL)) with self.assertRaisesRegex(TypeError, r'function takes at least 1 positional argument \(0 given\)'): parse((), {}, 'O|OO', ['', '', 'a']) - parse((1, 2), {'a': 3}, 'OO$O', ['', '', 'a']) + self.assertEqual(parse((1, 2), {'a': 3}, 'OO$O', ['', '', 'a']), + (1, 2, 3)) with self.assertRaisesRegex(TypeError, r'function takes exactly 2 positional arguments \(1 given\)'): parse((1,), {'a': 3}, 'OO$O', ['', '', 'a']) - parse((1,), {}, 'O|O$O', ['', '', 'a']) + self.assertEqual(parse((1,), {}, 'O|O$O', ['', '', 'a']), + (1, NULL, NULL)) with self.assertRaisesRegex(TypeError, r'function takes at least 1 positional argument \(0 given\)'): parse((), {}, 'O|O$O', ['', '', 'a']) @@ -1209,6 +1214,57 @@ def test_positional_only(self): with self.assertRaisesRegex(SystemError, 'Empty keyword'): parse((1,), {}, 'O|OO', ['', 'a', '']) + def test_nonascii_keywords(self): + parse = _testcapi.parse_tuple_and_keywords + + for name in ('a', 'ä', 'ŷ', '㷷', '𐀀'): + with self.subTest(name=name): + self.assertEqual(parse((), {name: 1}, 'O', [name]), (1,)) + self.assertEqual(parse((), {}, '|O', [name]), (NULL,)) + with self.assertRaisesRegex(TypeError, + f"function missing required argument '{name}'"): + parse((), {}, 'O', [name]) + with self.assertRaisesRegex(TypeError, + fr"argument for function given by name \('{name}'\) " + fr"and position \(1\)"): + parse((1,), {name: 2}, 'O|O', [name, 'b']) + with self.assertRaisesRegex(TypeError, + f"'{name}' is an invalid keyword argument"): + parse((), {name: 1}, '|O', ['b']) + with self.assertRaisesRegex(TypeError, + "'b' is an invalid keyword argument"): + parse((), {'b': 1}, '|O', [name]) + + invalid = name.encode() + (name.encode()[:-1] or b'\x80') + self.assertEqual(parse((), {}, '|O', [invalid]), (NULL,)) + self.assertEqual(parse((1,), {'b': 2}, 'O|O', [invalid, 'b']), + (1, 2)) + with self.assertRaisesRegex(TypeError, + f"function missing required argument '{name}\ufffd'"): + parse((), {}, 'O', [invalid]) + with self.assertRaisesRegex(UnicodeDecodeError, + f"'utf-8' codec can't decode bytes? "): + parse((), {'b': 1}, '|OO', [invalid, 'b']) + with self.assertRaisesRegex(UnicodeDecodeError, + f"'utf-8' codec can't decode bytes? "): + parse((), {'b': 1}, '|O', [invalid]) + + for name2 in ('b', 'ë', 'ĉ', 'Ɐ', '𐀁'): + with self.subTest(name2=name2): + with self.assertRaisesRegex(TypeError, + f"'{name2}' is an invalid keyword argument"): + parse((), {name2: 1}, '|O', [name]) + + name2 = name.encode().decode('latin1') + if name2 != name: + with self.assertRaisesRegex(TypeError, + f"'{name2}' is an invalid keyword argument"): + parse((), {name2: 1}, '|O', [name]) + name3 = name + '3' + with self.assertRaisesRegex(TypeError, + f"'{name2}' is an invalid keyword argument"): + parse((), {name2: 1, name3: 2}, '|OO', [name, name3]) + class Test_testcapi(unittest.TestCase): locals().update((name, getattr(_testcapi, name)) diff --git a/Misc/NEWS.d/next/C API/2023-10-13-14-18-06.gh-issue-110815.tEFLVl.rst b/Misc/NEWS.d/next/C API/2023-10-13-14-18-06.gh-issue-110815.tEFLVl.rst new file mode 100644 index 00000000000000..216d2d211644a8 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-10-13-14-18-06.gh-issue-110815.tEFLVl.rst @@ -0,0 +1 @@ +Support non-ASCII keyword names in :c:func:`PyArg_ParseTupleAndKeywords`. diff --git a/Modules/_testcapi/getargs.c b/Modules/_testcapi/getargs.c index 5f4a6dc8ca7672..86d8f5984dd3e5 100644 --- a/Modules/_testcapi/getargs.c +++ b/Modules/_testcapi/getargs.c @@ -13,9 +13,9 @@ parse_tuple_and_keywords(PyObject *self, PyObject *args) const char *sub_format; PyObject *sub_keywords; - double buffers[8][4]; /* double ensures alignment where necessary */ - PyObject *converted[8]; - char *keywords[8 + 1]; /* space for NULL at end */ +#define MAX_PARAMS 8 + double buffers[MAX_PARAMS][4]; /* double ensures alignment where necessary */ + char *keywords[MAX_PARAMS + 1]; /* space for NULL at end */ PyObject *return_value = NULL; @@ -35,11 +35,10 @@ parse_tuple_and_keywords(PyObject *self, PyObject *args) } memset(buffers, 0, sizeof(buffers)); - memset(converted, 0, sizeof(converted)); memset(keywords, 0, sizeof(keywords)); Py_ssize_t size = PySequence_Fast_GET_SIZE(sub_keywords); - if (size > 8) { + if (size > MAX_PARAMS) { PyErr_SetString(PyExc_ValueError, "parse_tuple_and_keywords: too many keywords in sub_keywords"); goto exit; @@ -47,29 +46,56 @@ parse_tuple_and_keywords(PyObject *self, PyObject *args) for (Py_ssize_t i = 0; i < size; i++) { PyObject *o = PySequence_Fast_GET_ITEM(sub_keywords, i); - if (!PyUnicode_FSConverter(o, (void *)(converted + i))) { + if (PyUnicode_Check(o)) { + keywords[i] = (char *)PyUnicode_AsUTF8(o); + if (keywords[i] == NULL) { + goto exit; + } + } + else if (PyBytes_Check(o)) { + keywords[i] = PyBytes_AS_STRING(o); + } + else { PyErr_Format(PyExc_ValueError, "parse_tuple_and_keywords: " - "could not convert keywords[%zd] to narrow string", i); + "keywords must be str or bytes", i); goto exit; } - keywords[i] = PyBytes_AS_STRING(converted[i]); } + assert(MAX_PARAMS == 8); int result = PyArg_ParseTupleAndKeywords(sub_args, sub_kwargs, sub_format, keywords, buffers + 0, buffers + 1, buffers + 2, buffers + 3, buffers + 4, buffers + 5, buffers + 6, buffers + 7); if (result) { - return_value = Py_NewRef(Py_None); + int objects_only = 1; + for (const char *f = sub_format; *f; f++) { + if (Py_ISALNUM(*f) && strchr("OSUY", *f) == NULL) { + objects_only = 0; + break; + } + } + if (objects_only) { + return_value = PyTuple_New(size); + if (return_value == NULL) { + goto exit; + } + for (Py_ssize_t i = 0; i < size; i++) { + PyObject *arg = *(PyObject **)(buffers + i); + if (arg == NULL) { + arg = Py_None; + } + PyTuple_SET_ITEM(return_value, i, Py_NewRef(arg)); + } + } + else { + return_value = Py_NewRef(Py_None); + } } exit: - size = sizeof(converted) / sizeof(converted[0]); - for (Py_ssize_t i = 0; i < size; i++) { - Py_XDECREF(converted[i]); - } return return_value; } diff --git a/Python/getargs.c b/Python/getargs.c index d590e2e153389e..a0eef2ccee5c37 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -1729,7 +1729,7 @@ vgetargskeywords(PyObject *args, PyObject *kwargs, const char *format, return cleanreturn(0, &freelist); } for (i = pos; i < len; i++) { - if (_PyUnicode_EqualToASCIIString(key, kwlist[i])) { + if (PyUnicode_EqualToUTF8(key, kwlist[i])) { match = 1; break; }