From 1dc504dc38adc29a6ef38e076af827dcff82ce29 Mon Sep 17 00:00:00 2001 From: Uno Date: Sat, 13 Sep 2025 22:17:34 +0900 Subject: [PATCH 1/2] Convert _json --- Modules/_json.c | 299 +++++++++++++-------------------------- Modules/clinic/_json.c.h | 79 +++++++++++ 2 files changed, 174 insertions(+), 204 deletions(-) create mode 100644 Modules/clinic/_json.c.h diff --git a/Modules/_json.c b/Modules/_json.c index 9a1fc3aba36116..bafd0c74382350 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -10,7 +10,6 @@ #include "Python.h" #include "pycore_ceval.h" // _Py_EnterRecursiveCall() -#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST() #include "pycore_global_strings.h" // _Py_ID() #include "pycore_pyerrors.h" // _PyErr_FormatNote #include "pycore_runtime.h" // _PyRuntime @@ -18,6 +17,12 @@ #include // bool +#include "clinic/_json.c.h" + +/*[clinic input] +module _json +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=549fa53592c925b2]*/ typedef struct _PyScannerObject { PyObject_HEAD @@ -73,8 +78,6 @@ static PyMemberDef encoder_members[] = { static PyObject * ascii_escape_unicode(PyObject *pystr); -static PyObject * -py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr); static PyObject * scan_once_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); @@ -637,34 +640,30 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next return NULL; } -PyDoc_STRVAR(pydoc_scanstring, - "scanstring(string, end, strict=True) -> (string, end)\n" - "\n" - "Scan the string s for a JSON string. End is the index of the\n" - "character in s after the quote that started the JSON string.\n" - "Unescapes all valid JSON string escape sequences and raises ValueError\n" - "on attempt to decode an invalid string. If strict is False then literal\n" - "control characters are allowed in the string.\n" - "\n" - "Returns a tuple of the decoded string and the index of the character in s\n" - "after the end quote." -); +/*[clinic input] +_json.scanstring + pystr: object + end: Py_ssize_t + strict: bool = True + / + +Scan the string s for a JSON string. + +Return a tuple of the decoded string and the index of the character in s +after the end quote. +[clinic start generated code]*/ static PyObject * -py_scanstring(PyObject* Py_UNUSED(self), PyObject *args) +_json_scanstring_impl(PyObject *module, PyObject *pystr, Py_ssize_t end, + int strict) +/*[clinic end generated code: output=9f20d620966f1454 input=0afa14325d15360e]*/ { - PyObject *pystr; PyObject *rval; - Py_ssize_t end; Py_ssize_t next_end = -1; - int strict = 1; - if (!PyArg_ParseTuple(args, "On|p:scanstring", &pystr, &end, &strict)) { - return NULL; - } + if (PyUnicode_Check(pystr)) { rval = scanstring_unicode(pystr, end, strict, &next_end); - } - else { + } else { PyErr_Format(PyExc_TypeError, "first argument must be a string, not %.80s", Py_TYPE(pystr)->tp_name); @@ -673,53 +672,46 @@ py_scanstring(PyObject* Py_UNUSED(self), PyObject *args) return _build_rval_index_tuple(rval, next_end); } -PyDoc_STRVAR(pydoc_encode_basestring_ascii, - "encode_basestring_ascii(string) -> string\n" - "\n" - "Return an ASCII-only JSON representation of a Python string" -); +/*[clinic input] +_json.encode_basestring_ascii + pystr: object + / + +Return an ASCII-only JSON representation of a Python string +[clinic start generated code]*/ static PyObject * -py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr) +_json_encode_basestring_ascii(PyObject *module, PyObject *pystr) +/*[clinic end generated code: output=e5ab8fbc5f216536 input=c49d7098d0d29952]*/ { - PyObject *rval; - /* Return an ASCII-only JSON representation of a Python string */ - /* METH_O */ if (PyUnicode_Check(pystr)) { - rval = ascii_escape_unicode(pystr); - } - else { - PyErr_Format(PyExc_TypeError, - "first argument must be a string, not %.80s", - Py_TYPE(pystr)->tp_name); - return NULL; + return ascii_escape_unicode(pystr); } - return rval; + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; } +/*[clinic input] +_json.encode_basestring + pystr: object + / -PyDoc_STRVAR(pydoc_encode_basestring, - "encode_basestring(string) -> string\n" - "\n" - "Return a JSON representation of a Python string" -); +Return a JSON representation of a Python string +[clinic start generated code]*/ static PyObject * -py_encode_basestring(PyObject* Py_UNUSED(self), PyObject *pystr) +_json_encode_basestring(PyObject *module, PyObject *pystr) +/*[clinic end generated code: output=6178d2f18cdb540e input=222eb0810dbbd3a3]*/ { - PyObject *rval; - /* Return a JSON representation of a Python string */ - /* METH_O */ if (PyUnicode_Check(pystr)) { - rval = escape_unicode(pystr); - } - else { - PyErr_Format(PyExc_TypeError, - "first argument must be a string, not %.80s", - Py_TYPE(pystr)->tp_name); - return NULL; + return escape_unicode(pystr); } - return rval; + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; } static void @@ -1357,10 +1349,10 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (PyCFunction_Check(s->encoder)) { PyCFunction f = PyCFunction_GetFunction(s->encoder); - if (f == py_encode_basestring_ascii) { + if (f == _json_encode_basestring_ascii) { s->fast_encode = write_escaped_ascii; } - else if (f == py_encode_basestring) { + else if (f == _json_encode_basestring) { s->fast_encode = write_escaped_unicode; } } @@ -1744,84 +1736,15 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs return 0; } -static inline int -_encoder_iterate_mapping_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer, - bool *first, PyObject *dct, PyObject *items, - Py_ssize_t indent_level, PyObject *indent_cache, - PyObject *separator) -{ - _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(items); - PyObject *key, *value; - for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) { - PyObject *item = PyList_GET_ITEM(items, i); -#ifdef Py_GIL_DISABLED - // gh-119438: in the free-threading build the critical section on items can get suspended - Py_INCREF(item); -#endif - if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { - PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); -#ifdef Py_GIL_DISABLED - Py_DECREF(item); -#endif - return -1; - } - - key = PyTuple_GET_ITEM(item, 0); - value = PyTuple_GET_ITEM(item, 1); - if (encoder_encode_key_value(s, writer, first, dct, key, value, - indent_level, indent_cache, - separator) < 0) { -#ifdef Py_GIL_DISABLED - Py_DECREF(item); -#endif - return -1; - } -#ifdef Py_GIL_DISABLED - Py_DECREF(item); -#endif - } - - return 0; -} - -static inline int -_encoder_iterate_dict_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer, - bool *first, PyObject *dct, Py_ssize_t indent_level, - PyObject *indent_cache, PyObject *separator) -{ - _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(dct); - PyObject *key, *value; - Py_ssize_t pos = 0; - while (PyDict_Next(dct, &pos, &key, &value)) { -#ifdef Py_GIL_DISABLED - // gh-119438: in the free-threading build the critical section on dct can get suspended - Py_INCREF(key); - Py_INCREF(value); -#endif - if (encoder_encode_key_value(s, writer, first, dct, key, value, - indent_level, indent_cache, - separator) < 0) { -#ifdef Py_GIL_DISABLED - Py_DECREF(key); - Py_DECREF(value); -#endif - return -1; - } -#ifdef Py_GIL_DISABLED - Py_DECREF(key); - Py_DECREF(value); -#endif - } - return 0; -} - static int encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, - PyObject *dct, + PyObject *dct, Py_ssize_t indent_level, PyObject *indent_cache) { /* Encode Python dict dct a JSON term */ PyObject *ident = NULL; + PyObject *items = NULL; + PyObject *key, *value; bool first = true; if (PyDict_GET_SIZE(dct) == 0) { @@ -1858,30 +1781,34 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, } if (s->sort_keys || !PyDict_CheckExact(dct)) { - PyObject *items = PyMapping_Items(dct); - if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0)) { - Py_XDECREF(items); + items = PyMapping_Items(dct); + if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0)) goto bail; - } - int result; - Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(items); - result = _encoder_iterate_mapping_lock_held(s, writer, &first, dct, - items, indent_level, indent_cache, separator); - Py_END_CRITICAL_SECTION_SEQUENCE_FAST(); - Py_DECREF(items); - if (result < 0) { - goto bail; + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) { + PyObject *item = PyList_GET_ITEM(items, i); + + if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { + PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + goto bail; + } + + key = PyTuple_GET_ITEM(item, 0); + value = PyTuple_GET_ITEM(item, 1); + if (encoder_encode_key_value(s, writer, &first, dct, key, value, + indent_level, indent_cache, + separator) < 0) + goto bail; } + Py_CLEAR(items); } else { - int result; - Py_BEGIN_CRITICAL_SECTION(dct); - result = _encoder_iterate_dict_lock_held(s, writer, &first, dct, - indent_level, indent_cache, separator); - Py_END_CRITICAL_SECTION(); - if (result < 0) { - goto bail; + Py_ssize_t pos = 0; + while (PyDict_Next(dct, &pos, &key, &value)) { + if (encoder_encode_key_value(s, writer, &first, dct, key, value, + indent_level, indent_cache, + separator) < 0) + goto bail; } } @@ -1903,43 +1830,11 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, return 0; bail: + Py_XDECREF(items); Py_XDECREF(ident); return -1; } -static inline int -_encoder_iterate_fast_seq_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer, - PyObject *seq, PyObject *s_fast, - Py_ssize_t indent_level, PyObject *indent_cache, PyObject *separator) -{ - for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { - PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); -#ifdef Py_GIL_DISABLED - // gh-119438: in the free-threading build the critical section on s_fast can get suspended - Py_INCREF(obj); -#endif - if (i) { - if (PyUnicodeWriter_WriteStr(writer, separator) < 0) { -#ifdef Py_GIL_DISABLED - Py_DECREF(obj); -#endif - return -1; - } - } - if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) { - _PyErr_FormatNote("when serializing %T item %zd", seq, i); -#ifdef Py_GIL_DISABLED - Py_DECREF(obj); -#endif - return -1; - } -#ifdef Py_GIL_DISABLED - Py_DECREF(obj); -#endif - } - return 0; -} - static int encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, @@ -1947,8 +1842,10 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, { PyObject *ident = NULL; PyObject *s_fast = NULL; + Py_ssize_t i; - s_fast = PySequence_Fast(seq, "encoder_listencode_list needs a sequence"); + ident = NULL; + s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); if (s_fast == NULL) return -1; if (PySequence_Fast_GET_SIZE(s_fast) == 0) { @@ -1986,13 +1883,16 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, goto bail; } } - int result; - Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(seq); - result = _encoder_iterate_fast_seq_lock_held(s, writer, seq, s_fast, - indent_level, indent_cache, separator); - Py_END_CRITICAL_SECTION_SEQUENCE_FAST(); - if (result < 0) { - goto bail; + for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { + PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); + if (i) { + if (PyUnicodeWriter_WriteStr(writer, separator) < 0) + goto bail; + } + if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) { + _PyErr_FormatNote("when serializing %T item %zd", seq, i); + goto bail; + } } if (ident != NULL) { if (PyDict_DelItem(s->markers, ident)) @@ -2080,18 +1980,9 @@ static PyType_Spec PyEncoderType_spec = { }; static PyMethodDef speedups_methods[] = { - {"encode_basestring_ascii", - py_encode_basestring_ascii, - METH_O, - pydoc_encode_basestring_ascii}, - {"encode_basestring", - py_encode_basestring, - METH_O, - pydoc_encode_basestring}, - {"scanstring", - py_scanstring, - METH_VARARGS, - pydoc_scanstring}, + _JSON_ENCODE_BASESTRING_ASCII_METHODDEF + _JSON_ENCODE_BASESTRING_METHODDEF + _JSON_SCANSTRING_METHODDEF {NULL, NULL, 0, NULL} }; diff --git a/Modules/clinic/_json.c.h b/Modules/clinic/_json.c.h new file mode 100644 index 00000000000000..b3912ee37f7c49 --- /dev/null +++ b/Modules/clinic/_json.c.h @@ -0,0 +1,79 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#include "pycore_abstract.h" // _PyNumber_Index() +#include "pycore_modsupport.h" // _PyArg_CheckPositional() + +PyDoc_STRVAR(_json_scanstring__doc__, +"scanstring($module, pystr, end, strict=True, /)\n" +"--\n" +"\n" +"Scan the string s for a JSON string.\n" +"\n" +"Return a tuple of the decoded string and the index of the character in s\n" +"after the end quote."); + +#define _JSON_SCANSTRING_METHODDEF \ + {"scanstring", _PyCFunction_CAST(_json_scanstring), METH_FASTCALL, _json_scanstring__doc__}, + +static PyObject * +_json_scanstring_impl(PyObject *module, PyObject *pystr, Py_ssize_t end, + int strict); + +static PyObject * +_json_scanstring(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *pystr; + Py_ssize_t end; + int strict = 1; + + if (!_PyArg_CheckPositional("scanstring", nargs, 2, 3)) { + goto exit; + } + pystr = args[0]; + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[1]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + end = ival; + } + if (nargs < 3) { + goto skip_optional; + } + strict = PyObject_IsTrue(args[2]); + if (strict < 0) { + goto exit; + } +skip_optional: + return_value = _json_scanstring_impl(module, pystr, end, strict); + +exit: + return return_value; +} + +PyDoc_STRVAR(_json_encode_basestring_ascii__doc__, +"encode_basestring_ascii($module, pystr, /)\n" +"--\n" +"\n" +"Return an ASCII-only JSON representation of a Python string"); + +#define _JSON_ENCODE_BASESTRING_ASCII_METHODDEF \ + {"encode_basestring_ascii", (PyCFunction)_json_encode_basestring_ascii, METH_O, _json_encode_basestring_ascii__doc__}, + +PyDoc_STRVAR(_json_encode_basestring__doc__, +"encode_basestring($module, pystr, /)\n" +"--\n" +"\n" +"Return a JSON representation of a Python string"); + +#define _JSON_ENCODE_BASESTRING_METHODDEF \ + {"encode_basestring", (PyCFunction)_json_encode_basestring, METH_O, _json_encode_basestring__doc__}, +/*[clinic end generated code: output=7f57cc1b016aad17 input=a9049054013a1b77]*/ From 84d9e5e6e363b4b4400f73ff1cf68ae0139f24d3 Mon Sep 17 00:00:00 2001 From: Uno Date: Sun, 14 Sep 2025 23:14:16 +0900 Subject: [PATCH 2/2] fix: pep7 code structure --- Modules/_json.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Modules/_json.c b/Modules/_json.c index bafd0c74382350..7cb88e79f49770 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -663,7 +663,8 @@ _json_scanstring_impl(PyObject *module, PyObject *pystr, Py_ssize_t end, if (PyUnicode_Check(pystr)) { rval = scanstring_unicode(pystr, end, strict, &next_end); - } else { + } + else { PyErr_Format(PyExc_TypeError, "first argument must be a string, not %.80s", Py_TYPE(pystr)->tp_name);