diff --git a/Doc/library/json.rst b/Doc/library/json.rst index c82ff9dc325b4c..66e379d3ef2802 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -148,15 +148,19 @@ Basic Usage .. function:: dump(obj, fp, *, skipkeys=False, ensure_ascii=True, \ check_circular=True, allow_nan=True, cls=None, \ indent=None, separators=None, default=None, \ - sort_keys=False, **kw) + sort_keys=False, convert_keys=False, **kw) Serialize *obj* as a JSON formatted stream to *fp* (a ``.write()``-supporting :term:`file-like object`) using this :ref:`conversion table `. - If *skipkeys* is true (default: ``False``), then dict keys that are not - of a basic type (:class:`str`, :class:`int`, :class:`float`, :class:`bool`, - ``None``) will be skipped instead of raising a :exc:`TypeError`. + *skipkeys* and *convert_keys* control how keys that are not of a basic type + (:class:`str`, :class:`int`, :class:`float`, :class:`bool`, ``None``) are + handled. If *skipkeys* is true (default: ``False``), then such keys will be + skipped. Otherwise, if *convert_keys* is true (default: ``False``), such + keys are converted to a supported type by calling the encoder's + :meth:`JSONEncoder.default` method. If :meth:`JSONEncoder.default` returns an + unsupported type, or *convert_keys* is False, :exc:`TypeError` is raised. The :mod:`json` module always produces :class:`str` objects, not :class:`bytes` objects. Therefore, ``fp.write()`` must support :class:`str` diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py index ed2c74771ea87d..329f4cd746045b 100644 --- a/Lib/json/__init__.py +++ b/Lib/json/__init__.py @@ -119,13 +119,16 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, - default=None, sort_keys=False, **kw): + default=None, sort_keys=False, convert_keys=False, **kw): """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a ``.write()``-supporting file-like object). - If ``skipkeys`` is true then ``dict`` keys that are not basic types - (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped - instead of raising a ``TypeError``. + Dict keys in JSON must be str, int, float, bool, or None. ``skipkeys`` and + ``convert_keys`` control how keys that are not one of these types are + handled. If ``skipkeys`` is True, then those items are simply skipped. + Otherwise, if ``convert_keys`` is True, the keys will be passed to + ``.default()`` to be converted. If ``convert_keys`` is False, or + ``.default()`` returns an unsupported type, ``TypeError`` is raised. If ``ensure_ascii`` is false, then the strings written to ``fp`` can contain non-ASCII characters if they appear in strings contained in @@ -165,7 +168,7 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - default is None and not sort_keys and not kw): + default is None and not sort_keys and not convert_keys and not kw): iterable = _default_encoder.iterencode(obj) else: if cls is None: @@ -173,7 +176,8 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, - default=default, sort_keys=sort_keys, **kw).iterencode(obj) + default=default, sort_keys=sort_keys, convert_keys=convert_keys, + **kw).iterencode(obj) # could accelerate with writelines in some versions of Python, at # a debuggability cost for chunk in iterable: @@ -182,12 +186,15 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, - default=None, sort_keys=False, **kw): + default=None, sort_keys=False, convert_keys=False, **kw): """Serialize ``obj`` to a JSON formatted ``str``. - If ``skipkeys`` is true then ``dict`` keys that are not basic types - (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped - instead of raising a ``TypeError``. + Dict keys in JSON must be str, int, float, bool, or None. ``skipkeys`` and + ``convert_keys`` control how keys that are not one of these types are + handled. If ``skipkeys`` is True, then those items are simply skipped. + Otherwise, if ``convert_keys`` is True, the keys will be passed to + ``.default()`` to be converted. If ``convert_keys`` is False, or + ``.default()`` returns an unsupported type, ``TypeError`` is raised. If ``ensure_ascii`` is false, then the return value can contain non-ASCII characters if they appear in strings contained in ``obj``. Otherwise, all @@ -227,7 +234,7 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - default is None and not sort_keys and not kw): + default is None and not sort_keys and not convert_keys and not kw): return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder @@ -235,7 +242,7 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, default=default, sort_keys=sort_keys, - **kw).encode(obj) + convert_keys=convert_keys, **kw).encode(obj) _default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None) diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py index 597849eca0524a..820dafc105b433 100644 --- a/Lib/json/encoder.py +++ b/Lib/json/encoder.py @@ -104,12 +104,15 @@ class JSONEncoder(object): key_separator = ': ' def __init__(self, *, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, - indent=None, separators=None, default=None): + indent=None, separators=None, default=None, convert_keys=False): """Constructor for JSONEncoder, with sensible defaults. - If skipkeys is false, then it is a TypeError to attempt - encoding of keys that are not str, int, float or None. If - skipkeys is True, such items are simply skipped. + Dict keys in JSON must be str, int, float, bool, or None. skipkeys and + convert_keys control how keys that are not one of these types are + handled. If skipkeys is True, then those items are simply skipped. + Otherwise, if convert_keys is True, the keys will be passed to + ``.default()`` to be converted. If convert_keys is False, or + ``.default()`` returns an unsupported type, TypeError is raised. If ensure_ascii is true, the output is guaranteed to be str objects with all incoming non-ASCII characters escaped. If @@ -157,6 +160,7 @@ def __init__(self, *, skipkeys=False, ensure_ascii=True, self.item_separator = ',' if default is not None: self.default = default + self.convert_keys = convert_keys def default(self, o): """Implement this method in a subclass such that it returns @@ -249,16 +253,17 @@ def floatstr(o, allow_nan=self.allow_nan, _iterencode = c_make_encoder( markers, self.default, _encoder, self.indent, self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, self.allow_nan) + self.skipkeys, self.allow_nan, self.convert_keys) else: _iterencode = _make_iterencode( markers, self.default, _encoder, self.indent, floatstr, self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, _one_shot) + self.skipkeys, self.convert_keys, _one_shot) return _iterencode(o, 0) def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, - _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + _key_separator, _item_separator, _sort_keys, _skipkeys, _convert_keys, + _one_shot, ## HACK: hand-optimized bytecode; turn globals into locals ValueError=ValueError, dict=dict, @@ -371,6 +376,27 @@ def _iterencode_dict(dct, _current_indent_level): elif isinstance(key, int): # see comment for int/float in _make_iterencode key = _intstr(key) + elif _convert_keys: + key = _default(key) + if isinstance(key, str): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + # see comment for int/float in _make_iterencode + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, int): + # see comment for int/float in _make_iterencode + key = _intstr(key) + else: + raise TypeError(f'keys must be str, int, float, bool ' + f'or None, not {key.__class__.__name__}') elif _skipkeys: continue else: diff --git a/Lib/test/test_json/test_dump.py b/Lib/test/test_json/test_dump.py index 13b40020781bae..06a0f21b1b3936 100644 --- a/Lib/test/test_json/test_dump.py +++ b/Lib/test/test_json/test_dump.py @@ -22,6 +22,21 @@ def test_dump_skipkeys(self): self.assertIn('valid_key', o) self.assertNotIn(b'invalid_key', o) + def test_dump_convert_keys(self): + v = {b'bytes_key': False, 'valid_key': True} + with self.assertRaises(TypeError): + self.json.dumps(v) + + def default(val): + if isinstance(val, bytes): + return val.hex(':') + return str(val) + + s = self.json.dumps(v, convert_keys=True, default=default) + o = self.json.loads(s) + self.assertIn('valid_key', o) + self.assertIn('62:79:74:65:73:5f:6b:65:79', o) + def test_encode_truefalse(self): self.assertEqual(self.dumps( {True: False, False: True}, sort_keys=True), diff --git a/Lib/test/test_json/test_speedups.py b/Lib/test/test_json/test_speedups.py index 682014cfd5b344..097a0d449a093f 100644 --- a/Lib/test/test_json/test_speedups.py +++ b/Lib/test/test_json/test_speedups.py @@ -45,7 +45,7 @@ def bad_encoder1(*args): return None enc = self.json.encoder.c_make_encoder(None, lambda obj: str(obj), bad_encoder1, None, ': ', ', ', - False, False, False) + False, False, False, False) with self.assertRaises(TypeError): enc('spam', 4) with self.assertRaises(TypeError): @@ -55,7 +55,7 @@ def bad_encoder2(*args): 1/0 enc = self.json.encoder.c_make_encoder(None, lambda obj: str(obj), bad_encoder2, None, ': ', ', ', - False, False, False) + False, False, False, False) with self.assertRaises(ZeroDivisionError): enc('spam', 4) @@ -66,7 +66,7 @@ def test_bad_markers_argument_to_encoder(self): r'make_encoder\(\) argument 1 must be dict or None, not int', ): self.json.encoder.c_make_encoder(1, None, None, None, ': ', ', ', - False, False, False) + False, False, False, False) def test_bad_bool_args(self): def test(name): diff --git a/Misc/NEWS.d/next/Library/2024-03-30-11-42-13.gh-issue-117391.u7KmA9.rst b/Misc/NEWS.d/next/Library/2024-03-30-11-42-13.gh-issue-117391.u7KmA9.rst new file mode 100644 index 00000000000000..fd5982d5df3648 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-03-30-11-42-13.gh-issue-117391.u7KmA9.rst @@ -0,0 +1,4 @@ +Add ``convert_keys`` parameter to JSON encoding, to allow passing dict keys that +are not supported types in JSON through the encoder's +:meth:`json.JSONEncoder.default()` method. +Patch by Charles Cazabon. diff --git a/Modules/_json.c b/Modules/_json.c index c55299899e77fe..be66f04c7ae7ba 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -46,6 +46,7 @@ typedef struct _PyEncoderObject { PyObject *item_separator; char sort_keys; char skipkeys; + char convert_keys; int allow_nan; PyCFunction fast_encode; } PyEncoderObject; @@ -59,6 +60,7 @@ static PyMemberDef encoder_members[] = { {"item_separator", _Py_T_OBJECT, offsetof(PyEncoderObject, item_separator), Py_READONLY, "item_separator"}, {"sort_keys", Py_T_BOOL, offsetof(PyEncoderObject, sort_keys), Py_READONLY, "sort_keys"}, {"skipkeys", Py_T_BOOL, offsetof(PyEncoderObject, skipkeys), Py_READONLY, "skipkeys"}, + {"convert_keys", Py_T_BOOL, offsetof(PyEncoderObject, convert_keys), Py_READONLY, "convert_keys"}, {NULL} }; @@ -1205,17 +1207,17 @@ static PyType_Spec PyScannerType_spec = { static PyObject * encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; + static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "convert_keys", NULL}; PyEncoderObject *s; PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; PyObject *item_separator; - int sort_keys, skipkeys, allow_nan; + int sort_keys, skipkeys, allow_nan, convert_keys; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUpppp:make_encoder", kwlist, &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, - &sort_keys, &skipkeys, &allow_nan)) + &sort_keys, &skipkeys, &allow_nan, &convert_keys)) return NULL; if (markers != Py_None && !PyDict_Check(markers)) { @@ -1238,6 +1240,7 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) s->sort_keys = sort_keys; s->skipkeys = skipkeys; s->allow_nan = allow_nan; + s->convert_keys = convert_keys; s->fast_encode = NULL; if (PyCFunction_Check(s->encoder)) { @@ -1478,6 +1481,39 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir else if (s->skipkeys) { return 0; } + else if (s->convert_keys) { + PyObject *newobj; + newobj = PyObject_CallOneArg(s->defaultfn, key); + if (newobj == NULL) { + return -1; + } + if (!(newobj == Py_True || newobj == Py_False || newobj == Py_None || PyUnicode_Check(newobj) + || PyFloat_Check(newobj) || PyLong_Check(newobj))) { + Py_DECREF(newobj); + PyErr_Format(PyExc_TypeError, + "keys must be str, int, float, bool or None, " + "not %.100s", + Py_TYPE(newobj)->tp_name); + return -1; + } + + if (newobj == Py_None || newobj == Py_True || newobj == Py_False) { + keystr = _encoded_const(key); + } + else if (PyUnicode_Check(newobj)) { + keystr = Py_NewRef(newobj); + } + else if (PyLong_Check(newobj)) { + keystr = PyLong_Type.tp_repr(newobj); + if (keystr == NULL) + return -1; + } + else if (PyFloat_Check(newobj)) { + keystr = encoder_encode_float(s, newobj); + if (keystr == NULL) + return -1; + } + } else { PyErr_Format(PyExc_TypeError, "keys must be str, int, float, bool or None, "