python · ccazabon · Mar 26, 2024 · Mar 28, 2024 · Mar 30, 2024 · Mar 30, 2024
diff --git a/Doc/library/json.rst b/Doc/library/json.rst
@@ -148,15 +148,19 @@ Basic Usage
 .. function:: dump(obj, fp, *, skipkeys=False, ensure_ascii=True, \
                    check_circular=True, allow_nan=True, cls=None, \
                    indent=None, separators=None, default=None, \
-                   sort_keys=False, **kw)
+                   sort_keys=False, convert_keys=False, **kw)
 
    Serialize *obj* as a JSON formatted stream to *fp* (a ``.write()``-supporting
    :term:`file-like object`) using this :ref:`conversion table
    <py-to-json-table>`.
 
-   If *skipkeys* is true (default: ``False``), then dict keys that are not
-   of a basic type (:class:`str`, :class:`int`, :class:`float`, :class:`bool`,
-   ``None``) will be skipped instead of raising a :exc:`TypeError`.
+   *skipkeys* and *convert_keys* control how keys that are not of a basic type
+   (:class:`str`, :class:`int`, :class:`float`, :class:`bool`, ``None``) are
+   handled.  If *skipkeys* is true (default: ``False``), then such keys will be
+   skipped.  Otherwise, if *convert_keys* is true (default: ``False``), such
+   keys are converted to a supported type by calling the encoder's
+   :meth:`JSONEncoder.default` method.  If :meth:`JSONEncoder.default` returns an
+   unsupported type, or *convert_keys* is False, :exc:`TypeError` is raised.
 
    The :mod:`json` module always produces :class:`str` objects, not
    :class:`bytes` objects. Therefore, ``fp.write()`` must support :class:`str`

diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py
@@ -119,13 +119,16 @@
 
 def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True,
         allow_nan=True, cls=None, indent=None, separators=None,
-        default=None, sort_keys=False, **kw):
+        default=None, sort_keys=False, convert_keys=False, **kw):
     """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
     ``.write()``-supporting file-like object).
 
-    If ``skipkeys`` is true then ``dict`` keys that are not basic types
-    (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped
-    instead of raising a ``TypeError``.
+    Dict keys in JSON must be str, int, float, bool, or None.  ``skipkeys`` and
+    ``convert_keys`` control how keys that are not one of these types are
+    handled.  If ``skipkeys`` is True, then those items are simply skipped.
+    Otherwise, if ``convert_keys`` is True, the keys will be passed to
+    ``.default()`` to be converted.  If ``convert_keys`` is False, or
+    ``.default()`` returns an unsupported type, ``TypeError`` is raised.
 
     If ``ensure_ascii`` is false, then the strings written to ``fp`` can
     contain non-ASCII characters if they appear in strings contained in
@@ -165,15 +168,16 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True,
     if (not skipkeys and ensure_ascii and
         check_circular and allow_nan and
         cls is None and indent is None and separators is None and
-        default is None and not sort_keys and not kw):
+        default is None and not sort_keys and not convert_keys and not kw):
         iterable = _default_encoder.iterencode(obj)
     else:
         if cls is None:
             cls = JSONEncoder
         iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
             check_circular=check_circular, allow_nan=allow_nan, indent=indent,
             separators=separators,
-            default=default, sort_keys=sort_keys, **kw).iterencode(obj)
+            default=default, sort_keys=sort_keys, convert_keys=convert_keys,
+            **kw).iterencode(obj)
     # could accelerate with writelines in some versions of Python, at
     # a debuggability cost
     for chunk in iterable:
@@ -182,12 +186,15 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True,
 
 def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True,
         allow_nan=True, cls=None, indent=None, separators=None,
-        default=None, sort_keys=False, **kw):
+        default=None, sort_keys=False, convert_keys=False, **kw):
     """Serialize ``obj`` to a JSON formatted ``str``.
 
-    If ``skipkeys`` is true then ``dict`` keys that are not basic types
-    (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped
-    instead of raising a ``TypeError``.
+    Dict keys in JSON must be str, int, float, bool, or None.  ``skipkeys`` and
+    ``convert_keys`` control how keys that are not one of these types are
+    handled.  If ``skipkeys`` is True, then those items are simply skipped.
+    Otherwise, if ``convert_keys`` is True, the keys will be passed to
+    ``.default()`` to be converted.  If ``convert_keys`` is False, or
+    ``.default()`` returns an unsupported type, ``TypeError`` is raised.
 
     If ``ensure_ascii`` is false, then the return value can contain non-ASCII
     characters if they appear in strings contained in ``obj``. Otherwise, all
@@ -227,15 +234,15 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True,
     if (not skipkeys and ensure_ascii and
         check_circular and allow_nan and
         cls is None and indent is None and separators is None and
-        default is None and not sort_keys and not kw):
+        default is None and not sort_keys and not convert_keys and not kw):
         return _default_encoder.encode(obj)
     if cls is None:
         cls = JSONEncoder
     return cls(
         skipkeys=skipkeys, ensure_ascii=ensure_ascii,
         check_circular=check_circular, allow_nan=allow_nan, indent=indent,
         separators=separators, default=default, sort_keys=sort_keys,
-        **kw).encode(obj)
+        convert_keys=convert_keys, **kw).encode(obj)
 
 
 _default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None)

diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py
@@ -104,12 +104,15 @@ class JSONEncoder(object):
     key_separator = ': '
     def __init__(self, *, skipkeys=False, ensure_ascii=True,
             check_circular=True, allow_nan=True, sort_keys=False,
-            indent=None, separators=None, default=None):
+            indent=None, separators=None, default=None, convert_keys=False):
         """Constructor for JSONEncoder, with sensible defaults.
 
-        If skipkeys is false, then it is a TypeError to attempt
-        encoding of keys that are not str, int, float or None.  If
-        skipkeys is True, such items are simply skipped.
+        Dict keys in JSON must be str, int, float, bool, or None.  skipkeys and
+        convert_keys control how keys that are not one of these types are
+        handled.  If skipkeys is True, then those items are simply skipped.
+        Otherwise, if convert_keys is True, the keys will be passed to
+        ``.default()`` to be converted.  If convert_keys is False, or
+        ``.default()`` returns an unsupported type, TypeError is raised.
 
         If ensure_ascii is true, the output is guaranteed to be str
         objects with all incoming non-ASCII characters escaped.  If
@@ -157,6 +160,7 @@ def __init__(self, *, skipkeys=False, ensure_ascii=True,
             self.item_separator = ','
         if default is not None:
             self.default = default
+        self.convert_keys = convert_keys
 
     def default(self, o):
         """Implement this method in a subclass such that it returns
@@ -249,16 +253,17 @@ def floatstr(o, allow_nan=self.allow_nan,
             _iterencode = c_make_encoder(
                 markers, self.default, _encoder, self.indent,
                 self.key_separator, self.item_separator, self.sort_keys,
-                self.skipkeys, self.allow_nan)
+                self.skipkeys, self.allow_nan, self.convert_keys)
         else:
             _iterencode = _make_iterencode(
                 markers, self.default, _encoder, self.indent, floatstr,
                 self.key_separator, self.item_separator, self.sort_keys,
-                self.skipkeys, _one_shot)
+                self.skipkeys, self.convert_keys, _one_shot)
         return _iterencode(o, 0)
 
 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
-        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
+        _key_separator, _item_separator, _sort_keys, _skipkeys, _convert_keys,
+        _one_shot,
         ## HACK: hand-optimized bytecode; turn globals into locals
         ValueError=ValueError,
         dict=dict,
@@ -371,6 +376,27 @@ def _iterencode_dict(dct, _current_indent_level):
             elif isinstance(key, int):
                 # see comment for int/float in _make_iterencode
                 key = _intstr(key)
+            elif _convert_keys:
+                key = _default(key)
+                if isinstance(key, str):
+                    pass
+                # JavaScript is weakly typed for these, so it makes sense to
+                # also allow them.  Many encoders seem to do something like this.
+                elif isinstance(key, float):
+                    # see comment for int/float in _make_iterencode
+                    key = _floatstr(key)
+                elif key is True:
+                    key = 'true'
+                elif key is False:
+                    key = 'false'
+                elif key is None:
+                    key = 'null'
+                elif isinstance(key, int):
+                    # see comment for int/float in _make_iterencode
+                    key = _intstr(key)
+                else:
+                    raise TypeError(f'keys must be str, int, float, bool '
+                                    f'or None, not {key.__class__.__name__}')
             elif _skipkeys:
                 continue
             else:

diff --git a/Lib/test/test_json/test_dump.py b/Lib/test/test_json/test_dump.py
@@ -22,6 +22,21 @@ def test_dump_skipkeys(self):
         self.assertIn('valid_key', o)
         self.assertNotIn(b'invalid_key', o)
 
+    def test_dump_convert_keys(self):
+        v = {b'bytes_key': False, 'valid_key': True}
+        with self.assertRaises(TypeError):
+            self.json.dumps(v)
+
+        def default(val):
+            if isinstance(val, bytes):
+                return val.hex(':')
+            return str(val)
+
+        s = self.json.dumps(v, convert_keys=True, default=default)
+        o = self.json.loads(s)
+        self.assertIn('valid_key', o)
+        self.assertIn('62:79:74:65:73:5f:6b:65:79', o)
+
     def test_encode_truefalse(self):
         self.assertEqual(self.dumps(
                  {True: False, False: True}, sort_keys=True),

diff --git a/Lib/test/test_json/test_speedups.py b/Lib/test/test_json/test_speedups.py
@@ -45,7 +45,7 @@ def bad_encoder1(*args):
             return None
         enc = self.json.encoder.c_make_encoder(None, lambda obj: str(obj),
                                                bad_encoder1, None, ': ', ', ',
-                                               False, False, False)
+                                               False, False, False, False)
         with self.assertRaises(TypeError):
             enc('spam', 4)
         with self.assertRaises(TypeError):
@@ -55,7 +55,7 @@ def bad_encoder2(*args):
             1/0
         enc = self.json.encoder.c_make_encoder(None, lambda obj: str(obj),
                                                bad_encoder2, None, ': ', ', ',
-                                               False, False, False)
+                                               False, False, False, False)
         with self.assertRaises(ZeroDivisionError):
             enc('spam', 4)
 
@@ -66,7 +66,7 @@ def test_bad_markers_argument_to_encoder(self):
             r'make_encoder\(\) argument 1 must be dict or None, not int',
         ):
             self.json.encoder.c_make_encoder(1, None, None, None, ': ', ', ',
-                                             False, False, False)
+                                             False, False, False, False)
 
     def test_bad_bool_args(self):
         def test(name):

diff --git a/Misc/NEWS.d/next/Library/2024-03-30-11-42-13.gh-issue-117391.u7KmA9.rst b/Misc/NEWS.d/next/Library/2024-03-30-11-42-13.gh-issue-117391.u7KmA9.rst
@@ -0,0 +1,4 @@
+Add ``convert_keys`` parameter to JSON encoding, to allow passing dict keys that
+are not supported types in JSON through the encoder's
+:meth:`json.JSONEncoder.default()` method.
+Patch by Charles Cazabon.
diff --git a/Modules/_json.c b/Modules/_json.c
@@ -46,6 +46,7 @@ typedef struct _PyEncoderObject {
     PyObject *item_separator;
     char sort_keys;
     char skipkeys;
+    char convert_keys;
     int allow_nan;
     PyCFunction fast_encode;
 } PyEncoderObject;
@@ -59,6 +60,7 @@ static PyMemberDef encoder_members[] = {
     {"item_separator", _Py_T_OBJECT, offsetof(PyEncoderObject, item_separator), Py_READONLY, "item_separator"},
     {"sort_keys", Py_T_BOOL, offsetof(PyEncoderObject, sort_keys), Py_READONLY, "sort_keys"},
     {"skipkeys", Py_T_BOOL, offsetof(PyEncoderObject, skipkeys), Py_READONLY, "skipkeys"},
+    {"convert_keys", Py_T_BOOL, offsetof(PyEncoderObject, convert_keys), Py_READONLY, "convert_keys"},
     {NULL}
 };
 
@@ -1205,17 +1207,17 @@ static PyType_Spec PyScannerType_spec = {
 static PyObject *
 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
-    static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
+    static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "convert_keys", NULL};
 
     PyEncoderObject *s;
     PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
     PyObject *item_separator;
-    int sort_keys, skipkeys, allow_nan;
+    int sort_keys, skipkeys, allow_nan, convert_keys;
 
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUpppp:make_encoder", kwlist,
         &markers, &defaultfn, &encoder, &indent,
         &key_separator, &item_separator,
-        &sort_keys, &skipkeys, &allow_nan))
+        &sort_keys, &skipkeys, &allow_nan, &convert_keys))
         return NULL;
 
     if (markers != Py_None && !PyDict_Check(markers)) {
@@ -1238,6 +1240,7 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     s->sort_keys = sort_keys;
     s->skipkeys = skipkeys;
     s->allow_nan = allow_nan;
+    s->convert_keys = convert_keys;
     s->fast_encode = NULL;
 
     if (PyCFunction_Check(s->encoder)) {
@@ -1478,6 +1481,39 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
     else if (s->skipkeys) {
         return 0;
     }
+    else if (s->convert_keys) {
+        PyObject *newobj;
+        newobj = PyObject_CallOneArg(s->defaultfn, key);
+        if (newobj == NULL) {
+            return -1;
+        }
+        if (!(newobj == Py_True || newobj == Py_False || newobj == Py_None || PyUnicode_Check(newobj)
+                || PyFloat_Check(newobj) || PyLong_Check(newobj))) {
+            Py_DECREF(newobj);
+            PyErr_Format(PyExc_TypeError,
+                         "keys must be str, int, float, bool or None, "
+                         "not %.100s",
+                         Py_TYPE(newobj)->tp_name);
+            return -1;
+        }
+
+        if (newobj == Py_None || newobj == Py_True || newobj == Py_False) {
+          keystr = _encoded_const(key);
+        }
+        else if (PyUnicode_Check(newobj)) {
+            keystr = Py_NewRef(newobj);
+        }
+        else if (PyLong_Check(newobj)) {
+            keystr = PyLong_Type.tp_repr(newobj);
+            if (keystr == NULL)
+                return -1;
+        }
+        else if (PyFloat_Check(newobj)) {
+            keystr = encoder_encode_float(s, newobj);
+            if (keystr == NULL)
+                return -1;
+        }
+    }
     else {
         PyErr_Format(PyExc_TypeError,
                      "keys must be str, int, float, bool or None, "