From f33ed21a0f86a24aeaf87950c00c0a8289a06505 Mon Sep 17 00:00:00 2001 From: Shamil Date: Sun, 12 Apr 2026 03:14:50 +0300 Subject: [PATCH] gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851) User callbacks invoked during JSON encoding (e.g. the `default` callback or a custom string encoder) can mutate or clear the dict or sequence being encoded, invalidating borrowed references to items, keys, and values. Hold strong references unconditionally while iterating. Co-authored-by: Kumar Aditya Co-authored-by: Gregory P. Smith (cherry picked from commit 235fa7244a0474c492ae98ee444529c7ba2a9047) --- Lib/test/test_json/test_speedups.py | 61 +++++++++++++++++++ ...-12-17-04-10-35.gh-issue-142831.ee3t4L.rst | 2 + Modules/_json.c | 32 ++++++++-- 3 files changed, 91 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst diff --git a/Lib/test/test_json/test_speedups.py b/Lib/test/test_json/test_speedups.py index 682014cfd5b344e..7c2789f8d1f235b 100644 --- a/Lib/test/test_json/test_speedups.py +++ b/Lib/test/test_json/test_speedups.py @@ -1,4 +1,5 @@ from test.test_json import CTest +from test.support import gc_collect class BadBool: @@ -80,3 +81,63 @@ def test(name): def test_unsortable_keys(self): with self.assertRaises(TypeError): self.json.encoder.JSONEncoder(sort_keys=True).encode({'a': 1, 1: 'a'}) + + def test_mutate_dict_items_during_encode(self): + # gh-142831: Clearing the items list via a re-entrant key encoder + # must not cause a use-after-free. BadDict.items() returns a + # mutable list; encode_str clears it while iterating. + items = None + + class BadDict(dict): + def items(self): + nonlocal items + items = [("boom", object())] + return items + + cleared = False + def encode_str(obj): + nonlocal items, cleared + if items is not None: + items.clear() + items = None + cleared = True + gc_collect() + return '"x"' + + encoder = self.json.encoder.c_make_encoder( + None, lambda o: "null", + encode_str, None, + ": ", ", ", False, + False, True + ) + + # Must not crash (use-after-free under ASan before fix) + encoder(BadDict(real=1), 0) + self.assertTrue(cleared) + + def test_mutate_list_during_encode(self): + # gh-142831: Clearing a list mid-iteration via the default + # callback must not cause a use-after-free. + call_count = 0 + lst = [object() for _ in range(10)] + + def default(obj): + nonlocal call_count + call_count += 1 + if call_count == 3: + lst.clear() + gc_collect() + return None + + encoder = self.json.encoder.c_make_encoder( + None, default, + self.json.encoder.c_encode_basestring, None, + ": ", ", ", False, + False, True + ) + + # Must not crash (use-after-free under ASan before fix) + encoder(lst, 0) + # Verify the mutation path was actually hit and the loop + # stopped iterating after the list was cleared. + self.assertEqual(call_count, 3) diff --git a/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst b/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst new file mode 100644 index 000000000000000..5fa3cd2727a9e57 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst @@ -0,0 +1,2 @@ +Fix a crash in the :mod:`json` module where a use-after-free could occur if +the object being encoded is modified during serialization. diff --git a/Modules/_json.c b/Modules/_json.c index afefc71bfbdd9a1..25cbd9b9bb89b47 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1602,9 +1602,13 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) { PyObject *item = PyList_GET_ITEM(items, i); + // gh-142831: encoder_encode_key_value() can invoke user code + // that mutates the items list, invalidating this borrowed ref. + Py_INCREF(item); if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + Py_DECREF(item); goto bail; } @@ -1612,18 +1616,30 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, value = PyTuple_GET_ITEM(item, 1); if (encoder_encode_key_value(s, writer, &first, key, value, new_newline_indent, - current_item_separator) < 0) + current_item_separator) < 0) { + Py_DECREF(item); goto bail; + } + Py_DECREF(item); } Py_CLEAR(items); } else { Py_ssize_t pos = 0; while (PyDict_Next(dct, &pos, &key, &value)) { + // gh-142831: encoder_encode_key_value() can invoke user code + // that mutates the dict, invalidating these borrowed refs. + Py_INCREF(key); + Py_INCREF(value); if (encoder_encode_key_value(s, writer, &first, key, value, new_newline_indent, - current_item_separator) < 0) + current_item_separator) < 0) { + Py_DECREF(key); + Py_DECREF(value); goto bail; + } + Py_DECREF(key); + Py_DECREF(value); } } @@ -1712,12 +1728,20 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, } for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); + // gh-142831: encoder_listencode_obj() can invoke user code + // that mutates the sequence, invalidating this borrowed ref. + Py_INCREF(obj); if (i) { - if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) + if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) { + Py_DECREF(obj); goto bail; + } } - if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) + if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) { + Py_DECREF(obj); goto bail; + } + Py_DECREF(obj); } if (ident != NULL) { if (PyDict_DelItem(s->markers, ident))