diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index a6adc2c98514ba..3e3092dcae1119 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1157,6 +1157,24 @@ def test_something(self): assert_python_ok("-c", source) + @unittest.skipUnless(Py_GIL_DISABLED, "requires free-threaded GC") + @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi") + def test_tuple_untrack_counts(self): + # This ensures that the free-threaded GC is counting untracked tuples + # in the "long_lived_total" count. This is required to avoid + # performance issues from running the GC too frequently. See + # GH-142531 as an example. + gc.collect() + count = _testinternalcapi.get_long_lived_total() + n = 20_000 + tuples = [(x,) for x in range(n)] + gc.collect() + new_count = _testinternalcapi.get_long_lived_total() + self.assertFalse(gc.is_tracked(tuples[0])) + # Use n // 2 just in case some other objects were collected. + self.assertTrue(new_count - count > (n // 2)) + + class IncrementalGCTests(unittest.TestCase): @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi") @requires_gil_enabled("Free threading does not support incremental GC") diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-23-03-10.gh-issue-142531.NUEa1T.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-23-03-10.gh-issue-142531.NUEa1T.rst new file mode 100644 index 00000000000000..15e03c1b9dd03f --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-23-03-10.gh-issue-142531.NUEa1T.rst @@ -0,0 +1,5 @@ +Fix a free-threaded GC performance regression. If there are many untracked +tuples, the GC will run too often, resulting in poor performance. The fix +is to include untracked tuples in the "long lived" object count. The number +of frozen objects is also now included since the free-threaded GC must +scan those too. diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index ce11a81211e7e6..27f2d70e832c0f 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2250,6 +2250,13 @@ get_tlbc_id(PyObject *Py_UNUSED(module), PyObject *obj) } return PyLong_FromVoidPtr(bc); } + +static PyObject * +get_long_lived_total(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + return PyLong_FromInt64(PyInterpreterState_Get()->gc.long_lived_total); +} + #endif static PyObject * @@ -2552,6 +2559,7 @@ static PyMethodDef module_functions[] = { {"py_thread_id", get_py_thread_id, METH_NOARGS}, {"get_tlbc", get_tlbc, METH_O, NULL}, {"get_tlbc_id", get_tlbc_id, METH_O, NULL}, + {"get_long_lived_total", get_long_lived_total, METH_NOARGS}, #endif #ifdef _Py_TIER2 {"uop_symbols_test", _Py_uop_symbols_test, METH_NOARGS}, diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index d096accb4371c1..079b6b78dcd96f 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -374,6 +374,19 @@ op_from_block(void *block, void *arg, bool include_frozen) return op; } +// As above but returns untracked and frozen objects as well. +static PyObject * +op_from_block_all_gc(void *block, void *arg) +{ + struct visitor_args *a = arg; + if (block == NULL) { + return NULL; + } + PyObject *op = (PyObject *)((char*)block + a->offset); + assert(PyObject_IS_GC(op)); + return op; +} + static int gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor, struct visitor_args *arg) @@ -1175,12 +1188,20 @@ static bool scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, void *block, size_t block_size, void *args) { - PyObject *op = op_from_block(block, args, false); + PyObject *op = op_from_block_all_gc(block, args); if (op == NULL) { return true; } - struct collection_state *state = (struct collection_state *)args; + // The free-threaded GC cost is proportional to the number of objects in + // the mimalloc GC heap and so we should include the counts for untracked + // and frozen objects as well. This is especially important if many + // tuples have been untracked. + state->long_lived_total++; + if (!_PyObject_GC_IS_TRACKED(op) || gc_is_frozen(op)) { + return true; + } + if (gc_is_unreachable(op)) { // Disable deferred refcounting for unreachable objects so that they // are collected immediately after finalization. @@ -1198,6 +1219,9 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, else { worklist_push(&state->unreachable, op); } + // It is possible this object will be resurrected but + // for now we assume it will be deallocated. + state->long_lived_total--; return true; } @@ -1211,7 +1235,6 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, // object is reachable, restore `ob_tid`; we're done with these objects gc_restore_tid(op); gc_clear_alive(op); - state->long_lived_total++; return true; } @@ -1818,6 +1841,7 @@ handle_resurrected_objects(struct collection_state *state) _PyObject_ASSERT(op, Py_REFCNT(op) > 1); worklist_remove(&iter); merge_refcount(op, -1); // remove worklist reference + state->long_lived_total++; } } } @@ -2220,9 +2244,6 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, } } - // Record the number of live GC objects - interp->gc.long_lived_total = state->long_lived_total; - // Clear weakrefs and enqueue callbacks (but do not call them). clear_weakrefs(state); _PyEval_StartTheWorld(interp); @@ -2240,6 +2261,8 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, err = handle_resurrected_objects(state); // Clear free lists in all threads _PyGC_ClearAllFreeLists(interp); + // Record the number of live GC objects + interp->gc.long_lived_total = state->long_lived_total; _PyEval_StartTheWorld(interp); if (err < 0) {