diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst index 8e6f2342a2869a..79a8c38626f002 100644 --- a/Doc/library/gc.rst +++ b/Doc/library/gc.rst @@ -110,13 +110,16 @@ The :mod:`gc` module provides the following functions: to be uncollectable (and were therefore moved to the :data:`garbage` list) inside this generation; + * ``candidates`` is the total number of objects in this generation which were + considered for collection and traversed; + * ``duration`` is the total time in seconds spent in collections for this generation. .. versionadded:: 3.4 .. versionchanged:: next - Add ``duration``. + Add ``duration`` and ``candidates``. .. function:: set_threshold(threshold0, [threshold1, [threshold2]]) @@ -319,6 +322,9 @@ values but should not rebind them): "uncollectable": When *phase* is "stop", the number of objects that could not be collected and were put in :data:`garbage`. + "candidates": When *phase* is "stop", the total number of objects in this + generation which were considered for collection and traversed. + "duration": When *phase* is "stop", the time in seconds spent in the collection. @@ -335,7 +341,7 @@ values but should not rebind them): .. versionadded:: 3.3 .. versionchanged:: next - Add "duration". + Add "duration" and "candidates". The following constants are provided for use with :func:`set_debug`: diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index d9f5d444a2dc07..6b3d5711b92971 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -179,6 +179,8 @@ struct gc_collection_stats { Py_ssize_t collected; /* total number of uncollectable objects (put into gc.garbage) */ Py_ssize_t uncollectable; + // Total number of objects considered for collection and traversed: + Py_ssize_t candidates; // Duration of the collection in seconds: double duration; }; @@ -191,6 +193,8 @@ struct gc_generation_stats { Py_ssize_t collected; /* total number of uncollectable objects (put into gc.garbage) */ Py_ssize_t uncollectable; + // Total number of objects considered for collection and traversed: + Py_ssize_t candidates; // Duration of the collection in seconds: double duration; }; diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index e65da0f61d944f..ec5df4d20e7085 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -846,11 +846,14 @@ def test_get_stats(self): self.assertEqual(len(stats), 3) for st in stats: self.assertIsInstance(st, dict) - self.assertEqual(set(st), - {"collected", "collections", "uncollectable", "duration"}) + self.assertEqual( + set(st), + {"collected", "collections", "uncollectable", "candidates", "duration"} + ) self.assertGreaterEqual(st["collected"], 0) self.assertGreaterEqual(st["collections"], 0) self.assertGreaterEqual(st["uncollectable"], 0) + self.assertGreaterEqual(st["candidates"], 0) self.assertGreaterEqual(st["duration"], 0) # Check that collection counts are incremented correctly if gc.isenabled(): @@ -865,7 +868,7 @@ def test_get_stats(self): self.assertGreater(new[0]["duration"], old[0]["duration"]) self.assertEqual(new[1]["duration"], old[1]["duration"]) self.assertEqual(new[2]["duration"], old[2]["duration"]) - for stat in ["collected", "uncollectable"]: + for stat in ["collected", "uncollectable", "candidates"]: self.assertGreaterEqual(new[0][stat], old[0][stat]) self.assertEqual(new[1][stat], old[1][stat]) self.assertEqual(new[2][stat], old[2][stat]) @@ -877,7 +880,7 @@ def test_get_stats(self): self.assertEqual(new[0]["duration"], old[0]["duration"]) self.assertEqual(new[1]["duration"], old[1]["duration"]) self.assertGreater(new[2]["duration"], old[2]["duration"]) - for stat in ["collected", "uncollectable"]: + for stat in ["collected", "uncollectable", "candidates"]: self.assertEqual(new[0][stat], old[0][stat]) self.assertEqual(new[1][stat], old[1][stat]) self.assertGreaterEqual(new[2][stat], old[2][stat]) @@ -1316,6 +1319,7 @@ def test_collect(self): self.assertIn("generation", info) self.assertIn("collected", info) self.assertIn("uncollectable", info) + self.assertIn("candidates", info) self.assertIn("duration", info) def test_collect_generation(self): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst new file mode 100644 index 00000000000000..e3af941523cb75 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst @@ -0,0 +1,2 @@ +Expose a ``"candidates"`` stat in :func:`gc.get_stats` and +:data:`gc.callbacks`. diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 6a44d8a9d17aea..4c286f5c12cc7d 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -358,10 +358,11 @@ gc_get_stats_impl(PyObject *module) for (i = 0; i < NUM_GENERATIONS; i++) { PyObject *dict; st = &stats[i]; - dict = Py_BuildValue("{snsnsnsd}", + dict = Py_BuildValue("{snsnsnsnsd}", "collections", st->collections, "collected", st->collected, "uncollectable", st->uncollectable, + "candidates", st->candidates, "duration", st->duration ); if (dict == NULL) diff --git a/Python/gc.c b/Python/gc.c index 7e3e93e6e01be2..d067a6144b0763 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -483,11 +483,12 @@ validate_consistent_old_space(PyGC_Head *head) /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 and * PREV_MASK_COLLECTING bit is set for all objects in containers. */ -static void +static Py_ssize_t update_refs(PyGC_Head *containers) { PyGC_Head *next; PyGC_Head *gc = GC_NEXT(containers); + Py_ssize_t candidates = 0; while (gc != containers) { next = GC_NEXT(gc); @@ -519,7 +520,9 @@ update_refs(PyGC_Head *containers) */ _PyObject_ASSERT(op, gc_get_refs(gc) != 0); gc = next; + candidates++; } + return candidates; } /* A traversal callback for subtract_refs. */ @@ -1240,7 +1243,7 @@ flag set but it does not clear it to skip unnecessary iteration. Before the flag is cleared (for example, by using 'clear_unreachable_mask' function or by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal list and we can not use most gc_list_* functions for it. */ -static inline void +static inline Py_ssize_t deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { validate_list(base, collecting_clear_unreachable_clear); /* Using ob_refcnt and gc_refs, calculate which objects in the @@ -1248,7 +1251,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { * refcount greater than 0 when all the references within the * set are taken into account). */ - update_refs(base); // gc_prev is used for gc_refs + Py_ssize_t candidates = update_refs(base); // gc_prev is used for gc_refs subtract_refs(base); /* Leave everything reachable from outside base in base, and move @@ -1289,6 +1292,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { move_unreachable(base, unreachable); // gc_prev is pointer again validate_list(base, collecting_clear_unreachable_clear); validate_list(unreachable, collecting_set_unreachable_set); + return candidates; } /* Handle objects that may have resurrected after a call to 'finalize_garbage', moving @@ -1366,6 +1370,7 @@ add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) gcstate->generation_stats[gen].duration += stats->duration; gcstate->generation_stats[gen].collected += stats->collected; gcstate->generation_stats[gen].uncollectable += stats->uncollectable; + gcstate->generation_stats[gen].candidates += stats->candidates; gcstate->generation_stats[gen].collections += 1; } @@ -1662,6 +1667,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) Py_ssize_t objects_marked = mark_at_start(tstate); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); gcstate->work_to_do -= objects_marked; + stats->candidates += objects_marked; validate_spaces(gcstate); return; } @@ -1754,7 +1760,7 @@ gc_collect_region(PyThreadState *tstate, assert(!_PyErr_Occurred(tstate)); gc_list_init(&unreachable); - deduce_unreachable(from, &unreachable); + stats->candidates = deduce_unreachable(from, &unreachable); validate_consistent_old_space(from); untrack_tuples(from); @@ -1844,10 +1850,11 @@ do_gc_callback(GCState *gcstate, const char *phase, assert(PyList_CheckExact(gcstate->callbacks)); PyObject *info = NULL; if (PyList_GET_SIZE(gcstate->callbacks) != 0) { - info = Py_BuildValue("{sisnsnsd}", + info = Py_BuildValue("{sisnsnsnsd}", "generation", generation, "collected", stats->collected, "uncollectable", stats->uncollectable, + "candidates", stats->candidates, "duration", stats->duration); if (info == NULL) { PyErr_FormatUnraisable("Exception ignored while invoking gc callbacks"); diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 9f424db8894524..1717603b947f90 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -100,6 +100,7 @@ struct collection_state { int skip_deferred_objects; Py_ssize_t collected; Py_ssize_t uncollectable; + Py_ssize_t candidates; Py_ssize_t long_lived_total; struct worklist unreachable; struct worklist legacy_finalizers; @@ -975,15 +976,12 @@ static bool update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, void *block, size_t block_size, void *args) { + struct collection_state *state = (struct collection_state *)args; PyObject *op = op_from_block(block, args, false); if (op == NULL) { return true; } - if (gc_is_alive(op)) { - return true; - } - // Exclude immortal objects from garbage collection if (_Py_IsImmortal(op)) { op->ob_tid = 0; @@ -991,6 +989,11 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, gc_clear_unreachable(op); return true; } + // Marked objects count as candidates, immortals don't: + state->candidates++; + if (gc_is_alive(op)) { + return true; + } Py_ssize_t refcount = Py_REFCNT(op); if (_PyObject_HasDeferredRefcount(op)) { @@ -1911,7 +1914,8 @@ handle_resurrected_objects(struct collection_state *state) static void invoke_gc_callback(PyThreadState *tstate, const char *phase, int generation, Py_ssize_t collected, - Py_ssize_t uncollectable, double duration) + Py_ssize_t uncollectable, Py_ssize_t candidates, + double duration) { assert(!_PyErr_Occurred(tstate)); @@ -1925,10 +1929,11 @@ invoke_gc_callback(PyThreadState *tstate, const char *phase, assert(PyList_CheckExact(gcstate->callbacks)); PyObject *info = NULL; if (PyList_GET_SIZE(gcstate->callbacks) != 0) { - info = Py_BuildValue("{sisnsnsd}", + info = Py_BuildValue("{sisnsnsnsd}", "generation", generation, "collected", collected, "uncollectable", uncollectable, + "candidates", candidates, "duration", duration); if (info == NULL) { PyErr_FormatUnraisable("Exception ignored while " @@ -2372,7 +2377,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) GC_STAT_ADD(generation, collections, 1); if (reason != _Py_GC_REASON_SHUTDOWN) { - invoke_gc_callback(tstate, "start", generation, 0, 0, 0); + invoke_gc_callback(tstate, "start", generation, 0, 0, 0, 0.0); } if (gcstate->debug & _PyGC_DEBUG_STATS) { @@ -2427,6 +2432,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) stats->collected += m; stats->uncollectable += n; stats->duration += duration; + stats->candidates += state.candidates; GC_STAT_ADD(generation, objects_collected, m); #ifdef Py_STATS @@ -2445,7 +2451,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) } if (reason != _Py_GC_REASON_SHUTDOWN) { - invoke_gc_callback(tstate, "stop", generation, m, n, duration); + invoke_gc_callback(tstate, "stop", generation, m, n, state.candidates, duration); } assert(!_PyErr_Occurred(tstate));