From bb3fc941930de5e5ae862bb12506da5be5719381 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 19 Nov 2025 11:35:17 -0800 Subject: [PATCH 01/12] Add a "visited" stat. --- Include/internal/pycore_interp_structs.h | 4 ++++ Modules/gcmodule.c | 3 ++- Python/gc.c | 18 ++++++++++++------ Python/gc_free_threading.c | 17 +++++++++++------ 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index d9f5d444a2dc07..2910d311ef5d95 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -179,6 +179,8 @@ struct gc_collection_stats { Py_ssize_t collected; /* total number of uncollectable objects (put into gc.garbage) */ Py_ssize_t uncollectable; + // Total number of objects visited: + Py_ssize_t visited; // Duration of the collection in seconds: double duration; }; @@ -191,6 +193,8 @@ struct gc_generation_stats { Py_ssize_t collected; /* total number of uncollectable objects (put into gc.garbage) */ Py_ssize_t uncollectable; + // Total number of objects visited: + Py_ssize_t visited; // Duration of the collection in seconds: double duration; }; diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 6a44d8a9d17aea..ec04d0f2392406 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -358,10 +358,11 @@ gc_get_stats_impl(PyObject *module) for (i = 0; i < NUM_GENERATIONS; i++) { PyObject *dict; st = &stats[i]; - dict = Py_BuildValue("{snsnsnsd}", + dict = Py_BuildValue("{snsnsnsnsd}", "collections", st->collections, "collected", st->collected, "uncollectable", st->uncollectable, + "visited", st->visited, "duration", st->duration ); if (dict == NULL) diff --git a/Python/gc.c b/Python/gc.c index 7e3e93e6e01be2..1ecc9b375ba7e0 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -663,12 +663,13 @@ visit_reachable(PyObject *op, void *arg) * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag. * So we can not gc_list_* functions for unreachable until we remove the flag. */ -static void +static Py_ssize_t move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) { // previous elem in the young list, used for restore gc_prev. PyGC_Head *prev = young; PyGC_Head *gc = GC_NEXT(young); + Py_ssize_t visited = 0; /* Invariants: all objects "to the left" of us in young are reachable * (directly or indirectly) from outside the young list as it was at entry. @@ -683,6 +684,7 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) /* Record which old space we are in, and set NEXT_MASK_UNREACHABLE bit for convenience */ uintptr_t flags = NEXT_MASK_UNREACHABLE | (gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1); while (gc != young) { + visited++; if (gc_get_refs(gc)) { /* gc is definitely reachable from outside the * original 'young'. Mark it as such, and traverse @@ -739,6 +741,7 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) young->_gc_next &= _PyGC_PREV_MASK; // don't let the pollution of the list head's next pointer leak unreachable->_gc_next &= _PyGC_PREV_MASK; + return visited; } /* In theory, all tuples should be younger than the @@ -1240,7 +1243,7 @@ flag set but it does not clear it to skip unnecessary iteration. Before the flag is cleared (for example, by using 'clear_unreachable_mask' function or by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal list and we can not use most gc_list_* functions for it. */ -static inline void +static inline Py_ssize_t deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { validate_list(base, collecting_clear_unreachable_clear); /* Using ob_refcnt and gc_refs, calculate which objects in the @@ -1286,9 +1289,10 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { * the reachable objects instead. But this is a one-time cost, probably not * worth complicating the code to speed just a little. */ - move_unreachable(base, unreachable); // gc_prev is pointer again + Py_ssize_t visited = move_unreachable(base, unreachable); // gc_prev is pointer again validate_list(base, collecting_clear_unreachable_clear); validate_list(unreachable, collecting_set_unreachable_set); + return visited; } /* Handle objects that may have resurrected after a call to 'finalize_garbage', moving @@ -1316,7 +1320,7 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, // have the PREV_MARK_COLLECTING set, but the objects are going to be // removed so we can skip the expense of clearing the flag. PyGC_Head* resurrected = unreachable; - deduce_unreachable(resurrected, still_unreachable); + (void)deduce_unreachable(resurrected, still_unreachable); clear_unreachable_mask(still_unreachable); // Move the resurrected objects to the old generation for future collection. @@ -1364,6 +1368,7 @@ static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) { gcstate->generation_stats[gen].duration += stats->duration; + gcstate->generation_stats[gen].visited += stats->visited; gcstate->generation_stats[gen].collected += stats->collected; gcstate->generation_stats[gen].uncollectable += stats->uncollectable; gcstate->generation_stats[gen].collections += 1; @@ -1754,7 +1759,7 @@ gc_collect_region(PyThreadState *tstate, assert(!_PyErr_Occurred(tstate)); gc_list_init(&unreachable); - deduce_unreachable(from, &unreachable); + stats->visited += deduce_unreachable(from, &unreachable); validate_consistent_old_space(from); untrack_tuples(from); @@ -1844,10 +1849,11 @@ do_gc_callback(GCState *gcstate, const char *phase, assert(PyList_CheckExact(gcstate->callbacks)); PyObject *info = NULL; if (PyList_GET_SIZE(gcstate->callbacks) != 0) { - info = Py_BuildValue("{sisnsnsd}", + info = Py_BuildValue("{sisnsnsnsd}", "generation", generation, "collected", stats->collected, "uncollectable", stats->uncollectable, + "visited", stats->visited, "duration", stats->duration); if (info == NULL) { PyErr_FormatUnraisable("Exception ignored while invoking gc callbacks"); diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 9f424db8894524..a2d8bb123c081a 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -98,6 +98,7 @@ struct collection_state { // we can't collect objects with deferred references because we may not // see all references. int skip_deferred_objects; + Py_ssize_t visited; Py_ssize_t collected; Py_ssize_t uncollectable; Py_ssize_t long_lived_total; @@ -975,7 +976,9 @@ static bool update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, void *block, size_t block_size, void *args) { - PyObject *op = op_from_block(block, args, false); + struct collection_state *state = (struct collection_state *)args; + state->visited++; + PyObject *op = op_from_block(block, &state->base, false); if (op == NULL) { return true; } @@ -1434,7 +1437,7 @@ deduce_unreachable_heap(PyInterpreterState *interp, // Identify objects that are directly reachable from outside the GC heap // by computing the difference between the refcount and the number of // incoming references. - gc_visit_heaps(interp, &update_refs, &state->base); + gc_visit_heaps(interp, &update_refs, &state); #ifdef GC_DEBUG // Check that all objects are marked as unreachable and that the computed @@ -1911,7 +1914,7 @@ handle_resurrected_objects(struct collection_state *state) static void invoke_gc_callback(PyThreadState *tstate, const char *phase, int generation, Py_ssize_t collected, - Py_ssize_t uncollectable, double duration) + Py_ssize_t uncollectable, Py_ssize_t visited, double duration) { assert(!_PyErr_Occurred(tstate)); @@ -1925,10 +1928,11 @@ invoke_gc_callback(PyThreadState *tstate, const char *phase, assert(PyList_CheckExact(gcstate->callbacks)); PyObject *info = NULL; if (PyList_GET_SIZE(gcstate->callbacks) != 0) { - info = Py_BuildValue("{sisnsnsd}", + info = Py_BuildValue("{sisnsnsnsd}", "generation", generation, "collected", collected, "uncollectable", uncollectable, + "visited", visited, "duration", duration); if (info == NULL) { PyErr_FormatUnraisable("Exception ignored while " @@ -2372,7 +2376,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) GC_STAT_ADD(generation, collections, 1); if (reason != _Py_GC_REASON_SHUTDOWN) { - invoke_gc_callback(tstate, "start", generation, 0, 0, 0); + invoke_gc_callback(tstate, "start", generation, 0, 0, 0, 0); } if (gcstate->debug & _PyGC_DEBUG_STATS) { @@ -2427,6 +2431,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) stats->collected += m; stats->uncollectable += n; stats->duration += duration; + stats->visited += state.visited; GC_STAT_ADD(generation, objects_collected, m); #ifdef Py_STATS @@ -2445,7 +2450,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) } if (reason != _Py_GC_REASON_SHUTDOWN) { - invoke_gc_callback(tstate, "stop", generation, m, n, duration); + invoke_gc_callback(tstate, "stop", generation, m, n, state->visited, duration); } assert(!_PyErr_Occurred(tstate)); From d89ecdcf3c3fc9172eba5b49f36bf8c9078accd2 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 20 Nov 2025 21:58:23 -0800 Subject: [PATCH 02/12] Count visited in update_refs --- Python/gc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 1ecc9b375ba7e0..ea136f2af209f6 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -483,11 +483,12 @@ validate_consistent_old_space(PyGC_Head *head) /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 and * PREV_MASK_COLLECTING bit is set for all objects in containers. */ -static void +static Py_ssize_t update_refs(PyGC_Head *containers) { PyGC_Head *next; PyGC_Head *gc = GC_NEXT(containers); + Py_ssize_t visited = 0; while (gc != containers) { next = GC_NEXT(gc); @@ -519,7 +520,9 @@ update_refs(PyGC_Head *containers) */ _PyObject_ASSERT(op, gc_get_refs(gc) != 0); gc = next; + visited++; } + return visited; } /* A traversal callback for subtract_refs. */ @@ -663,13 +666,12 @@ visit_reachable(PyObject *op, void *arg) * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag. * So we can not gc_list_* functions for unreachable until we remove the flag. */ -static Py_ssize_t +static void move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) { // previous elem in the young list, used for restore gc_prev. PyGC_Head *prev = young; PyGC_Head *gc = GC_NEXT(young); - Py_ssize_t visited = 0; /* Invariants: all objects "to the left" of us in young are reachable * (directly or indirectly) from outside the young list as it was at entry. @@ -684,7 +686,6 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) /* Record which old space we are in, and set NEXT_MASK_UNREACHABLE bit for convenience */ uintptr_t flags = NEXT_MASK_UNREACHABLE | (gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1); while (gc != young) { - visited++; if (gc_get_refs(gc)) { /* gc is definitely reachable from outside the * original 'young'. Mark it as such, and traverse @@ -741,7 +742,6 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) young->_gc_next &= _PyGC_PREV_MASK; // don't let the pollution of the list head's next pointer leak unreachable->_gc_next &= _PyGC_PREV_MASK; - return visited; } /* In theory, all tuples should be younger than the @@ -1251,7 +1251,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { * refcount greater than 0 when all the references within the * set are taken into account). */ - update_refs(base); // gc_prev is used for gc_refs + Py_ssize_t visited = update_refs(base); // gc_prev is used for gc_refs subtract_refs(base); /* Leave everything reachable from outside base in base, and move @@ -1289,7 +1289,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { * the reachable objects instead. But this is a one-time cost, probably not * worth complicating the code to speed just a little. */ - Py_ssize_t visited = move_unreachable(base, unreachable); // gc_prev is pointer again + move_unreachable(base, unreachable); // gc_prev is pointer again validate_list(base, collecting_clear_unreachable_clear); validate_list(unreachable, collecting_set_unreachable_set); return visited; From 3ee8db91527945531e085e61d0b3238c320b69ce Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 20 Nov 2025 21:59:30 -0800 Subject: [PATCH 03/12] Remove old void cast --- Python/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/gc.c b/Python/gc.c index ea136f2af209f6..33627d5f95f766 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1320,7 +1320,7 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, // have the PREV_MARK_COLLECTING set, but the objects are going to be // removed so we can skip the expense of clearing the flag. PyGC_Head* resurrected = unreachable; - (void)deduce_unreachable(resurrected, still_unreachable); + deduce_unreachable(resurrected, still_unreachable); clear_unreachable_mask(still_unreachable); // Move the resurrected objects to the old generation for future collection. From 41814f3ca3c90df80e153034934f0d01a462fbc4 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 20 Nov 2025 22:07:57 -0800 Subject: [PATCH 04/12] Update docs and tests --- Doc/library/gc.rst | 10 ++++++++-- Lib/test/test_gc.py | 12 ++++++++---- Python/gc.c | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst index 8e6f2342a2869a..272d0bfa0f0c48 100644 --- a/Doc/library/gc.rst +++ b/Doc/library/gc.rst @@ -110,13 +110,16 @@ The :mod:`gc` module provides the following functions: to be uncollectable (and were therefore moved to the :data:`garbage` list) inside this generation; + * ``visited`` is the total number of unique objects visited during each + collection of this generation; + * ``duration`` is the total time in seconds spent in collections for this generation. .. versionadded:: 3.4 .. versionchanged:: next - Add ``duration``. + Add ``duration`` and ``visited``. .. function:: set_threshold(threshold0, [threshold1, [threshold2]]) @@ -319,6 +322,9 @@ values but should not rebind them): "uncollectable": When *phase* is "stop", the number of objects that could not be collected and were put in :data:`garbage`. + "visited": When *phase* is "stop", the number of unique objects visited + during the collection. + "duration": When *phase* is "stop", the time in seconds spent in the collection. @@ -335,7 +341,7 @@ values but should not rebind them): .. versionadded:: 3.3 .. versionchanged:: next - Add "duration". + Add "duration" and "visited". The following constants are provided for use with :func:`set_debug`: diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index e65da0f61d944f..4bab09cb02bb4f 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -846,11 +846,14 @@ def test_get_stats(self): self.assertEqual(len(stats), 3) for st in stats: self.assertIsInstance(st, dict) - self.assertEqual(set(st), - {"collected", "collections", "uncollectable", "duration"}) + self.assertEqual( + set(st), + {"collected", "collections", "uncollectable", "visited", "duration"} + ) self.assertGreaterEqual(st["collected"], 0) self.assertGreaterEqual(st["collections"], 0) self.assertGreaterEqual(st["uncollectable"], 0) + self.assertGreaterEqual(st["visited"], 0) self.assertGreaterEqual(st["duration"], 0) # Check that collection counts are incremented correctly if gc.isenabled(): @@ -865,7 +868,7 @@ def test_get_stats(self): self.assertGreater(new[0]["duration"], old[0]["duration"]) self.assertEqual(new[1]["duration"], old[1]["duration"]) self.assertEqual(new[2]["duration"], old[2]["duration"]) - for stat in ["collected", "uncollectable"]: + for stat in ["collected", "uncollectable", "visited"]: self.assertGreaterEqual(new[0][stat], old[0][stat]) self.assertEqual(new[1][stat], old[1][stat]) self.assertEqual(new[2][stat], old[2][stat]) @@ -877,7 +880,7 @@ def test_get_stats(self): self.assertEqual(new[0]["duration"], old[0]["duration"]) self.assertEqual(new[1]["duration"], old[1]["duration"]) self.assertGreater(new[2]["duration"], old[2]["duration"]) - for stat in ["collected", "uncollectable"]: + for stat in ["collected", "uncollectable", "visited"]: self.assertEqual(new[0][stat], old[0][stat]) self.assertEqual(new[1][stat], old[1][stat]) self.assertGreaterEqual(new[2][stat], old[2][stat]) @@ -1316,6 +1319,7 @@ def test_collect(self): self.assertIn("generation", info) self.assertIn("collected", info) self.assertIn("uncollectable", info) + self.assertIn("visited", info) self.assertIn("duration", info) def test_collect_generation(self): diff --git a/Python/gc.c b/Python/gc.c index 33627d5f95f766..75408b03516db0 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1759,7 +1759,7 @@ gc_collect_region(PyThreadState *tstate, assert(!_PyErr_Occurred(tstate)); gc_list_init(&unreachable); - stats->visited += deduce_unreachable(from, &unreachable); + stats->visited = deduce_unreachable(from, &unreachable); validate_consistent_old_space(from); untrack_tuples(from); From 8f4ce45c878f53492a7136f353b51f6eb61b20d6 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 20 Nov 2025 22:09:32 -0800 Subject: [PATCH 05/12] blurb add --- .../2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst new file mode 100644 index 00000000000000..2a04fbf07d644d --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst @@ -0,0 +1,2 @@ +Expose a ``"visited"`` stat in :func:`gc.get_stats` and +:data:`gc.callbacks`. From 13a97f3a16f024379485b62389cbea5610522971 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 20 Nov 2025 22:19:43 -0800 Subject: [PATCH 06/12] Omit immortal and live objects --- Python/gc_free_threading.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index a2d8bb123c081a..339f57c5bc111c 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -977,7 +977,6 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, void *block, size_t block_size, void *args) { struct collection_state *state = (struct collection_state *)args; - state->visited++; PyObject *op = op_from_block(block, &state->base, false); if (op == NULL) { return true; @@ -994,6 +993,7 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, gc_clear_unreachable(op); return true; } + state->visited++; Py_ssize_t refcount = Py_REFCNT(op); if (_PyObject_HasDeferredRefcount(op)) { From bdfe660af3c9d9485e999b9077fef54d2da44662 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 20 Nov 2025 22:32:16 -0800 Subject: [PATCH 07/12] Fix free-threading errors (still need to get to warnings) --- Python/gc_free_threading.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 339f57c5bc111c..dc2eac3b83c873 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -2376,7 +2376,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) GC_STAT_ADD(generation, collections, 1); if (reason != _Py_GC_REASON_SHUTDOWN) { - invoke_gc_callback(tstate, "start", generation, 0, 0, 0, 0); + invoke_gc_callback(tstate, "start", generation, 0, 0, 0, 0.0); } if (gcstate->debug & _PyGC_DEBUG_STATS) { @@ -2450,7 +2450,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) } if (reason != _Py_GC_REASON_SHUTDOWN) { - invoke_gc_callback(tstate, "stop", generation, m, n, state->visited, duration); + invoke_gc_callback(tstate, "stop", generation, m, n, state.visited, duration); } assert(!_PyErr_Occurred(tstate)); From 2d251676ff5af51450a2245485506ab03851664f Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Fri, 21 Nov 2025 07:28:26 -0800 Subject: [PATCH 08/12] Fix bad address --- Python/gc_free_threading.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index dc2eac3b83c873..e0d3a667d16d35 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1437,7 +1437,7 @@ deduce_unreachable_heap(PyInterpreterState *interp, // Identify objects that are directly reachable from outside the GC heap // by computing the difference between the refcount and the number of // incoming references. - gc_visit_heaps(interp, &update_refs, &state); + gc_visit_heaps(interp, &update_refs, &state->base); #ifdef GC_DEBUG // Check that all objects are marked as unreachable and that the computed From 9be8100ab4eec1a9c22991c2f5db7844aad776dc Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Fri, 21 Nov 2025 07:31:07 -0800 Subject: [PATCH 09/12] Clean up the diff --- Python/gc_free_threading.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index e0d3a667d16d35..5a3312eaeaedd9 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -977,7 +977,7 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, void *block, size_t block_size, void *args) { struct collection_state *state = (struct collection_state *)args; - PyObject *op = op_from_block(block, &state->base, false); + PyObject *op = op_from_block(block, args, false); if (op == NULL) { return true; } From 7f46627ba69c52adb4c9eaa674a5c3ffbc0e9b28 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sat, 22 Nov 2025 10:53:16 -0800 Subject: [PATCH 10/12] visited -> candidates --- Doc/library/gc.rst | 12 ++++++------ Include/internal/pycore_interp_structs.h | 8 ++++---- Lib/test/test_gc.py | 10 +++++----- ...025-11-20-22-09-22.gh-issue-140638.f6btj0.rst | 2 +- Modules/gcmodule.c | 2 +- Python/gc.c | 16 ++++++++-------- Python/gc_free_threading.c | 13 +++++++------ 7 files changed, 32 insertions(+), 31 deletions(-) diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst index 272d0bfa0f0c48..d75765e6b09dfc 100644 --- a/Doc/library/gc.rst +++ b/Doc/library/gc.rst @@ -110,8 +110,8 @@ The :mod:`gc` module provides the following functions: to be uncollectable (and were therefore moved to the :data:`garbage` list) inside this generation; - * ``visited`` is the total number of unique objects visited during each - collection of this generation; + * ``candidates`` is the total number of objects in this generation which were + traversed and considered for collection; * ``duration`` is the total time in seconds spent in collections for this generation. @@ -119,7 +119,7 @@ The :mod:`gc` module provides the following functions: .. versionadded:: 3.4 .. versionchanged:: next - Add ``duration`` and ``visited``. + Add ``duration`` and ``candidates``. .. function:: set_threshold(threshold0, [threshold1, [threshold2]]) @@ -322,8 +322,8 @@ values but should not rebind them): "uncollectable": When *phase* is "stop", the number of objects that could not be collected and were put in :data:`garbage`. - "visited": When *phase* is "stop", the number of unique objects visited - during the collection. + "candidates": When *phase* is "stop", the total number of objects in this + generation which were traversed and considered for collection. "duration": When *phase* is "stop", the time in seconds spent in the collection. @@ -341,7 +341,7 @@ values but should not rebind them): .. versionadded:: 3.3 .. versionchanged:: next - Add "duration" and "visited". + Add "duration" and "candidates". The following constants are provided for use with :func:`set_debug`: diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 2910d311ef5d95..afe88b0bb80dcf 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -179,8 +179,8 @@ struct gc_collection_stats { Py_ssize_t collected; /* total number of uncollectable objects (put into gc.garbage) */ Py_ssize_t uncollectable; - // Total number of objects visited: - Py_ssize_t visited; + // Total number of objects traversed and considered for collection: + Py_ssize_t candidates; // Duration of the collection in seconds: double duration; }; @@ -193,8 +193,8 @@ struct gc_generation_stats { Py_ssize_t collected; /* total number of uncollectable objects (put into gc.garbage) */ Py_ssize_t uncollectable; - // Total number of objects visited: - Py_ssize_t visited; + // Total number of objects traversed and considered for collection: + Py_ssize_t candidates; // Duration of the collection in seconds: double duration; }; diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 4bab09cb02bb4f..ec5df4d20e7085 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -848,12 +848,12 @@ def test_get_stats(self): self.assertIsInstance(st, dict) self.assertEqual( set(st), - {"collected", "collections", "uncollectable", "visited", "duration"} + {"collected", "collections", "uncollectable", "candidates", "duration"} ) self.assertGreaterEqual(st["collected"], 0) self.assertGreaterEqual(st["collections"], 0) self.assertGreaterEqual(st["uncollectable"], 0) - self.assertGreaterEqual(st["visited"], 0) + self.assertGreaterEqual(st["candidates"], 0) self.assertGreaterEqual(st["duration"], 0) # Check that collection counts are incremented correctly if gc.isenabled(): @@ -868,7 +868,7 @@ def test_get_stats(self): self.assertGreater(new[0]["duration"], old[0]["duration"]) self.assertEqual(new[1]["duration"], old[1]["duration"]) self.assertEqual(new[2]["duration"], old[2]["duration"]) - for stat in ["collected", "uncollectable", "visited"]: + for stat in ["collected", "uncollectable", "candidates"]: self.assertGreaterEqual(new[0][stat], old[0][stat]) self.assertEqual(new[1][stat], old[1][stat]) self.assertEqual(new[2][stat], old[2][stat]) @@ -880,7 +880,7 @@ def test_get_stats(self): self.assertEqual(new[0]["duration"], old[0]["duration"]) self.assertEqual(new[1]["duration"], old[1]["duration"]) self.assertGreater(new[2]["duration"], old[2]["duration"]) - for stat in ["collected", "uncollectable", "visited"]: + for stat in ["collected", "uncollectable", "candidates"]: self.assertEqual(new[0][stat], old[0][stat]) self.assertEqual(new[1][stat], old[1][stat]) self.assertGreaterEqual(new[2][stat], old[2][stat]) @@ -1319,7 +1319,7 @@ def test_collect(self): self.assertIn("generation", info) self.assertIn("collected", info) self.assertIn("uncollectable", info) - self.assertIn("visited", info) + self.assertIn("candidates", info) self.assertIn("duration", info) def test_collect_generation(self): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst index 2a04fbf07d644d..e3af941523cb75 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst @@ -1,2 +1,2 @@ -Expose a ``"visited"`` stat in :func:`gc.get_stats` and +Expose a ``"candidates"`` stat in :func:`gc.get_stats` and :data:`gc.callbacks`. diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index ec04d0f2392406..4c286f5c12cc7d 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -362,7 +362,7 @@ gc_get_stats_impl(PyObject *module) "collections", st->collections, "collected", st->collected, "uncollectable", st->uncollectable, - "visited", st->visited, + "candidates", st->candidates, "duration", st->duration ); if (dict == NULL) diff --git a/Python/gc.c b/Python/gc.c index 75408b03516db0..d71046be9d82bd 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -488,7 +488,7 @@ update_refs(PyGC_Head *containers) { PyGC_Head *next; PyGC_Head *gc = GC_NEXT(containers); - Py_ssize_t visited = 0; + Py_ssize_t candidates = 0; while (gc != containers) { next = GC_NEXT(gc); @@ -520,9 +520,9 @@ update_refs(PyGC_Head *containers) */ _PyObject_ASSERT(op, gc_get_refs(gc) != 0); gc = next; - visited++; + candidates++; } - return visited; + return candidates; } /* A traversal callback for subtract_refs. */ @@ -1251,7 +1251,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { * refcount greater than 0 when all the references within the * set are taken into account). */ - Py_ssize_t visited = update_refs(base); // gc_prev is used for gc_refs + Py_ssize_t candidates = update_refs(base); // gc_prev is used for gc_refs subtract_refs(base); /* Leave everything reachable from outside base in base, and move @@ -1292,7 +1292,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { move_unreachable(base, unreachable); // gc_prev is pointer again validate_list(base, collecting_clear_unreachable_clear); validate_list(unreachable, collecting_set_unreachable_set); - return visited; + return candidates; } /* Handle objects that may have resurrected after a call to 'finalize_garbage', moving @@ -1368,9 +1368,9 @@ static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) { gcstate->generation_stats[gen].duration += stats->duration; - gcstate->generation_stats[gen].visited += stats->visited; gcstate->generation_stats[gen].collected += stats->collected; gcstate->generation_stats[gen].uncollectable += stats->uncollectable; + gcstate->generation_stats[gen].candidates += stats->candidates; gcstate->generation_stats[gen].collections += 1; } @@ -1759,7 +1759,7 @@ gc_collect_region(PyThreadState *tstate, assert(!_PyErr_Occurred(tstate)); gc_list_init(&unreachable); - stats->visited = deduce_unreachable(from, &unreachable); + stats->candidates = deduce_unreachable(from, &unreachable); validate_consistent_old_space(from); untrack_tuples(from); @@ -1853,7 +1853,7 @@ do_gc_callback(GCState *gcstate, const char *phase, "generation", generation, "collected", stats->collected, "uncollectable", stats->uncollectable, - "visited", stats->visited, + "candidates", stats->candidates, "duration", stats->duration); if (info == NULL) { PyErr_FormatUnraisable("Exception ignored while invoking gc callbacks"); diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 5a3312eaeaedd9..5f1d748d28eb56 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -98,9 +98,9 @@ struct collection_state { // we can't collect objects with deferred references because we may not // see all references. int skip_deferred_objects; - Py_ssize_t visited; Py_ssize_t collected; Py_ssize_t uncollectable; + Py_ssize_t candidates; Py_ssize_t long_lived_total; struct worklist unreachable; struct worklist legacy_finalizers; @@ -993,7 +993,7 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, gc_clear_unreachable(op); return true; } - state->visited++; + state->candidates++; Py_ssize_t refcount = Py_REFCNT(op); if (_PyObject_HasDeferredRefcount(op)) { @@ -1914,7 +1914,8 @@ handle_resurrected_objects(struct collection_state *state) static void invoke_gc_callback(PyThreadState *tstate, const char *phase, int generation, Py_ssize_t collected, - Py_ssize_t uncollectable, Py_ssize_t visited, double duration) + Py_ssize_t uncollectable, Py_ssize_t candidates, + double duration) { assert(!_PyErr_Occurred(tstate)); @@ -1932,7 +1933,7 @@ invoke_gc_callback(PyThreadState *tstate, const char *phase, "generation", generation, "collected", collected, "uncollectable", uncollectable, - "visited", visited, + "candidates", candidates, "duration", duration); if (info == NULL) { PyErr_FormatUnraisable("Exception ignored while " @@ -2431,7 +2432,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) stats->collected += m; stats->uncollectable += n; stats->duration += duration; - stats->visited += state.visited; + stats->candidates += state.candidates; GC_STAT_ADD(generation, objects_collected, m); #ifdef Py_STATS @@ -2450,7 +2451,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) } if (reason != _Py_GC_REASON_SHUTDOWN) { - invoke_gc_callback(tstate, "stop", generation, m, n, state.visited, duration); + invoke_gc_callback(tstate, "stop", generation, m, n, state.candidates, duration); } assert(!_PyErr_Occurred(tstate)); From 866bd56cc6c9ae8056f49f8087ba09c4529e64be Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sat, 22 Nov 2025 10:58:10 -0800 Subject: [PATCH 11/12] Account for marked objects in candidates. --- Python/gc.c | 1 + Python/gc_free_threading.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index d71046be9d82bd..d067a6144b0763 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1667,6 +1667,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) Py_ssize_t objects_marked = mark_at_start(tstate); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); gcstate->work_to_do -= objects_marked; + stats->candidates += objects_marked; validate_spaces(gcstate); return; } diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 5f1d748d28eb56..1717603b947f90 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -982,10 +982,6 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, return true; } - if (gc_is_alive(op)) { - return true; - } - // Exclude immortal objects from garbage collection if (_Py_IsImmortal(op)) { op->ob_tid = 0; @@ -993,7 +989,11 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, gc_clear_unreachable(op); return true; } + // Marked objects count as candidates, immortals don't: state->candidates++; + if (gc_is_alive(op)) { + return true; + } Py_ssize_t refcount = Py_REFCNT(op); if (_PyObject_HasDeferredRefcount(op)) { From 7e65738713953ddbd666c913a3ce18078662851c Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sat, 22 Nov 2025 13:31:17 -0800 Subject: [PATCH 12/12] Wording --- Doc/library/gc.rst | 4 ++-- Include/internal/pycore_interp_structs.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst index d75765e6b09dfc..79a8c38626f002 100644 --- a/Doc/library/gc.rst +++ b/Doc/library/gc.rst @@ -111,7 +111,7 @@ The :mod:`gc` module provides the following functions: list) inside this generation; * ``candidates`` is the total number of objects in this generation which were - traversed and considered for collection; + considered for collection and traversed; * ``duration`` is the total time in seconds spent in collections for this generation. @@ -323,7 +323,7 @@ values but should not rebind them): that could not be collected and were put in :data:`garbage`. "candidates": When *phase* is "stop", the total number of objects in this - generation which were traversed and considered for collection. + generation which were considered for collection and traversed. "duration": When *phase* is "stop", the time in seconds spent in the collection. diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index afe88b0bb80dcf..6b3d5711b92971 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -179,7 +179,7 @@ struct gc_collection_stats { Py_ssize_t collected; /* total number of uncollectable objects (put into gc.garbage) */ Py_ssize_t uncollectable; - // Total number of objects traversed and considered for collection: + // Total number of objects considered for collection and traversed: Py_ssize_t candidates; // Duration of the collection in seconds: double duration; @@ -193,7 +193,7 @@ struct gc_generation_stats { Py_ssize_t collected; /* total number of uncollectable objects (put into gc.garbage) */ Py_ssize_t uncollectable; - // Total number of objects traversed and considered for collection: + // Total number of objects considered for collection and traversed: Py_ssize_t candidates; // Duration of the collection in seconds: double duration;