Skip to content

Commit 227b9d3

Browse files
authored
GH-140638: Add a GC "candidates" stat (GH-141814)
1 parent 425fd85 commit 227b9d3

File tree

7 files changed

+50
-20
lines changed

7 files changed

+50
-20
lines changed

Doc/library/gc.rst

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,13 +110,16 @@ The :mod:`gc` module provides the following functions:
110110
to be uncollectable (and were therefore moved to the :data:`garbage`
111111
list) inside this generation;
112112

113+
* ``candidates`` is the total number of objects in this generation which were
114+
considered for collection and traversed;
115+
113116
* ``duration`` is the total time in seconds spent in collections for this
114117
generation.
115118

116119
.. versionadded:: 3.4
117120

118121
.. versionchanged:: next
119-
Add ``duration``.
122+
Add ``duration`` and ``candidates``.
120123

121124

122125
.. function:: set_threshold(threshold0, [threshold1, [threshold2]])
@@ -319,6 +322,9 @@ values but should not rebind them):
319322
"uncollectable": When *phase* is "stop", the number of objects
320323
that could not be collected and were put in :data:`garbage`.
321324

325+
"candidates": When *phase* is "stop", the total number of objects in this
326+
generation which were considered for collection and traversed.
327+
322328
"duration": When *phase* is "stop", the time in seconds spent in the
323329
collection.
324330

@@ -335,7 +341,7 @@ values but should not rebind them):
335341
.. versionadded:: 3.3
336342

337343
.. versionchanged:: next
338-
Add "duration".
344+
Add "duration" and "candidates".
339345

340346

341347
The following constants are provided for use with :func:`set_debug`:

Include/internal/pycore_interp_structs.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,8 @@ struct gc_collection_stats {
179179
Py_ssize_t collected;
180180
/* total number of uncollectable objects (put into gc.garbage) */
181181
Py_ssize_t uncollectable;
182+
// Total number of objects considered for collection and traversed:
183+
Py_ssize_t candidates;
182184
// Duration of the collection in seconds:
183185
double duration;
184186
};
@@ -191,6 +193,8 @@ struct gc_generation_stats {
191193
Py_ssize_t collected;
192194
/* total number of uncollectable objects (put into gc.garbage) */
193195
Py_ssize_t uncollectable;
196+
// Total number of objects considered for collection and traversed:
197+
Py_ssize_t candidates;
194198
// Duration of the collection in seconds:
195199
double duration;
196200
};

Lib/test/test_gc.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -846,11 +846,14 @@ def test_get_stats(self):
846846
self.assertEqual(len(stats), 3)
847847
for st in stats:
848848
self.assertIsInstance(st, dict)
849-
self.assertEqual(set(st),
850-
{"collected", "collections", "uncollectable", "duration"})
849+
self.assertEqual(
850+
set(st),
851+
{"collected", "collections", "uncollectable", "candidates", "duration"}
852+
)
851853
self.assertGreaterEqual(st["collected"], 0)
852854
self.assertGreaterEqual(st["collections"], 0)
853855
self.assertGreaterEqual(st["uncollectable"], 0)
856+
self.assertGreaterEqual(st["candidates"], 0)
854857
self.assertGreaterEqual(st["duration"], 0)
855858
# Check that collection counts are incremented correctly
856859
if gc.isenabled():
@@ -865,7 +868,7 @@ def test_get_stats(self):
865868
self.assertGreater(new[0]["duration"], old[0]["duration"])
866869
self.assertEqual(new[1]["duration"], old[1]["duration"])
867870
self.assertEqual(new[2]["duration"], old[2]["duration"])
868-
for stat in ["collected", "uncollectable"]:
871+
for stat in ["collected", "uncollectable", "candidates"]:
869872
self.assertGreaterEqual(new[0][stat], old[0][stat])
870873
self.assertEqual(new[1][stat], old[1][stat])
871874
self.assertEqual(new[2][stat], old[2][stat])
@@ -877,7 +880,7 @@ def test_get_stats(self):
877880
self.assertEqual(new[0]["duration"], old[0]["duration"])
878881
self.assertEqual(new[1]["duration"], old[1]["duration"])
879882
self.assertGreater(new[2]["duration"], old[2]["duration"])
880-
for stat in ["collected", "uncollectable"]:
883+
for stat in ["collected", "uncollectable", "candidates"]:
881884
self.assertEqual(new[0][stat], old[0][stat])
882885
self.assertEqual(new[1][stat], old[1][stat])
883886
self.assertGreaterEqual(new[2][stat], old[2][stat])
@@ -1316,6 +1319,7 @@ def test_collect(self):
13161319
self.assertIn("generation", info)
13171320
self.assertIn("collected", info)
13181321
self.assertIn("uncollectable", info)
1322+
self.assertIn("candidates", info)
13191323
self.assertIn("duration", info)
13201324

13211325
def test_collect_generation(self):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Expose a ``"candidates"`` stat in :func:`gc.get_stats` and
2+
:data:`gc.callbacks`.

Modules/gcmodule.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,10 +358,11 @@ gc_get_stats_impl(PyObject *module)
358358
for (i = 0; i < NUM_GENERATIONS; i++) {
359359
PyObject *dict;
360360
st = &stats[i];
361-
dict = Py_BuildValue("{snsnsnsd}",
361+
dict = Py_BuildValue("{snsnsnsnsd}",
362362
"collections", st->collections,
363363
"collected", st->collected,
364364
"uncollectable", st->uncollectable,
365+
"candidates", st->candidates,
365366
"duration", st->duration
366367
);
367368
if (dict == NULL)

Python/gc.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -483,11 +483,12 @@ validate_consistent_old_space(PyGC_Head *head)
483483
/* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 and
484484
* PREV_MASK_COLLECTING bit is set for all objects in containers.
485485
*/
486-
static void
486+
static Py_ssize_t
487487
update_refs(PyGC_Head *containers)
488488
{
489489
PyGC_Head *next;
490490
PyGC_Head *gc = GC_NEXT(containers);
491+
Py_ssize_t candidates = 0;
491492

492493
while (gc != containers) {
493494
next = GC_NEXT(gc);
@@ -519,7 +520,9 @@ update_refs(PyGC_Head *containers)
519520
*/
520521
_PyObject_ASSERT(op, gc_get_refs(gc) != 0);
521522
gc = next;
523+
candidates++;
522524
}
525+
return candidates;
523526
}
524527

525528
/* A traversal callback for subtract_refs. */
@@ -1240,15 +1243,15 @@ flag set but it does not clear it to skip unnecessary iteration. Before the
12401243
flag is cleared (for example, by using 'clear_unreachable_mask' function or
12411244
by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
12421245
list and we can not use most gc_list_* functions for it. */
1243-
static inline void
1246+
static inline Py_ssize_t
12441247
deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
12451248
validate_list(base, collecting_clear_unreachable_clear);
12461249
/* Using ob_refcnt and gc_refs, calculate which objects in the
12471250
* container set are reachable from outside the set (i.e., have a
12481251
* refcount greater than 0 when all the references within the
12491252
* set are taken into account).
12501253
*/
1251-
update_refs(base); // gc_prev is used for gc_refs
1254+
Py_ssize_t candidates = update_refs(base); // gc_prev is used for gc_refs
12521255
subtract_refs(base);
12531256

12541257
/* Leave everything reachable from outside base in base, and move
@@ -1289,6 +1292,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
12891292
move_unreachable(base, unreachable); // gc_prev is pointer again
12901293
validate_list(base, collecting_clear_unreachable_clear);
12911294
validate_list(unreachable, collecting_set_unreachable_set);
1295+
return candidates;
12921296
}
12931297

12941298
/* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
@@ -1366,6 +1370,7 @@ add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats)
13661370
gcstate->generation_stats[gen].duration += stats->duration;
13671371
gcstate->generation_stats[gen].collected += stats->collected;
13681372
gcstate->generation_stats[gen].uncollectable += stats->uncollectable;
1373+
gcstate->generation_stats[gen].candidates += stats->candidates;
13691374
gcstate->generation_stats[gen].collections += 1;
13701375
}
13711376

@@ -1662,6 +1667,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
16621667
Py_ssize_t objects_marked = mark_at_start(tstate);
16631668
GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
16641669
gcstate->work_to_do -= objects_marked;
1670+
stats->candidates += objects_marked;
16651671
validate_spaces(gcstate);
16661672
return;
16671673
}
@@ -1754,7 +1760,7 @@ gc_collect_region(PyThreadState *tstate,
17541760
assert(!_PyErr_Occurred(tstate));
17551761

17561762
gc_list_init(&unreachable);
1757-
deduce_unreachable(from, &unreachable);
1763+
stats->candidates = deduce_unreachable(from, &unreachable);
17581764
validate_consistent_old_space(from);
17591765
untrack_tuples(from);
17601766

@@ -1844,10 +1850,11 @@ do_gc_callback(GCState *gcstate, const char *phase,
18441850
assert(PyList_CheckExact(gcstate->callbacks));
18451851
PyObject *info = NULL;
18461852
if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
1847-
info = Py_BuildValue("{sisnsnsd}",
1853+
info = Py_BuildValue("{sisnsnsnsd}",
18481854
"generation", generation,
18491855
"collected", stats->collected,
18501856
"uncollectable", stats->uncollectable,
1857+
"candidates", stats->candidates,
18511858
"duration", stats->duration);
18521859
if (info == NULL) {
18531860
PyErr_FormatUnraisable("Exception ignored while invoking gc callbacks");

Python/gc_free_threading.c

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ struct collection_state {
100100
int skip_deferred_objects;
101101
Py_ssize_t collected;
102102
Py_ssize_t uncollectable;
103+
Py_ssize_t candidates;
103104
Py_ssize_t long_lived_total;
104105
struct worklist unreachable;
105106
struct worklist legacy_finalizers;
@@ -975,22 +976,24 @@ static bool
975976
update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
976977
void *block, size_t block_size, void *args)
977978
{
979+
struct collection_state *state = (struct collection_state *)args;
978980
PyObject *op = op_from_block(block, args, false);
979981
if (op == NULL) {
980982
return true;
981983
}
982984

983-
if (gc_is_alive(op)) {
984-
return true;
985-
}
986-
987985
// Exclude immortal objects from garbage collection
988986
if (_Py_IsImmortal(op)) {
989987
op->ob_tid = 0;
990988
_PyObject_GC_UNTRACK(op);
991989
gc_clear_unreachable(op);
992990
return true;
993991
}
992+
// Marked objects count as candidates, immortals don't:
993+
state->candidates++;
994+
if (gc_is_alive(op)) {
995+
return true;
996+
}
994997

995998
Py_ssize_t refcount = Py_REFCNT(op);
996999
if (_PyObject_HasDeferredRefcount(op)) {
@@ -1911,7 +1914,8 @@ handle_resurrected_objects(struct collection_state *state)
19111914
static void
19121915
invoke_gc_callback(PyThreadState *tstate, const char *phase,
19131916
int generation, Py_ssize_t collected,
1914-
Py_ssize_t uncollectable, double duration)
1917+
Py_ssize_t uncollectable, Py_ssize_t candidates,
1918+
double duration)
19151919
{
19161920
assert(!_PyErr_Occurred(tstate));
19171921

@@ -1925,10 +1929,11 @@ invoke_gc_callback(PyThreadState *tstate, const char *phase,
19251929
assert(PyList_CheckExact(gcstate->callbacks));
19261930
PyObject *info = NULL;
19271931
if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
1928-
info = Py_BuildValue("{sisnsnsd}",
1932+
info = Py_BuildValue("{sisnsnsnsd}",
19291933
"generation", generation,
19301934
"collected", collected,
19311935
"uncollectable", uncollectable,
1936+
"candidates", candidates,
19321937
"duration", duration);
19331938
if (info == NULL) {
19341939
PyErr_FormatUnraisable("Exception ignored while "
@@ -2372,7 +2377,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
23722377
GC_STAT_ADD(generation, collections, 1);
23732378

23742379
if (reason != _Py_GC_REASON_SHUTDOWN) {
2375-
invoke_gc_callback(tstate, "start", generation, 0, 0, 0);
2380+
invoke_gc_callback(tstate, "start", generation, 0, 0, 0, 0.0);
23762381
}
23772382

23782383
if (gcstate->debug & _PyGC_DEBUG_STATS) {
@@ -2427,6 +2432,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
24272432
stats->collected += m;
24282433
stats->uncollectable += n;
24292434
stats->duration += duration;
2435+
stats->candidates += state.candidates;
24302436

24312437
GC_STAT_ADD(generation, objects_collected, m);
24322438
#ifdef Py_STATS
@@ -2445,7 +2451,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
24452451
}
24462452

24472453
if (reason != _Py_GC_REASON_SHUTDOWN) {
2448-
invoke_gc_callback(tstate, "stop", generation, m, n, duration);
2454+
invoke_gc_callback(tstate, "stop", generation, m, n, state.candidates, duration);
24492455
}
24502456

24512457
assert(!_PyErr_Occurred(tstate));

0 commit comments

Comments
 (0)