From d70e27f25886e3ac1aa9fcc2d44dd38b4001d8bb Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 11 Dec 2023 13:33:21 -0500 Subject: [PATCH] gh-112529: Use atomic operations for `gcstate->collecting` (#112533) * gh-112529: Use atomic operations for `gcstate->collecting` The `collecting` field in `GCState` is used to prevent overlapping garbage collections within the same interpreter. This is updated to use atomic operations in order to be thread-safe in `--disable-gil` builds. The GC code is refactored a bit to support this. More of the logic is pushed down to `gc_collect_main()` so that we can safely order the logic setting `collecting`, the selection of the generation, and the invocation of callbacks with respect to the atomic operations and the (future) stop-the-world pauses. The change uses atomic operations for both `--disable-gil` and the default build (with the GIL) to avoid extra `#ifdef` guards and ease the maintenance burden. --- Modules/gcmodule.c | 352 ++++++++++++++++++++++----------------------- 1 file changed, 168 insertions(+), 184 deletions(-) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 8233fc56159b60..2d1f381e622226 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -74,6 +74,20 @@ module gc #define AS_GC(op) _Py_AS_GC(op) #define FROM_GC(gc) _Py_FROM_GC(gc) +// Automatically choose the generation that needs collecting. +#define GENERATION_AUTO (-1) + +typedef enum { + // GC was triggered by heap allocation + _Py_GC_REASON_HEAP, + + // GC was called during shutdown + _Py_GC_REASON_SHUTDOWN, + + // GC was called by gc.collect() or PyGC_Collect() + _Py_GC_REASON_MANUAL +} _PyGC_Reason; + static inline int gc_is_collecting(PyGC_Head *g) @@ -1194,19 +1208,122 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, gc_list_merge(resurrected, old_generation); } + +/* Invoke progress callbacks to notify clients that garbage collection + * is starting or stopping + */ +static void +invoke_gc_callback(PyThreadState *tstate, const char *phase, + int generation, Py_ssize_t collected, + Py_ssize_t uncollectable) +{ + assert(!_PyErr_Occurred(tstate)); + + /* we may get called very early */ + GCState *gcstate = &tstate->interp->gc; + if (gcstate->callbacks == NULL) { + return; + } + + /* The local variable cannot be rebound, check it for sanity */ + assert(PyList_CheckExact(gcstate->callbacks)); + PyObject *info = NULL; + if (PyList_GET_SIZE(gcstate->callbacks) != 0) { + info = Py_BuildValue("{sisnsn}", + "generation", generation, + "collected", collected, + "uncollectable", uncollectable); + if (info == NULL) { + PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); + return; + } + } + + PyObject *phase_obj = PyUnicode_FromString(phase); + if (phase_obj == NULL) { + Py_XDECREF(info); + PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); + return; + } + + PyObject *stack[] = {phase_obj, info}; + for (Py_ssize_t i=0; icallbacks); i++) { + PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i); + Py_INCREF(cb); /* make sure cb doesn't go away */ + r = PyObject_Vectorcall(cb, stack, 2, NULL); + if (r == NULL) { + PyErr_WriteUnraisable(cb); + } + else { + Py_DECREF(r); + } + Py_DECREF(cb); + } + Py_DECREF(phase_obj); + Py_XDECREF(info); + assert(!_PyErr_Occurred(tstate)); +} + + +/* Find the oldest generation (highest numbered) where the count + * exceeds the threshold. Objects in the that generation and + * generations younger than it will be collected. */ +static int +gc_select_generation(GCState *gcstate) +{ + for (int i = NUM_GENERATIONS-1; i >= 0; i--) { + if (gcstate->generations[i].count > gcstate->generations[i].threshold) { + /* Avoid quadratic performance degradation in number + of tracked objects (see also issue #4074): + + To limit the cost of garbage collection, there are two strategies; + - make each collection faster, e.g. by scanning fewer objects + - do less collections + This heuristic is about the latter strategy. + + In addition to the various configurable thresholds, we only trigger a + full collection if the ratio + + long_lived_pending / long_lived_total + + is above a given value (hardwired to 25%). + + The reason is that, while "non-full" collections (i.e., collections of + the young and middle generations) will always examine roughly the same + number of objects -- determined by the aforementioned thresholds --, + the cost of a full collection is proportional to the total number of + long-lived objects, which is virtually unbounded. + + Indeed, it has been remarked that doing a full collection every + of object creations entails a dramatic performance + degradation in workloads which consist in creating and storing lots of + long-lived objects (e.g. building a large list of GC-tracked objects would + show quadratic performance, instead of linear as expected: see issue #4074). + + Using the above ratio, instead, yields amortized linear performance in + the total number of objects (the effect of which can be summarized + thusly: "each full garbage collection is more and more costly as the + number of objects grows, but we do fewer and fewer of them"). + + This heuristic was suggested by Martin von Löwis on python-dev in + June 2008. His original analysis and proposal can be found at: + http://mail.python.org/pipermail/python-dev/2008-June/080579.html + */ + if (i == NUM_GENERATIONS - 1 + && gcstate->long_lived_pending < gcstate->long_lived_total / 4) + continue; + return i; + } + } + return -1; +} + + /* This is the main function. Read this to understand how the * collection process works. */ static Py_ssize_t -gc_collect_main(PyThreadState *tstate, int generation, - Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, - int nofail) +gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) { - GC_STAT_ADD(generation, collections, 1); -#ifdef Py_STATS - if (_Py_stats) { - _Py_stats->object_stats.object_visits = 0; - } -#endif int i; Py_ssize_t m = 0; /* # objects collected */ Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */ @@ -1223,6 +1340,36 @@ gc_collect_main(PyThreadState *tstate, int generation, assert(gcstate->garbage != NULL); assert(!_PyErr_Occurred(tstate)); + int expected = 0; + if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) { + // Don't start a garbage collection if one is already in progress. + return 0; + } + + if (generation == GENERATION_AUTO) { + // Select the oldest generation that needs collecting. We will collect + // objects from that generation and all generations younger than it. + generation = gc_select_generation(gcstate); + if (generation < 0) { + // No generation needs to be collected. + _Py_atomic_store_int(&gcstate->collecting, 0); + return 0; + } + } + + assert(generation >= 0 && generation < NUM_GENERATIONS); + +#ifdef Py_STATS + if (_Py_stats) { + _Py_stats->object_stats.object_visits = 0; + } +#endif + GC_STAT_ADD(generation, collections, 1); + + if (reason != _Py_GC_REASON_SHUTDOWN) { + invoke_gc_callback(tstate, "start", generation, 0, 0); + } + if (gcstate->debug & DEBUG_STATS) { PySys_WriteStderr("gc: collecting generation %d...\n", generation); show_stats_each_generations(gcstate); @@ -1342,7 +1489,7 @@ gc_collect_main(PyThreadState *tstate, int generation, } if (_PyErr_Occurred(tstate)) { - if (nofail) { + if (reason == _Py_GC_REASON_SHUTDOWN) { _PyErr_Clear(tstate); } else { @@ -1351,13 +1498,6 @@ gc_collect_main(PyThreadState *tstate, int generation, } /* Update stats */ - if (n_collected) { - *n_collected = m; - } - if (n_uncollectable) { - *n_uncollectable = n; - } - struct gc_generation_stats *stats = &gcstate->generation_stats[generation]; stats->collections++; stats->collected += m; @@ -1376,134 +1516,13 @@ gc_collect_main(PyThreadState *tstate, int generation, PyDTrace_GC_DONE(n + m); } - assert(!_PyErr_Occurred(tstate)); - return n + m; -} - -/* Invoke progress callbacks to notify clients that garbage collection - * is starting or stopping - */ -static void -invoke_gc_callback(PyThreadState *tstate, const char *phase, - int generation, Py_ssize_t collected, - Py_ssize_t uncollectable) -{ - assert(!_PyErr_Occurred(tstate)); - - /* we may get called very early */ - GCState *gcstate = &tstate->interp->gc; - if (gcstate->callbacks == NULL) { - return; - } - - /* The local variable cannot be rebound, check it for sanity */ - assert(PyList_CheckExact(gcstate->callbacks)); - PyObject *info = NULL; - if (PyList_GET_SIZE(gcstate->callbacks) != 0) { - info = Py_BuildValue("{sisnsn}", - "generation", generation, - "collected", collected, - "uncollectable", uncollectable); - if (info == NULL) { - PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); - return; - } + if (reason != _Py_GC_REASON_SHUTDOWN) { + invoke_gc_callback(tstate, "stop", generation, m, n); } - PyObject *phase_obj = PyUnicode_FromString(phase); - if (phase_obj == NULL) { - Py_XDECREF(info); - PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); - return; - } - - PyObject *stack[] = {phase_obj, info}; - for (Py_ssize_t i=0; icallbacks); i++) { - PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i); - Py_INCREF(cb); /* make sure cb doesn't go away */ - r = PyObject_Vectorcall(cb, stack, 2, NULL); - if (r == NULL) { - PyErr_WriteUnraisable(cb); - } - else { - Py_DECREF(r); - } - Py_DECREF(cb); - } - Py_DECREF(phase_obj); - Py_XDECREF(info); - assert(!_PyErr_Occurred(tstate)); -} - -/* Perform garbage collection of a generation and invoke - * progress callbacks. - */ -static Py_ssize_t -gc_collect_with_callback(PyThreadState *tstate, int generation) -{ assert(!_PyErr_Occurred(tstate)); - Py_ssize_t result, collected, uncollectable; - invoke_gc_callback(tstate, "start", generation, 0, 0); - result = gc_collect_main(tstate, generation, &collected, &uncollectable, 0); - invoke_gc_callback(tstate, "stop", generation, collected, uncollectable); - assert(!_PyErr_Occurred(tstate)); - return result; -} - -static Py_ssize_t -gc_collect_generations(PyThreadState *tstate) -{ - GCState *gcstate = &tstate->interp->gc; - /* Find the oldest generation (highest numbered) where the count - * exceeds the threshold. Objects in the that generation and - * generations younger than it will be collected. */ - Py_ssize_t n = 0; - for (int i = NUM_GENERATIONS-1; i >= 0; i--) { - if (gcstate->generations[i].count > gcstate->generations[i].threshold) { - /* Avoid quadratic performance degradation in number - of tracked objects (see also issue #4074): - - To limit the cost of garbage collection, there are two strategies; - - make each collection faster, e.g. by scanning fewer objects - - do less collections - This heuristic is about the latter strategy. - - In addition to the various configurable thresholds, we only trigger a - full collection if the ratio - - long_lived_pending / long_lived_total - - is above a given value (hardwired to 25%). - - The reason is that, while "non-full" collections (i.e., collections of - the young and middle generations) will always examine roughly the same - number of objects -- determined by the aforementioned thresholds --, - the cost of a full collection is proportional to the total number of - long-lived objects, which is virtually unbounded. - - Indeed, it has been remarked that doing a full collection every - of object creations entails a dramatic performance - degradation in workloads which consist in creating and storing lots of - long-lived objects (e.g. building a large list of GC-tracked objects would - show quadratic performance, instead of linear as expected: see issue #4074). - - Using the above ratio, instead, yields amortized linear performance in - the total number of objects (the effect of which can be summarized - thusly: "each full garbage collection is more and more costly as the - number of objects grows, but we do fewer and fewer of them"). - - This heuristic was suggested by Martin von Löwis on python-dev in - June 2008. His original analysis and proposal can be found at: - http://mail.python.org/pipermail/python-dev/2008-June/080579.html - */ - if (i == NUM_GENERATIONS - 1 - && gcstate->long_lived_pending < gcstate->long_lived_total / 4) - continue; - n = gc_collect_with_callback(tstate, i); - break; - } - } - return n; + _Py_atomic_store_int(&gcstate->collecting, 0); + return n + m; } #include "clinic/gcmodule.c.h" @@ -1574,18 +1593,7 @@ gc_collect_impl(PyObject *module, int generation) return -1; } - GCState *gcstate = &tstate->interp->gc; - Py_ssize_t n; - if (gcstate->collecting) { - /* already collecting, don't do anything */ - n = 0; - } - else { - gcstate->collecting = 1; - n = gc_collect_with_callback(tstate, generation); - gcstate->collecting = 0; - } - return n; + return gc_collect_main(tstate, generation, _Py_GC_REASON_MANUAL); } /*[clinic input] @@ -2124,17 +2132,9 @@ PyGC_Collect(void) } Py_ssize_t n; - if (gcstate->collecting) { - /* already collecting, don't do anything */ - n = 0; - } - else { - gcstate->collecting = 1; - PyObject *exc = _PyErr_GetRaisedException(tstate); - n = gc_collect_with_callback(tstate, NUM_GENERATIONS - 1); - _PyErr_SetRaisedException(tstate, exc); - gcstate->collecting = 0; - } + PyObject *exc = _PyErr_GetRaisedException(tstate); + n = gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_MANUAL); + _PyErr_SetRaisedException(tstate, exc); return n; } @@ -2148,16 +2148,7 @@ _PyGC_CollectNoFail(PyThreadState *tstate) during interpreter shutdown (and then never finish it). See http://bugs.python.org/issue8713#msg195178 for an example. */ - GCState *gcstate = &tstate->interp->gc; - if (gcstate->collecting) { - return 0; - } - - Py_ssize_t n; - gcstate->collecting = 1; - n = gc_collect_main(tstate, NUM_GENERATIONS - 1, NULL, NULL, 1); - gcstate->collecting = 0; - return n; + return gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN); } void @@ -2275,10 +2266,6 @@ PyObject_IS_GC(PyObject *obj) void _Py_ScheduleGC(PyInterpreterState *interp) { - GCState *gcstate = &interp->gc; - if (gcstate->collecting == 1) { - return; - } _Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 1); } @@ -2296,7 +2283,7 @@ _PyObject_GC_Link(PyObject *op) if (gcstate->generations[0].count > gcstate->generations[0].threshold && gcstate->enabled && gcstate->generations[0].threshold && - !gcstate->collecting && + !_Py_atomic_load_int_relaxed(&gcstate->collecting) && !_PyErr_Occurred(tstate)) { _Py_ScheduleGC(tstate->interp); @@ -2306,10 +2293,7 @@ _PyObject_GC_Link(PyObject *op) void _Py_RunGC(PyThreadState *tstate) { - GCState *gcstate = &tstate->interp->gc; - gcstate->collecting = 1; - gc_collect_generations(tstate); - gcstate->collecting = 0; + gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP); } static PyObject *