diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst index 084cd6ac257ebe..cbddc14f928cbd 100644 --- a/Doc/library/gc.rst +++ b/Doc/library/gc.rst @@ -263,6 +263,39 @@ values but should not rebind them): .. versionadded:: 3.3 +The :mod:`gc` module provides an "object debugger" which checks frequently if +all Python objects tracked by the garbage collector look valid: + +* check that the reference counter is greater than or equal to 1; +* check that the pointer to the type is not NULL; +* if debug hooks on memory allocators (:c:func:`PyMem_SetupDebugHooks`) are + enabled (:envvar:`PYTHONMALLOC` environment variable set to ``"debug"`` or + :option:`-X` ``dev`` command line option), detect freed memory. + +This debugger aims to debug bugs in C extensions. + +.. function:: enable_object_debugger(threshold) + + Enable the object debugger. + + Check that all Python objects tracked by the garbage collector look valid + every *threshold* memory allocation or deallocation made by the garbage + collector. + + Low *threshold* can have a significant negative impact on Python + performance, but should detect earlier Python objects which look invalid. + + *threshold* must be greater than or equal to 1. + + .. versionadded:: 3.8 + +.. function:: disable_object_debugger() + + Disable the object debugger. + + .. versionadded:: 3.8 + + The following constants are provided for use with :func:`set_debug`: diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index 39a0da5e61e9ff..a55a9fa4e3e928 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -206,6 +206,11 @@ gc indicating a generation to get objects from. Contributed in :issue:`36016` by Pablo Galindo. +New "object debugger" which checks frequently if all Python objects tracked by +the garbage collector look valid: :func:`gc.enable_object_debugger` and +:func:`gc.disable_object_debugger`. This debugger aims to debug bugs in C +extensions. Contributed in :issue:`36389` by Victor Stinner. + gzip ---- diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index c95595358a9e84..925c0e9082ccab 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -76,6 +76,8 @@ static inline void _PyObject_GC_UNTRACK_impl(const char *filename, int lineno, #define _PyObject_GC_UNTRACK(op) \ _PyObject_GC_UNTRACK_impl(__FILE__, __LINE__, _PyObject_CAST(op)) +PyAPI_FUNC(void) _PyGC_DisableObjectDebugger(void); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index 8da1bd9e304ac6..e0f69940eb4548 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -111,6 +111,15 @@ struct gc_generation_stats { Py_ssize_t uncollectable; }; +struct _gc_object_debugger { + int enabled; + struct gc_obj_dbg_gen { + int threshold; /* collection threshold */ + int count; /* count of allocations or collections of younger + generations */ + } generations[NUM_GENERATIONS]; +}; + struct _gc_runtime_state { /* List of objects that still need to be cleaned up, singly linked * via their gc headers' gc_prev pointers. */ @@ -143,6 +152,8 @@ struct _gc_runtime_state { collections, and are awaiting to undergo a full collection for the first time. */ Py_ssize_t long_lived_pending; + + struct _gc_object_debugger object_debugger; }; PyAPI_FUNC(void) _PyGC_Initialize(struct _gc_runtime_state *); diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 2dab53004452ae..a5b3603b9c269d 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -823,6 +823,35 @@ def test_get_objects_arguments(self): self.assertRaises(TypeError, gc.get_objects, 1.234) +@unittest.skipUnless(hasattr(gc, 'enable_object_debugger'), + 'need gc.enable_object_debugger()') +class ObjectDebuggerTests(unittest.TestCase): + def test_basic(self): + try: + # Call the object debugger around 2 times + # (the test only needs that it's called at least once) + gc.enable_object_debugger(10) + objs = [{} for _ in range(20)] + finally: + gc.disable_object_debugger() + + def test_thresholds(self): + try: + big = 10 ** 7 + gc.enable_object_debugger(big) + gc.enable_object_debugger(big, big) + gc.enable_object_debugger(big, big, big) + finally: + gc.disable_object_debugger() + + def test_invalid_thresholds(self): + self.assertRaises(ValueError, gc.enable_object_debugger, -1) + self.assertRaises(ValueError, gc.enable_object_debugger, 0) + self.assertRaises(OverflowError, gc.enable_object_debugger, 2 ** 100) + self.assertRaises(OverflowError, gc.enable_object_debugger, 1, 2 ** 100) + self.assertRaises(OverflowError, gc.enable_object_debugger, 1, 1, 2 ** 100) + + class GCCallbackTests(unittest.TestCase): def setUp(self): # Save gc state and disable it. diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-03-21-12-09-05.bpo-36389.EkpMZP.rst b/Misc/NEWS.d/next/Core and Builtins/2019-03-21-12-09-05.bpo-36389.EkpMZP.rst new file mode 100644 index 00000000000000..66b4a59cb3c13c --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-03-21-12-09-05.bpo-36389.EkpMZP.rst @@ -0,0 +1,4 @@ +New "object debugger" which checks frequently if all Python objects tracked +by the garbage collector are consistent: :func:`gc.enable_object_debugger` +and :func:`gc.disable_object_debugger`. Contributed in :issue:`36389` by Victor +Stinner. diff --git a/Modules/clinic/gcmodule.c.h b/Modules/clinic/gcmodule.c.h index 22d2aa4a87bcd0..681156a98fb7b0 100644 --- a/Modules/clinic/gcmodule.c.h +++ b/Modules/clinic/gcmodule.c.h @@ -373,4 +373,103 @@ gc_get_freeze_count(PyObject *module, PyObject *Py_UNUSED(ignored)) exit: return return_value; } -/*[clinic end generated code: output=e40d384b1f0d513c input=a9049054013a1b77]*/ + +PyDoc_STRVAR(gc_py_enable_object_debugger__doc__, +"enable_object_debugger($module, /, threshold0, threshold1=-1,\n" +" threshold2=-1)\n" +"--\n" +"\n" +"Enable the object debugger.\n" +"\n" +"Check all Python objects tracked by the garbage collector every \'threshold\'\n" +"memory allocation or deallocation made by the garbage collector."); + +#define GC_PY_ENABLE_OBJECT_DEBUGGER_METHODDEF \ + {"enable_object_debugger", (PyCFunction)(void(*)(void))gc_py_enable_object_debugger, METH_FASTCALL|METH_KEYWORDS, gc_py_enable_object_debugger__doc__}, + +static PyObject * +gc_py_enable_object_debugger_impl(PyObject *module, int threshold0, + int threshold1, int threshold2); + +static PyObject * +gc_py_enable_object_debugger(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"threshold0", "threshold1", "threshold2", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "enable_object_debugger", 0}; + PyObject *argsbuf[3]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + int threshold0; + int threshold1 = -1; + int threshold2 = -1; + PyObject *_return_value; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 3, 0, argsbuf); + if (!args) { + goto exit; + } + if (PyFloat_Check(args[0])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + threshold0 = _PyLong_AsInt(args[0]); + if (threshold0 == -1 && PyErr_Occurred()) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (args[1]) { + if (PyFloat_Check(args[1])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + threshold1 = _PyLong_AsInt(args[1]); + if (threshold1 == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (PyFloat_Check(args[2])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + threshold2 = _PyLong_AsInt(args[2]); + if (threshold2 == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_pos: + _return_value = gc_py_enable_object_debugger_impl(module, threshold0, threshold1, threshold2); + if (_return_value != Py_None) { + goto exit; + } + return_value = Py_None; + Py_INCREF(Py_None); + +exit: + return return_value; +} + +PyDoc_STRVAR(gc_py_disable_object_debugger__doc__, +"disable_object_debugger($module, /)\n" +"--\n" +"\n" +"Disable the object debugger."); + +#define GC_PY_DISABLE_OBJECT_DEBUGGER_METHODDEF \ + {"disable_object_debugger", (PyCFunction)gc_py_disable_object_debugger, METH_NOARGS, gc_py_disable_object_debugger__doc__}, + +static PyObject * +gc_py_disable_object_debugger_impl(PyObject *module); + +static PyObject * +gc_py_disable_object_debugger(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return gc_py_disable_object_debugger_impl(module); +} +/*[clinic end generated code: output=408f1549d404380c input=a9049054013a1b77]*/ diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index fad1356d6b443d..7724e809007027 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -111,6 +111,7 @@ gc_decref(PyGC_Head *g) g->_gc_prev -= 1 << _PyGC_PREV_SHIFT; } + /* Python string to use if unhandled exception occurs */ static PyObject *gc_str = NULL; @@ -146,6 +147,8 @@ _PyGC_Initialize(struct _gc_runtime_state *state) (uintptr_t)&state->permanent_generation.head}, 0, 0 }; state->permanent_generation = permanent_generation; + + state->object_debugger.enabled = 0; } /* @@ -402,7 +405,7 @@ subtract_refs(PyGC_Head *containers) for (; gc != containers; gc = GC_NEXT(gc)) { traverse = Py_TYPE(FROM_GC(gc))->tp_traverse; (void) traverse(FROM_GC(gc), - (visitproc)visit_decref, + visit_decref, NULL); } } @@ -1240,6 +1243,107 @@ collect_generations(void) return n; } + +/* Object Debugger */ + +void +_PyGC_DisableObjectDebugger(void) +{ + struct _gc_object_debugger *debugger = &_PyRuntime.gc.object_debugger; + debugger->enabled = 0; + for (int i=0; i < NUM_GENERATIONS; i++) { + debugger->generations[i].count = 0; + debugger->generations[i].threshold = 0; + } +} + + +#define GC_OBJECT_ASSERT(OP, EXPR) \ + if (!(EXPR)) { \ + _PyObject_AssertFailed((OP), #EXPR, NULL, \ + __FILE__, __LINE__, __func__); \ + } + +static void +gc_check_object_consistency(PyObject *op) +{ + GC_OBJECT_ASSERT(op, op != NULL); + GC_OBJECT_ASSERT(op, !_PyObject_IsFreed(op)); + GC_OBJECT_ASSERT(op, Py_REFCNT(op) >= 1); + + PyTypeObject *type = op->ob_type; + GC_OBJECT_ASSERT(op, type != NULL); + GC_OBJECT_ASSERT(op, !_PyObject_IsFreed((PyObject *)type)); + +#undef ASSERT +} + + +static int +visit_check_consistency(PyObject *op, void *data) +{ + gc_check_object_consistency(op); + return 0; +} + + +static void +gc_check_object(PyObject *op) +{ + gc_check_object_consistency(op); + GC_OBJECT_ASSERT(op, _PyObject_GC_IS_TRACKED(op)); + + traverseproc traverse = Py_TYPE(op)->tp_traverse; + traverse(op, visit_check_consistency, NULL); +} + +#undef GC_OBJECT_ASSERT + + +static void +_PyGC_ObjectDebuggerGeneration(int gen) +{ + PyGC_Head *gc_list = GEN_HEAD(gen); + for (PyGC_Head *gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) { + PyObject *op = FROM_GC(gc); + gc_check_object(op); + } +} + + +static void +_PyGC_ObjectDebugger(void) +{ + struct _gc_object_debugger *debugger = &_PyRuntime.gc.object_debugger; + + int gen = 0; + while (1) { + debugger->generations[gen].count = 0; + + _PyGC_ObjectDebuggerGeneration(gen); + + if ((gen + 1) >= NUM_GENERATIONS) { + break; + } + gen++; + + debugger->generations[gen].count++; + if (debugger->generations[gen].count < debugger->generations[gen].threshold) { + break; + } + + /* Avoid quadratic performance degradation in number + of tracked objects. See comments at the beginning + of this file, and issue #4074. */ + if (gen == NUM_GENERATIONS - 1 + && _PyRuntime.gc.long_lived_pending < _PyRuntime.gc.long_lived_total / 4) + { + break; + } + } +} + + #include "clinic/gcmodule.c.h" /*[clinic input] @@ -1524,7 +1628,7 @@ gc_get_objects_impl(PyObject *module, Py_ssize_t generation) } /* If generation is passed, we extract only that generation */ - if (generation != -1) { + if (generation != -1) { if (generation >= NUM_GENERATIONS) { PyErr_Format(PyExc_ValueError, "generation parameter must be less than the number of " @@ -1683,6 +1787,70 @@ gc_get_freeze_count_impl(PyObject *module) } +/*[clinic input] +gc.enable_object_debugger as gc_py_enable_object_debugger -> NoneType + + threshold0: int + threshold1: int = -1 + threshold2: int = -1 + +Enable the object debugger. + +Check all Python objects tracked by the garbage collector every 'threshold' +memory allocation or deallocation made by the garbage collector. +[clinic start generated code]*/ + +static PyObject * +gc_py_enable_object_debugger_impl(PyObject *module, int threshold0, + int threshold1, int threshold2) +/*[clinic end generated code: output=5eb8b74e7df0e725 input=706b9e90db364bae]*/ +{ + if (threshold0 < 1) { + PyErr_SetString(PyExc_ValueError, + "threshold0 must be greater than or equal to 1"); + return NULL; + } + + if (threshold1 < 0) { + threshold1 = _PyRuntime.gc.generations[1].threshold; + } + if (threshold2 < 0) { + threshold2 = _PyRuntime.gc.generations[2].threshold; + } + + struct _gc_object_debugger *debugger = &_PyRuntime.gc.object_debugger; + + /* Clear some caches to run the debugger in next allocations */ + clear_freelists(); + PyType_ClearCache(); + + debugger->enabled = 1; + debugger->generations[0].threshold = threshold0; + debugger->generations[0].count = 0; + debugger->generations[1].threshold = threshold1; + debugger->generations[1].count = 0; + debugger->generations[2].threshold = threshold2; + debugger->generations[2].count = 0; + + return Py_None; +} + + +/*[clinic input] +gc.disable_object_debugger as gc_py_disable_object_debugger -> NoneType + +Disable the object debugger. +[clinic start generated code]*/ + +static PyObject * +gc_py_disable_object_debugger_impl(PyObject *module) +/*[clinic end generated code: output=48666b981226d740 input=37522f5b3e475600]*/ +{ + _PyGC_DisableObjectDebugger(); + return Py_None; +} + + PyDoc_STRVAR(gc__doc__, "This module provides access to the garbage collector for reference cycles.\n" "\n" @@ -1724,6 +1892,8 @@ static PyMethodDef GcMethods[] = { GC_FREEZE_METHODDEF GC_UNFREEZE_METHODDEF GC_GET_FREEZE_COUNT_METHODDEF + GC_PY_ENABLE_OBJECT_DEBUGGER_METHODDEF + GC_PY_DISABLE_OBJECT_DEBUGGER_METHODDEF {NULL, NULL} /* Sentinel */ }; @@ -1864,9 +2034,11 @@ _PyGC_DumpShutdownStats(void) } } + void _PyGC_Fini(void) { + _PyGC_DisableObjectDebugger(); Py_CLEAR(_PyRuntime.gc.callbacks); } @@ -1877,6 +2049,7 @@ _PyGC_Dump(PyGC_Head *g) _PyObject_Dump(FROM_GC(g)); } + /* extension modules might be compiled with GC support so these functions must always be available */ @@ -1904,12 +2077,27 @@ PyObject_GC_UnTrack(void *op_raw) } } + +static inline void +gc_check_object_debugger(void) +{ + struct _gc_object_debugger *debugger = &_PyRuntime.gc.object_debugger; + if (debugger->enabled) { + debugger->generations[0].count++; + if (debugger->generations[0].count >= debugger->generations[0].threshold) { + _PyGC_ObjectDebugger(); + } + } +} + + static PyObject * _PyObject_GC_Alloc(int use_calloc, size_t basicsize) { PyObject *op; PyGC_Head *g; size_t size; + if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) return PyErr_NoMemory(); size = sizeof(PyGC_Head) + basicsize; @@ -1932,6 +2120,9 @@ _PyObject_GC_Alloc(int use_calloc, size_t basicsize) collect_generations(); _PyRuntime.gc.collecting = 0; } + + gc_check_object_debugger(); + op = FROM_GC(g); return op; } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index ad1447256cc697..038fb7a8d6b8ac 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -6,6 +6,7 @@ #undef Yield /* undefine macro conflicting with */ #include "pycore_coreconfig.h" #include "pycore_context.h" +#include "pycore_object.h" /* _PyGC_DisableObjectDebugger() */ #include "pycore_fileutils.h" #include "pycore_hamt.h" #include "pycore_pathconfig.h" @@ -2063,6 +2064,9 @@ fatal_error(const char *prefix, const char *msg, int status) } reentrant = 1; + /* Prevent reentrant call if called by the GC object debugger */ + _PyGC_DisableObjectDebugger(); + fprintf(stderr, "Fatal Python error: "); if (prefix) { fputs(prefix, stderr);