Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add cyclic garbage collection to NumPy #15065

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions doc/release/upcoming_changes/15065.improvement.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Cyclic garbage collection support
---------------------------------
NumPy arrays now supports cyclic garbage collection.
Most importantly this means that situations such as::

arr = np.array([None], dtype=object)
arr[0] = arr
del arr

can be cleaned up by the garbage collector. While support has
been added, it is generally advisable to avoid such constructs.
2 changes: 1 addition & 1 deletion numpy/core/code_generators/genapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
join('multiarray', 'nditer_pywrap.c'),
join('multiarray', 'nditer_templ.c.src'),
join('multiarray', 'number.c'),
join('multiarray', 'refcount.c'),
join('multiarray', 'refcount.c.src'),
join('multiarray', 'scalartypes.c.src'),
join('multiarray', 'scalarapi.c'),
join('multiarray', 'sequence.c'),
Expand Down
2 changes: 1 addition & 1 deletion numpy/core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,7 +861,7 @@ def get_mathlib_info(*args):
join('src', 'multiarray', 'nditer_constr.c'),
join('src', 'multiarray', 'nditer_pywrap.c'),
join('src', 'multiarray', 'number.c'),
join('src', 'multiarray', 'refcount.c'),
join('src', 'multiarray', 'refcount.c.src'),
join('src', 'multiarray', 'sequence.c'),
join('src', 'multiarray', 'shape.c'),
join('src', 'multiarray', 'scalarapi.c'),
Expand Down
114 changes: 96 additions & 18 deletions numpy/core/src/multiarray/arrayobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ maintainer email: oliphant.travis@ieee.org
#include "alloc.h"
#include "mem_overlap.h"
#include "numpyos.h"
#include "refcount.h"
#include "strfuncs.h"

#include "binop_override.h"
Expand Down Expand Up @@ -471,6 +472,20 @@ array_dealloc(PyArrayObject *self)
{
PyArrayObject_fields *fa = (PyArrayObject_fields *)self;

PyObject_GC_UnTrack(self);
/*
* prevent deeply-nested object arrays from causing stack overflows
* on deallocation. On new versions of python the first version is
* used to prevent potential problems with subclasses.
* See: https://bugs.python.org/issue35983
* The new macros were added in Python v3.8.0b1
*/
#ifdef Py_TRASHCAN_BEGIN /* Available in Python >= v3.8.0b1 */
Py_TRASHCAN_BEGIN(self, array_dealloc);
#else
Py_TRASHCAN_SAFE_BEGIN(self);
#endif

_dealloc_cached_buffer_info((PyObject*)self);

if (fa->weakreflist != NULL) {
Expand Down Expand Up @@ -534,8 +549,72 @@ array_dealloc(PyArrayObject *self)
npy_free_cache_dim(fa->dimensions, 2 * fa->nd);
Py_DECREF(fa->descr);
Py_TYPE(self)->tp_free((PyObject *)self);

/* Use new macro when available, these are identical */
#ifdef Py_TRASHCAN_BEGIN
Py_TRASHCAN_END;
#else
Py_TRASHCAN_SAFE_END(self);
#endif
}


/*
* Implement traversal for cyclic garbage collection. There are two main
* cases. Arrays that do not own their own data, must VISIT their base.
* Arrays which hold the actual data own the references and have to visit
* all items.
*/
static int
array_traverse(PyArrayObject *self, visitproc visit, void *arg)
{
/*
* Assert that traverse is only called after initialization finished,
* this is guaranteed since the GC cannot run between creation and
* finishing initialization.
*/
assert(PyArray_DATA(self) != NULL);

/* Note: If we add cyclic gc support to dtypes, should visit descr */
PyArray_Descr *descr = PyArray_DESCR(self);
Py_VISIT(((PyArrayObject_fields *)self)->base);

if (!PyDataType_REFCHK(descr)) {
/*
* There is no need to traverse an array not containing objects.
* NOTE: If user dtypes could handle reference counting themselves,
* they would also have to handle VISIT here.
*/
return 0;
}
if (!PyArray_CHKFLAGS(self, NPY_ARRAY_OWNDATA)) {
/* if the array doesn't own its data, visiting fa->base is sufficient */
return 0;
}

/*
* Visit all objects in the array. Except for hypothetical cases and
* structured types, the array will be a continuous chunk of well behaved
* objects. If not, fall back to a structured type version.
*/
if ((descr->type_num == NPY_OBJECT) &&
PyArray_ISONESEGMENT(self) && PyArray_ISALIGNED(self)) {
npy_intp i, size;
PyObject **data = PyArray_DATA(self);

assert(PyArray_ISNOTSWAPPED(self)); /* objects are always NBO */
size = PyArray_SIZE(self);
for (i = 0; i < size; i++) {
Py_VISIT(*data); /* Should be able to handle NULL */
data++;
}
return 0;
}
_PyArray_VISIT(self, visit, arg);
return 0;
}


/*NUMPY_API
* Prints the raw data of the ndarray in a form useful for debugging
* low-level C issues.
Expand Down Expand Up @@ -1764,23 +1843,21 @@ array_iter(PyArrayObject *arr)
return PySeqIter_New((PyObject *)arr);
}

static PyObject *
array_alloc(PyTypeObject *type, Py_ssize_t NPY_UNUSED(nitems))
{
/* nitems will always be 0 */
PyObject *obj = PyObject_Malloc(type->tp_basicsize);
PyObject_Init(obj, type);
return obj;
}

static void
array_free(PyObject * v)
{
/* avoid same deallocator as PyBaseObject, see gentype_free */
PyObject_Free(v);
/*
* To break cycles, tp_clear is implemented in PyArray_ClearAndFillNone,
* which sets all items to None. This is sufficient to break any cycle. It
* is not necessary to overwrite the base attribute of arrays not owning their
* data.
* We do not implement any special paths, since clearing will occur very rarely.
*/
static int
array_clear(PyArrayObject *self) {
return _PyArray_CLEAR(self);
}



NPY_NO_EXPORT PyTypeObject PyArray_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"numpy.ndarray", /* tp_name */
Expand All @@ -1806,11 +1883,12 @@ NPY_NO_EXPORT PyTypeObject PyArray_Type = {
(getattrofunc)0, /* tp_getattro */
(setattrofunc)0, /* tp_setattro */
&array_as_buffer, /* tp_as_buffer */
(Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE), /* tp_flags */
(Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
Py_TPFLAGS_HAVE_GC), /* tp_flags */
0, /* tp_doc */

(traverseproc)0, /* tp_traverse */
(inquiry)0, /* tp_clear */
(traverseproc)array_traverse, /* tp_traverse */
(inquiry)array_clear, /* tp_clear */
(richcmpfunc)array_richcompare, /* tp_richcompare */
offsetof(PyArrayObject_fields, weakreflist), /* tp_weaklistoffset */
(getiterfunc)array_iter, /* tp_iter */
Expand All @@ -1824,9 +1902,9 @@ NPY_NO_EXPORT PyTypeObject PyArray_Type = {
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)0, /* tp_init */
(allocfunc)array_alloc, /* tp_alloc */
0, /* tp_alloc */
(newfunc)array_new, /* tp_new */
(freefunc)array_free, /* tp_free */
0, /* tp_free */
0, /* tp_is_gc */
0, /* tp_bases */
0, /* tp_mro */
Expand Down
Loading