Skip to content

Commit

Permalink
Merge pull request #277 from python-greenlet/issue264
Browse files Browse the repository at this point in the history
Add a way to get how long it takes to cleanup using gc, and a way to disable it
  • Loading branch information
jamadden committed Nov 16, 2021
2 parents 0145f05 + 34724e1 commit 5d60fd8
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 15 deletions.
5 changes: 5 additions & 0 deletions src/greenlet/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,8 @@
from ._greenlet import GREENLET_USE_CONTEXT_VARS # pylint:disable=unused-import
from ._greenlet import GREENLET_USE_GC # pylint:disable=unused-import
from ._greenlet import GREENLET_USE_TRACING # pylint:disable=unused-import

# Controlling the use of the gc module.
from ._greenlet import CLOCKS_PER_SEC # pylint:disable=unused-import
from ._greenlet import enable_optional_cleanup # pylint:disable=unused-import
from ._greenlet import get_clocks_used_doing_optional_cleanup # pylint:disable=unused-import
67 changes: 67 additions & 0 deletions src/greenlet/greenlet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2739,6 +2739,68 @@ mod_get_total_main_greenlets(PyObject* UNUSED(module))
return PyLong_FromSize_t(total_main_greenlets);
}

PyDoc_STRVAR(mod_get_clocks_used_doing_optional_cleanup_doc,
"get_clocks_used_doing_optional_cleanup() -> Integer\n"
"\n"
"Get the number of clock ticks the program has used doing optional "
"greenlet cleanup.\n"
"Beginning in greenlet 2.0, greenlet tries to find and dispose of greenlets\n"
"that leaked after a thread exited. This requires invoking Python's garbage collector,\n"
"which may have a performance cost proportional to the number of live objects.\n"
"This function returns the amount of processor time\n"
"greenlet has used to do this. In programs that run with very large amounts of live\n"
"objects, this metric can be used to decide whether the cost of doing this cleanup\n"
"is worth the memory leak being corrected. If not, you can disable the cleanup\n"
"using ``enable_optional_cleanup(False)``.\n"
"The units are arbitrary and can only be compared to themselves (similarly to ``time.clock()``);\n"
"for example, to see how it scales with your heap. You can attempt to convert them into seconds\n"
"by dividing by the value of CLOCKS_PER_SEC."
"If cleanup has been disabled, returns None."
"\n"
"This is an implementation specific, provisional API. It may be changed or removed\n"
"in the future.\n"
".. versionadded:: 2.0"
);
static PyObject*
mod_get_clocks_used_doing_optional_cleanup(PyObject* UNUSED(module))
{
std::clock_t& clocks = ThreadState::clocks_used_doing_gc();

if (clocks == std::clock_t(-1)) {
Py_RETURN_NONE;
}
// This might not actually work on some implementations; clock_t
// is an opaque type.
return PyLong_FromSsize_t(clocks);
}

PyDoc_STRVAR(mod_enable_optional_cleanup_doc,
"mod_enable_optional_cleanup(bool) -> None\n"
"\n"
"Enable or disable optional cleanup operations.\n"
"See ``get_clocks_used_doing_optional_cleanup()`` for details.\n"
);
static PyObject*
mod_enable_optional_cleanup(PyObject* UNUSED(module), PyObject* flag)
{
int is_true = PyObject_IsTrue(flag);
if (is_true == -1) {
return nullptr;
}

std::clock_t& clocks = ThreadState::clocks_used_doing_gc();
if (is_true) {
// If we already have a value, we don't want to lose it.
if (clocks == std::clock_t(-1)) {
clocks = 0;
}
}
else {
clocks = std::clock_t(-1);
}
Py_RETURN_NONE;
}

static PyMethodDef GreenMethods[] = {
{"getcurrent",
(PyCFunction)mod_getcurrent,
Expand All @@ -2749,6 +2811,8 @@ static PyMethodDef GreenMethods[] = {
{"set_thread_local", (PyCFunction)mod_set_thread_local, METH_VARARGS, mod_set_thread_local_doc},
{"get_pending_cleanup_count", (PyCFunction)mod_get_pending_cleanup_count, METH_NOARGS, mod_get_pending_cleanup_count_doc},
{"get_total_main_greenlets", (PyCFunction)mod_get_total_main_greenlets, METH_NOARGS, mod_get_total_main_greenlets_doc},
{"get_clocks_used_doing_optional_cleanup", (PyCFunction)mod_get_clocks_used_doing_optional_cleanup, METH_NOARGS, mod_get_clocks_used_doing_optional_cleanup_doc},
{"enable_optional_cleanup", (PyCFunction)mod_enable_optional_cleanup, METH_O, mod_enable_optional_cleanup_doc},
{NULL, NULL} /* Sentinel */
};

Expand Down Expand Up @@ -2800,6 +2864,9 @@ greenlet_internal_mod_init() G_NOEXCEPT
m.PyAddObject("GREENLET_USE_CONTEXT_VARS", (long)GREENLET_PY37);
m.PyAddObject("GREENLET_USE_STANDARD_THREADING", (long)G_USE_STANDARD_THREADING);

OwnedObject clocks_per_sec = OwnedObject::consuming(PyLong_FromSsize_t(CLOCKS_PER_SEC));
m.PyAddObject("CLOCKS_PER_SEC", clocks_per_sec);

/* also publish module-level data as attributes of the greentype. */
// XXX: This is weird, and enables a strange pattern of
// confusing the class greenlet with the module greenlet; with
Expand Down
2 changes: 0 additions & 2 deletions src/greenlet/greenlet_internal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ namespace greenlet {

class ThreadState;

typedef std::vector<PyGreenlet*, PythonAllocator<PyGreenlet*> > g_deleteme_t;

};


Expand Down
38 changes: 26 additions & 12 deletions src/greenlet/greenlet_thread_state.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef GREENLET_THREAD_STATE_HPP
#define GREENLET_THREAD_STATE_HPP

#include <ctime>
#include <stdexcept>

#include "greenlet_internal.hpp"
Expand Down Expand Up @@ -103,17 +104,21 @@ class ThreadState {
/* Strong reference to the trace function, if any. */
OwnedObject tracefunc;

/* A vector of PyGreenlet pointers representing things that need
typedef std::vector<PyGreenlet*, PythonAllocator<PyGreenlet*> > deleteme_t;
/* A vector of raw PyGreenlet pointers representing things that need
deleted when this thread is running. The vector owns the
references, but you need to manually INCREF/DECREF as you use
them. */
greenlet::g_deleteme_t deleteme;
them. We don't use a vector<refs::OwnedGreenlet> because we
make copy of this vector, and that would become O(n) as all the
refcounts are incremented in the copy.
*/
deleteme_t deleteme;

#ifdef GREENLET_NEEDS_EXCEPTION_STATE_SAVED
void* exception_state;
#endif


static std::clock_t _clocks_used_doing_gc;
static ImmortalString get_referrers_name;
static PythonAllocator<ThreadState> allocator;

Expand All @@ -134,6 +139,7 @@ class ThreadState {
static void init()
{
ThreadState::get_referrers_name = "get_referrers";
ThreadState::_clocks_used_doing_gc = 0;
}

ThreadState()
Expand Down Expand Up @@ -256,9 +262,9 @@ class ThreadState {
// It's possible we could add items to this list while
// running Python code if there's a thread switch, so we
// need to defensively copy it before that can happen.
g_deleteme_t copy = this->deleteme;
deleteme_t copy = this->deleteme;
this->deleteme.clear(); // in case things come back on the list
for(greenlet::g_deleteme_t::iterator it = copy.begin(), end = copy.end();
for(deleteme_t::iterator it = copy.begin(), end = copy.end();
it != end;
++it ) {
PyGreenlet* to_del = *it;
Expand Down Expand Up @@ -320,6 +326,14 @@ class ThreadState {
this->deleteme.push_back(to_del);
}

/**
* Set to std::clock_t(-1) to disable.
*/
inline static std::clock_t& clocks_used_doing_gc()
{
return ThreadState::_clocks_used_doing_gc;
}

~ThreadState()
{
if (!PyInterpreterState_Head()) {
Expand Down Expand Up @@ -348,11 +362,6 @@ class ThreadState {
// on the stack, somewhere uncollectable. Try to detect that.
if (this->current_greenlet == this->main_greenlet && this->current_greenlet) {
assert(this->current_greenlet->is_currently_running_in_some_thread());

// // Break a cycle we know about, the self reference
// // the main greenlet keeps.
// this->main_greenlet->main_greenlet.CLEAR();

// Drop one reference we hold.
this->current_greenlet.CLEAR();
assert(!this->current_greenlet);
Expand All @@ -361,12 +370,14 @@ class ThreadState {
PyGreenlet* old_main_greenlet = this->main_greenlet.borrow();
Py_ssize_t cnt = this->main_greenlet.REFCNT();
this->main_greenlet.CLEAR();
if (cnt == 2 && Py_REFCNT(old_main_greenlet) == 1) {
if (ThreadState::_clocks_used_doing_gc != std::clock_t(-1)
&& cnt == 2 && Py_REFCNT(old_main_greenlet) == 1) {
// Highly likely that the reference is somewhere on
// the stack, not reachable by GC. Verify.
// XXX: This is O(n) in the total number of objects.
// TODO: Add a way to disable this at runtime, and
// another way to report on it.
std::clock_t begin = std::clock();
NewReference gc(PyImport_ImportModule("gc"));
if (gc) {
OwnedObject get_referrers = gc.PyRequireAttr(ThreadState::get_referrers_name);
Expand Down Expand Up @@ -412,6 +423,8 @@ class ThreadState {
}
}
}
std::clock_t end = std::clock();
ThreadState::_clocks_used_doing_gc += (end - begin);
}
}
}
Expand Down Expand Up @@ -453,6 +466,7 @@ class ThreadState {

ImmortalString ThreadState::get_referrers_name(nullptr);
PythonAllocator<ThreadState> ThreadState::allocator;
std::clock_t ThreadState::_clocks_used_doing_gc(0);

template<typename Destructor>
class ThreadStateCreator
Expand Down
31 changes: 30 additions & 1 deletion src/greenlet/tests/test_leaks.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,15 @@ def additional():
for g in gg:
self.assertIsNone(g())

def assertClocksUsed(self):
used = greenlet._greenlet.get_clocks_used_doing_optional_cleanup()
self.assertGreaterEqual(used, 0)
# we don't lose the value
greenlet._greenlet.enable_optional_cleanup(True)
used2 = greenlet._greenlet.get_clocks_used_doing_optional_cleanup()
self.assertEqual(used, used2)
self.assertGreater(greenlet._greenlet.CLOCKS_PER_SEC, 1)

def _check_issue251(self,
manually_collect_background=True,
explicit_reference_to_switch=False):
Expand Down Expand Up @@ -216,7 +225,8 @@ def background_thread():
# the ``greenlet.switch`` method still on the stack that we
# can't reach to clean up. The C code goes through terrific
# lengths to clean that up.
if not explicit_reference_to_switch:
if not explicit_reference_to_switch and greenlet._greenlet.get_clocks_used_doing_optional_cleanup() is not None:
# If cleanup was disabled, though, we may not find it.
self.assertEqual(greenlets_after, greenlets_before)
if manually_collect_background:
# TODO: Figure out how to make this work!
Expand All @@ -237,9 +247,19 @@ def background_thread():
# done by leakcheck will find it.
pass

if greenlet._greenlet.get_clocks_used_doing_optional_cleanup() is not None:
self.assertClocksUsed()

def test_issue251_killing_cross_thread_leaks_list(self):
self._check_issue251()

def test_issue251_with_cleanup_disabled(self):
greenlet._greenlet.enable_optional_cleanup(False)
try:
self._check_issue251()
finally:
greenlet._greenlet.enable_optional_cleanup(True)

@fails_leakcheck
def test_issue251_issue252_need_to_collect_in_background(self):
# Between greenlet 1.1.2 and the next version, this was still
Expand All @@ -261,6 +281,15 @@ def test_issue251_issue252_need_to_collect_in_background(self):
# for some reason, but I've never seen it pass on macOS.
self._check_issue251(manually_collect_background=False)

@fails_leakcheck
def test_issue251_issue252_need_to_collect_in_background_cleanup_disabled(self):
self.expect_greenlet_leak = True
greenlet._greenlet.enable_optional_cleanup(False)
try:
self._check_issue251(manually_collect_background=False)
finally:
greenlet._greenlet.enable_optional_cleanup(True)

@fails_leakcheck
def test_issue251_issue252_explicit_reference_not_collectable(self):
self._check_issue251(
Expand Down

0 comments on commit 5d60fd8

Please sign in to comment.