Skip to content

Commit

Permalink
Merge pull request #318 from python-greenlet/gevent-1909
Browse files Browse the repository at this point in the history
Save/restore the trash_delete_nesting; add tests for this.
  • Loading branch information
jamadden committed Oct 27, 2022
2 parents 1c69fa4 + b61b650 commit 3f67490
Show file tree
Hide file tree
Showing 8 changed files with 255 additions and 15 deletions.
14 changes: 13 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,19 @@
2.0.0a3 (unreleased)
====================

- Nothing changed yet.
- Deal gracefully with greenlet switches that occur while deferred
deallocation of objects is happening using CPython's "trash can"
mechanism. Previously, if a large nested container held items that
switched greenlets during delayed deallocation, and that second
greenlet also invoked the trash can, CPython's internal state could
become corrupt. This was visible as an assertion error in debug
builds. Now, the relevant internal state is saved and restored
during greenlet switches. See also `gevent issue 1909
<https://github.com/gevent/gevent/issues/1909>`_.
- Rename the C API function ``PyGreenlet_GET_PARENT`` to
``PyGreenlet_GetParent`` for consistency. The old name remains
available as a deprecated alias.



2.0.0a2 (2022-03-24)
Expand Down
7 changes: 5 additions & 2 deletions docs/c_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,12 @@ Functions
Macro that returns true if the greenlet *g* has started and has not died.
.. c:function:: PyGreenlet* PyGreenlet_GET_PARENT(PyGreenlet* g)
.. c:function:: PyGreenlet* PyGreenlet_GetParent(PyGreenlet* g)
Macro that returns the parent greenlet of *g*.
Macro that returns the parent greenlet of *g*. Returns a non-null
pointer if there is a parent, or a null pointer on an error or if
there is no parent. If this returns a non-null pointer, you must
decrement its reference.
.. c:function:: int PyGreenlet_SetParent(PyGreenlet* g, PyGreenlet* nparent)
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@

extlinks = {
'issue': ('https://github.com/python-greenlet/greenlet/issues/%s',
'issue #'),
'issue #%s'),
'pr': ('https://github.com/python-greenlet/greenlet/pull/%s',
'pull request #')
'pull request #%s')
}
5 changes: 4 additions & 1 deletion src/greenlet/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@
from ._greenlet import GREENLET_USE_GC # pylint:disable=unused-import
from ._greenlet import GREENLET_USE_TRACING # pylint:disable=unused-import

# Controlling the use of the gc module.
# Controlling the use of the gc module. Provisional API for this greenlet
# implementation in 2.0.
from ._greenlet import CLOCKS_PER_SEC # pylint:disable=unused-import
from ._greenlet import enable_optional_cleanup # pylint:disable=unused-import
from ._greenlet import get_clocks_used_doing_optional_cleanup # pylint:disable=unused-import

# Other APIS in the _greenlet module are for test support.
12 changes: 12 additions & 0 deletions src/greenlet/greenlet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2825,6 +2825,17 @@ mod_enable_optional_cleanup(PyObject* UNUSED(module), PyObject* flag)
Py_RETURN_NONE;
}

PyDoc_STRVAR(mod_get_tstate_trash_delete_nesting_doc,
"get_tstate_trash_delete_nesting() -> Integer\n"
"\n"
"Return the 'trash can' nesting level. Testing only.\n");
static PyObject*
mod_get_tstate_trash_delete_nesting(PyObject* UNUSED(module))
{
PyThreadState* tstate = PyThreadState_GET();
return PyLong_FromLong(tstate->trash_delete_nesting);
}

static PyMethodDef GreenMethods[] = {
{"getcurrent",
(PyCFunction)mod_getcurrent,
Expand All @@ -2837,6 +2848,7 @@ static PyMethodDef GreenMethods[] = {
{"get_total_main_greenlets", (PyCFunction)mod_get_total_main_greenlets, METH_NOARGS, mod_get_total_main_greenlets_doc},
{"get_clocks_used_doing_optional_cleanup", (PyCFunction)mod_get_clocks_used_doing_optional_cleanup, METH_NOARGS, mod_get_clocks_used_doing_optional_cleanup_doc},
{"enable_optional_cleanup", (PyCFunction)mod_enable_optional_cleanup, METH_O, mod_enable_optional_cleanup_doc},
{"get_tstate_trash_delete_nesting", (PyCFunction)mod_get_tstate_trash_delete_nesting, METH_NOARGS, mod_get_tstate_trash_delete_nesting_doc},
{NULL, NULL} /* Sentinel */
};

Expand Down
34 changes: 25 additions & 9 deletions src/greenlet/greenlet.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,21 +113,37 @@ static void** _PyGreenlet_API = NULL;
(*(int (*)(PyGreenlet * greenlet, PyGreenlet * nparent)) \
_PyGreenlet_API[PyGreenlet_SetParent_NUM])

# define PyGreenlet_MAIN \
(*(int (*)(PyGreenlet*)) \
/*
* PyGreenlet_GetParent(PyObject* greenlet)
*
* return greenlet.parent;
*
* This could return NULL even if there is no exception active.
* If it does not return NULL, you are responsible for decrementing the
* reference count.
*/
# define PyGreenlet_GetParent \
(*(PyGreenlet* (*)(PyGreenlet*)) \
_PyGreenlet_API[PyGreenlet_GET_PARENT_NUM])

/*
* deprecated, undocumented alias.
*/
# define PyGreenlet_GET_PARENT PyGreenlet_GetParent

# define PyGreenlet_MAIN \
(*(int (*)(PyGreenlet*)) \
_PyGreenlet_API[PyGreenlet_MAIN_NUM])

# define PyGreenlet_STARTED \
(*(int (*)(PyGreenlet*)) \
# define PyGreenlet_STARTED \
(*(int (*)(PyGreenlet*)) \
_PyGreenlet_API[PyGreenlet_STARTED_NUM])

# define PyGreenlet_ACTIVE \
(*(int (*)(PyGreenlet*)) \
# define PyGreenlet_ACTIVE \
(*(int (*)(PyGreenlet*)) \
_PyGreenlet_API[PyGreenlet_ACTIVE_NUM])

# define PyGreenlet_GET_PARENT \
(*(PyGreenlet* (*)(PyGreenlet*)) \
_PyGreenlet_API[PyGreenlet_GET_PARENT_NUM])



/* Macro that imports greenlet and initializes C API */
Expand Down
9 changes: 9 additions & 0 deletions src/greenlet/greenlet_greenlet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ namespace greenlet
int use_tracing;
#endif
int recursion_depth;
int trash_delete_nesting;
#if GREENLET_PY311
_PyInterpreterFrame *current_frame;
_PyStackChunk *datastack_chunk;
Expand Down Expand Up @@ -742,6 +743,7 @@ PythonState::PythonState()
,use_tracing(0)
#endif
,recursion_depth(0)
,trash_delete_nesting(0)
#if GREENLET_PY311
,current_frame(nullptr)
,datastack_chunk(nullptr)
Expand Down Expand Up @@ -835,6 +837,9 @@ void PythonState::operator<<(const PyThreadState *const tstate) G_NOEXCEPT
this->recursion_depth = tstate->recursion_depth;
this->_top_frame.steal(tstate->frame);
#endif

// All versions of Python.
this->trash_delete_nesting = tstate->trash_delete_nesting;
}

void PythonState::operator>>(PyThreadState *const tstate) G_NOEXCEPT
Expand Down Expand Up @@ -866,6 +871,8 @@ void PythonState::operator>>(PyThreadState *const tstate) G_NOEXCEPT
tstate->frame = this->_top_frame.relinquish_ownership();
tstate->recursion_depth = this->recursion_depth;
#endif
// All versions of Python.
tstate->trash_delete_nesting = this->trash_delete_nesting;
}

void PythonState::will_switch_from(PyThreadState *const origin_tstate) G_NOEXCEPT
Expand Down Expand Up @@ -933,6 +940,7 @@ const PythonState::OwnedFrame& PythonState::top_frame() const G_NOEXCEPT


using greenlet::StackState;
#ifdef GREENLET_USE_STDIO
#include <iostream>
using std::cerr;
using std::endl;
Expand All @@ -948,6 +956,7 @@ std::ostream& greenlet::operator<<(std::ostream& os, const StackState& s)
<< ")";
return os;
}
#endif

StackState::StackState(void* mark, StackState& current)
: _stack_start(nullptr),
Expand Down
185 changes: 185 additions & 0 deletions src/greenlet/tests/test_greenlet_trash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
# -*- coding: utf-8 -*-
"""
Tests for greenlets interacting with the CPython trash can API.
The CPython trash can API is not designed to be re-entered from a
single thread. But this can happen using greenlets, if something
during the object deallocation process switches greenlets, and this second
greenlet then causes the trash can to get entered again. Here, we do this
very explicitly, but in other cases (like gevent) it could be arbitrarily more
complicated: for example, a weakref callback might try to acquire a lock that's
already held by another greenlet; that would allow a greenlet switch to occur.
See https://github.com/gevent/gevent/issues/1909
This test is fragile and relies on details of the CPython
implementation (like most of the rest of this package):
- We enter the trashcan and deferred deallocation after
``_PyTrash_UNWIND_LEVEL`` calls. This constant, defined in
CPython's object.c, is generally 50. That's basically how many objects are required to
get us into the deferred deallocation situation.
- The test fails by hitting an ``assert()`` in object.c; if the
build didn't enable assert, then we don't catch this.
- If the test fails in that way, the interpreter crashes.
"""
from __future__ import print_function, absolute_import, division

import sys
import unittest



class TestTrashCanReEnter(unittest.TestCase):

@unittest.skipUnless(
sys.version_info[0] > 2,
"Python 2 tracks this slightly differently, so our test doesn't catch a problem there. "
)
def test_it(self):
# Try several times to trigger it, because it isn't 100%
# reliable.
for _ in range(10):
self.check_it()

def check_it(self): # pylint:disable=too-many-statements
import greenlet
from greenlet._greenlet import get_tstate_trash_delete_nesting # pylint:disable=no-name-in-module

main = greenlet.getcurrent()

assert get_tstate_trash_delete_nesting() == 0

# We expect to be in deferred deallocation after this many
# deallocations have occurred. TODO: I wish we had a better way to do
# this --- that was before get_tstate_trash_delete_nesting; perhaps
# we can use that API to do better?
TRASH_UNWIND_LEVEL = 50
# How many objects to put in a container; it's the container that
# queues objects for deferred deallocation.
OBJECTS_PER_CONTAINER = 500

class Dealloc: # define the class here because we alter class variables each time we run.
"""
An object with a ``__del__`` method. When it starts getting deallocated
from a deferred trash can run, it switches greenlets, allocates more objects
which then also go in the trash can. If we don't save state appropriately,
nesting gets out of order and we can crash the interpreter.
"""

#: Has our deallocation actually run and switched greenlets?
#: When it does, this will be set to the current greenlet. This should
#: be happening in the main greenlet, so we check that down below.
SPAWNED = False

#: Has the background greenlet run?
BG_RAN = False

BG_GLET = None

#: How many of these things have ever been allocated.
CREATED = 0

#: How many of these things have ever been deallocated.
DESTROYED = 0

#: How many were destroyed not in the main greenlet. There should always
#: be some.
#: If the test is broken or things change in the trashcan implementation,
#: this may not be correct.
DESTROYED_BG = 0

def __init__(self, sequence_number):
"""
:param sequence_number: The ordinal of this object during
one particular creation run. This is used to detect (guess, really)
when we have entered the trash can's deferred deallocation.
"""
self.i = sequence_number
Dealloc.CREATED += 1

def __del__(self):
if self.i == TRASH_UNWIND_LEVEL and not self.SPAWNED:
Dealloc.SPAWNED = greenlet.getcurrent()
other = Dealloc.BG_GLET = greenlet.greenlet(background_greenlet)
x = other.switch()
assert x == 42
# It's important that we don't switch back to the greenlet,
# we leave it hanging there in an incomplete state. But we don't let it
# get collected, either. If we complete it now, while we're still
# in the scope of the initial trash can, things work out and we
# don't see the problem. We need this greenlet to complete
# at some point in the future, after we've exited this trash can invocation.
del other
elif self.i == 40 and greenlet.getcurrent() is not main:
Dealloc.BG_RAN = True
try:
main.switch(42)
except greenlet.GreenletExit as ex:
# We expect this; all references to us go away
# while we're still running, and we need to finish deleting
# ourself.
Dealloc.BG_RAN = type(ex)
del ex

# Record the fact that we're dead last of all. This ensures that
# we actually get returned too.
Dealloc.DESTROYED += 1
if greenlet.getcurrent() is not main:
Dealloc.DESTROYED_BG += 1


def background_greenlet():
# We direct through a second function, instead of
# directly calling ``make_some()``, so that we have complete
# control over when these objects are destroyed: we need them
# to be destroyed in the context of the background greenlet
t = make_some()
del t # Triggere deletion.

def make_some():
t = ()
i = OBJECTS_PER_CONTAINER
while i:
# Nest the tuples; it's the recursion that gets us
# into trash.
t = (Dealloc(i), t)
i -= 1
return t


some = make_some()
self.assertEqual(Dealloc.CREATED, OBJECTS_PER_CONTAINER)
self.assertEqual(Dealloc.DESTROYED, 0)

# If we're going to crash, it should be on the following line.
# We only crash if ``assert()`` is enabled, of course.
del some

# For non-debug builds of CPython, we won't crash. The best we can do is check
# the nesting level explicitly.
self.assertEqual(0, get_tstate_trash_delete_nesting())

# Discard this, raising GreenletExit into where it is waiting.
Dealloc.BG_GLET = None
# The same nesting level maintains.
self.assertEqual(0, get_tstate_trash_delete_nesting())

# We definitely cleaned some up in the background
self.assertGreater(Dealloc.DESTROYED_BG, 0)

# Make sure all the cleanups happened.
self.assertIs(Dealloc.SPAWNED, main)
self.assertTrue(Dealloc.BG_RAN)
self.assertEqual(Dealloc.BG_RAN, greenlet.GreenletExit)
self.assertEqual(Dealloc.CREATED, Dealloc.DESTROYED )
self.assertEqual(Dealloc.CREATED, OBJECTS_PER_CONTAINER * 2)

import gc
gc.collect()


if __name__ == '__main__':
unittest.main()

0 comments on commit 3f67490

Please sign in to comment.