From 9c154857b9f15f540b8fde2844f04d66f91c9f02 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 4 Dec 2018 12:59:48 -0800 Subject: [PATCH 01/23] Factor out struct _pending_call. --- Include/internal/pycore_ceval_state.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index 1717ec4f41c36b..e365b85ad91f4d 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -13,17 +13,19 @@ extern "C" { typedef int (*_Py_pending_call_func)(void *); +struct _pending_call { + _Py_pending_call_func func; + void *arg; + int flags; +}; + struct _pending_calls { int busy; PyThread_type_lock lock; /* Request for running pending calls. */ int32_t calls_to_do; #define NPENDINGCALLS 32 - struct _pending_call { - _Py_pending_call_func func; - void *arg; - int flags; - } calls[NPENDINGCALLS]; + struct _pending_call calls[NPENDINGCALLS]; int first; int last; }; From 124b3f6684cf23aa536604646fc6b41992b19568 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 4 Dec 2018 14:20:00 -0800 Subject: [PATCH 02/23] Switch to a linked list. --- Include/internal/pycore_ceval_state.h | 15 +++--- Python/ceval_gil.c | 69 ++++++++++++++------------- 2 files changed, 46 insertions(+), 38 deletions(-) diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index e365b85ad91f4d..98e7a0a17f787e 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -13,21 +13,24 @@ extern "C" { typedef int (*_Py_pending_call_func)(void *); +struct _pending_call; + struct _pending_call { _Py_pending_call_func func; void *arg; int flags; + struct _pending_call *next; }; +#define NPENDINGCALLS 32 + struct _pending_calls { int busy; PyThread_type_lock lock; - /* Request for running pending calls. */ - int32_t calls_to_do; -#define NPENDINGCALLS 32 - struct _pending_call calls[NPENDINGCALLS]; - int first; - int last; + /* The number of pending calls. */ + int32_t npending; + struct _pending_call *head; + struct _pending_call *tail; }; typedef enum { diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 78f0217e5fe821..1a78523ed8a3dc 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -70,9 +70,9 @@ update_eval_breaker_from_thread(PyInterpreterState *interp, PyThreadState *tstat } if (_Py_IsMainThread()) { - int32_t calls_to_do = _Py_atomic_load_int32_relaxed( - &_PyRuntime.ceval.pending_mainthread.calls_to_do); - if (calls_to_do) { + int32_t npending = _Py_atomic_load_int32_relaxed( + &_PyRuntime.ceval.pending_mainthread.npending); + if (npending) { _Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 1); } if (_Py_ThreadCanHandleSignals(interp)) { @@ -665,34 +665,29 @@ static int _push_pending_call(struct _pending_calls *pending, _Py_pending_call_func func, void *arg, int flags) { - int i = pending->last; - int j = (i + 1) % NPENDINGCALLS; - if (j == pending->first) { + if (pending->npending == NPENDINGCALLS) { return -1; /* Queue full */ } - pending->calls[i].func = func; - pending->calls[i].arg = arg; - pending->calls[i].flags = flags; - pending->last = j; - assert(pending->calls_to_do < NPENDINGCALLS); - pending->calls_to_do++; - return 0; -} -static int -_next_pending_call(struct _pending_calls *pending, - int (**func)(void *), void **arg, int *flags) -{ - int i = pending->first; - if (i == pending->last) { - /* Queue empty */ - assert(pending->calls[i].func == NULL); + struct _pending_call *call = PyMem_RawMalloc(sizeof(struct _pending_call)); + if (call == NULL) { return -1; } - *func = pending->calls[i].func; - *arg = pending->calls[i].arg; - *flags = pending->calls[i].flags; - return i; + call->func = func; + call->arg = arg; + call->flags = flags; + call->next = NULL; + + if (pending->head == NULL) { + pending->head = call; + } + else { + pending->tail->next = call; + } + pending->tail = call; + assert(pending->npending < NPENDINGCALLS); + pending->npending++; + return 0; } /* Pop one item off the queue while holding the lock. */ @@ -700,13 +695,23 @@ static void _pop_pending_call(struct _pending_calls *pending, int (**func)(void *), void **arg, int *flags) { - int i = _next_pending_call(pending, func, arg, flags); - if (i >= 0) { - pending->calls[i] = (struct _pending_call){0}; - pending->first = (i + 1) % NPENDINGCALLS; - assert(pending->calls_to_do > 0); - pending->calls_to_do--; + struct _pending_call *call = pending->head; + if (call == NULL) { + /* Queue empty */ + return; } + + pending->head = call->next; + if (pending->tail == call) { + pending->tail = NULL; + } + pending->npending--; + + *func = call->func; + *arg = call->arg; + *flags = call->flags; + + PyMem_RawFree(call); } /* This implementation is thread-safe. It allows From c27ef64a961331c87e499af2b2b21b3df1ebb64f Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 4 Dec 2018 14:21:00 -0800 Subject: [PATCH 03/23] Add a TODO. --- Python/ceval_gil.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 1a78523ed8a3dc..edbd6aa1883bfc 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -665,6 +665,7 @@ static int _push_pending_call(struct _pending_calls *pending, _Py_pending_call_func func, void *arg, int flags) { + // XXX Drop the limit? if (pending->npending == NPENDINGCALLS) { return -1; /* Queue full */ } From 25a2e07adc7860b42b832e6e091e7d0ec6609587 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 11 Dec 2018 15:12:13 -0700 Subject: [PATCH 04/23] Add a NEWS entry. --- .../2018-12-11-15-12-03.gh-issue-79647.5xu737.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-12-11-15-12-03.gh-issue-79647.5xu737.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-12-11-15-12-03.gh-issue-79647.5xu737.rst b/Misc/NEWS.d/next/Core and Builtins/2018-12-11-15-12-03.gh-issue-79647.5xu737.rst new file mode 100644 index 00000000000000..43ae1bf3ce1344 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-12-11-15-12-03.gh-issue-79647.5xu737.rst @@ -0,0 +1 @@ +Simply the ceval pending calls list by using a linked list. From c2178a6736fab4fbde6dedea2d8b63645c0bbfc0 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 25 Jan 2019 12:34:57 -0700 Subject: [PATCH 05/23] Add a note about why we kept NPENDINGCALLS. --- Include/internal/pycore_ceval_state.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index 98e7a0a17f787e..a945a7dff1c63e 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -22,6 +22,9 @@ struct _pending_call { struct _pending_call *next; }; +// We technically do not need this limit around any longer since we +// moved from a circular queue to a linked list. However, having a +// size limit is still a good idea so we keep the one we already had. #define NPENDINGCALLS 32 struct _pending_calls { From c8976873e29f8d48600ec37fa10c552202b6b116 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 11 Oct 2023 03:32:40 -0600 Subject: [PATCH 06/23] Preallocate the first 32 pending call entries. --- Include/internal/pycore_ceval_state.h | 7 +++++ Python/ceval_gil.c | 38 ++++++++++++++++++++++++--- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index a945a7dff1c63e..93b5adc945fa53 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -34,6 +34,13 @@ struct _pending_calls { int32_t npending; struct _pending_call *head; struct _pending_call *tail; + + // We have a set of pre-allocated pending calls, to avoid + // possible allocation failures (at least when the number + // of pending calls is small). + int _first; + int _last; + struct _pending_call _preallocated[NPENDINGCALLS]; }; typedef enum { diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index edbd6aa1883bfc..72e729f7a7806b 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -670,15 +670,29 @@ _push_pending_call(struct _pending_calls *pending, return -1; /* Queue full */ } - struct _pending_call *call = PyMem_RawMalloc(sizeof(struct _pending_call)); - if (call == NULL) { - return -1; + // Allocate for the pending call. + struct _pending_call *call; + if (pending->npending < NPENDINGCALLS) { + int i = pending->_last; + int j = (i + 1) % NPENDINGCALLS; + assert(j != pending->_first); + call = &pending->_preallocated[i]; + pending->_last = j; + } + else { + call = PyMem_RawMalloc(sizeof(struct _pending_call)); + if (call == NULL) { + return -1; + } } + + // Initialize the data. call->func = func; call->arg = arg; call->flags = flags; call->next = NULL; + // Add the call to the list. if (pending->head == NULL) { pending->head = call; } @@ -688,6 +702,7 @@ _push_pending_call(struct _pending_calls *pending, pending->tail = call; assert(pending->npending < NPENDINGCALLS); pending->npending++; + return 0; } @@ -699,20 +714,35 @@ _pop_pending_call(struct _pending_calls *pending, struct _pending_call *call = pending->head; if (call == NULL) { /* Queue empty */ + assert(pending->npending == 0); + assert(pending->_first == pending->_last); return; } + assert(pending->npending > 0); + // Remove the next one from the list. pending->head = call->next; if (pending->tail == call) { pending->tail = NULL; } pending->npending--; + // Copy its data. *func = call->func; *arg = call->arg; *flags = call->flags; - PyMem_RawFree(call); + // Deallocate the list entry. + if (pending->npending < NPENDINGCALLS) { + assert(pending->_first != pending->_last); + int i = pending->_first; + pending->_preallocated[i] = (struct _pending_call){0}; + pending->_first = (i + 1) % NPENDINGCALLS; + } + else { + assert(pending->_first == pending->_last); + PyMem_RawFree(call); + } } /* This implementation is thread-safe. It allows From 9bade305afb105c58f051ffffc922b59ba54838a Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 12 Oct 2023 05:55:57 -0600 Subject: [PATCH 07/23] Fix a boundary condition. --- Include/internal/pycore_ceval_state.h | 2 +- Python/ceval_gil.c | 25 +++++++++++++++++-------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index 93b5adc945fa53..3954865d50164d 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -39,7 +39,7 @@ struct _pending_calls { // possible allocation failures (at least when the number // of pending calls is small). int _first; - int _last; + int _next; struct _pending_call _preallocated[NPENDINGCALLS]; }; diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 72e729f7a7806b..e7a813f47f78e2 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -672,12 +672,20 @@ _push_pending_call(struct _pending_calls *pending, // Allocate for the pending call. struct _pending_call *call; - if (pending->npending < NPENDINGCALLS) { - int i = pending->_last; - int j = (i + 1) % NPENDINGCALLS; - assert(j != pending->_first); + if (pending->npending == 0) { + assert(pending->_next == pending->_first); + assert(pending->head == NULL); + assert(pending->tail == NULL); + call = &pending->_preallocated[0]; + pending->_first = 0; + pending->_next = 1; + } + else if (pending->npending < NPENDINGCALLS) { + int i = pending->_next; + int next = (i + 1) % NPENDINGCALLS; + assert(i != pending->_first); call = &pending->_preallocated[i]; - pending->_last = j; + pending->_next = next; } else { call = PyMem_RawMalloc(sizeof(struct _pending_call)); @@ -715,7 +723,7 @@ _pop_pending_call(struct _pending_calls *pending, if (call == NULL) { /* Queue empty */ assert(pending->npending == 0); - assert(pending->_first == pending->_last); + assert(pending->_first == pending->_next); return; } assert(pending->npending > 0); @@ -734,13 +742,14 @@ _pop_pending_call(struct _pending_calls *pending, // Deallocate the list entry. if (pending->npending < NPENDINGCALLS) { - assert(pending->_first != pending->_last); + assert(pending->_first != pending->_next + || (pending->npending + 1) == NPENDINGCALLS); int i = pending->_first; pending->_preallocated[i] = (struct _pending_call){0}; pending->_first = (i + 1) % NPENDINGCALLS; } else { - assert(pending->_first == pending->_last); + assert(pending->_first == pending->_next); PyMem_RawFree(call); } } From 8c776bd5c81b94a07eea4d8b5deac69dd05ba0fe Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 12 Oct 2023 03:39:24 -0600 Subject: [PATCH 08/23] Add a comment. --- Modules/_testcapimodule.c | 1 + Modules/_testinternalcapi.c | 1 + 2 files changed, 2 insertions(+) diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index ce3d0b1b1b005c..04164933fc0d02 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -819,6 +819,7 @@ pending_threadfunc(PyObject *self, PyObject *arg) Py_DECREF(callable); /* unsuccessful add, destroy the extra reference */ Py_RETURN_FALSE; } + /* The callable is decref'ed in _pending_callback() above. */ Py_RETURN_TRUE; } diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 05bac0936b155d..9107dc1059818a 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1049,6 +1049,7 @@ pending_threadfunc(PyObject *self, PyObject *args, PyObject *kwargs) } while (r < 0); } + /* The callable is decref'ed in _pending_callback() above. */ Py_RETURN_TRUE; } From bae74fea8b1360268edec3196d055304aefa3a23 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 12 Oct 2023 03:42:52 -0600 Subject: [PATCH 09/23] Move a test to the right place. --- Lib/test/test_capi/test_misc.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 5ece213e7b2363..fe26d36223d29b 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -1050,6 +1050,11 @@ def test_sys_setobject(self): setobject(b'\xff', value) # CRASHES setobject(NULL, value) + def test_gen_get_code(self): + def genf(): yield + gen = genf() + self.assertEqual(_testcapi.gen_get_code(gen), gen.gi_code) + @requires_limited_api class TestHeapTypeRelative(unittest.TestCase): @@ -1289,11 +1294,6 @@ def test_pendingcalls_non_threaded(self): self.pendingcalls_submit(l, n) self.pendingcalls_wait(l, n) - def test_gen_get_code(self): - def genf(): yield - gen = genf() - self.assertEqual(_testcapi.gen_get_code(gen), gen.gi_code) - class PendingTask(types.SimpleNamespace): _add_pending = _testinternalcapi.pending_threadfunc From fcb268c7ef2a022372a150cd71da607508d39676 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 12 Oct 2023 03:49:02 -0600 Subject: [PATCH 10/23] Distinguish tests for main-only pending calls. --- Lib/test/test_capi/test_misc.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index fe26d36223d29b..e15f47dd03f3bb 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -1217,7 +1217,7 @@ class TestPendingCalls(unittest.TestCase): # about when pending calls get run. This is especially relevant # here for creating deterministic tests. - def pendingcalls_submit(self, l, n): + def main_pendingcalls_submit(self, l, n): def callback(): #this function can be interrupted by thread switching so let's #use an atomic operation @@ -1232,7 +1232,7 @@ def callback(): if _testcapi._pending_threadfunc(callback): break - def pendingcalls_wait(self, l, n, context = None): + def main_pendingcalls_wait(self, l, n, context = None): #now, stick around until l[0] has grown to 10 count = 0 while len(l) != n: @@ -1252,7 +1252,7 @@ def pendingcalls_wait(self, l, n, context = None): print("(%i)"%(len(l),)) @threading_helper.requires_working_threading() - def test_pendingcalls_threaded(self): + def test_main_pendingcalls_threaded(self): #do every callback on a separate thread n = 32 #total callbacks @@ -1266,15 +1266,15 @@ class foo(object):pass context.lock = threading.Lock() context.event = threading.Event() - threads = [threading.Thread(target=self.pendingcalls_thread, + threads = [threading.Thread(target=self.main_pendingcalls_thread, args=(context,)) for i in range(context.nThreads)] with threading_helper.start_threads(threads): - self.pendingcalls_wait(context.l, n, context) + self.main_pendingcalls_wait(context.l, n, context) - def pendingcalls_thread(self, context): + def main_pendingcalls_thread(self, context): try: - self.pendingcalls_submit(context.l, context.n) + self.main_pendingcalls_submit(context.l, context.n) finally: with context.lock: context.nFinished += 1 @@ -1284,15 +1284,15 @@ def pendingcalls_thread(self, context): if nFinished == context.nThreads: context.event.set() - def test_pendingcalls_non_threaded(self): + def test_main_pendingcalls_non_threaded(self): #again, just using the main thread, likely they will all be dispatched at #once. It is ok to ask for too many, because we loop until we find a slot. #the loop can be interrupted to dispatch. #there are only 32 dispatch slots, so we go for twice that! l = [] n = 64 - self.pendingcalls_submit(l, n) - self.pendingcalls_wait(l, n) + self.main_pendingcalls_submit(l, n) + self.main_pendingcalls_wait(l, n) class PendingTask(types.SimpleNamespace): From 58b6b4ab846ea2c720972f0ca9f6735e27aa4e7c Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 12 Oct 2023 04:41:44 -0600 Subject: [PATCH 11/23] Add tests. --- Lib/test/test_capi/test_misc.py | 64 +++++++++++++++++++++++++++++---- Modules/_testcapimodule.c | 54 +++++++++++++++++++++------- Modules/_testinternalcapi.c | 54 ++++++++++++++++++---------- 3 files changed, 135 insertions(+), 37 deletions(-) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index e15f47dd03f3bb..352f1742e0c16d 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -1232,12 +1232,27 @@ def callback(): if _testcapi._pending_threadfunc(callback): break - def main_pendingcalls_wait(self, l, n, context = None): + def pendingcalls_submit(self, l, n, *, main=True, ensure=False): + def callback(): + #this function can be interrupted by thread switching so let's + #use an atomic operation + l.append(None) + + if main: + return _testcapi._pending_threadfunc(callback, n, + blocking=False, + ensure_added=ensure) + else: + return _testinternalcapi.pending_threadfunc(callback, n, + blocking=False, + ensure_added=ensure) + + def pendingcalls_wait(self, l, numadded, context = None): #now, stick around until l[0] has grown to 10 count = 0 - while len(l) != n: + while len(l) != numadded: #this busy loop is where we expect to be interrupted to - #run our callbacks. Note that callbacks are only run on the + #run our callbacks. Note that some callbacks are only run on the #main thread if False and support.verbose: print("(%i)"%(len(l),),) @@ -1247,7 +1262,7 @@ def main_pendingcalls_wait(self, l, n, context = None): continue count += 1 self.assertTrue(count < 10000, - "timeout waiting for %i callbacks, got %i"%(n, len(l))) + "timeout waiting for %i callbacks, got %i"%(numadded, len(l))) if False and support.verbose: print("(%i)"%(len(l),)) @@ -1270,7 +1285,7 @@ class foo(object):pass args=(context,)) for i in range(context.nThreads)] with threading_helper.start_threads(threads): - self.main_pendingcalls_wait(context.l, n, context) + self.pendingcalls_wait(context.l, n, context) def main_pendingcalls_thread(self, context): try: @@ -1292,7 +1307,44 @@ def test_main_pendingcalls_non_threaded(self): l = [] n = 64 self.main_pendingcalls_submit(l, n) - self.main_pendingcalls_wait(l, n) + self.pendingcalls_wait(l, n) + + def test_max_pending(self): + with self.subTest('main-only'): + maxpending = 32 + + l = [] + added = self.pendingcalls_submit(l, 1, main=True) + self.pendingcalls_wait(l, added) + self.assertEqual(added, 1) + + l = [] + added = self.pendingcalls_submit(l, maxpending, main=True) + self.pendingcalls_wait(l, added) + self.assertEqual(added, maxpending) + + l = [] + added = self.pendingcalls_submit(l, maxpending+1, main=True) + self.pendingcalls_wait(l, added) + self.assertEqual(added, maxpending) + + with self.subTest('not main-only'): + maxpending = 32 + + l = [] + added = self.pendingcalls_submit(l, 1, main=False) + self.pendingcalls_wait(l, added) + self.assertEqual(added, 1) + + l = [] + added = self.pendingcalls_submit(l, maxpending, main=False) + self.pendingcalls_wait(l, added) + self.assertEqual(added, maxpending) + + l = [] + added = self.pendingcalls_submit(l, maxpending+1, main=False) + self.pendingcalls_wait(l, added) + self.assertEqual(added, maxpending) class PendingTask(types.SimpleNamespace): diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 04164933fc0d02..0eb98be97d77cf 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -801,26 +801,55 @@ static int _pending_callback(void *arg) * run from any python thread. */ static PyObject * -pending_threadfunc(PyObject *self, PyObject *arg) +pending_threadfunc(PyObject *self, PyObject *arg, PyObject *kwargs) { + static char *kwlist[] = {"callback", "num", + "blocking", "ensure_added", NULL}; PyObject *callable; - int r; - if (PyArg_ParseTuple(arg, "O", &callable) == 0) + unsigned int num = 1; + int blocking = 0; + int ensure_added = 0; + if (!PyArg_ParseTupleAndKeywords(arg, kwargs, + "O|I$pp:_pending_threadfunc", kwlist, + &callable, &num, &blocking, &ensure_added)) + { return NULL; + } /* create the reference for the callbackwhile we hold the lock */ - Py_INCREF(callable); + for (unsigned int i = 0; i < num; i++) { + Py_INCREF(callable); + } - Py_BEGIN_ALLOW_THREADS - r = Py_AddPendingCall(&_pending_callback, callable); - Py_END_ALLOW_THREADS + PyThreadState *save_tstate; + if (!blocking) { + save_tstate = PyEval_SaveThread(); + } + + unsigned int num_added = 0; + for (; num_added < num; num_added++) { + if (ensure_added) { + int r; + do { + r = Py_AddPendingCall(&_pending_callback, callable); + } while (r < 0); + } + else { + if (Py_AddPendingCall(&_pending_callback, callable) < 0) { + break; + } + } + } + + if (!blocking) { + PyEval_RestoreThread(save_tstate); + } - if (r<0) { + for (unsigned int i = num_added; i < num; i++) { Py_DECREF(callable); /* unsuccessful add, destroy the extra reference */ - Py_RETURN_FALSE; } - /* The callable is decref'ed in _pending_callback() above. */ - Py_RETURN_TRUE; + /* The callable is decref'ed above in each added _pending_callback(). */ + return PyLong_FromUnsignedLong((unsigned long)num_added); } /* Test PyOS_string_to_double. */ @@ -3263,7 +3292,8 @@ static PyMethodDef TestMethods[] = { {"_spawn_pthread_waiter", spawn_pthread_waiter, METH_NOARGS}, {"_end_spawned_pthread", end_spawned_pthread, METH_NOARGS}, #endif - {"_pending_threadfunc", pending_threadfunc, METH_VARARGS}, + {"_pending_threadfunc", (PyCFunction)pending_threadfunc, + METH_VARARGS|METH_KEYWORDS}, #ifdef HAVE_GETTIMEOFDAY {"profile_int", profile_int, METH_NOARGS}, #endif diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 9107dc1059818a..1d7da1bfa603ec 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1019,38 +1019,54 @@ static PyObject * pending_threadfunc(PyObject *self, PyObject *args, PyObject *kwargs) { PyObject *callable; + unsigned int num = 1; + int blocking = 0; int ensure_added = 0; - static char *kwlist[] = {"", "ensure_added", NULL}; + static char *kwlist[] = {"callback", "num", + "blocking", "ensure_added", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "O|$p:pending_threadfunc", kwlist, - &callable, &ensure_added)) + "O|I$pp:pending_threadfunc", kwlist, + &callable, &num, &blocking, &ensure_added)) { return NULL; } PyInterpreterState *interp = _PyInterpreterState_GET(); /* create the reference for the callbackwhile we hold the lock */ - Py_INCREF(callable); + for (unsigned int i = 0; i < num; i++) { + Py_INCREF(callable); + } - int r; - Py_BEGIN_ALLOW_THREADS - r = _PyEval_AddPendingCall(interp, &_pending_callback, callable, 0); - Py_END_ALLOW_THREADS - if (r < 0) { - /* unsuccessful add */ - if (!ensure_added) { - Py_DECREF(callable); - Py_RETURN_FALSE; + PyThreadState *save_tstate; + if (!blocking) { + save_tstate = PyEval_SaveThread(); + } + + unsigned int num_added = 0; + for (; num_added < num; num_added++) { + if (ensure_added) { + int r; + do { + r = _PyEval_AddPendingCall(interp, &_pending_callback, callable, 0); + } while (r < 0); + } + else { + if (_PyEval_AddPendingCall(interp, &_pending_callback, callable, 0) < 0) { + break; + } } - do { - Py_BEGIN_ALLOW_THREADS - r = _PyEval_AddPendingCall(interp, &_pending_callback, callable, 0); - Py_END_ALLOW_THREADS - } while (r < 0); + } + + if (!blocking) { + PyEval_RestoreThread(save_tstate); + } + + for (unsigned int i = num_added; i < num; i++) { + Py_DECREF(callable); /* unsuccessful add, destroy the extra reference */ } /* The callable is decref'ed in _pending_callback() above. */ - Py_RETURN_TRUE; + return PyLong_FromUnsignedLong((unsigned long)num_added); } From 1b223e989ca8862ce8c3dbf9f7db9092e4ae987a Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 12 Oct 2023 02:37:03 -0600 Subject: [PATCH 12/23] Add _pending_calls.max. --- Include/internal/pycore_ceval_state.h | 1 + Include/internal/pycore_runtime_init.h | 6 ++++++ Python/ceval_gil.c | 12 ++++++++---- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index 3954865d50164d..bfaa4eed8e948b 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -32,6 +32,7 @@ struct _pending_calls { PyThread_type_lock lock; /* The number of pending calls. */ int32_t npending; + int32_t max; struct _pending_call *head; struct _pending_call *tail; diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 574a3c1a9db66c..fbe630a091f3fc 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -101,6 +101,9 @@ extern PyTypeObject _PyExc_MemoryError; .parser = _parser_runtime_state_INIT, \ .ceval = { \ .perf = _PyEval_RUNTIME_PERF_INIT, \ + .pending_mainthread = { \ + .max = NPENDINGCALLS, \ + }, \ }, \ .gilstate = { \ .check_enabled = 1, \ @@ -149,6 +152,9 @@ extern PyTypeObject _PyExc_MemoryError; .obmalloc = _obmalloc_state_INIT(INTERP.obmalloc), \ .ceval = { \ .recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \ + .pending = { \ + .max = NPENDINGCALLS, \ + }, \ }, \ .gc = { \ .enabled = 1, \ diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index e7a813f47f78e2..e53ba64ff106ae 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -666,8 +666,11 @@ _push_pending_call(struct _pending_calls *pending, _Py_pending_call_func func, void *arg, int flags) { // XXX Drop the limit? - if (pending->npending == NPENDINGCALLS) { - return -1; /* Queue full */ + if (pending->max >= 0) { + if (pending->npending == pending->max) { + return -1; /* Queue full */ + } + assert(pending->npending < pending->max); } // Allocate for the pending call. @@ -708,7 +711,6 @@ _push_pending_call(struct _pending_calls *pending, pending->tail->next = call; } pending->tail = call; - assert(pending->npending < NPENDINGCALLS); pending->npending++; return 0; @@ -812,8 +814,10 @@ handle_signals(PyThreadState *tstate) static int _make_pending_calls(struct _pending_calls *pending) { + assert(pending->max > 0); // XXX Drop this. /* perform a bounded number of calls, in case of recursion */ - for (int i=0; imax; i++) { _Py_pending_call_func func = NULL; void *arg = NULL; int flags = 0; From 273e775fb63a1c6dc63118ff5836e4be13bfc225 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 12 Oct 2023 03:00:13 -0600 Subject: [PATCH 13/23] Add _pending_calls.maxloop. --- Include/internal/pycore_ceval_state.h | 3 +++ Include/internal/pycore_runtime_init.h | 2 ++ Python/ceval_gil.c | 13 +++++++++++-- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index bfaa4eed8e948b..598d0448e61c13 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -33,6 +33,9 @@ struct _pending_calls { /* The number of pending calls. */ int32_t npending; int32_t max; + /* How many pending calls are made at a time, at most. */ + int32_t maxloop; + /* The linked list of pending calls. */ struct _pending_call *head; struct _pending_call *tail; diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index fbe630a091f3fc..8cae1509a966f1 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -103,6 +103,7 @@ extern PyTypeObject _PyExc_MemoryError; .perf = _PyEval_RUNTIME_PERF_INIT, \ .pending_mainthread = { \ .max = NPENDINGCALLS, \ + .maxloop = NPENDINGCALLS, \ }, \ }, \ .gilstate = { \ @@ -154,6 +155,7 @@ extern PyTypeObject _PyExc_MemoryError; .recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \ .pending = { \ .max = NPENDINGCALLS, \ + .maxloop = NPENDINGCALLS, \ }, \ }, \ .gc = { \ diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index e53ba64ff106ae..d1721f153359b2 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -816,8 +816,7 @@ _make_pending_calls(struct _pending_calls *pending) { assert(pending->max > 0); // XXX Drop this. /* perform a bounded number of calls, in case of recursion */ - // XXX Loop over a fixed maximum and set eval breaker if not empty? - for (int i=0; imax; i++) { + for (int i=0; imaxloop; i++) { _Py_pending_call_func func = NULL; void *arg = NULL; int flags = 0; @@ -829,6 +828,7 @@ _make_pending_calls(struct _pending_calls *pending) /* having released the lock, perform the callback */ if (func == NULL) { + // There are no pending calls left. break; } int res = func(arg); @@ -836,6 +836,7 @@ _make_pending_calls(struct _pending_calls *pending) PyMem_RawFree(arg); } if (res != 0) { + assert(PyErr_Occurred()); return -1; } } @@ -878,6 +879,10 @@ make_pending_calls(PyInterpreterState *interp) SIGNAL_PENDING_CALLS(interp); return -1; } + if (pending->npending > 0) { + /* We hit pending->maxloop. */ + SIGNAL_PENDING_CALLS(interp); + } if (_Py_IsMainThread() && _Py_IsMainInterpreter(interp)) { if (_make_pending_calls(pending_main) != 0) { @@ -886,6 +891,10 @@ make_pending_calls(PyInterpreterState *interp) SIGNAL_PENDING_CALLS(interp); return -1; } + if (pending_main->npending > 0) { + /* We hit pending_main->maxloop. */ + SIGNAL_PENDING_CALLS(interp); + } } pending->busy = 0; From 21e15518ff5b12f79fdd9a431fc8b1e63c74f4a7 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 12 Oct 2023 03:07:00 -0600 Subject: [PATCH 14/23] NPENDINGCALLS -> NPENDINGCALLSARRAY --- Include/internal/pycore_ceval_state.h | 4 ++-- Include/internal/pycore_runtime_init.h | 8 ++++---- Python/ceval_gil.c | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index 598d0448e61c13..261a619a5c3dd8 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -25,7 +25,7 @@ struct _pending_call { // We technically do not need this limit around any longer since we // moved from a circular queue to a linked list. However, having a // size limit is still a good idea so we keep the one we already had. -#define NPENDINGCALLS 32 +#define NPENDINGCALLSARRAY 32 struct _pending_calls { int busy; @@ -44,7 +44,7 @@ struct _pending_calls { // of pending calls is small). int _first; int _next; - struct _pending_call _preallocated[NPENDINGCALLS]; + struct _pending_call _preallocated[NPENDINGCALLSARRAY]; }; typedef enum { diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 8cae1509a966f1..9d3866fa1b1024 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -102,8 +102,8 @@ extern PyTypeObject _PyExc_MemoryError; .ceval = { \ .perf = _PyEval_RUNTIME_PERF_INIT, \ .pending_mainthread = { \ - .max = NPENDINGCALLS, \ - .maxloop = NPENDINGCALLS, \ + .max = NPENDINGCALLSARRAY, \ + .maxloop = NPENDINGCALLSARRAY, \ }, \ }, \ .gilstate = { \ @@ -154,8 +154,8 @@ extern PyTypeObject _PyExc_MemoryError; .ceval = { \ .recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \ .pending = { \ - .max = NPENDINGCALLS, \ - .maxloop = NPENDINGCALLS, \ + .max = NPENDINGCALLSARRAY, \ + .maxloop = NPENDINGCALLSARRAY, \ }, \ }, \ .gc = { \ diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index d1721f153359b2..380f41fd7e521c 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -683,9 +683,9 @@ _push_pending_call(struct _pending_calls *pending, pending->_first = 0; pending->_next = 1; } - else if (pending->npending < NPENDINGCALLS) { + else if (pending->npending < NPENDINGCALLSARRAY) { int i = pending->_next; - int next = (i + 1) % NPENDINGCALLS; + int next = (i + 1) % NPENDINGCALLSARRAY; assert(i != pending->_first); call = &pending->_preallocated[i]; pending->_next = next; @@ -743,12 +743,12 @@ _pop_pending_call(struct _pending_calls *pending, *flags = call->flags; // Deallocate the list entry. - if (pending->npending < NPENDINGCALLS) { + if (pending->npending < NPENDINGCALLSARRAY) { assert(pending->_first != pending->_next - || (pending->npending + 1) == NPENDINGCALLS); + || (pending->npending + 1) == NPENDINGCALLSARRAY); int i = pending->_first; pending->_preallocated[i] = (struct _pending_call){0}; - pending->_first = (i + 1) % NPENDINGCALLS; + pending->_first = (i + 1) % NPENDINGCALLSARRAY; } else { assert(pending->_first == pending->_next); From ff77df7652bf160ab3a90265f2d32bc55869e34f Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 13 Oct 2023 05:00:19 -0600 Subject: [PATCH 15/23] Fix deallocation. --- Include/internal/pycore_ceval_state.h | 1 + Python/ceval_gil.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index 261a619a5c3dd8..08513946497392 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -19,6 +19,7 @@ struct _pending_call { _Py_pending_call_func func; void *arg; int flags; + int from_array; struct _pending_call *next; }; diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 380f41fd7e521c..d1f69f4618836e 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -682,6 +682,7 @@ _push_pending_call(struct _pending_calls *pending, call = &pending->_preallocated[0]; pending->_first = 0; pending->_next = 1; + call->from_array = 1; } else if (pending->npending < NPENDINGCALLSARRAY) { int i = pending->_next; @@ -689,12 +690,14 @@ _push_pending_call(struct _pending_calls *pending, assert(i != pending->_first); call = &pending->_preallocated[i]; pending->_next = next; + call->from_array = 1; } else { call = PyMem_RawMalloc(sizeof(struct _pending_call)); if (call == NULL) { return -1; } + call->from_array = 0; } // Initialize the data. @@ -743,15 +746,12 @@ _pop_pending_call(struct _pending_calls *pending, *flags = call->flags; // Deallocate the list entry. - if (pending->npending < NPENDINGCALLSARRAY) { - assert(pending->_first != pending->_next - || (pending->npending + 1) == NPENDINGCALLSARRAY); + if (call->from_array) { int i = pending->_first; pending->_preallocated[i] = (struct _pending_call){0}; pending->_first = (i + 1) % NPENDINGCALLSARRAY; } else { - assert(pending->_first == pending->_next); PyMem_RawFree(call); } } From 2ba10ab5a9452b4b3d8c750cc16161e5d31177fa Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 13 Oct 2023 05:00:44 -0600 Subject: [PATCH 16/23] Bunp the limit to 1000. --- Include/internal/pycore_ceval_state.h | 10 ++++++++-- Include/internal/pycore_runtime_init.h | 4 ++-- Lib/test/test_capi/test_misc.py | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index 08513946497392..1002406597c9ae 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -23,10 +23,16 @@ struct _pending_call { struct _pending_call *next; }; +// Using an array for the first pending calls gives us some +// extra stability in the case of signals. +// We also use the value as the max and loop max for the main thread. +#define NPENDINGCALLSARRAY 32 // We technically do not need this limit around any longer since we // moved from a circular queue to a linked list. However, having a -// size limit is still a good idea so we keep the one we already had. -#define NPENDINGCALLSARRAY 32 +// size limit is still a good idea for now to reduce +// the possible impact of bugs. +#define MAXPENDINGCALLS 1000 +#define MAXPENDINGCALLSLOOP 100 struct _pending_calls { int busy; diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 9d3866fa1b1024..69fab09b95200e 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -154,8 +154,8 @@ extern PyTypeObject _PyExc_MemoryError; .ceval = { \ .recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \ .pending = { \ - .max = NPENDINGCALLSARRAY, \ - .maxloop = NPENDINGCALLSARRAY, \ + .max = MAXPENDINGCALLS, \ + .maxloop = MAXPENDINGCALLSLOOP, \ }, \ }, \ .gc = { \ diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 352f1742e0c16d..03ec97714480d2 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -1329,7 +1329,7 @@ def test_max_pending(self): self.assertEqual(added, maxpending) with self.subTest('not main-only'): - maxpending = 32 + maxpending = 1000 l = [] added = self.pendingcalls_submit(l, 1, main=False) From a5394fa1d7fc4547bae79bc51f987519d9f22a23 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 13 Oct 2023 05:07:26 -0600 Subject: [PATCH 17/23] Drop the limit on the number of pending calls for each interpreter. --- Include/internal/pycore_ceval_state.h | 10 +++++----- Lib/test/test_capi/test_misc.py | 12 ++++-------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index 1002406597c9ae..8903656ac8ec67 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -27,11 +27,11 @@ struct _pending_call { // extra stability in the case of signals. // We also use the value as the max and loop max for the main thread. #define NPENDINGCALLSARRAY 32 -// We technically do not need this limit around any longer since we -// moved from a circular queue to a linked list. However, having a -// size limit is still a good idea for now to reduce -// the possible impact of bugs. -#define MAXPENDINGCALLS 1000 + +// We effectively drop the limit for per-interpreter pending calls. +#define MAXPENDINGCALLS INT32_MAX +// We don't want a flood of pending calls to interrupt any one thread +// for too long, so we keep a limit on the number handled per pass. #define MAXPENDINGCALLSLOOP 100 struct _pending_calls { diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 03ec97714480d2..90fa058ec08022 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -1329,7 +1329,8 @@ def test_max_pending(self): self.assertEqual(added, maxpending) with self.subTest('not main-only'): - maxpending = 1000 + # Per-interpreter pending calls do not have a limit + # on how many may be pending at a time. l = [] added = self.pendingcalls_submit(l, 1, main=False) @@ -1337,14 +1338,9 @@ def test_max_pending(self): self.assertEqual(added, 1) l = [] - added = self.pendingcalls_submit(l, maxpending, main=False) + added = self.pendingcalls_submit(l, 1000, main=False) self.pendingcalls_wait(l, added) - self.assertEqual(added, maxpending) - - l = [] - added = self.pendingcalls_submit(l, maxpending+1, main=False) - self.pendingcalls_wait(l, added) - self.assertEqual(added, maxpending) + self.assertEqual(added, 1000) class PendingTask(types.SimpleNamespace): From 2f748486cbed09387c06effbca41e4ea800b5b75 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 24 Apr 2024 18:19:13 -0600 Subject: [PATCH 18/23] Make sure we make *all* pending calls when finishing. --- Python/ceval_gil.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index dc456ca303f680..2e62619c96ac4b 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -971,12 +971,31 @@ _Py_FinishPendingCalls(PyThreadState *tstate) assert(PyGILState_Check()); assert(_PyThreadState_CheckConsistency(tstate)); - if (make_pending_calls(tstate) < 0) { - PyObject *exc = _PyErr_GetRaisedException(tstate); - PyErr_BadInternalCall(); - _PyErr_ChainExceptions1(exc); - _PyErr_Print(tstate); - } + struct _pending_calls *pending = &tstate->interp->ceval.pending; + struct _pending_calls *pending_main = + _Py_IsMainThread() && _Py_IsMainInterpreter(tstate->interp) + ? &_PyRuntime.ceval.pending_mainthread + : NULL; + /* make_pending_calls() may return early without making all pending + calls, so we keep trying until we're actually done. */ + int32_t npending = INT32_MAX; + int32_t npending_prev = -1; + do { + assert(npending_prev < 0 || npending_prev > npending); + npending_prev = npending; + + if (make_pending_calls(tstate) < 0) { + PyObject *exc = _PyErr_GetRaisedException(tstate); + PyErr_BadInternalCall(); + _PyErr_ChainExceptions1(exc); + _PyErr_Print(tstate); + } + + npending = _Py_atomic_load_int32_relaxed(&pending->npending); + if (pending_main != NULL) { + npending += _Py_atomic_load_int32_relaxed(&pending_main->npending); + } + } while (npending > 0); } int From 5f2404ff4603a27f5cd8a8661fcd3860099dbfa3 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 24 Apr 2024 18:49:22 -0600 Subject: [PATCH 19/23] Group the pending calls code more closely. --- Python/ceval_gil.c | 58 ++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 2e62619c96ac4b..eec801206f9d3c 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -624,6 +624,34 @@ PyEval_RestoreThread(PyThreadState *tstate) } +void +_PyEval_SignalReceived(void) +{ + _Py_set_eval_breaker_bit(_PyRuntime.main_tstate, _PY_SIGNALS_PENDING_BIT); +} + + +#ifndef Py_GIL_DISABLED +static void +signal_active_thread(PyInterpreterState *interp, uintptr_t bit) +{ + struct _gil_runtime_state *gil = interp->ceval.gil; + + // If a thread from the targeted interpreter is holding the GIL, signal + // that thread. Otherwise, the next thread to run from the targeted + // interpreter will have its bit set as part of taking the GIL. + MUTEX_LOCK(gil->mutex); + if (_Py_atomic_load_int_relaxed(&gil->locked)) { + PyThreadState *holder = (PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder); + if (holder->interp == interp) { + _Py_set_eval_breaker_bit(holder, bit); + } + } + MUTEX_UNLOCK(gil->mutex); +} +#endif + + /* Mechanism whereby asynchronously executing callbacks (e.g. UNIX signal handlers or Mac I/O completion routines) can schedule calls to a function to be called synchronously. @@ -646,13 +674,6 @@ PyEval_RestoreThread(PyThreadState *tstate) threadstate. */ -void -_PyEval_SignalReceived(void) -{ - _Py_set_eval_breaker_bit(_PyRuntime.main_tstate, _PY_SIGNALS_PENDING_BIT); -} - - /* Push one item onto the queue while holding the lock. */ static int _push_pending_call(struct _pending_calls *pending, @@ -749,27 +770,6 @@ _pop_pending_call(struct _pending_calls *pending, } } - -#ifndef Py_GIL_DISABLED -static void -signal_active_thread(PyInterpreterState *interp, uintptr_t bit) -{ - struct _gil_runtime_state *gil = interp->ceval.gil; - - // If a thread from the targeted interpreter is holding the GIL, signal - // that thread. Otherwise, the next thread to run from the targeted - // interpreter will have its bit set as part of taking the GIL. - MUTEX_LOCK(gil->mutex); - if (_Py_atomic_load_int_relaxed(&gil->locked)) { - PyThreadState *holder = (PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder); - if (holder->interp == interp) { - _Py_set_eval_breaker_bit(holder, bit); - } - } - MUTEX_UNLOCK(gil->mutex); -} -#endif - /* This implementation is thread-safe. It allows scheduling to be made from any thread, and even from an executing callback. @@ -941,6 +941,7 @@ make_pending_calls(PyThreadState *tstate) return 0; } + void _Py_set_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit) { @@ -976,6 +977,7 @@ _Py_FinishPendingCalls(PyThreadState *tstate) _Py_IsMainThread() && _Py_IsMainInterpreter(tstate->interp) ? &_PyRuntime.ceval.pending_mainthread : NULL; + /* make_pending_calls() may return early without making all pending calls, so we keep trying until we're actually done. */ int32_t npending = INT32_MAX; From 2357fbb5481ab307d4144497e3d5b422d89591af Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 24 Apr 2024 18:57:18 -0600 Subject: [PATCH 20/23] Do not bother with a preallocated array. --- Include/internal/pycore_ceval_state.h | 10 +--- Python/ceval_gil.c | 67 ++++++++++++++------------- 2 files changed, 37 insertions(+), 40 deletions(-) diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index a46c93b813fc3a..7995a96c57af62 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -20,7 +20,6 @@ struct _pending_call { _Py_pending_call_func func; void *arg; int flags; - int from_array; struct _pending_call *next; }; @@ -46,13 +45,7 @@ struct _pending_calls { /* The linked list of pending calls. */ struct _pending_call *head; struct _pending_call *tail; - - // We have a set of pre-allocated pending calls, to avoid - // possible allocation failures (at least when the number - // of pending calls is small). - int _first; - int _next; - struct _pending_call _preallocated[NPENDINGCALLSARRAY]; + struct _pending_call *freelist; }; @@ -93,6 +86,7 @@ struct _ceval_runtime_state { PyMutex sys_trace_profile_mutex; }; + #ifdef PY_HAVE_PERF_TRAMPOLINE # define _PyEval_RUNTIME_PERF_INIT \ { \ diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index eec801206f9d3c..9704bf1f3da4c3 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -674,6 +674,22 @@ signal_active_thread(PyInterpreterState *interp, uintptr_t bit) threadstate. */ +void +_pending_calls_fini(struct _pending_calls *pending) +{ + PyMutex_Lock(&pending->mutex); + struct _pending_call *head = pending->freelist; + pending->freelist = NULL; + PyMutex_Unlock(&pending->mutex); + + /* Deallocate all freelist entries. */ + while (head != NULL) { + struct _pending_call *cur = head; + head = cur->next; + PyMem_RawFree(cur); + } +} + /* Push one item onto the queue while holding the lock. */ static int _push_pending_call(struct _pending_calls *pending, @@ -688,37 +704,23 @@ _push_pending_call(struct _pending_calls *pending, } // Allocate for the pending call. - struct _pending_call *call; - if (pending->npending == 0) { - assert(pending->_next == pending->_first); - assert(pending->head == NULL); - assert(pending->tail == NULL); - call = &pending->_preallocated[0]; - pending->_first = 0; - pending->_next = 1; - call->from_array = 1; - } - else if (pending->npending < NPENDINGCALLSARRAY) { - int i = pending->_next; - int next = (i + 1) % NPENDINGCALLSARRAY; - assert(i != pending->_first); - call = &pending->_preallocated[i]; - pending->_next = next; - call->from_array = 1; + struct _pending_call *call = pending->freelist; + if (call != NULL) { + pending->freelist = call->next; } else { call = PyMem_RawMalloc(sizeof(struct _pending_call)); if (call == NULL) { return -1; } - call->from_array = 0; } // Initialize the data. - call->func = func; - call->arg = arg; - call->flags = flags; - call->next = NULL; + *call = (struct _pending_call){ + .func=func, + .arg=arg, + .flags=flags, + }; // Add the call to the list. if (pending->head == NULL) { @@ -742,10 +744,11 @@ _pop_pending_call(struct _pending_calls *pending, if (call == NULL) { /* Queue empty */ assert(pending->npending == 0); - assert(pending->_first == pending->_next); + assert(pending->tail == NULL); return; } assert(pending->npending > 0); + assert(pending->tail != NULL); // Remove the next one from the list. pending->head = call->next; @@ -759,15 +762,9 @@ _pop_pending_call(struct _pending_calls *pending, *arg = call->arg; *flags = call->flags; - // Deallocate the list entry. - if (call->from_array) { - int i = pending->_first; - pending->_preallocated[i] = (struct _pending_call){0}; - pending->_first = (i + 1) % NPENDINGCALLSARRAY; - } - else { - PyMem_RawFree(call); - } + // "Deallocate" the list entry. + call->next = pending->freelist; + pending->freelist = call; } /* This implementation is thread-safe. It allows @@ -998,6 +995,12 @@ _Py_FinishPendingCalls(PyThreadState *tstate) npending += _Py_atomic_load_int32_relaxed(&pending_main->npending); } } while (npending > 0); + + /* Clean up the lingering pending calls state. */ + _pending_calls_fini(pending); + if (pending_main != NULL) { + _pending_calls_fini(pending_main); + } } int From 814cb0cb5816bc6eaba4d9e6941dbe59448b5d07 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 24 Apr 2024 19:01:09 -0600 Subject: [PATCH 21/23] Use a preallocated array for the initial freelist for the main thread pending calls. --- Include/internal/pycore_ceval_state.h | 46 +++++++++++++++++++++++--- Include/internal/pycore_runtime_init.h | 3 ++ Python/ceval_gil.c | 5 ++- 3 files changed, 49 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index 7995a96c57af62..540e9a2370683e 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -20,13 +20,10 @@ struct _pending_call { _Py_pending_call_func func; void *arg; int flags; + int from_heap; struct _pending_call *next; }; -// Using an array for the first pending calls gives us some -// extra stability in the case of signals. -// We also use the value as the max and loop max for the main thread. -#define NPENDINGCALLSARRAY 32 // We effectively drop the limit for per-interpreter pending calls. #define MAXPENDINGCALLS INT32_MAX @@ -83,9 +80,50 @@ struct _ceval_runtime_state { } perf; /* Pending calls to be made only on the main thread. */ struct _pending_calls pending_mainthread; + // Using a preallocated array for the first pending calls gives us + // some extra stability in the case of signals. + // We also use this number as the max and loop max for the main thread. +#define NPENDINGCALLSARRAY 32 + struct _pending_call _pending_preallocated[NPENDINGCALLSARRAY]; PyMutex sys_trace_profile_mutex; }; +#define _PyEval_RUNTIME_PENDING_FREELIST_INIT(ceval) \ + { \ + { .next = &(ceval)._pending_preallocated[1] }, \ + { .next = &(ceval)._pending_preallocated[2] }, \ + { .next = &(ceval)._pending_preallocated[3] }, \ + { .next = &(ceval)._pending_preallocated[4] }, \ + { .next = &(ceval)._pending_preallocated[5] }, \ + { .next = &(ceval)._pending_preallocated[6] }, \ + { .next = &(ceval)._pending_preallocated[7] }, \ + { .next = &(ceval)._pending_preallocated[8] }, \ + { .next = &(ceval)._pending_preallocated[9] }, \ + { .next = &(ceval)._pending_preallocated[10] }, \ + { .next = &(ceval)._pending_preallocated[11] }, \ + { .next = &(ceval)._pending_preallocated[12] }, \ + { .next = &(ceval)._pending_preallocated[13] }, \ + { .next = &(ceval)._pending_preallocated[14] }, \ + { .next = &(ceval)._pending_preallocated[15] }, \ + { .next = &(ceval)._pending_preallocated[16] }, \ + { .next = &(ceval)._pending_preallocated[17] }, \ + { .next = &(ceval)._pending_preallocated[18] }, \ + { .next = &(ceval)._pending_preallocated[19] }, \ + { .next = &(ceval)._pending_preallocated[20] }, \ + { .next = &(ceval)._pending_preallocated[21] }, \ + { .next = &(ceval)._pending_preallocated[22] }, \ + { .next = &(ceval)._pending_preallocated[23] }, \ + { .next = &(ceval)._pending_preallocated[24] }, \ + { .next = &(ceval)._pending_preallocated[25] }, \ + { .next = &(ceval)._pending_preallocated[26] }, \ + { .next = &(ceval)._pending_preallocated[27] }, \ + { .next = &(ceval)._pending_preallocated[28] }, \ + { .next = &(ceval)._pending_preallocated[29] }, \ + { .next = &(ceval)._pending_preallocated[30] }, \ + { .next = &(ceval)._pending_preallocated[31] }, \ + /* The last entry has .next set to NULL. */ \ + } + #ifdef PY_HAVE_PERF_TRAMPOLINE # define _PyEval_RUNTIME_PERF_INIT \ diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 3fe06219cf9748..c9b111ab8f6694 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -118,7 +118,10 @@ extern PyTypeObject _PyExc_MemoryError; .pending_mainthread = { \ .max = NPENDINGCALLSARRAY, \ .maxloop = NPENDINGCALLSARRAY, \ + .freelist = (runtime).ceval._pending_preallocated \ }, \ + ._pending_preallocated = \ + _PyEval_RUNTIME_PENDING_FREELIST_INIT((runtime).ceval), \ }, \ .gilstate = { \ .check_enabled = 1, \ diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 9704bf1f3da4c3..322b0b3cf25b6b 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -686,7 +686,9 @@ _pending_calls_fini(struct _pending_calls *pending) while (head != NULL) { struct _pending_call *cur = head; head = cur->next; - PyMem_RawFree(cur); + if (cur->from_heap) { + PyMem_RawFree(cur); + } } } @@ -713,6 +715,7 @@ _push_pending_call(struct _pending_calls *pending, if (call == NULL) { return -1; } + call->from_heap = 1; } // Initialize the data. From 3783a8d0519c9d2ebd52e8cbdd45fecc0ec9468d Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 25 Apr 2024 10:35:50 -0600 Subject: [PATCH 22/23] Make _pending_calls_fini() static. --- Python/ceval_gil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 322b0b3cf25b6b..12f9fbf5576b4c 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -674,7 +674,7 @@ signal_active_thread(PyInterpreterState *interp, uintptr_t bit) threadstate. */ -void +static void _pending_calls_fini(struct _pending_calls *pending) { PyMutex_Lock(&pending->mutex); From bbaf87272e629d01ad6d980719d78350d3f6c9fa Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 25 Apr 2024 12:19:27 -0600 Subject: [PATCH 23/23] Initialize the freelist dynamically. --- Include/internal/pycore_ceval_state.h | 36 -------------------------- Include/internal/pycore_runtime_init.h | 3 --- Python/ceval_gil.c | 32 +++++++++++++++++++++++ 3 files changed, 32 insertions(+), 39 deletions(-) diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index 540e9a2370683e..140c6cd1f68df9 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -88,42 +88,6 @@ struct _ceval_runtime_state { PyMutex sys_trace_profile_mutex; }; -#define _PyEval_RUNTIME_PENDING_FREELIST_INIT(ceval) \ - { \ - { .next = &(ceval)._pending_preallocated[1] }, \ - { .next = &(ceval)._pending_preallocated[2] }, \ - { .next = &(ceval)._pending_preallocated[3] }, \ - { .next = &(ceval)._pending_preallocated[4] }, \ - { .next = &(ceval)._pending_preallocated[5] }, \ - { .next = &(ceval)._pending_preallocated[6] }, \ - { .next = &(ceval)._pending_preallocated[7] }, \ - { .next = &(ceval)._pending_preallocated[8] }, \ - { .next = &(ceval)._pending_preallocated[9] }, \ - { .next = &(ceval)._pending_preallocated[10] }, \ - { .next = &(ceval)._pending_preallocated[11] }, \ - { .next = &(ceval)._pending_preallocated[12] }, \ - { .next = &(ceval)._pending_preallocated[13] }, \ - { .next = &(ceval)._pending_preallocated[14] }, \ - { .next = &(ceval)._pending_preallocated[15] }, \ - { .next = &(ceval)._pending_preallocated[16] }, \ - { .next = &(ceval)._pending_preallocated[17] }, \ - { .next = &(ceval)._pending_preallocated[18] }, \ - { .next = &(ceval)._pending_preallocated[19] }, \ - { .next = &(ceval)._pending_preallocated[20] }, \ - { .next = &(ceval)._pending_preallocated[21] }, \ - { .next = &(ceval)._pending_preallocated[22] }, \ - { .next = &(ceval)._pending_preallocated[23] }, \ - { .next = &(ceval)._pending_preallocated[24] }, \ - { .next = &(ceval)._pending_preallocated[25] }, \ - { .next = &(ceval)._pending_preallocated[26] }, \ - { .next = &(ceval)._pending_preallocated[27] }, \ - { .next = &(ceval)._pending_preallocated[28] }, \ - { .next = &(ceval)._pending_preallocated[29] }, \ - { .next = &(ceval)._pending_preallocated[30] }, \ - { .next = &(ceval)._pending_preallocated[31] }, \ - /* The last entry has .next set to NULL. */ \ - } - #ifdef PY_HAVE_PERF_TRAMPOLINE # define _PyEval_RUNTIME_PERF_INIT \ diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index c9b111ab8f6694..3fe06219cf9748 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -118,10 +118,7 @@ extern PyTypeObject _PyExc_MemoryError; .pending_mainthread = { \ .max = NPENDINGCALLSARRAY, \ .maxloop = NPENDINGCALLSARRAY, \ - .freelist = (runtime).ceval._pending_preallocated \ }, \ - ._pending_preallocated = \ - _PyEval_RUNTIME_PENDING_FREELIST_INIT((runtime).ceval), \ }, \ .gilstate = { \ .check_enabled = 1, \ diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 12f9fbf5576b4c..38d93309ce1f90 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -674,6 +674,30 @@ signal_active_thread(PyInterpreterState *interp, uintptr_t bit) threadstate. */ +static void +_pending_calls_init(struct _pending_calls *pending, + struct _pending_call *preallocated, size_t len) +{ + /* We shouldn't need the lock while initializing. */ + assert(pending->head == NULL); + assert(pending->tail == NULL); + assert(pending->npending == 0); + assert(!pending->busy); + assert(pending->max > 0); + assert(pending->maxloop > 0); + + assert(pending->freelist == NULL); + if (preallocated) { + assert(len > 0); + for (size_t i = len; i > 0; i--) { + struct _pending_call *call = &preallocated[i-1]; + assert(!call->from_heap); + call->next = pending->freelist; + pending->freelist = call; + } + } +} + static void _pending_calls_fini(struct _pending_calls *pending) { @@ -1049,6 +1073,14 @@ Py_MakePendingCalls(void) void _PyEval_InitState(PyInterpreterState *interp) { + _pending_calls_init(&interp->ceval.pending, NULL, 0); + if (_Py_IsMainInterpreter(interp)) { + struct _ceval_runtime_state *ceval = &_PyRuntime.ceval; + _pending_calls_init(&ceval->pending_mainthread, + ceval->_pending_preallocated, + Py_ARRAY_LENGTH(ceval->_pending_preallocated)); + } + _gil_initialize(&interp->_gil); }