diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index ec99f90d669d12..4fad4c60abbafe 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -149,6 +149,13 @@ struct _ts { struct _py_trashcan trash; + /* Tagged pointer to top-most critical section, or zero if there is no + * active critical section. Critical sections are only used in + * `--disable-gil` builds (i.e., when Py_NOGIL is defined to 1). In the + * default build, this field is always zero. + */ + uintptr_t critical_section; + /* Called when a thread state is deleted normally, but not when it * is destroyed after fork(). * Pain: to prevent rare but fatal shutdown errors (issue 18808), diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h new file mode 100644 index 00000000000000..73c2e243f20bcc --- /dev/null +++ b/Include/internal/pycore_critical_section.h @@ -0,0 +1,242 @@ +#ifndef Py_INTERNAL_CRITICAL_SECTION_H +#define Py_INTERNAL_CRITICAL_SECTION_H + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_lock.h" // PyMutex +#include "pycore_pystate.h" // _PyThreadState_GET() +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Implementation of Python critical sections +// +// Conceptually, critical sections are a deadlock avoidance layer on top of +// per-object locks. These helpers, in combination with those locks, replace +// our usage of the global interpreter lock to provide thread-safety for +// otherwise thread-unsafe objects, such as dict. +// +// NOTE: These APIs are no-ops in non-free-threaded builds. +// +// Straightforward per-object locking could introduce deadlocks that were not +// present when running with the GIL. Threads may hold locks for multiple +// objects simultaneously because Python operations can nest. If threads were +// to acquire the same locks in different orders, they would deadlock. +// +// One way to avoid deadlocks is to allow threads to hold only the lock (or +// locks) for a single operation at a time (typically a single lock, but some +// operations involve two locks). When a thread begins a nested operation it +// could suspend the locks for any outer operation: before beginning the nested +// operation, the locks for the outer operation are released and when the +// nested operation completes, the locks for the outer operation are +// reacquired. +// +// To improve performance, this API uses a variation of the above scheme. +// Instead of immediately suspending locks any time a nested operation begins, +// locks are only suspended if the thread would block. This reduces the number +// of lock acquisitions and releases for nested operations, while still +// avoiding deadlocks. +// +// Additionally, the locks for any active operation are suspended around +// other potentially blocking operations, such as I/O. This is because the +// interaction between locks and blocking operations can lead to deadlocks in +// the same way as the interaction between multiple locks. +// +// Each thread's critical sections and their corresponding locks are tracked in +// a stack in `PyThreadState.critical_section`. When a thread calls +// `_PyThreadState_Detach()`, such as before a blocking I/O operation or when +// waiting to acquire a lock, the thread suspends all of its active critical +// sections, temporarily releasing the associated locks. When the thread calls +// `_PyThreadState_Attach()`, it resumes the top-most (i.e., most recent) +// critical section by reacquiring the associated lock or locks. See +// `_PyCriticalSection_Resume()`. +// +// NOTE: Only the top-most critical section is guaranteed to be active. +// Operations that need to lock two objects at once must use +// `Py_BEGIN_CRITICAL_SECTION2()`. You *CANNOT* use nested critical sections +// to lock more than one object at once, because the inner critical section +// may suspend the outer critical sections. This API does not provide a way +// to lock more than two objects at once (though it could be added later +// if actually needed). +// +// NOTE: Critical sections implicitly behave like reentrant locks because +// attempting to acquire the same lock will suspend any outer (earlier) +// critical sections. However, they are less efficient for this use case than +// purposefully designed reentrant locks. +// +// Example usage: +// Py_BEGIN_CRITICAL_SECTION(op); +// ... +// Py_END_CRITICAL_SECTION(); +// +// To lock two objects at once: +// Py_BEGIN_CRITICAL_SECTION2(op1, op2); +// ... +// Py_END_CRITICAL_SECTION2(); + + +// Tagged pointers to critical sections use the two least significant bits to +// mark if the pointed-to critical section is inactive and whether it is a +// _PyCriticalSection2 object. +#define _Py_CRITICAL_SECTION_INACTIVE 0x1 +#define _Py_CRITICAL_SECTION_TWO_MUTEXES 0x2 +#define _Py_CRITICAL_SECTION_MASK 0x3 + +#ifdef Py_NOGIL +# define Py_BEGIN_CRITICAL_SECTION(op) \ + { \ + _PyCriticalSection _cs; \ + _PyCriticalSection_Begin(&_cs, &_PyObject_CAST(op)->ob_mutex) + +# define Py_END_CRITICAL_SECTION() \ + _PyCriticalSection_End(&_cs); \ + } + +# define Py_BEGIN_CRITICAL_SECTION2(a, b) \ + { \ + _PyCriticalSection2 _cs2; \ + _PyCriticalSection2_Begin(&_cs2, &_PyObject_CAST(a)->ob_mutex, &_PyObject_CAST(b)->ob_mutex) + +# define Py_END_CRITICAL_SECTION2() \ + _PyCriticalSection2_End(&_cs2); \ + } +#else /* !Py_NOGIL */ +// The critical section APIs are no-ops with the GIL. +# define Py_BEGIN_CRITICAL_SECTION(op) +# define Py_END_CRITICAL_SECTION() +# define Py_BEGIN_CRITICAL_SECTION2(a, b) +# define Py_END_CRITICAL_SECTION2() +#endif /* !Py_NOGIL */ + +typedef struct { + // Tagged pointer to an outer active critical section (or 0). + // The two least-significant-bits indicate whether the pointed-to critical + // section is inactive and whether it is a _PyCriticalSection2 object. + uintptr_t prev; + + // Mutex used to protect critical section + PyMutex *mutex; +} _PyCriticalSection; + +// A critical section protected by two mutexes. Use +// _PyCriticalSection2_Begin and _PyCriticalSection2_End. +typedef struct { + _PyCriticalSection base; + + PyMutex *mutex2; +} _PyCriticalSection2; + +static inline int +_PyCriticalSection_IsActive(uintptr_t tag) +{ + return tag != 0 && (tag & _Py_CRITICAL_SECTION_INACTIVE) == 0; +} + +// Resumes the top-most critical section. +PyAPI_FUNC(void) +_PyCriticalSection_Resume(PyThreadState *tstate); + +// (private) slow path for locking the mutex +PyAPI_FUNC(void) +_PyCriticalSection_BeginSlow(_PyCriticalSection *c, PyMutex *m); + +PyAPI_FUNC(void) +_PyCriticalSection2_BeginSlow(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, + int is_m1_locked); + +static inline void +_PyCriticalSection_Begin(_PyCriticalSection *c, PyMutex *m) +{ + if (PyMutex_LockFast(&m->v)) { + PyThreadState *tstate = _PyThreadState_GET(); + c->mutex = m; + c->prev = tstate->critical_section; + tstate->critical_section = (uintptr_t)c; + } + else { + _PyCriticalSection_BeginSlow(c, m); + } +} + +// Removes the top-most critical section from the thread's stack of critical +// sections. If the new top-most critical section is inactive, then it is +// resumed. +static inline void +_PyCriticalSection_Pop(_PyCriticalSection *c) +{ + PyThreadState *tstate = _PyThreadState_GET(); + uintptr_t prev = c->prev; + tstate->critical_section = prev; + + if ((prev & _Py_CRITICAL_SECTION_INACTIVE) != 0) { + _PyCriticalSection_Resume(tstate); + } +} + +static inline void +_PyCriticalSection_End(_PyCriticalSection *c) +{ + PyMutex_Unlock(c->mutex); + _PyCriticalSection_Pop(c); +} + +static inline void +_PyCriticalSection2_Begin(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) +{ + if (m1 == m2) { + // If the two mutex arguments are the same, treat this as a critical + // section with a single mutex. + c->mutex2 = NULL; + _PyCriticalSection_Begin(&c->base, m1); + return; + } + + if ((uintptr_t)m2 < (uintptr_t)m1) { + // Sort the mutexes so that the lower address is locked first. + // The exact order does not matter, but we need to acquire the mutexes + // in a consistent order to avoid lock ordering deadlocks. + PyMutex *tmp = m1; + m1 = m2; + m2 = tmp; + } + + if (PyMutex_LockFast(&m1->v)) { + if (PyMutex_LockFast(&m2->v)) { + PyThreadState *tstate = _PyThreadState_GET(); + c->base.mutex = m1; + c->mutex2 = m2; + c->base.prev = tstate->critical_section; + + uintptr_t p = (uintptr_t)c | _Py_CRITICAL_SECTION_TWO_MUTEXES; + tstate->critical_section = p; + } + else { + _PyCriticalSection2_BeginSlow(c, m1, m2, 1); + } + } + else { + _PyCriticalSection2_BeginSlow(c, m1, m2, 0); + } +} + +static inline void +_PyCriticalSection2_End(_PyCriticalSection2 *c) +{ + if (c->mutex2) { + PyMutex_Unlock(c->mutex2); + } + PyMutex_Unlock(c->base.mutex); + _PyCriticalSection_Pop(&c->base); +} + +PyAPI_FUNC(void) +_PyCriticalSection_SuspendAll(PyThreadState *tstate); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_CRITICAL_SECTION_H */ diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index c4bb76a40e7b12..fe5e21fad221e4 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -32,9 +32,16 @@ extern "C" { // PyMutex_Lock(&m); // ... // PyMutex_Unlock(&m); -typedef struct _PyMutex { - uint8_t v; -} PyMutex; + +// NOTE: In Py_NOGIL builds, `struct _PyMutex` is defined in Include/object.h. +// The Py_NOGIL builds need the definition in Include/object.h for the +// `ob_mutex` field in PyObject. For the default (non-free-threaded) build, +// we define the struct here to avoid exposing it in the public API. +#ifndef Py_NOGIL +struct _PyMutex { uint8_t v; }; +#endif + +typedef struct _PyMutex PyMutex; #define _Py_UNLOCKED 0 #define _Py_LOCKED 1 @@ -46,6 +53,13 @@ PyAPI_FUNC(void) _PyMutex_LockSlow(PyMutex *m); // (private) slow path for unlocking the mutex PyAPI_FUNC(void) _PyMutex_UnlockSlow(PyMutex *m); +static inline int +PyMutex_LockFast(uint8_t *lock_bits) +{ + uint8_t expected = _Py_UNLOCKED; + return _Py_atomic_compare_exchange_uint8(lock_bits, &expected, _Py_LOCKED); +} + // Locks the mutex. // // If the mutex is currently locked, the calling thread will be parked until diff --git a/Include/object.h b/Include/object.h index bdc07d9e8d7f83..f6693562d211c5 100644 --- a/Include/object.h +++ b/Include/object.h @@ -119,7 +119,7 @@ check by comparing the reference count field to the immortality reference count. { \ 0, \ 0, \ - 0, \ + { 0 }, \ 0, \ _Py_IMMORTAL_REFCNT_LOCAL, \ 0, \ @@ -204,10 +204,14 @@ struct _object { // Create a shared field from a refcnt and desired flags #define _Py_REF_SHARED(refcnt, flags) (((refcnt) << _Py_REF_SHARED_SHIFT) + (flags)) +// NOTE: In non-free-threaded builds, `struct _PyMutex` is defined in +// pycore_lock.h. See pycore_lock.h for more details. +struct _PyMutex { uint8_t v; }; + struct _object { uintptr_t ob_tid; // thread id (or zero) uint16_t _padding; - uint8_t ob_mutex; // per-object lock + struct _PyMutex ob_mutex; // per-object lock uint8_t ob_gc_bits; // gc-related state uint32_t ob_ref_local; // local reference count Py_ssize_t ob_ref_shared; // shared (atomic) reference count diff --git a/Makefile.pre.in b/Makefile.pre.in index 365491de4e3f57..0629e77dc2dae4 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -409,6 +409,7 @@ PYTHON_OBJS= \ Python/codecs.o \ Python/compile.o \ Python/context.o \ + Python/critical_section.o \ Python/crossinterp.o \ Python/dynamic_annotations.o \ Python/errors.o \ @@ -1802,6 +1803,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_complexobject.h \ $(srcdir)/Include/internal/pycore_condvar.h \ $(srcdir)/Include/internal/pycore_context.h \ + $(srcdir)/Include/internal/pycore_critical_section.h \ $(srcdir)/Include/internal/pycore_crossinterp.h \ $(srcdir)/Include/internal/pycore_dict.h \ $(srcdir)/Include/internal/pycore_dict_state.h \ diff --git a/Misc/NEWS.d/next/C API/2023-10-31-14-58-17.gh-issue-111569._V8iu4.rst b/Misc/NEWS.d/next/C API/2023-10-31-14-58-17.gh-issue-111569._V8iu4.rst new file mode 100644 index 00000000000000..c2bd3ae36e6439 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-10-31-14-58-17.gh-issue-111569._V8iu4.rst @@ -0,0 +1,3 @@ +Implement "Python Critical Sections" from :pep:`703`. These are macros to +help replace the GIL with per-object locks in the ``--disable-gil`` build of +CPython. The macros are no-ops in the default build. diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in index 9e1188304de9f6..d144ad312ef560 100644 --- a/Modules/Setup.stdlib.in +++ b/Modules/Setup.stdlib.in @@ -158,7 +158,7 @@ @MODULE_XXSUBTYPE_TRUE@xxsubtype xxsubtype.c @MODULE__XXTESTFUZZ_TRUE@_xxtestfuzz _xxtestfuzz/_xxtestfuzz.c _xxtestfuzz/fuzzer.c @MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c -@MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c +@MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c _testinternalcapi/test_critical_sections.c @MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/vectorcall_limited.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/bytearray.c _testcapi/bytes.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/pyos.c _testcapi/file.c _testcapi/codec.c _testcapi/immortal.c _testcapi/heaptype_relative.c _testcapi/gc.c _testcapi/sys.c @MODULE__TESTCLINIC_TRUE@_testclinic _testclinic.c @MODULE__TESTCLINIC_LIMITED_TRUE@_testclinic_limited _testclinic_limited.c diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 7dba29a54d79b7..604a59e7e19e43 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1687,6 +1687,9 @@ module_exec(PyObject *module) if (_PyTestInternalCapi_Init_Set(module) < 0) { return 1; } + if (_PyTestInternalCapi_Init_CriticalSection(module) < 0) { + return 1; + } if (PyModule_Add(module, "SIZEOF_PYGC_HEAD", PyLong_FromSsize_t(sizeof(PyGC_Head))) < 0) { diff --git a/Modules/_testinternalcapi/parts.h b/Modules/_testinternalcapi/parts.h index 3d2774e3f1b404..49a1395f499fc3 100644 --- a/Modules/_testinternalcapi/parts.h +++ b/Modules/_testinternalcapi/parts.h @@ -13,5 +13,6 @@ int _PyTestInternalCapi_Init_Lock(PyObject *module); int _PyTestInternalCapi_Init_PyTime(PyObject *module); int _PyTestInternalCapi_Init_Set(PyObject *module); +int _PyTestInternalCapi_Init_CriticalSection(PyObject *module); #endif // Py_TESTINTERNALCAPI_PARTS_H diff --git a/Modules/_testinternalcapi/test_critical_sections.c b/Modules/_testinternalcapi/test_critical_sections.c new file mode 100644 index 00000000000000..238f29c3c62e64 --- /dev/null +++ b/Modules/_testinternalcapi/test_critical_sections.c @@ -0,0 +1,213 @@ +/* + * C Extension module to test pycore_critical_section.h API. + */ + +#include "parts.h" + +#include "pycore_critical_section.h" + +#ifdef Py_NOGIL +#define assert_nogil assert +#define assert_gil(x) +#else +#define assert_gil assert +#define assert_nogil(x) +#endif + + +static PyObject * +test_critical_sections(PyObject *self, PyObject *Py_UNUSED(args)) +{ + PyObject *d1 = PyDict_New(); + assert(d1 != NULL); + + PyObject *d2 = PyDict_New(); + assert(d2 != NULL); + + // Beginning a critical section should lock the associated object and + // push the critical section onto the thread's stack (in Py_NOGIL builds). + Py_BEGIN_CRITICAL_SECTION(d1); + assert_nogil(PyMutex_IsLocked(&d1->ob_mutex)); + assert_nogil(_PyCriticalSection_IsActive(PyThreadState_GET()->critical_section)); + assert_gil(PyThreadState_GET()->critical_section == 0); + Py_END_CRITICAL_SECTION(); + assert_nogil(!PyMutex_IsLocked(&d1->ob_mutex)); + + assert_nogil(!PyMutex_IsLocked(&d1->ob_mutex)); + assert_nogil(!PyMutex_IsLocked(&d2->ob_mutex)); + Py_BEGIN_CRITICAL_SECTION2(d1, d2); + assert_nogil(PyMutex_IsLocked(&d1->ob_mutex)); + assert_nogil(PyMutex_IsLocked(&d2->ob_mutex)); + Py_END_CRITICAL_SECTION2(); + assert_nogil(!PyMutex_IsLocked(&d1->ob_mutex)); + assert_nogil(!PyMutex_IsLocked(&d2->ob_mutex)); + + // Passing the same object twice should work (and not deadlock). + assert_nogil(!PyMutex_IsLocked(&d2->ob_mutex)); + Py_BEGIN_CRITICAL_SECTION2(d2, d2); + assert_nogil(PyMutex_IsLocked(&d2->ob_mutex)); + Py_END_CRITICAL_SECTION2(); + assert_nogil(!PyMutex_IsLocked(&d2->ob_mutex)); + + Py_DECREF(d2); + Py_DECREF(d1); + Py_RETURN_NONE; +} + +static void +lock_unlock_object(PyObject *obj, int recurse_depth) +{ + Py_BEGIN_CRITICAL_SECTION(obj); + if (recurse_depth > 0) { + lock_unlock_object(obj, recurse_depth - 1); + } + Py_END_CRITICAL_SECTION(); +} + +static void +lock_unlock_two_objects(PyObject *a, PyObject *b, int recurse_depth) +{ + Py_BEGIN_CRITICAL_SECTION2(a, b); + if (recurse_depth > 0) { + lock_unlock_two_objects(a, b, recurse_depth - 1); + } + Py_END_CRITICAL_SECTION2(); +} + + +// Test that nested critical sections do not deadlock if they attempt to lock +// the same object. +static PyObject * +test_critical_sections_nest(PyObject *self, PyObject *Py_UNUSED(args)) +{ + PyObject *a = PyDict_New(); + assert(a != NULL); + PyObject *b = PyDict_New(); + assert(b != NULL); + + // Locking an object recursively with this API should not deadlock. + assert_nogil(!PyMutex_IsLocked(&a->ob_mutex)); + Py_BEGIN_CRITICAL_SECTION(a); + assert_nogil(PyMutex_IsLocked(&a->ob_mutex)); + lock_unlock_object(a, 10); + assert_nogil(PyMutex_IsLocked(&a->ob_mutex)); + Py_END_CRITICAL_SECTION(); + assert_nogil(!PyMutex_IsLocked(&a->ob_mutex)); + + // Same test but with two objects. + Py_BEGIN_CRITICAL_SECTION2(b, a); + lock_unlock_two_objects(a, b, 10); + assert_nogil(PyMutex_IsLocked(&a->ob_mutex)); + assert_nogil(PyMutex_IsLocked(&b->ob_mutex)); + Py_END_CRITICAL_SECTION2(); + + Py_DECREF(b); + Py_DECREF(a); + Py_RETURN_NONE; +} + +// Test that a critical section is suspended by a Py_BEGIN_ALLOW_THREADS and +// resumed by a Py_END_ALLOW_THREADS. +static PyObject * +test_critical_sections_suspend(PyObject *self, PyObject *Py_UNUSED(args)) +{ + PyObject *a = PyDict_New(); + assert(a != NULL); + + Py_BEGIN_CRITICAL_SECTION(a); + assert_nogil(PyMutex_IsLocked(&a->ob_mutex)); + + // Py_BEGIN_ALLOW_THREADS should suspend the active critical section + Py_BEGIN_ALLOW_THREADS + assert_nogil(!PyMutex_IsLocked(&a->ob_mutex)); + Py_END_ALLOW_THREADS; + + // After Py_END_ALLOW_THREADS the critical section should be resumed. + assert_nogil(PyMutex_IsLocked(&a->ob_mutex)); + Py_END_CRITICAL_SECTION(); + + Py_DECREF(a); + Py_RETURN_NONE; +} + +struct test_data { + PyObject *obj1; + PyObject *obj2; + PyObject *obj3; + Py_ssize_t countdown; + PyEvent done_event; +}; + +static void +thread_critical_sections(void *arg) +{ + const Py_ssize_t NUM_ITERS = 200; + struct test_data *test_data = arg; + PyGILState_STATE gil = PyGILState_Ensure(); + + for (Py_ssize_t i = 0; i < NUM_ITERS; i++) { + Py_BEGIN_CRITICAL_SECTION(test_data->obj1); + Py_END_CRITICAL_SECTION(); + + Py_BEGIN_CRITICAL_SECTION(test_data->obj2); + lock_unlock_object(test_data->obj1, 1); + Py_END_CRITICAL_SECTION(); + + Py_BEGIN_CRITICAL_SECTION2(test_data->obj3, test_data->obj1); + lock_unlock_object(test_data->obj2, 2); + Py_END_CRITICAL_SECTION2(); + + Py_BEGIN_CRITICAL_SECTION(test_data->obj3); + Py_BEGIN_ALLOW_THREADS + Py_END_ALLOW_THREADS + Py_END_CRITICAL_SECTION(); + } + + PyGILState_Release(gil); + if (_Py_atomic_add_ssize(&test_data->countdown, -1) == 1) { + // last thread to finish sets done_event + _PyEvent_Notify(&test_data->done_event); + } +} + +static PyObject * +test_critical_sections_threads(PyObject *self, PyObject *Py_UNUSED(args)) +{ + const Py_ssize_t NUM_THREADS = 4; + struct test_data test_data = { + .obj1 = PyDict_New(), + .obj2 = PyDict_New(), + .obj3 = PyDict_New(), + .countdown = NUM_THREADS, + }; + assert(test_data.obj1 != NULL); + assert(test_data.obj2 != NULL); + assert(test_data.obj3 != NULL); + + for (int i = 0; i < NUM_THREADS; i++) { + PyThread_start_new_thread(&thread_critical_sections, &test_data); + } + PyEvent_Wait(&test_data.done_event); + + Py_DECREF(test_data.obj3); + Py_DECREF(test_data.obj2); + Py_DECREF(test_data.obj1); + Py_RETURN_NONE; +} + +static PyMethodDef test_methods[] = { + {"test_critical_sections", test_critical_sections, METH_NOARGS}, + {"test_critical_sections_nest", test_critical_sections_nest, METH_NOARGS}, + {"test_critical_sections_suspend", test_critical_sections_suspend, METH_NOARGS}, + {"test_critical_sections_threads", test_critical_sections_threads, METH_NOARGS}, + {NULL, NULL} /* sentinel */ +}; + +int +_PyTestInternalCapi_Init_CriticalSection(PyObject *mod) +{ + if (PyModule_AddFunctions(mod, test_methods) < 0) { + return -1; + } + return 0; +} diff --git a/Objects/object.c b/Objects/object.c index b561da7fca3aa0..b7662783a061a9 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2371,7 +2371,7 @@ new_reference(PyObject *op) #else op->ob_tid = _Py_ThreadId(); op->_padding = 0; - op->ob_mutex = 0; + op->ob_mutex = (struct _PyMutex){ 0 }; op->ob_gc_bits = 0; op->ob_ref_local = 1; op->ob_ref_shared = 0; diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index e4ce3fefc91008..a1c37e183f21c7 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -195,6 +195,7 @@ + @@ -220,12 +221,14 @@ + + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 8e987cdf2b86e0..1c5a6d623f4dad 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -103,6 +103,9 @@ Source Files + + Source Files + Source Files @@ -223,6 +226,9 @@ Source Files + + Source Files + Source Files @@ -289,6 +295,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/_testinternalcapi.vcxproj b/PCbuild/_testinternalcapi.vcxproj index a729ab3877d91f..558f66ca95cd33 100644 --- a/PCbuild/_testinternalcapi.vcxproj +++ b/PCbuild/_testinternalcapi.vcxproj @@ -95,6 +95,7 @@ + diff --git a/PCbuild/_testinternalcapi.vcxproj.filters b/PCbuild/_testinternalcapi.vcxproj.filters index 9c8a5d793ee0f4..abfeeb39630daf 100644 --- a/PCbuild/_testinternalcapi.vcxproj.filters +++ b/PCbuild/_testinternalcapi.vcxproj.filters @@ -15,6 +15,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index eca2671b0cc090..f6fbd0fccc3f9b 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -216,6 +216,7 @@ + @@ -548,6 +549,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 447bb266d7587e..eb7ba0417c8146 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -579,6 +579,9 @@ Include\internal + + Include\internal + Include\internal @@ -1241,6 +1244,9 @@ Python + + Python + Source Files diff --git a/Python/critical_section.c b/Python/critical_section.c new file mode 100644 index 00000000000000..2214d80eeb297b --- /dev/null +++ b/Python/critical_section.c @@ -0,0 +1,100 @@ +#include "Python.h" + +#include "pycore_lock.h" +#include "pycore_critical_section.h" + +static_assert(_Alignof(_PyCriticalSection) >= 4, + "critical section must be aligned to at least 4 bytes"); + +void +_PyCriticalSection_BeginSlow(_PyCriticalSection *c, PyMutex *m) +{ + PyThreadState *tstate = _PyThreadState_GET(); + c->mutex = NULL; + c->prev = (uintptr_t)tstate->critical_section; + tstate->critical_section = (uintptr_t)c; + + _PyMutex_LockSlow(m); + c->mutex = m; +} + +void +_PyCriticalSection2_BeginSlow(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, + int is_m1_locked) +{ + PyThreadState *tstate = _PyThreadState_GET(); + c->base.mutex = NULL; + c->mutex2 = NULL; + c->base.prev = tstate->critical_section; + tstate->critical_section = (uintptr_t)c | _Py_CRITICAL_SECTION_TWO_MUTEXES; + + if (!is_m1_locked) { + PyMutex_Lock(m1); + } + PyMutex_Lock(m2); + c->base.mutex = m1; + c->mutex2 = m2; +} + +static _PyCriticalSection * +untag_critical_section(uintptr_t tag) +{ + return (_PyCriticalSection *)(tag & ~_Py_CRITICAL_SECTION_MASK); +} + +// Release all locks held by critical sections. This is called by +// _PyThreadState_Detach. +void +_PyCriticalSection_SuspendAll(PyThreadState *tstate) +{ + uintptr_t *tagptr = &tstate->critical_section; + while (_PyCriticalSection_IsActive(*tagptr)) { + _PyCriticalSection *c = untag_critical_section(*tagptr); + + if (c->mutex) { + PyMutex_Unlock(c->mutex); + if ((*tagptr & _Py_CRITICAL_SECTION_TWO_MUTEXES)) { + _PyCriticalSection2 *c2 = (_PyCriticalSection2 *)c; + if (c2->mutex2) { + PyMutex_Unlock(c2->mutex2); + } + } + } + + *tagptr |= _Py_CRITICAL_SECTION_INACTIVE; + tagptr = &c->prev; + } +} + +void +_PyCriticalSection_Resume(PyThreadState *tstate) +{ + uintptr_t p = tstate->critical_section; + _PyCriticalSection *c = untag_critical_section(p); + assert(!_PyCriticalSection_IsActive(p)); + + PyMutex *m1 = c->mutex; + c->mutex = NULL; + + PyMutex *m2 = NULL; + _PyCriticalSection2 *c2 = NULL; + if ((p & _Py_CRITICAL_SECTION_TWO_MUTEXES)) { + c2 = (_PyCriticalSection2 *)c; + m2 = c2->mutex2; + c2->mutex2 = NULL; + } + + if (m1) { + PyMutex_Lock(m1); + } + if (m2) { + PyMutex_Lock(m2); + } + + c->mutex = m1; + if (m2) { + c2->mutex2 = m2; + } + + tstate->critical_section &= ~_Py_CRITICAL_SECTION_INACTIVE; +} diff --git a/Python/pystate.c b/Python/pystate.c index b369a56d6d5a08..991d8d204a1c25 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -4,6 +4,7 @@ #include "Python.h" #include "pycore_ceval.h" #include "pycore_code.h" // stats +#include "pycore_critical_section.h" // _PyCriticalSection_Resume() #include "pycore_dtoa.h" // _dtoa_state_INIT() #include "pycore_emscripten_trampoline.h" // _Py_EmscriptenTrampoline_Init() #include "pycore_frame.h" @@ -1911,6 +1912,12 @@ _PyThreadState_Attach(PyThreadState *tstate) Py_FatalError("thread attach failed"); } + // Resume previous critical section. This acquires the lock(s) from the + // top-most critical section. + if (tstate->critical_section != 0) { + _PyCriticalSection_Resume(tstate); + } + #if defined(Py_DEBUG) errno = err; #endif @@ -1922,6 +1929,9 @@ _PyThreadState_Detach(PyThreadState *tstate) // XXX assert(tstate_is_alive(tstate) && tstate_is_bound(tstate)); assert(tstate->state == _Py_THREAD_ATTACHED); assert(tstate == current_fast_get(&_PyRuntime)); + if (tstate->critical_section != 0) { + _PyCriticalSection_SuspendAll(tstate); + } tstate_set_detached(tstate); tstate_deactivate(tstate); current_fast_clear(&_PyRuntime);