diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index ddf79ab1d130e8..c4ef6eff37b199 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -317,7 +317,7 @@ _PyLong_FlipSign(PyLongObject *op) { #define _PyLong_DIGIT_INIT(val) \ { \ - .ob_base = _PyObject_HEAD_INIT(&PyLong_Type) \ + .ob_base = _PyObject_HEAD_INIT(&PyLong_Type), \ .long_value = { \ .lv_tag = TAG_FROM_SIGN_AND_SIZE( \ (val) == 0 ? 0 : ((val) < 0 ? -1 : 1), \ diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 2d50f42c9c614d..206d8a5d4cc5e1 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -54,16 +54,24 @@ PyAPI_FUNC(int) _PyObject_IsFreed(PyObject *); Furthermore, we can't use designated initializers in Extensions since these are not supported pre-C++20. Thus, keeping an internal copy here is the most backwards compatible solution */ +#if defined(Py_NOGIL) +#define _PyObject_HEAD_INIT(type) \ + { \ + .ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL, \ + .ob_type = (type) \ + } +#else #define _PyObject_HEAD_INIT(type) \ { \ .ob_refcnt = _Py_IMMORTAL_REFCNT, \ .ob_type = (type) \ - }, + } +#endif #define _PyVarObject_HEAD_INIT(type, size) \ { \ - .ob_base = _PyObject_HEAD_INIT(type) \ + .ob_base = _PyObject_HEAD_INIT(type), \ .ob_size = size \ - }, + } extern void _Py_NO_RETURN _Py_FatalRefcountErrorFunc( const char *func, @@ -95,24 +103,63 @@ static inline void _Py_RefcntAdd(PyObject* op, Py_ssize_t n) #ifdef Py_REF_DEBUG _Py_AddRefTotal(_PyInterpreterState_GET(), n); #endif +#if !defined(Py_NOGIL) op->ob_refcnt += n; +#else + if (_Py_IsOwnedByCurrentThread(op)) { + uint32_t local = op->ob_ref_local; + Py_ssize_t refcnt = (Py_ssize_t)local + n; +# if PY_SSIZE_T_MAX > UINT32_MAX + if (refcnt > (Py_ssize_t)UINT32_MAX) { + // Make the object immortal if the 32-bit local reference count + // would overflow. + refcnt = _Py_IMMORTAL_REFCNT_LOCAL; + } +# endif + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, (uint32_t)refcnt); + } + else { + _Py_atomic_add_ssize(&op->ob_ref_shared, (n << _Py_REF_SHARED_SHIFT)); + } +#endif } #define _Py_RefcntAdd(op, n) _Py_RefcntAdd(_PyObject_CAST(op), n) static inline void _Py_SetImmortal(PyObject *op) { if (op) { +#ifdef Py_NOGIL + op->ob_tid = _Py_UNOWNED_TID; + op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL; + op->ob_ref_shared = 0; +#else op->ob_refcnt = _Py_IMMORTAL_REFCNT; +#endif } } #define _Py_SetImmortal(op) _Py_SetImmortal(_PyObject_CAST(op)) +// Makes an immortal object mortal again with the specified refcnt. Should only +// be used during runtime finalization. +static inline void _Py_SetMortal(PyObject *op, Py_ssize_t refcnt) +{ + if (op) { + assert(_Py_IsImmortal(op)); +#ifdef Py_NOGIL + op->ob_tid = _Py_UNOWNED_TID; + op->ob_ref_local = 0; + op->ob_ref_shared = _Py_REF_SHARED(refcnt, _Py_REF_MERGED); +#else + op->ob_refcnt = refcnt; +#endif + } +} + /* _Py_ClearImmortal() should only be used during runtime finalization. */ static inline void _Py_ClearImmortal(PyObject *op) { if (op) { - assert(op->ob_refcnt == _Py_IMMORTAL_REFCNT); - op->ob_refcnt = 1; + _Py_SetMortal(op, 1); Py_DECREF(op); } } @@ -122,6 +169,7 @@ static inline void _Py_ClearImmortal(PyObject *op) op = NULL; \ } while (0) +#if !defined(Py_NOGIL) static inline void _Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct) { @@ -161,6 +209,37 @@ _Py_DECREF_NO_DEALLOC(PyObject *op) #endif } +#else +// TODO: implement Py_DECREF specializations for Py_NOGIL build +static inline void +_Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct) +{ + Py_DECREF(op); +} + +static inline void +_Py_DECREF_NO_DEALLOC(PyObject *op) +{ + Py_DECREF(op); +} + +static inline int +_Py_REF_IS_MERGED(Py_ssize_t ob_ref_shared) +{ + return (ob_ref_shared & _Py_REF_SHARED_FLAG_MASK) == _Py_REF_MERGED; +} + +static inline int +_Py_REF_IS_QUEUED(Py_ssize_t ob_ref_shared) +{ + return (ob_ref_shared & _Py_REF_SHARED_FLAG_MASK) == _Py_REF_QUEUED; +} + +// Merge the local and shared reference count fields and add `extra` to the +// refcount when merging. +Py_ssize_t _Py_ExplicitMergeRefcount(PyObject *op, Py_ssize_t extra); +#endif // !defined(Py_NOGIL) + #ifdef Py_REF_DEBUG # undef _Py_DEC_REFTOTAL #endif diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 73fa5f553cb81b..0799b7e701ce95 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -129,13 +129,13 @@ extern PyTypeObject _PyExc_MemoryError; .latin1 = _Py_str_latin1_INIT, \ }, \ .tuple_empty = { \ - .ob_base = _PyVarObject_HEAD_INIT(&PyTuple_Type, 0) \ + .ob_base = _PyVarObject_HEAD_INIT(&PyTuple_Type, 0), \ }, \ .hamt_bitmap_node_empty = { \ - .ob_base = _PyVarObject_HEAD_INIT(&_PyHamt_BitmapNode_Type, 0) \ + .ob_base = _PyVarObject_HEAD_INIT(&_PyHamt_BitmapNode_Type, 0), \ }, \ .context_token_missing = { \ - .ob_base = _PyObject_HEAD_INIT(&_PyContextTokenMissing_Type) \ + .ob_base = _PyObject_HEAD_INIT(&_PyContextTokenMissing_Type), \ }, \ }, \ }, \ @@ -172,11 +172,11 @@ extern PyTypeObject _PyExc_MemoryError; .singletons = { \ ._not_used = 1, \ .hamt_empty = { \ - .ob_base = _PyObject_HEAD_INIT(&_PyHamt_Type) \ + .ob_base = _PyObject_HEAD_INIT(&_PyHamt_Type), \ .h_root = (PyHamtNode*)&_Py_SINGLETON(hamt_bitmap_node_empty), \ }, \ .last_resort_memory_error = { \ - _PyObject_HEAD_INIT(&_PyExc_MemoryError) \ + _PyObject_HEAD_INIT(&_PyExc_MemoryError), \ .args = (PyObject*)&_Py_SINGLETON(tuple_empty) \ }, \ }, \ @@ -206,7 +206,7 @@ extern PyTypeObject _PyExc_MemoryError; #define _PyBytes_SIMPLE_INIT(CH, LEN) \ { \ - _PyVarObject_HEAD_INIT(&PyBytes_Type, (LEN)) \ + _PyVarObject_HEAD_INIT(&PyBytes_Type, (LEN)), \ .ob_shash = -1, \ .ob_sval = { (CH) }, \ } @@ -217,7 +217,7 @@ extern PyTypeObject _PyExc_MemoryError; #define _PyUnicode_ASCII_BASE_INIT(LITERAL, ASCII) \ { \ - .ob_base = _PyObject_HEAD_INIT(&PyUnicode_Type) \ + .ob_base = _PyObject_HEAD_INIT(&PyUnicode_Type), \ .length = sizeof(LITERAL) - 1, \ .hash = -1, \ .state = { \ diff --git a/Include/object.h b/Include/object.h index 9058558e3cd4d9..6f116ef35a790c 100644 --- a/Include/object.h +++ b/Include/object.h @@ -106,9 +106,26 @@ check by comparing the reference count field to the immortality reference count. #define _Py_IMMORTAL_REFCNT (UINT_MAX >> 2) #endif +// Py_NOGIL builds indicate immortal objects using `ob_ref_local`, which is +// always 32-bits. +#ifdef Py_NOGIL +#define _Py_IMMORTAL_REFCNT_LOCAL UINT32_MAX +#endif + // Make all internal uses of PyObject_HEAD_INIT immortal while preserving the // C-API expectation that the refcnt will be set to 1. -#ifdef Py_BUILD_CORE +#if defined(Py_NOGIL) +#define PyObject_HEAD_INIT(type) \ + { \ + 0, \ + 0, \ + 0, \ + 0, \ + _Py_IMMORTAL_REFCNT_LOCAL, \ + 0, \ + (type), \ + }, +#elif defined(Py_BUILD_CORE) #define PyObject_HEAD_INIT(type) \ { \ { _Py_IMMORTAL_REFCNT }, \ @@ -142,6 +159,7 @@ check by comparing the reference count field to the immortality reference count. * by hand. Similarly every pointer to a variable-size Python object can, * in addition, be cast to PyVarObject*. */ +#ifndef Py_NOGIL struct _object { #if (defined(__GNUC__) || defined(__clang__)) \ && !(defined __STDC_VERSION__ && __STDC_VERSION__ >= 201112L) @@ -166,6 +184,36 @@ struct _object { PyTypeObject *ob_type; }; +#else +// Objects that are not owned by any thread use a thread id (tid) of zero. +// This includes both immortal objects and objects whose reference count +// fields have been merged. +#define _Py_UNOWNED_TID 0 + +// The shared reference count uses the two least-significant bits to store +// flags. The remaining bits are used to store the reference count. +#define _Py_REF_SHARED_SHIFT 2 +#define _Py_REF_SHARED_FLAG_MASK 0x3 + +// The shared flags are initialized to zero. +#define _Py_REF_SHARED_INIT 0x0 +#define _Py_REF_MAYBE_WEAKREF 0x1 +#define _Py_REF_QUEUED 0x2 +#define _Py_REF_MERGED 0x3 + +// Create a shared field from a refcnt and desired flags +#define _Py_REF_SHARED(refcnt, flags) (((refcnt) << _Py_REF_SHARED_SHIFT) + (flags)) + +struct _object { + uintptr_t ob_tid; // thread id (or zero) + uint16_t _padding; + uint8_t ob_mutex; // per-object lock + uint8_t ob_gc_bits; // gc-related state + uint32_t ob_ref_local; // local reference count + Py_ssize_t ob_ref_shared; // shared (atomic) reference count + PyTypeObject *ob_type; +}; +#endif /* Cast argument to PyObject* type. */ #define _PyObject_CAST(op) _Py_CAST(PyObject*, (op)) @@ -183,9 +231,56 @@ typedef struct { PyAPI_FUNC(int) Py_Is(PyObject *x, PyObject *y); #define Py_Is(x, y) ((x) == (y)) +#ifndef Py_LIMITED_API +static inline uintptr_t +_Py_ThreadId(void) +{ + uintptr_t tid; +#if defined(_MSC_VER) && defined(_M_X64) + tid = __readgsqword(48); +#elif defined(_MSC_VER) && defined(_M_IX86) + tid = __readfsdword(24); +#elif defined(_MSC_VER) && defined(_M_ARM64) + tid = __getReg(18); +#elif defined(__i386__) + __asm__("movl %%gs:0, %0" : "=r" (tid)); // 32-bit always uses GS +#elif defined(__MACH__) && defined(__x86_64__) + __asm__("movq %%gs:0, %0" : "=r" (tid)); // x86_64 macOSX uses GS +#elif defined(__x86_64__) + __asm__("movq %%fs:0, %0" : "=r" (tid)); // x86_64 Linux, BSD uses FS +#elif defined(__arm__) + __asm__ ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tid)); +#elif defined(__aarch64__) && defined(__APPLE__) + __asm__ ("mrs %0, tpidrro_el0" : "=r" (tid)); +#elif defined(__aarch64__) + __asm__ ("mrs %0, tpidr_el0" : "=r" (tid)); +#else + # error "define _Py_ThreadId for this platform" +#endif + return tid; +} +#endif + +#if defined(Py_NOGIL) && !defined(Py_LIMITED_API) +static inline Py_ALWAYS_INLINE int +_Py_IsOwnedByCurrentThread(PyObject *ob) +{ + return ob->ob_tid == _Py_ThreadId(); +} +#endif static inline Py_ssize_t Py_REFCNT(PyObject *ob) { +#if !defined(Py_NOGIL) return ob->ob_refcnt; +#else + uint32_t local = _Py_atomic_load_uint32_relaxed(&ob->ob_ref_local); + if (local == _Py_IMMORTAL_REFCNT_LOCAL) { + return _Py_IMMORTAL_REFCNT; + } + Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&ob->ob_ref_shared); + return _Py_STATIC_CAST(Py_ssize_t, local) + + Py_ARITHMETIC_RIGHT_SHIFT(Py_ssize_t, shared, _Py_REF_SHARED_SHIFT); +#endif } #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 # define Py_REFCNT(ob) Py_REFCNT(_PyObject_CAST(ob)) @@ -216,7 +311,9 @@ static inline Py_ssize_t Py_SIZE(PyObject *ob) { static inline Py_ALWAYS_INLINE int _Py_IsImmortal(PyObject *op) { -#if SIZEOF_VOID_P > 4 +#if defined(Py_NOGIL) + return op->ob_ref_local == _Py_IMMORTAL_REFCNT_LOCAL; +#elif SIZEOF_VOID_P > 4 return _Py_CAST(PY_INT32_T, op->ob_refcnt) < 0; #else return op->ob_refcnt == _Py_IMMORTAL_REFCNT; @@ -240,7 +337,24 @@ static inline void Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt) { if (_Py_IsImmortal(ob)) { return; } +#if !defined(Py_NOGIL) ob->ob_refcnt = refcnt; +#else + if (_Py_IsOwnedByCurrentThread(ob)) { + // Set local refcount to desired refcount and shared refcount to zero, + // but preserve the shared refcount flags. + assert(refcnt < UINT32_MAX); + ob->ob_ref_local = _Py_STATIC_CAST(uint32_t, refcnt); + ob->ob_ref_shared &= _Py_REF_SHARED_FLAG_MASK; + } + else { + // Set local refcount to zero and shared refcount to desired refcount. + // Mark the object as merged. + ob->ob_tid = _Py_UNOWNED_TID; + ob->ob_ref_local = 0; + ob->ob_ref_shared = _Py_REF_SHARED(refcnt, _Py_REF_MERGED); + } +#endif } #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 # define Py_SET_REFCNT(ob, refcnt) Py_SET_REFCNT(_PyObject_CAST(ob), (refcnt)) @@ -618,7 +732,19 @@ static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op) #else // Non-limited C API and limited C API for Python 3.9 and older access // directly PyObject.ob_refcnt. -#if SIZEOF_VOID_P > 4 +#if defined(Py_NOGIL) + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + uint32_t new_local = local + 1; + if (new_local == 0) { + return; + } + if (_Py_IsOwnedByCurrentThread(op)) { + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, new_local); + } + else { + _Py_atomic_add_ssize(&op->ob_ref_shared, (1 << _Py_REF_SHARED_SHIFT)); + } +#elif SIZEOF_VOID_P > 4 // Portable saturated add, branching on the carry flag and set low bits PY_UINT32_T cur_refcnt = op->ob_refcnt_split[PY_BIG_ENDIAN]; PY_UINT32_T new_refcnt = cur_refcnt + 1; @@ -643,6 +769,19 @@ static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op) # define Py_INCREF(op) Py_INCREF(_PyObject_CAST(op)) #endif + +#if !defined(Py_LIMITED_API) && defined(Py_NOGIL) +// Implements Py_DECREF on objects not owned by the current thread. +PyAPI_FUNC(void) _Py_DecRefShared(PyObject *); +PyAPI_FUNC(void) _Py_DecRefSharedDebug(PyObject *, const char *, int); + +// Called from Py_DECREF by the owning thread when the local refcount reaches +// zero. The call will deallocate the object if the shared refcount is also +// zero. Otherwise, the thread gives up ownership and merges the reference +// count fields. +PyAPI_FUNC(void) _Py_MergeZeroLocalRefcount(PyObject *); +#endif + #if defined(Py_LIMITED_API) && (Py_LIMITED_API+0 >= 0x030c0000 || defined(Py_REF_DEBUG)) // Stable ABI implements Py_DECREF() as a function call on limited C API // version 3.12 and newer, and on Python built in debug mode. _Py_DecRef() was @@ -657,6 +796,52 @@ static inline void Py_DECREF(PyObject *op) { } #define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op)) +#elif defined(Py_NOGIL) && defined(Py_REF_DEBUG) +static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) +{ + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + if (local == _Py_IMMORTAL_REFCNT_LOCAL) { + return; + } + _Py_DECREF_STAT_INC(); + _Py_DECREF_DecRefTotal(); + if (_Py_IsOwnedByCurrentThread(op)) { + if (local == 0) { + _Py_NegativeRefcount(filename, lineno, op); + } + local--; + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local); + if (local == 0) { + _Py_MergeZeroLocalRefcount(op); + } + } + else { + _Py_DecRefSharedDebug(op, filename, lineno); + } +} +#define Py_DECREF(op) Py_DECREF(__FILE__, __LINE__, _PyObject_CAST(op)) + +#elif defined(Py_NOGIL) +static inline void Py_DECREF(PyObject *op) +{ + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + if (local == _Py_IMMORTAL_REFCNT_LOCAL) { + return; + } + _Py_DECREF_STAT_INC(); + if (_Py_IsOwnedByCurrentThread(op)) { + local--; + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local); + if (local == 0) { + _Py_MergeZeroLocalRefcount(op); + } + } + else { + _Py_DecRefShared(op); + } +} +#define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op)) + #elif defined(Py_REF_DEBUG) static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) { diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index cb09a7ff438243..de7db70275441a 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -796,7 +796,10 @@ def check_cflags_pgo(): return any(option in cflags_nodist for option in pgo_options) -_header = 'nP' +if sysconfig.get_config_var('Py_NOGIL'): + _header = 'PHBBInP' +else: + _header = 'nP' _align = '0n' _vheader = _header + 'n' diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-12-12-09-01.gh-issue-110481.3Er3it.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-12-12-09-01.gh-issue-110481.3Er3it.rst new file mode 100644 index 00000000000000..984548aced3df8 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-10-12-12-09-01.gh-issue-110481.3Er3it.rst @@ -0,0 +1 @@ +Implement biased reference counting in ``--disable-gil`` builds. diff --git a/Modules/_ctypes/_ctypes_test.c b/Modules/_ctypes/_ctypes_test.c index df11c00237f9a6..12d372ff167021 100644 --- a/Modules/_ctypes/_ctypes_test.c +++ b/Modules/_ctypes/_ctypes_test.c @@ -1,5 +1,11 @@ +#ifndef _MSC_VER +#include "pyconfig.h" // Py_NOGIL +#endif + +#ifndef Py_NOGIL // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED #define Py_LIMITED_API 0x030d0000 +#endif // gh-85283: On Windows, Py_LIMITED_API requires Py_BUILD_CORE to not attempt // linking the extension to python3.lib (which fails). Py_BUILD_CORE_MODULE is diff --git a/Modules/_multiprocessing/posixshmem.c b/Modules/_multiprocessing/posixshmem.c index c4d1138534d8c5..dc3c59d01e0cb1 100644 --- a/Modules/_multiprocessing/posixshmem.c +++ b/Modules/_multiprocessing/posixshmem.c @@ -2,8 +2,12 @@ posixshmem - A Python extension that provides shm_open() and shm_unlink() */ +#include "pyconfig.h" // Py_NOGIL + +#ifndef Py_NOGIL // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED #define Py_LIMITED_API 0x030d0000 +#endif #include diff --git a/Modules/_scproxy.c b/Modules/_scproxy.c index 2f1b8618eb9fcc..c8e120e24b18b9 100644 --- a/Modules/_scproxy.c +++ b/Modules/_scproxy.c @@ -3,8 +3,14 @@ * using the SystemConfiguration framework. */ +#ifndef _MSC_VER +#include "pyconfig.h" // Py_NOGIL +#endif + +#ifndef Py_NOGIL // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED #define Py_LIMITED_API 0x030d0000 +#endif #include #include diff --git a/Modules/_stat.c b/Modules/_stat.c index 402fbbaecf8dd8..d0035608aad0b3 100644 --- a/Modules/_stat.c +++ b/Modules/_stat.c @@ -11,8 +11,14 @@ * */ +#ifndef _MSC_VER +#include "pyconfig.h" // Py_NOGIL +#endif + // Need limited C API version 3.13 for PyModule_Add() on Windows +#ifndef Py_NOGIL #define Py_LIMITED_API 0x030d0000 +#endif #include "Python.h" diff --git a/Modules/_testcapi/heaptype_relative.c b/Modules/_testcapi/heaptype_relative.c index 53dd01d1ed4f80..b58d26cddc723a 100644 --- a/Modules/_testcapi/heaptype_relative.c +++ b/Modules/_testcapi/heaptype_relative.c @@ -1,4 +1,11 @@ +#ifndef _MSC_VER +#include "pyconfig.h" // Py_NOGIL +#endif + +#ifndef Py_NOGIL #define Py_LIMITED_API 0x030c0000 // 3.12 +#endif + #include "parts.h" #include // max_align_t #include // memset diff --git a/Modules/_testcapi/vectorcall_limited.c b/Modules/_testcapi/vectorcall_limited.c index 3e81903098f954..857cb30b3da1ca 100644 --- a/Modules/_testcapi/vectorcall_limited.c +++ b/Modules/_testcapi/vectorcall_limited.c @@ -1,6 +1,13 @@ /* Test Vectorcall in the limited API */ +#ifndef _MSC_VER +#include "pyconfig.h" // Py_NOGIL +#endif + +#ifndef Py_NOGIL #define Py_LIMITED_API 0x030c0000 // 3.12 +#endif + #include "parts.h" #include "clinic/vectorcall_limited.c.h" diff --git a/Modules/_testclinic_limited.c b/Modules/_testclinic_limited.c index 4273383816a0dd..63ebb5292602f8 100644 --- a/Modules/_testclinic_limited.c +++ b/Modules/_testclinic_limited.c @@ -4,8 +4,14 @@ #undef Py_BUILD_CORE_MODULE #undef Py_BUILD_CORE_BUILTIN +#ifndef _MSC_VER +#include "pyconfig.h" // Py_NOGIL +#endif + // For now, only limited C API 3.13 is supported +#ifndef Py_NOGIL #define Py_LIMITED_API 0x030d0000 +#endif /* Always enable assertions */ #undef NDEBUG diff --git a/Modules/_testimportmultiple.c b/Modules/_testimportmultiple.c index a13024d6cd029e..99d48b6617f49c 100644 --- a/Modules/_testimportmultiple.c +++ b/Modules/_testimportmultiple.c @@ -4,7 +4,13 @@ * foo, bar), only the first one is called the same as the compiled file. */ +#ifndef _MSC_VER +#include "pyconfig.h" // Py_NOGIL +#endif + +#ifndef Py_NOGIL #define Py_LIMITED_API 0x03020000 +#endif #include diff --git a/Modules/_uuidmodule.c b/Modules/_uuidmodule.c index b35cccb5bdc406..8bda1d602473a1 100644 --- a/Modules/_uuidmodule.c +++ b/Modules/_uuidmodule.c @@ -3,8 +3,14 @@ * DCE compatible Universally Unique Identifier library. */ +#ifndef _MSC_VER +#include "pyconfig.h" // Py_NOGIL +#endif + +#ifndef Py_NOGIL // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED #define Py_LIMITED_API 0x030d0000 +#endif #include "Python.h" #if defined(HAVE_UUID_H) diff --git a/Modules/errnomodule.c b/Modules/errnomodule.c index e4fd3b47762702..5ac4d298343254 100644 --- a/Modules/errnomodule.c +++ b/Modules/errnomodule.c @@ -1,7 +1,13 @@ /* Errno module */ +#ifndef _MSC_VER +#include "pyconfig.h" // Py_NOGIL +#endif + +#ifndef Py_NOGIL // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED #define Py_LIMITED_API 0x030d0000 +#endif #include "Python.h" #include // EPIPE diff --git a/Modules/md5module.c b/Modules/md5module.c index 9d412ba580c336..ff7fc09bca3996 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -16,8 +16,14 @@ /* MD5 objects */ +#ifndef _MSC_VER +#include "pyconfig.h" // Py_NOGIL +#endif + +#ifndef Py_NOGIL // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED #define Py_LIMITED_API 0x030d0000 +#endif #include "Python.h" #include "hashlib.h" diff --git a/Modules/resource.c b/Modules/resource.c index c973008aaa6bd4..eb9c2e27fdba85 100644 --- a/Modules/resource.c +++ b/Modules/resource.c @@ -1,5 +1,11 @@ +#ifndef _MSC_VER +#include "pyconfig.h" // Py_NOGIL +#endif + +#ifndef Py_NOGIL // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED #define Py_LIMITED_API 0x030d0000 +#endif #include "Python.h" #include // errno diff --git a/Modules/xxlimited.c b/Modules/xxlimited.c index df6e593b320e52..3dcf74ab1190d1 100644 --- a/Modules/xxlimited.c +++ b/Modules/xxlimited.c @@ -62,8 +62,14 @@ pass */ +#ifndef _MSC_VER +#include "pyconfig.h" // Py_NOGIL +#endif + +#ifndef Py_NOGIL // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED #define Py_LIMITED_API 0x030d0000 +#endif #include "Python.h" #include diff --git a/Modules/xxlimited_35.c b/Modules/xxlimited_35.c index 361c7e76d77f50..04673ea68cfee5 100644 --- a/Modules/xxlimited_35.c +++ b/Modules/xxlimited_35.c @@ -5,7 +5,13 @@ * See the xxlimited module for an extension module template. */ +#ifndef _MSC_VER +#include "pyconfig.h" // Py_NOGIL +#endif + +#ifndef Py_NOGIL #define Py_LIMITED_API 0x03050000 +#endif #include "Python.h" diff --git a/Objects/object.c b/Objects/object.c index 791db755663e68..35c7e7bf33b135 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -296,6 +296,124 @@ _Py_DecRef(PyObject *o) Py_DECREF(o); } +#ifdef Py_NOGIL +static inline int +is_shared_refcnt_dead(Py_ssize_t shared) +{ +#if SIZEOF_SIZE_T == 8 + return shared == (Py_ssize_t)0xDDDDDDDDDDDDDDDD; +#else + return shared == (Py_ssize_t)0xDDDDDDDD; +#endif +} + +void +_Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno) +{ + // Should we queue the object for the owning thread to merge? + int should_queue; + + Py_ssize_t new_shared; + Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&o->ob_ref_shared); + do { + should_queue = (shared == 0 || shared == _Py_REF_MAYBE_WEAKREF); + + if (should_queue) { + // If the object had refcount zero, not queued, and not merged, + // then we enqueue the object to be merged by the owning thread. + // In this case, we don't subtract one from the reference count + // because the queue holds a reference. + new_shared = _Py_REF_QUEUED; + } + else { + // Otherwise, subtract one from the reference count. This might + // be negative! + new_shared = shared - (1 << _Py_REF_SHARED_SHIFT); + } + +#ifdef Py_REF_DEBUG + if ((_Py_REF_IS_MERGED(new_shared) && new_shared < 0) || + is_shared_refcnt_dead(shared)) + { + _Py_NegativeRefcount(filename, lineno, o); + } +#endif + } while (!_Py_atomic_compare_exchange_ssize(&o->ob_ref_shared, + &shared, new_shared)); + + if (should_queue) { + // TODO: the inter-thread queue is not yet implemented. For now, + // we just merge the refcount here. + Py_ssize_t refcount = _Py_ExplicitMergeRefcount(o, -1); + if (refcount == 0) { + _Py_Dealloc(o); + } + } + else if (new_shared == _Py_REF_MERGED) { + // refcount is zero AND merged + _Py_Dealloc(o); + } +} + +void +_Py_DecRefShared(PyObject *o) +{ + _Py_DecRefSharedDebug(o, NULL, 0); +} + +void +_Py_MergeZeroLocalRefcount(PyObject *op) +{ + assert(op->ob_ref_local == 0); + + _Py_atomic_store_uintptr_relaxed(&op->ob_tid, 0); + Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared); + if (shared == 0) { + // Fast-path: shared refcount is zero (including flags) + _Py_Dealloc(op); + return; + } + + // Slow-path: atomically set the flags (low two bits) to _Py_REF_MERGED. + Py_ssize_t new_shared; + do { + new_shared = (shared & ~_Py_REF_SHARED_FLAG_MASK) | _Py_REF_MERGED; + } while (!_Py_atomic_compare_exchange_ssize(&op->ob_ref_shared, + &shared, new_shared)); + + if (new_shared == _Py_REF_MERGED) { + // i.e., the shared refcount is zero (only the flags are set) so we + // deallocate the object. + _Py_Dealloc(op); + } +} + +Py_ssize_t +_Py_ExplicitMergeRefcount(PyObject *op, Py_ssize_t extra) +{ + assert(!_Py_IsImmortal(op)); + Py_ssize_t refcnt; + Py_ssize_t new_shared; + Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared); + do { + refcnt = Py_ARITHMETIC_RIGHT_SHIFT(Py_ssize_t, shared, _Py_REF_SHARED_SHIFT); + if (_Py_REF_IS_MERGED(shared)) { + return refcnt; + } + + refcnt += (Py_ssize_t)op->ob_ref_local; + refcnt += extra; + + new_shared = _Py_REF_SHARED(refcnt, _Py_REF_MERGED); + } while (!_Py_atomic_compare_exchange_ssize(&op->ob_ref_shared, + &shared, new_shared)); + + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 0); + _Py_atomic_store_uintptr_relaxed(&op->ob_tid, 0); + return refcnt; +} +#endif + /**************************************/ @@ -1926,10 +2044,7 @@ PyTypeObject _PyNone_Type = { none_new, /*tp_new */ }; -PyObject _Py_NoneStruct = { - { _Py_IMMORTAL_REFCNT }, - &_PyNone_Type -}; +PyObject _Py_NoneStruct = _PyObject_HEAD_INIT(&_PyNone_Type); /* NotImplemented is an object that can be used to signal that an operation is not implemented for the given type combination. */ @@ -2028,10 +2143,7 @@ PyTypeObject _PyNotImplemented_Type = { notimplemented_new, /*tp_new */ }; -PyObject _Py_NotImplementedStruct = { - { _Py_IMMORTAL_REFCNT }, - &_PyNotImplemented_Type -}; +PyObject _Py_NotImplementedStruct = _PyObject_HEAD_INIT(&_PyNotImplemented_Type); PyStatus @@ -2248,7 +2360,16 @@ new_reference(PyObject *op) _PyTraceMalloc_NewReference(op); } // Skip the immortal object check in Py_SET_REFCNT; always set refcnt to 1 +#if !defined(Py_NOGIL) op->ob_refcnt = 1; +#else + op->ob_tid = _Py_ThreadId(); + op->_padding = 0; + op->ob_mutex = 0; + op->ob_gc_bits = 0; + op->ob_ref_local = 1; + op->ob_ref_shared = 0; +#endif #ifdef Py_TRACE_REFS _Py_AddToAllObjects(op); #endif diff --git a/Objects/setobject.c b/Objects/setobject.c index 2a3514f2028c3d..76449c6a2a9ee7 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -2394,7 +2394,4 @@ static PyTypeObject _PySetDummy_Type = { Py_TPFLAGS_DEFAULT, /*tp_flags */ }; -static PyObject _dummy_struct = { - { _Py_IMMORTAL_REFCNT }, - &_PySetDummy_Type -}; +static PyObject _dummy_struct = _PyObject_HEAD_INIT(&_PySetDummy_Type); diff --git a/Objects/sliceobject.c b/Objects/sliceobject.c index 1513f3e49abb44..a3ed0c096d84ed 100644 --- a/Objects/sliceobject.c +++ b/Objects/sliceobject.c @@ -98,10 +98,7 @@ PyTypeObject PyEllipsis_Type = { ellipsis_new, /* tp_new */ }; -PyObject _Py_EllipsisObject = { - { _Py_IMMORTAL_REFCNT }, - &PyEllipsis_Type -}; +PyObject _Py_EllipsisObject = _PyObject_HEAD_INIT(&PyEllipsis_Type); /* Slice object implementation */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 80b19567c63d20..87636efcfca050 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14967,7 +14967,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) // Skip the Immortal Instance check and restore // the two references (key and value) ignored // by PyUnicode_InternInPlace(). - s->ob_refcnt = 2; + _Py_SetMortal(s, 2); #ifdef INTERNED_STATS total_length += PyUnicode_GET_LENGTH(s); #endif diff --git a/PC/winsound.c b/PC/winsound.c index ae36936703b0c3..36e4d07420e650 100644 --- a/PC/winsound.c +++ b/PC/winsound.c @@ -35,8 +35,10 @@ winsound.PlaySound(None, 0) */ +#ifndef Py_NOGIL // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED #define Py_LIMITED_API 0x030d0000 +#endif #include #include diff --git a/Python/ceval.c b/Python/ceval.c index e3a7c5f38403a7..6f8584c15b7bde 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -46,12 +46,13 @@ # error "ceval.c must be build with Py_BUILD_CORE define for best performance" #endif -#if !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) +#if !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) && !defined(Py_NOGIL) // GH-89279: The MSVC compiler does not inline these static inline functions // in PGO build in _PyEval_EvalFrameDefault(), because this function is over // the limit of PGO, and that limit cannot be configured. // Define them as macros to make sure that they are always inlined by the // preprocessor. +// TODO: implement Py_DECREF macro for Py_NOGIL #undef Py_DECREF #define Py_DECREF(arg) \ diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 5fd65d53c503f8..9ee11588e448ae 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -19,17 +19,9 @@ /* Uncomment this to dump debugging output when assertions fail */ // #define INSTRUMENT_DEBUG 1 -PyObject _PyInstrumentation_DISABLE = -{ - .ob_refcnt = _Py_IMMORTAL_REFCNT, - .ob_type = &PyBaseObject_Type -}; +PyObject _PyInstrumentation_DISABLE = _PyObject_HEAD_INIT(&PyBaseObject_Type); -PyObject _PyInstrumentation_MISSING = -{ - .ob_refcnt = _Py_IMMORTAL_REFCNT, - .ob_type = &PyBaseObject_Type -}; +PyObject _PyInstrumentation_MISSING = _PyObject_HEAD_INIT(&PyBaseObject_Type); static const int8_t EVENT_FOR_OPCODE[256] = { [RETURN_CONST] = PY_MONITORING_EVENT_PY_RETURN, diff --git a/Python/specialize.c b/Python/specialize.c index 07fd93d29b09dc..d74c4c58a94523 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2525,7 +2525,7 @@ static const PyBytesObject no_location = { }; const struct _PyCode_DEF(8) _Py_InitCleanup = { - _PyVarObject_HEAD_INIT(&PyCode_Type, 3) + _PyVarObject_HEAD_INIT(&PyCode_Type, 3), .co_consts = (PyObject *)&_Py_SINGLETON(tuple_empty), .co_names = (PyObject *)&_Py_SINGLETON(tuple_empty), .co_exceptiontable = (PyObject *)&_Py_SINGLETON(bytes_empty), diff --git a/Tools/build/deepfreeze.py b/Tools/build/deepfreeze.py index c3231a5a40c326..fed7cf3ac6d396 100644 --- a/Tools/build/deepfreeze.py +++ b/Tools/build/deepfreeze.py @@ -115,6 +115,7 @@ def __init__(self, file: TextIO) -> None: self.inits: list[str] = [] self.identifiers, self.strings = self.get_identifiers_and_strings() self.write('#include "Python.h"') + self.write('#include "internal/pycore_object.h"') self.write('#include "internal/pycore_gc.h"') self.write('#include "internal/pycore_code.h"') self.write('#include "internal/pycore_frame.h"') @@ -154,14 +155,10 @@ def block(self, prefix: str, suffix: str = "") -> None: self.write("}" + suffix) def object_head(self, typename: str) -> None: - with self.block(".ob_base =", ","): - self.write(f".ob_refcnt = _Py_IMMORTAL_REFCNT,") - self.write(f".ob_type = &{typename},") + self.write(f".ob_base = _PyObject_HEAD_INIT(&{typename}),") def object_var_head(self, typename: str, size: int) -> None: - with self.block(".ob_base =", ","): - self.object_head(typename) - self.write(f".ob_size = {size},") + self.write(f".ob_base = _PyVarObject_HEAD_INIT(&{typename}, {size}),") def field(self, obj: object, name: str) -> None: self.write(f".{name} = {getattr(obj, name)},")