diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 9e4504479cd9f0..9671f3457e6272 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -930,7 +930,7 @@ struct _is { struct types_state types; struct callable_cache callable_cache; PyObject *common_consts[NUM_COMMON_CONSTANTS]; - bool jit; + uint8_t jit; bool compiling; struct _PyExecutorObject *executor_list_head; struct _PyExecutorObject *executor_deletion_list_head; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 0307a174e77346..c0f9f545b30799 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -372,8 +372,6 @@ _PyJit_TryInitializeTracing(PyThreadState *tstate, _PyInterpreterFrame *frame, void _PyJit_FinalizeTracing(PyThreadState *tstate); -void _PyJit_Tracer_InvalidateDependency(PyThreadState *old_tstate, void *obj); - #ifdef __cplusplus } #endif diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 25372fee58e0d7..f336ae73bf1462 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -9,9 +9,13 @@ import _opcode -from test.support import (script_helper, requires_specialization, - import_helper, Py_GIL_DISABLED, requires_jit_enabled, - reset_code) +from test.support import ( + script_helper, + import_helper, + Py_GIL_DISABLED, + requires_jit_enabled, + reset_code +) _testinternalcapi = import_helper.import_module("_testinternalcapi") @@ -61,8 +65,6 @@ def get_opnames(ex): return list(iter_opnames(ex)) -@requires_specialization -@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @requires_jit_enabled class TestExecutorInvalidation(unittest.TestCase): @@ -130,8 +132,6 @@ def f(): self.assertIsNone(exe) -@requires_specialization -@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @requires_jit_enabled @unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") class TestUops(unittest.TestCase): @@ -434,8 +434,6 @@ def testfunc(n, m): self.assertIn("_FOR_ITER_TIER_TWO", uops) -@requires_specialization -@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @requires_jit_enabled @unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") class TestUopsOptimization(unittest.TestCase): @@ -2052,6 +2050,7 @@ def testfunc(n): self.assertNotIn("_GUARD_NOS_INT", uops) self.assertNotIn("_GUARD_TOS_INT", uops) + @unittest.skipIf(Py_GIL_DISABLED, "FT build immortalizes constants") def test_call_len_known_length_small_int(self): # Make sure that len(t) is optimized for a tuple of length 5. # See https://github.com/python/cpython/issues/139393. @@ -2076,6 +2075,7 @@ def testfunc(n): self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + @unittest.skipIf(Py_GIL_DISABLED, "FT build immortalizes constants") def test_call_len_known_length(self): # Make sure that len(t) is not optimized for a tuple of length 2048. # See https://github.com/python/cpython/issues/139393. @@ -2497,6 +2497,7 @@ def testfunc(n): self.assertIn("_POP_TOP_NOP", uops) + @unittest.skipIf(Py_GIL_DISABLED, "FT might immortalize this.") def test_pop_top_specialize_int(self): def testfunc(n): for _ in range(n): @@ -2510,6 +2511,7 @@ def testfunc(n): self.assertIn("_POP_TOP_INT", uops) + @unittest.skipIf(Py_GIL_DISABLED, "FT might immortalize this.") def test_pop_top_specialize_float(self): def testfunc(n): for _ in range(n): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-16-30-46.gh-issue-141594.PSsC5J.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-16-30-46.gh-issue-141594.PSsC5J.rst new file mode 100644 index 00000000000000..5db6330ca540b0 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-16-30-46.gh-issue-141594.PSsC5J.rst @@ -0,0 +1,3 @@ +Add free-threading support to the JIT. The JIT is only enabled on +single-threaded code in free-threading, and is disabled when multiple +threads are spawned. Patch by Ken Jin. diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 89e558b0fe8933..31900a16b9e044 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1240,8 +1240,7 @@ add_executor_dependency(PyObject *self, PyObject *args) static PyObject * invalidate_executors(PyObject *self, PyObject *obj) { - PyInterpreterState *interp = PyInterpreterState_Get(); - _Py_Executors_InvalidateDependency(interp, obj, 1); + _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), obj, 1); Py_RETURN_NONE; } diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 3aea2038fd17e7..b61d86d110fb1b 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -2432,7 +2432,7 @@ code_dealloc(PyObject *self) PyMem_Free(co_extra); } #ifdef _Py_TIER2 - _PyJit_Tracer_InvalidateDependency(tstate, self); + _Py_Executors_InvalidateDependency(tstate->interp, self, 1); if (co->co_executors != NULL) { clear_executors(co); } @@ -3363,8 +3363,12 @@ deopt_code_unit(PyCodeObject *code, int i) inst.op.code = _PyOpcode_Deopt[opcode]; assert(inst.op.code < MIN_SPECIALIZED_OPCODE); } - // JIT should not be enabled with free-threading - assert(inst.op.code != ENTER_EXECUTOR); + if (inst.op.code == ENTER_EXECUTOR) { + _PyExecutorObject *exec = code->co_executors->executors[inst.op.arg]; + assert(exec != NULL); + inst.op.code = exec->vm_data.opcode; + inst.op.arg = exec->vm_data.oparg; + } return inst; } diff --git a/Objects/frameobject.c b/Objects/frameobject.c index b652973600c17d..05ecf74d7d1e2f 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -263,7 +263,6 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value) #if _Py_TIER2 _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), co, 1); - _PyJit_Tracer_InvalidateDependency(_PyThreadState_GET(), co); #endif _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i); diff --git a/Objects/funcobject.c b/Objects/funcobject.c index b659ac8023373b..e11db261bc23f3 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -11,7 +11,7 @@ #include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_stats.h" #include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() -#include "pycore_optimizer.h" // _PyJit_Tracer_InvalidateDependency +#include "pycore_optimizer.h" // _Py_Executors_InvalidateDependency static const char * func_event_name(PyFunction_WatchEvent event) { @@ -298,7 +298,7 @@ functions is running. */ -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 static inline struct _func_version_cache_item * get_cache_item(PyInterpreterState *interp, uint32_t version) { @@ -315,11 +315,13 @@ _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version) // This should only be called from MAKE_FUNCTION. No code is specialized // based on the version, so we do not need to stop the world to set it. func->func_version = version; -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 PyInterpreterState *interp = _PyInterpreterState_GET(); + FT_MUTEX_LOCK(&interp->func_state.mutex); struct _func_version_cache_item *slot = get_cache_item(interp, version); slot->func = func; slot->code = func->func_code; + FT_MUTEX_UNLOCK(&interp->func_state.mutex); #endif } @@ -330,13 +332,15 @@ func_clear_version(PyInterpreterState *interp, PyFunctionObject *func) // Version was never set or has already been cleared. return; } -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 + FT_MUTEX_LOCK(&interp->func_state.mutex); struct _func_version_cache_item *slot = get_cache_item(interp, func->func_version); if (slot->func == func) { slot->func = NULL; // Leave slot->code alone, there may be use for it. } + FT_MUTEX_UNLOCK(&interp->func_state.mutex); #endif func->func_version = FUNC_VERSION_CLEARED; } @@ -358,8 +362,9 @@ _PyFunction_ClearVersion(PyFunctionObject *func) void _PyFunction_ClearCodeByVersion(uint32_t version) { -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 PyInterpreterState *interp = _PyInterpreterState_GET(); + FT_MUTEX_LOCK(&interp->func_state.mutex); struct _func_version_cache_item *slot = get_cache_item(interp, version); if (slot->code) { assert(PyCode_Check(slot->code)); @@ -369,15 +374,17 @@ _PyFunction_ClearCodeByVersion(uint32_t version) slot->func = NULL; } } + FT_MUTEX_UNLOCK(&interp->func_state.mutex); #endif } PyFunctionObject * _PyFunction_LookupByVersion(uint32_t version, PyObject **p_code) { -#ifdef Py_GIL_DISABLED - return NULL; -#else +#if _Py_TIER2 + // This function does not need locking/atomics as it can only be + // called from the optimizer, which is currently disabled + // when there are multiple threads. PyInterpreterState *interp = _PyInterpreterState_GET(); struct _func_version_cache_item *slot = get_cache_item(interp, version); if (slot->code) { @@ -395,12 +402,18 @@ _PyFunction_LookupByVersion(uint32_t version, PyObject **p_code) return slot->func; } return NULL; +#else + return NULL; #endif } uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func) { + // This function does not need locking/atomics as it can only be + // called from the specializing interpreter or optimizer. + // The specializing interpreter holds a strong reference to the function. + // The optimizer is currently disabled when there are multiple threads. return func->func_version; } @@ -1153,7 +1166,6 @@ func_dealloc(PyObject *self) } #if _Py_TIER2 _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), self, 1); - _PyJit_Tracer_InvalidateDependency(_PyThreadState_GET(), self); #endif _PyObject_GC_UNTRACK(op); FT_CLEAR_WEAKREFS(self, op->func_weakreflist); diff --git a/Objects/listobject.c b/Objects/listobject.c index 1722ea60cdc68f..62ba2ba641740e 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -79,7 +79,9 @@ ensure_shared_on_resize(PyListObject *self) // We can't use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here because // the `CALL_LIST_APPEND` bytecode handler may lock the list without // a critical section. - assert(Py_REFCNT(self) == 1 || PyMutex_IsLocked(&_PyObject_CAST(self)->ob_mutex)); + assert(Py_REFCNT(self) == 1 || + (_Py_IsOwnedByCurrentThread((PyObject *)self) && !_PyObject_GC_IS_SHARED(self)) || + PyMutex_IsLocked(&_PyObject_CAST(self)->ob_mutex)); // Ensure that the list array is freed using QSBR if we are not the // owning thread. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 61bcc21ce13d47..33a94ca17e74ad 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1148,8 +1148,9 @@ static void set_version_unlocked(PyTypeObject *tp, unsigned int version) { assert(version == 0 || (tp->tp_versions_used != _Py_ATTR_CACHE_UNUSED)); -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 PyInterpreterState *interp = _PyInterpreterState_GET(); + BEGIN_TYPE_LOCK(); // lookup the old version and set to null if (tp->tp_version_tag != 0) { PyTypeObject **slot = @@ -1157,6 +1158,8 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version) + (tp->tp_version_tag % TYPE_VERSION_CACHE_SIZE); *slot = NULL; } +#endif +#ifndef Py_GIL_DISABLED if (version) { tp->tp_versions_used++; } @@ -1166,13 +1169,14 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version) } #endif FT_ATOMIC_STORE_UINT_RELAXED(tp->tp_version_tag, version); -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 if (version != 0) { PyTypeObject **slot = interp->types.type_version_cache + (version % TYPE_VERSION_CACHE_SIZE); *slot = tp; } + END_TYPE_LOCK(); #endif } @@ -1357,9 +1361,12 @@ _PyType_SetVersion(PyTypeObject *tp, unsigned int version) PyTypeObject * _PyType_LookupByVersion(unsigned int version) { -#ifdef Py_GIL_DISABLED +#ifndef _Py_TIER2 return NULL; #else + // This function does not need locking/atomics as it can only be + // called from the optimizer, which is currently disabled + // when there are multiple threads. PyInterpreterState *interp = _PyInterpreterState_GET(); PyTypeObject **slot = interp->types.type_version_cache diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 565eaa7a599175..8f391049c8dc1b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2939,9 +2939,9 @@ dummy_func( }; specializing tier1 op(_SPECIALIZE_JUMP_BACKWARD, (--)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (this_instr->op.code == JUMP_BACKWARD) { - uint8_t desired = tstate->interp->jit ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT; + uint8_t desired = FT_ATOMIC_LOAD_UINT8(tstate->interp->jit) ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT; FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, desired); // Need to re-dispatch so the warmup counter isn't off by one: next_instr = this_instr; @@ -3284,11 +3284,9 @@ dummy_func( // Only used by Tier 2 op(_GUARD_NOT_EXHAUSTED_LIST, (iter, null_or_index -- iter, null_or_index)) { -#ifndef Py_GIL_DISABLED PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(list_o) == &PyList_Type); EXIT_IF((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)); -#endif } replaced op(_ITER_NEXT_LIST, (iter, null_or_index -- iter, null_or_index, next)) { @@ -5281,6 +5279,19 @@ dummy_func( } tier2 op(_CHECK_VALIDITY, (--)) { + // For FT: + // This doesn't need atomics (for now) as there is only a single time + // where a write from another thread is possible: + // when a new thread is spawned and it invalidates all current + // executors. + // The new thread can only be created by an executing uop prior to the + // _CHECK_VALIDITY check. New thread creation is synchronized by + // locking of the runtime, and the current thread is naturally + // paused/waiting for the new thread to be created. Thus, + // there is a strict happens-before relation between that + // uop's invalidation of validity and this check. + // So for now, while the JIT does not run on multiple threads, + // it is safe for this to be non-atomic. DEOPT_IF(!current_executor->vm_data.valid); } diff --git a/Python/ceval.c b/Python/ceval.c index 31b81a37464718..7604d7c8059a8f 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1300,7 +1300,7 @@ _PyTier2Interpreter( for (;;) { uopcode = next_uop->opcode; #ifdef Py_DEBUG - if (frame->lltrace >= 3) { + if (frame->lltrace >= 4) { dump_stack(frame, stack_pointer); if (next_uop->opcode == _START_EXECUTOR) { printf("%4d uop: ", 0); diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 9b6506ac3326b3..e42c5f237e45b3 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1395,11 +1395,13 @@ _Py_HandlePending(PyThreadState *tstate) _Py_RunGC(tstate); } +#ifdef _Py_TIER2 if ((breaker & _PY_EVAL_JIT_INVALIDATE_COLD_BIT) != 0) { _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); _Py_Executors_InvalidateCold(tstate->interp); tstate->interp->executor_creation_counter = JIT_CLEANUP_THRESHOLD; } +#endif /* GIL drop request */ if ((breaker & _PY_GIL_DROP_REQUEST_BIT) != 0) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 6796abf84ac5f4..7dd38f03b3f7db 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4436,14 +4436,12 @@ _PyStackRef iter; null_or_index = stack_pointer[-1]; iter = stack_pointer[-2]; - #ifndef Py_GIL_DISABLED PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(list_o) == &PyList_Type); if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - #endif break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 0d4678df68ce2d..c94bfa955c3a86 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7588,9 +7588,9 @@ /* Skip 1 cache entry */ // _SPECIALIZE_JUMP_BACKWARD { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (this_instr->op.code == JUMP_BACKWARD) { - uint8_t desired = tstate->interp->jit ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT; + uint8_t desired = FT_ATOMIC_LOAD_UINT8(tstate->interp->jit) ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT; FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, desired); next_instr = this_instr; DISPATCH_SAME_OPARG(); diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 81e46a331e0b9e..7c9f9381e41c54 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -1786,7 +1786,6 @@ force_instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) _PyCode_Clear_Executors(code); } _Py_Executors_InvalidateDependency(interp, code, 1); - _PyJit_Tracer_InvalidateDependency(PyThreadState_GET(), code); #endif int code_len = (int)Py_SIZE(code); /* Exit early to avoid creating instrumentation diff --git a/Python/optimizer.c b/Python/optimizer.c index 9db894f0bf054a..f79c0d5c893cca 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -140,7 +140,6 @@ _PyOptimizer_Optimize( } assert(!interp->compiling); assert(_tstate->jit_tracer_state.initial_state.stack_depth >= 0); -#ifndef Py_GIL_DISABLED assert(_tstate->jit_tracer_state.initial_state.func != NULL); interp->compiling = true; // The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must* @@ -193,9 +192,6 @@ _PyOptimizer_Optimize( assert(executor->vm_data.valid); interp->compiling = false; return 1; -#else - return 0; -#endif } static _PyExecutorObject * @@ -245,8 +241,9 @@ get_oparg(PyObject *self, PyObject *Py_UNUSED(ignored)) ///////////////////// Experimental UOp Optimizer ///////////////////// -static int executor_clear(PyObject *executor); -static void unlink_executor(_PyExecutorObject *executor); +static int executor_clear(PyInterpreterState *interp, PyObject *executor); +static int executor_clear_implicit_interp(PyObject *executor); +static void unlink_executor(PyInterpreterState *interp, _PyExecutorObject *executor); void @@ -312,7 +309,7 @@ uop_dealloc(PyObject *op) { _PyExecutorObject *self = _PyExecutorObject_CAST(op); _PyObject_GC_UNTRACK(self); assert(self->vm_data.code == NULL); - unlink_executor(self); + unlink_executor(_PyInterpreterState_GET(), self); // Once unlinked it becomes impossible to invalidate an executor, so do it here. self->vm_data.valid = 0; add_to_pending_deletion_list(self); @@ -451,7 +448,11 @@ static PyMethodDef uop_executor_methods[] = { static int executor_is_gc(PyObject *o) { +#ifdef Py_GIL_DISABLED + return 1; +#else return !_Py_IsImmortal(o); +#endif } PyTypeObject _PyUOpExecutor_Type = { @@ -464,7 +465,7 @@ PyTypeObject _PyUOpExecutor_Type = { .tp_as_sequence = &uop_as_sequence, .tp_methods = uop_executor_methods, .tp_traverse = executor_traverse, - .tp_clear = executor_clear, + .tp_clear = executor_clear_implicit_interp, .tp_is_gc = executor_is_gc, }; @@ -1525,7 +1526,7 @@ link_executor(_PyExecutorObject *executor) } static void -unlink_executor(_PyExecutorObject *executor) +unlink_executor(PyInterpreterState *interp, _PyExecutorObject *executor) { if (!executor->vm_data.linked) { return; @@ -1542,7 +1543,6 @@ unlink_executor(_PyExecutorObject *executor) } else { // prev == NULL implies that executor is the list head - PyInterpreterState *interp = PyInterpreterState_Get(); assert(interp->executor_list_head == executor); interp->executor_list_head = next; } @@ -1653,15 +1653,19 @@ _Py_ExecutorDetach(_PyExecutorObject *executor) Py_DECREF(executor); } +// Note: we must use the interp state supplied and pass it to +// unlink_executor. This function might be called when a new +// interpreter is being created/removed. Thus the interpreter +// state is inconsistent with where the executor actually belongs. static int -executor_clear(PyObject *op) +executor_clear(PyInterpreterState *interp, PyObject *op) { _PyExecutorObject *executor = _PyExecutorObject_CAST(op); if (!executor->vm_data.valid) { return 0; } assert(executor->vm_data.valid == 1); - unlink_executor(executor); + unlink_executor(interp, executor); executor->vm_data.valid = 0; /* It is possible for an executor to form a reference @@ -1669,18 +1673,27 @@ executor_clear(PyObject *op) * free the executor unless we hold a strong reference to it */ _PyExecutorObject *cold = _PyExecutor_GetColdExecutor(); + _PyExecutorObject *cold_dynamic = _PyExecutor_GetColdDynamicExecutor(); Py_INCREF(executor); for (uint32_t i = 0; i < executor->exit_count; i++) { executor->exits[i].temperature = initial_unreachable_backoff_counter(); _PyExecutorObject *e = executor->exits[i].executor; - executor->exits[i].executor = cold; - Py_DECREF(e); + executor->exits[i].executor = NULL; + if (e != cold && e != cold_dynamic && e->vm_data.code == NULL) { + executor_clear(interp, (PyObject *)e); + } } _Py_ExecutorDetach(executor); Py_DECREF(executor); return 0; } +static int +executor_clear_implicit_interp(PyObject *op) +{ + return executor_clear(_PyInterpreterState_GET(), op); +} + void _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj) { @@ -1688,6 +1701,8 @@ _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj) _Py_BloomFilter_Add(&executor->vm_data.bloom, obj); } +static void jit_tracer_invalidate_dependency(PyThreadState *tstate, void *obj); + /* Invalidate all executors that depend on `obj` * May cause other executors to be invalidated as well */ @@ -1703,6 +1718,12 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is if (invalidate == NULL) { goto error; } + + // It doesn't matter if we don't invalidate all threads. + // If more threads are spawned, we force the jit not to compile anyways + // so the trace gets abandoned. + jit_tracer_invalidate_dependency(_PyThreadState_GET(), obj); + /* Clearing an executor can deallocate others, so we need to make a list of * executors to invalidate first */ for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { @@ -1717,7 +1738,7 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is } for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) { PyObject *exec = PyList_GET_ITEM(invalidate, i); - executor_clear(exec); + executor_clear(interp, exec); if (is_invalidation) { OPT_STAT_INC(executors_invalidated); } @@ -1731,8 +1752,8 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is _Py_Executors_InvalidateAll(interp, is_invalidation); } -void -_PyJit_Tracer_InvalidateDependency(PyThreadState *tstate, void *obj) +static void +jit_tracer_invalidate_dependency(PyThreadState *tstate, void *obj) { _PyBloomFilter obj_filter; _Py_BloomFilter_Init(&obj_filter); @@ -1755,7 +1776,7 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation) _PyCode_Clear_Executors(executor->vm_data.code); } else { - executor_clear((PyObject *)executor); + executor_clear(interp, (PyObject *)executor); } if (is_invalidation) { OPT_STAT_INC(executors_invalidated); @@ -1790,7 +1811,7 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp) } for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) { PyObject *exec = PyList_GET_ITEM(invalidate, i); - executor_clear(exec); + executor_clear(interp, exec); } Py_DECREF(invalidate); return; @@ -1937,4 +1958,4 @@ _PyExecutor_Free(struct _PyExecutorObject *self) Py_UNREACHABLE(); } -#endif /* _Py_TIER2 */ +#endif /* _Py_TIER2 */ \ No newline at end of file diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8d7b734e17cb0b..e60e927aba6f21 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -114,7 +114,7 @@ convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj, bool pop) if (res == NULL) { return NULL; } - if (_Py_IsImmortal(res)) { + if (_Py_IsImmortal(res) || _PyObject_HasDeferredRefcount(res)) { inst->opcode = pop ? _POP_TOP_LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE_BORROW; } else { @@ -226,14 +226,14 @@ eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit) static JitOptRef lookup_attr(JitOptContext *ctx, _PyUOpInstruction *this_instr, - PyTypeObject *type, PyObject *name, uint16_t immortal, + PyTypeObject *type, PyObject *name, uint16_t deferred_refcount, uint16_t mortal) { // The cached value may be dead, so we need to do the lookup again... :( if (type && PyType_Check(type)) { PyObject *lookup = _PyType_Lookup(type, name); if (lookup) { - int opcode = _Py_IsImmortal(lookup) ? immortal : mortal; + int opcode = _Py_IsImmortal(lookup) || _PyObject_HasDeferredRefcount(lookup) ? deferred_refcount : mortal; REPLACE_OP(this_instr, opcode, 0, (uintptr_t)lookup); return sym_new_const(ctx, lookup); } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 67368b5ce077aa..b3048c64d21f73 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1361,7 +1361,7 @@ init_interp_main(PyThreadState *tstate) } else #endif { - interp->jit = true; + interp->jit = 1; } } } @@ -1718,7 +1718,7 @@ finalize_modules(PyThreadState *tstate) PyInterpreterState *interp = tstate->interp; // Invalidate all executors and turn off JIT: - interp->jit = false; + interp->jit = 0; interp->compiling = false; #ifdef _Py_TIER2 _Py_Executors_InvalidateAll(interp, 0); diff --git a/Python/pystate.c b/Python/pystate.c index c12a1418e74309..425e39a293bfc0 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -566,7 +566,7 @@ init_interpreter(PyInterpreterState *interp, interp->monitoring_tool_versions[t] = 0; } interp->_code_object_generation = 0; - interp->jit = false; + interp->jit = 0; interp->compiling = false; interp->executor_list_head = NULL; interp->executor_deletion_list_head = NULL; @@ -1522,6 +1522,12 @@ add_threadstate(PyInterpreterState *interp, PyThreadState *tstate, { assert(interp->threads.head != tstate); if (next != NULL) { +#if defined(_Py_TIER2) && defined(Py_GIL_DISABLED) + FT_ATOMIC_STORE_UINT8(interp->jit, 0); + // There's more than one thread. In FT mode, + // disable the JIT completely for now. + _Py_Executors_InvalidateAll(interp, 1); +#endif assert(next->prev == NULL || next->prev == tstate); next->prev = tstate; } @@ -1816,6 +1822,13 @@ tstate_delete_common(PyThreadState *tstate, int release_gil) _PyObject_VirtualFree(_tstate->jit_tracer_state.code_buffer, UOP_BUFFER_SIZE); _tstate->jit_tracer_state.code_buffer = NULL; } +#ifdef Py_GIL_DISABLED + // There's only one thread. Re-enable JIT. + PyThreadState *curr = interp->threads.head; + if (curr != NULL && curr->prev == NULL && curr->next == NULL) { + FT_ATOMIC_STORE_UINT8(interp->jit, 1); + } +#endif #endif HEAD_UNLOCK(runtime); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index b4b441bf4d9519..1d9da9b0f0610b 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2365,7 +2365,7 @@ sys_activate_stack_trampoline_impl(PyObject *module, const char *backend) { #ifdef PY_HAVE_PERF_TRAMPOLINE #ifdef _Py_JIT - if (_PyInterpreterState_GET()->jit) { + if (FT_ATOMIC_LOAD_UINT8(_PyInterpreterState_GET()->jit)) { PyErr_SetString(PyExc_ValueError, "Cannot activate the perf trampoline if the JIT is active"); return NULL; }