From f89efaea5269eb8bd938fc0525d9bf0f15f02fb7 Mon Sep 17 00:00:00 2001 From: Alm Date: Fri, 29 Aug 2025 15:06:29 +0300 Subject: [PATCH 1/2] JIT: Streamline MAKE_WARM - move coldness check to executor creation --- Include/internal/pycore_optimizer.h | 3 ++- Python/bytecodes.c | 5 ----- Python/executor_cases.c.h | 3 --- Python/optimizer.c | 9 +++++++++ 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 9f930f2107ed5e..e2505912a2d770 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -116,7 +116,8 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp); // Used as the threshold to trigger executor invalidation when // trace_run_counter is greater than this value. -#define JIT_CLEANUP_THRESHOLD 100000 +// TODO: Test what should be the optimal value for this. +#define JIT_CLEANUP_THRESHOLD 1000 // This is the length of the trace we project initially. #define UOP_MAX_TRACE_LENGTH 800 diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7f89c312b9a815..107d88fdb8e3b5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -11,7 +11,6 @@ #include "pycore_audit.h" // _PySys_Audit() #include "pycore_backoff.h" #include "pycore_cell.h" // PyCell_GetRef() -#include "pycore_ceval.h" #include "pycore_code.h" #include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS #include "pycore_function.h" @@ -5367,10 +5366,6 @@ dummy_func( tier2 op(_MAKE_WARM, (--)) { current_executor->vm_data.warm = true; - // It's okay if this ends up going negative. - if (--tstate->interp->trace_run_counter == 0) { - _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); - } } tier2 op(_FATAL_ERROR, (--)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 3dcb2decc43737..41011b243b1fcc 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -7420,9 +7420,6 @@ case _MAKE_WARM: { current_executor->vm_data.warm = true; - if (--tstate->interp->trace_run_counter == 0) { - _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); - } break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index bae5cfa50ead58..5c3c96d0848f8c 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -6,6 +6,7 @@ #include "pycore_interp.h" #include "pycore_backoff.h" #include "pycore_bitutils.h" // _Py_popcount32() +#include "pycore_ceval.h" // _Py_set_eval_breaker_bit #include "pycore_code.h" // _Py_GetBaseCodeUnit #include "pycore_function.h" // _PyFunction_LookupByVersion() #include "pycore_interpframe.h" @@ -1322,6 +1323,14 @@ uop_optimize( return -1; } assert(length <= UOP_MAX_TRACE_LENGTH); + + // Check executor coldness + PyThreadState *tstate = PyThreadState_Get(); + // It's okay if this ends up going negative. + if (--tstate->interp->trace_run_counter == 0) { + _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); + } + *exec_ptr = executor; return 1; } From 0aeba969421bc6052e775da8292a99dedf345a0e Mon Sep 17 00:00:00 2001 From: Alm Date: Thu, 16 Oct 2025 22:43:38 +0300 Subject: [PATCH 2/2] Rename counter --- Include/internal/pycore_interp_structs.h | 2 +- Include/internal/pycore_optimizer.h | 4 ++-- Python/ceval_gil.c | 2 +- Python/optimizer.c | 2 +- Python/pystate.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index fa9568ab4d0e85..03a12b927c6d9d 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -953,7 +953,7 @@ struct _is { struct _PyExecutorObject *executor_deletion_list_head; struct _PyExecutorObject *cold_executor; int executor_deletion_list_remaining_capacity; - size_t trace_run_counter; + size_t executor_creation_counter; _rare_events rare_events; PyDict_WatchCallback builtins_dict_watcher; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index e2505912a2d770..06a264a1b32537 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -115,8 +115,8 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp); #endif // Used as the threshold to trigger executor invalidation when -// trace_run_counter is greater than this value. -// TODO: Test what should be the optimal value for this. +// executor_creation_counter is greater than this value. +// This value is arbitrary and was not optimized. #define JIT_CLEANUP_THRESHOLD 1000 // This is the length of the trace we project initially. diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 6bf64868cbb2d3..9b6506ac3326b3 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1398,7 +1398,7 @@ _Py_HandlePending(PyThreadState *tstate) if ((breaker & _PY_EVAL_JIT_INVALIDATE_COLD_BIT) != 0) { _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); _Py_Executors_InvalidateCold(tstate->interp); - tstate->interp->trace_run_counter = JIT_CLEANUP_THRESHOLD; + tstate->interp->executor_creation_counter = JIT_CLEANUP_THRESHOLD; } /* GIL drop request */ diff --git a/Python/optimizer.c b/Python/optimizer.c index 5c3c96d0848f8c..410469d66dd8a0 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1327,7 +1327,7 @@ uop_optimize( // Check executor coldness PyThreadState *tstate = PyThreadState_Get(); // It's okay if this ends up going negative. - if (--tstate->interp->trace_run_counter == 0) { + if (--tstate->interp->executor_creation_counter == 0) { _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); } diff --git a/Python/pystate.c b/Python/pystate.c index 2465d8667472dc..8380336609ed7f 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -574,7 +574,7 @@ init_interpreter(PyInterpreterState *interp, interp->executor_list_head = NULL; interp->executor_deletion_list_head = NULL; interp->executor_deletion_list_remaining_capacity = 0; - interp->trace_run_counter = JIT_CLEANUP_THRESHOLD; + interp->executor_creation_counter = JIT_CLEANUP_THRESHOLD; if (interp != &runtime->_main_interpreter) { /* Fix the self-referential, statically initialized fields. */ interp->dtoa = (struct _dtoa_state)_dtoa_state_INIT(interp);