Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
6a3bd75
Move all JIT fields to thread state
Fidget-Spinner Nov 13, 2025
ba9a65a
fix a bug with traversing states
Fidget-Spinner Nov 13, 2025
725894d
fix JIT invalidation mechanism for FT
Fidget-Spinner Nov 14, 2025
1e5713d
fix re-entrant finalizers
Fidget-Spinner Nov 14, 2025
f0d4c57
Re-enable the JIT
Fidget-Spinner Nov 14, 2025
67de7d6
cleanup a little
Fidget-Spinner Nov 14, 2025
7f0bc57
fix weird GC bugs
Fidget-Spinner Nov 14, 2025
f05e61c
re-enable jit on some stuff
Fidget-Spinner Nov 14, 2025
f78e8c8
fix test, more locks!
Fidget-Spinner Nov 14, 2025
e1f1b30
fix JIT builds
Fidget-Spinner Nov 14, 2025
53c5e1d
only clear at end
Fidget-Spinner Nov 14, 2025
cc38ee4
remove locks in JIT code
Fidget-Spinner Nov 14, 2025
f8fefb3
fix a few bugs
Fidget-Spinner Nov 14, 2025
d76a24b
Improve tracer thread safety
Fidget-Spinner Nov 15, 2025
fa99108
set immortal before GC
Fidget-Spinner Nov 15, 2025
fcfed96
📜🤖 Added by blurb_it.
blurb-it[bot] Nov 15, 2025
ba67ab7
fix default builkd
Fidget-Spinner Nov 15, 2025
b46385b
Merge branch 'jit_ft' of github.com:Fidget-Spinner/cpython into jit_ft
Fidget-Spinner Nov 15, 2025
46285be
Merge remote-tracking branch 'upstream/main' into jit_ft
Fidget-Spinner Nov 15, 2025
8ce83cd
fix comment
Fidget-Spinner Nov 15, 2025
fa55643
lint
Fidget-Spinner Nov 15, 2025
92f3dbf
fix nojit builds
Fidget-Spinner Nov 15, 2025
7240b15
Remove heavywieght locking--not needed
Fidget-Spinner Nov 15, 2025
cb87676
address review, fix bug
Fidget-Spinner Nov 15, 2025
f972637
GC executors
Fidget-Spinner Nov 15, 2025
3ef237b
Change the name
Fidget-Spinner Nov 16, 2025
44356d6
Address review, remove more refcounting
Fidget-Spinner Nov 16, 2025
b819053
Fix TSAN races
Fidget-Spinner Nov 16, 2025
527aac1
Remove atomics from _CHECK_VALIDITY
Fidget-Spinner Nov 16, 2025
b80c02e
fix comment
Fidget-Spinner Nov 16, 2025
4278c9d
fix typo
Fidget-Spinner Nov 16, 2025
d84215d
Re-enable the type/function reverse cache
Fidget-Spinner Nov 16, 2025
b08ef60
allow allow deferred things to be borrrowed too
Fidget-Spinner Nov 16, 2025
c2c8fbe
fix warnings
Fidget-Spinner Nov 16, 2025
162b1ec
skip bad tests
Fidget-Spinner Nov 16, 2025
e0890ff
skip bad tests
Fidget-Spinner Nov 16, 2025
197e9f4
reduce diff
Fidget-Spinner Nov 17, 2025
46413cf
Reduce diff to minmal
Fidget-Spinner Nov 17, 2025
b5d6571
reduce diff further
Fidget-Spinner Nov 17, 2025
a93c26c
Reduce diff to near minimal
Fidget-Spinner Nov 17, 2025
97d5f2b
typo fix
Fidget-Spinner Nov 17, 2025
c381903
Address review
Fidget-Spinner Nov 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Include/internal/pycore_interp_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -930,7 +930,7 @@ struct _is {
struct types_state types;
struct callable_cache callable_cache;
PyObject *common_consts[NUM_COMMON_CONSTANTS];
bool jit;
uint8_t jit;
bool compiling;
struct _PyExecutorObject *executor_list_head;
struct _PyExecutorObject *executor_deletion_list_head;
Expand Down
2 changes: 0 additions & 2 deletions Include/internal/pycore_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -372,8 +372,6 @@ _PyJit_TryInitializeTracing(PyThreadState *tstate, _PyInterpreterFrame *frame,

void _PyJit_FinalizeTracing(PyThreadState *tstate);

void _PyJit_Tracer_InvalidateDependency(PyThreadState *old_tstate, void *obj);

#ifdef __cplusplus
}
#endif
Expand Down
20 changes: 11 additions & 9 deletions Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,13 @@

import _opcode

from test.support import (script_helper, requires_specialization,
import_helper, Py_GIL_DISABLED, requires_jit_enabled,
reset_code)
from test.support import (
script_helper,
import_helper,
Py_GIL_DISABLED,
requires_jit_enabled,
reset_code
)

_testinternalcapi = import_helper.import_module("_testinternalcapi")

Expand Down Expand Up @@ -61,8 +65,6 @@ def get_opnames(ex):
return list(iter_opnames(ex))


@requires_specialization
@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds")
@requires_jit_enabled
class TestExecutorInvalidation(unittest.TestCase):

Expand Down Expand Up @@ -130,8 +132,6 @@ def f():
self.assertIsNone(exe)


@requires_specialization
@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds")
@requires_jit_enabled
@unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.")
class TestUops(unittest.TestCase):
Expand Down Expand Up @@ -434,8 +434,6 @@ def testfunc(n, m):
self.assertIn("_FOR_ITER_TIER_TWO", uops)


@requires_specialization
@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds")
@requires_jit_enabled
@unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.")
class TestUopsOptimization(unittest.TestCase):
Expand Down Expand Up @@ -2052,6 +2050,7 @@ def testfunc(n):
self.assertNotIn("_GUARD_NOS_INT", uops)
self.assertNotIn("_GUARD_TOS_INT", uops)

@unittest.skipIf(Py_GIL_DISABLED, "FT build immortalizes constants")
def test_call_len_known_length_small_int(self):
# Make sure that len(t) is optimized for a tuple of length 5.
# See https://github.com/python/cpython/issues/139393.
Expand All @@ -2076,6 +2075,7 @@ def testfunc(n):
self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops)
self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops)

@unittest.skipIf(Py_GIL_DISABLED, "FT build immortalizes constants")
def test_call_len_known_length(self):
# Make sure that len(t) is not optimized for a tuple of length 2048.
# See https://github.com/python/cpython/issues/139393.
Expand Down Expand Up @@ -2497,6 +2497,7 @@ def testfunc(n):

self.assertIn("_POP_TOP_NOP", uops)

@unittest.skipIf(Py_GIL_DISABLED, "FT might immortalize this.")
def test_pop_top_specialize_int(self):
def testfunc(n):
for _ in range(n):
Expand All @@ -2510,6 +2511,7 @@ def testfunc(n):

self.assertIn("_POP_TOP_INT", uops)

@unittest.skipIf(Py_GIL_DISABLED, "FT might immortalize this.")
def test_pop_top_specialize_float(self):
def testfunc(n):
for _ in range(n):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Add free-threading support to the JIT. The JIT is only enabled on
single-threaded code in free-threading, and is disabled when multiple
threads are spawned. Patch by Ken Jin.
3 changes: 1 addition & 2 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1240,8 +1240,7 @@ add_executor_dependency(PyObject *self, PyObject *args)
static PyObject *
invalidate_executors(PyObject *self, PyObject *obj)
{
PyInterpreterState *interp = PyInterpreterState_Get();
_Py_Executors_InvalidateDependency(interp, obj, 1);
_Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), obj, 1);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be reverted too.

Py_RETURN_NONE;
}

Expand Down
10 changes: 7 additions & 3 deletions Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2432,7 +2432,7 @@ code_dealloc(PyObject *self)
PyMem_Free(co_extra);
}
#ifdef _Py_TIER2
_PyJit_Tracer_InvalidateDependency(tstate, self);
_Py_Executors_InvalidateDependency(tstate->interp, self, 1);
if (co->co_executors != NULL) {
clear_executors(co);
}
Expand Down Expand Up @@ -3363,8 +3363,12 @@ deopt_code_unit(PyCodeObject *code, int i)
inst.op.code = _PyOpcode_Deopt[opcode];
assert(inst.op.code < MIN_SPECIALIZED_OPCODE);
}
// JIT should not be enabled with free-threading
assert(inst.op.code != ENTER_EXECUTOR);
if (inst.op.code == ENTER_EXECUTOR) {
_PyExecutorObject *exec = code->co_executors->executors[inst.op.arg];
assert(exec != NULL);
inst.op.code = exec->vm_data.opcode;
inst.op.arg = exec->vm_data.oparg;
}
return inst;
}

Expand Down
1 change: 0 additions & 1 deletion Objects/frameobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,6 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value)

#if _Py_TIER2
_Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), co, 1);
_PyJit_Tracer_InvalidateDependency(_PyThreadState_GET(), co);
#endif

_PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i);
Expand Down
30 changes: 21 additions & 9 deletions Objects/funcobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include "pycore_setobject.h" // _PySet_NextEntry()
#include "pycore_stats.h"
#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS()
#include "pycore_optimizer.h" // _PyJit_Tracer_InvalidateDependency
#include "pycore_optimizer.h" // _Py_Executors_InvalidateDependency

static const char *
func_event_name(PyFunction_WatchEvent event) {
Expand Down Expand Up @@ -298,7 +298,7 @@ functions is running.

*/

#ifndef Py_GIL_DISABLED
#if _Py_TIER2
static inline struct _func_version_cache_item *
get_cache_item(PyInterpreterState *interp, uint32_t version)
{
Expand All @@ -315,11 +315,13 @@ _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version)
// This should only be called from MAKE_FUNCTION. No code is specialized
// based on the version, so we do not need to stop the world to set it.
func->func_version = version;
#ifndef Py_GIL_DISABLED
#if _Py_TIER2
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This changes the tier 1 with-gil behavior.
I don't see why we would need the cache in tier 1, but can you double check?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I double checked.

PyInterpreterState *interp = _PyInterpreterState_GET();
FT_MUTEX_LOCK(&interp->func_state.mutex);
struct _func_version_cache_item *slot = get_cache_item(interp, version);
slot->func = func;
slot->code = func->func_code;
FT_MUTEX_UNLOCK(&interp->func_state.mutex);
#endif
}

Expand All @@ -330,13 +332,15 @@ func_clear_version(PyInterpreterState *interp, PyFunctionObject *func)
// Version was never set or has already been cleared.
return;
}
#ifndef Py_GIL_DISABLED
#if _Py_TIER2
FT_MUTEX_LOCK(&interp->func_state.mutex);
struct _func_version_cache_item *slot =
get_cache_item(interp, func->func_version);
if (slot->func == func) {
slot->func = NULL;
// Leave slot->code alone, there may be use for it.
}
FT_MUTEX_UNLOCK(&interp->func_state.mutex);
#endif
func->func_version = FUNC_VERSION_CLEARED;
}
Expand All @@ -358,8 +362,9 @@ _PyFunction_ClearVersion(PyFunctionObject *func)
void
_PyFunction_ClearCodeByVersion(uint32_t version)
{
#ifndef Py_GIL_DISABLED
#if _Py_TIER2
PyInterpreterState *interp = _PyInterpreterState_GET();
FT_MUTEX_LOCK(&interp->func_state.mutex);
struct _func_version_cache_item *slot = get_cache_item(interp, version);
if (slot->code) {
assert(PyCode_Check(slot->code));
Expand All @@ -369,15 +374,17 @@ _PyFunction_ClearCodeByVersion(uint32_t version)
slot->func = NULL;
}
}
FT_MUTEX_UNLOCK(&interp->func_state.mutex);
#endif
}

PyFunctionObject *
_PyFunction_LookupByVersion(uint32_t version, PyObject **p_code)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this function really used somewhere?
Maybe we should remove it on main in a separate PR?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's strange, you're right, it's not used.

It's useful for the optimizer though, so we might want to use it in the future.

{
#ifdef Py_GIL_DISABLED
return NULL;
#else
#if _Py_TIER2
// This function does not need locking/atomics as it can only be
// called from the optimizer, which is currently disabled
// when there are multiple threads.
PyInterpreterState *interp = _PyInterpreterState_GET();
struct _func_version_cache_item *slot = get_cache_item(interp, version);
if (slot->code) {
Expand All @@ -395,12 +402,18 @@ _PyFunction_LookupByVersion(uint32_t version, PyObject **p_code)
return slot->func;
}
return NULL;
#else
return NULL;
#endif
}

uint32_t
_PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
{
// This function does not need locking/atomics as it can only be
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use the function version in specialized instructions, so it is used in tier 1.
I'm surprised this isn't already synchronized.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The use in the specializer is protected because it's accessing a stack variable which holds a strong reference to it. I will clarify that the unsnchronized use is only in the optimizer and it's fine there.

// called from the specializing interpreter or optimizer.
// The specializing interpreter holds a strong reference to the function.
// The optimizer is currently disabled when there are multiple threads.
return func->func_version;
}

Expand Down Expand Up @@ -1153,7 +1166,6 @@ func_dealloc(PyObject *self)
}
#if _Py_TIER2
_Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), self, 1);
_PyJit_Tracer_InvalidateDependency(_PyThreadState_GET(), self);
#endif
_PyObject_GC_UNTRACK(op);
FT_CLEAR_WEAKREFS(self, op->func_weakreflist);
Expand Down
4 changes: 3 additions & 1 deletion Objects/listobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ ensure_shared_on_resize(PyListObject *self)
// We can't use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here because
// the `CALL_LIST_APPEND` bytecode handler may lock the list without
// a critical section.
assert(Py_REFCNT(self) == 1 || PyMutex_IsLocked(&_PyObject_CAST(self)->ob_mutex));
assert(Py_REFCNT(self) == 1 ||
(_Py_IsOwnedByCurrentThread((PyObject *)self) && !_PyObject_GC_IS_SHARED(self)) ||
PyMutex_IsLocked(&_PyObject_CAST(self)->ob_mutex));

// Ensure that the list array is freed using QSBR if we are not the
// owning thread.
Expand Down
13 changes: 10 additions & 3 deletions Objects/typeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1148,15 +1148,18 @@ static void
set_version_unlocked(PyTypeObject *tp, unsigned int version)
{
assert(version == 0 || (tp->tp_versions_used != _Py_ATTR_CACHE_UNUSED));
#ifndef Py_GIL_DISABLED
#if _Py_TIER2
PyInterpreterState *interp = _PyInterpreterState_GET();
BEGIN_TYPE_LOCK();
// lookup the old version and set to null
if (tp->tp_version_tag != 0) {
PyTypeObject **slot =
interp->types.type_version_cache
+ (tp->tp_version_tag % TYPE_VERSION_CACHE_SIZE);
*slot = NULL;
}
#endif
#ifndef Py_GIL_DISABLED
if (version) {
tp->tp_versions_used++;
}
Expand All @@ -1166,13 +1169,14 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version)
}
#endif
FT_ATOMIC_STORE_UINT_RELAXED(tp->tp_version_tag, version);
#ifndef Py_GIL_DISABLED
#if _Py_TIER2
if (version != 0) {
PyTypeObject **slot =
interp->types.type_version_cache
+ (version % TYPE_VERSION_CACHE_SIZE);
*slot = tp;
}
END_TYPE_LOCK();
#endif
}

Expand Down Expand Up @@ -1357,9 +1361,12 @@ _PyType_SetVersion(PyTypeObject *tp, unsigned int version)
PyTypeObject *
_PyType_LookupByVersion(unsigned int version)
{
#ifdef Py_GIL_DISABLED
#ifndef _Py_TIER2
return NULL;
#else
// This function does not need locking/atomics as it can only be
// called from the optimizer, which is currently disabled
// when there are multiple threads.
PyInterpreterState *interp = _PyInterpreterState_GET();
PyTypeObject **slot =
interp->types.type_version_cache
Expand Down
19 changes: 15 additions & 4 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -2939,9 +2939,9 @@ dummy_func(
};

specializing tier1 op(_SPECIALIZE_JUMP_BACKWARD, (--)) {
#if ENABLE_SPECIALIZATION
#if ENABLE_SPECIALIZATION_FT
if (this_instr->op.code == JUMP_BACKWARD) {
uint8_t desired = tstate->interp->jit ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT;
uint8_t desired = FT_ATOMIC_LOAD_UINT8(tstate->interp->jit) ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT;
FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, desired);
// Need to re-dispatch so the warmup counter isn't off by one:
next_instr = this_instr;
Expand Down Expand Up @@ -3284,11 +3284,9 @@ dummy_func(

// Only used by Tier 2
op(_GUARD_NOT_EXHAUSTED_LIST, (iter, null_or_index -- iter, null_or_index)) {
#ifndef Py_GIL_DISABLED
PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter);
assert(Py_TYPE(list_o) == &PyList_Type);
EXIT_IF((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o));
#endif
}

replaced op(_ITER_NEXT_LIST, (iter, null_or_index -- iter, null_or_index, next)) {
Expand Down Expand Up @@ -5281,6 +5279,19 @@ dummy_func(
}

tier2 op(_CHECK_VALIDITY, (--)) {
// For FT:
// This doesn't need atomics (for now) as there is only a single time
// where a write from another thread is possible:
// when a new thread is spawned and it invalidates all current
// executors.
// The new thread can only be created by an executing uop prior to the
// _CHECK_VALIDITY check. New thread creation is synchronized by
// locking of the runtime, and the current thread is naturally
// paused/waiting for the new thread to be created. Thus,
// there is a strict happens-before relation between that
// uop's invalidation of validity and this check.
// So for now, while the JIT does not run on multiple threads,
// it is safe for this to be non-atomic.
DEOPT_IF(!current_executor->vm_data.valid);
}

Expand Down
2 changes: 1 addition & 1 deletion Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -1300,7 +1300,7 @@ _PyTier2Interpreter(
for (;;) {
uopcode = next_uop->opcode;
#ifdef Py_DEBUG
if (frame->lltrace >= 3) {
if (frame->lltrace >= 4) {
dump_stack(frame, stack_pointer);
if (next_uop->opcode == _START_EXECUTOR) {
printf("%4d uop: ", 0);
Expand Down
2 changes: 2 additions & 0 deletions Python/ceval_gil.c
Original file line number Diff line number Diff line change
Expand Up @@ -1395,11 +1395,13 @@ _Py_HandlePending(PyThreadState *tstate)
_Py_RunGC(tstate);
}

#ifdef _Py_TIER2
if ((breaker & _PY_EVAL_JIT_INVALIDATE_COLD_BIT) != 0) {
_Py_unset_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT);
_Py_Executors_InvalidateCold(tstate->interp);
tstate->interp->executor_creation_counter = JIT_CLEANUP_THRESHOLD;
}
#endif

/* GIL drop request */
if ((breaker & _PY_GIL_DROP_REQUEST_BIT) != 0) {
Expand Down
2 changes: 0 additions & 2 deletions Python/executor_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading