-
-
Notifications
You must be signed in to change notification settings - Fork 33.1k
gh-139922: Tail calling for MSVC (VS 2026) #139962
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
82d1259
3248658
9ac430d
085c1d7
0b12f2e
acf48f5
35e96c1
d7737e9
86f19cf
40013cc
48db59e
bc9d23c
19e02c2
66ec774
50f8ff7
5d908b4
0786133
e699d40
66d6c39
5584fec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -303,9 +303,9 @@ PyAPI_FUNC(PyObject *)_PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, Py | |
PyAPI_FUNC(void) _PyEval_MonitorRaise(PyThreadState *tstate, _PyInterpreterFrame *frame, _Py_CODEUNIT *instr); | ||
PyAPI_FUNC(int) _PyEval_UnpackIterableStackRef(PyThreadState *tstate, PyObject *v, int argcnt, int argcntafter, _PyStackRef *sp); | ||
PyAPI_FUNC(void) _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame); | ||
PyAPI_FUNC(PyObject **) _PyObjectArray_FromStackRefArray(_PyStackRef *input, Py_ssize_t nargs, PyObject **scratch); | ||
PyAPI_FUNC(PyObject **) _PyObjectArray_FromStackRefArray(_PyThreadStateImpl *_tstate, _PyStackRef *input, Py_ssize_t nargs); | ||
|
||
PyAPI_FUNC(void) _PyObjectArray_Free(PyObject **array, PyObject **scratch); | ||
PyAPI_FUNC(void) _PyObjectArray_Free(_PyThreadStateImpl *_tstate, PyObject **array, Py_ssize_t nargs, PyObject **temp_arr); | ||
|
||
PyAPI_FUNC(PyObject *) _PyEval_GetANext(PyObject *aiter); | ||
PyAPI_FUNC(void) _PyEval_LoadGlobalStackRef(PyObject *globals, PyObject *builtins, PyObject *name, _PyStackRef *writeto); | ||
|
@@ -391,6 +391,28 @@ _PyForIter_VirtualIteratorNext(PyThreadState* tstate, struct _PyInterpreterFrame | |
#define SPECIAL___AEXIT__ 3 | ||
#define SPECIAL_MAX 3 | ||
|
||
// Special counterparts of ceval functions for performance reasons | ||
PyAPI_FUNC(int) _PyEval_Mapping_GetOptionalItem(PyObject *obj, PyObject *key, PyObject **result); | ||
|
||
#if defined(_MSC_VER) && !defined(__clang__) && _Py_TAIL_CALL_INTERP | ||
# define Py_NO_INLINE_MSVC_TAILCALL Py_NO_INLINE | ||
#else | ||
# define Py_NO_INLINE_MSVC_TAILCALL | ||
#endif | ||
|
||
// Tells the compiler that this variable cannot be alised. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Grammar nit, but "cannot be aliased" is ambiguous (because it's not clear whether we are choosing not to alias it, or telling the compiler that it cannot choose to alias it). I think we're promising that it won't be aliased? So "Tells the compiler to trust that we haven't aliased this variable" better explains what it's doing. Or just "... this variable is not going to be aliased". Alternatively, "Tells the compiler not to alias the variable" (but I don't think that even makes sense, let alone being correct here). |
||
#if defined(_MSC_VER) && !defined(__clang__) | ||
# define Py_UNALIASED(var) restrict var | ||
#else | ||
# define Py_UNALIASED(var) var | ||
#endif | ||
|
||
// Just a scope. Hints to the programmer and compiler | ||
// That any local variable defined within this block MUST | ||
// not escape from the current definition. | ||
# define Py_BEGIN_LOCALS_MUST_NOT_ESCAPE { | ||
# define Py_END_LOCALS_MUST_NOT_ESCAPE } | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,9 @@ struct _gc_thread_state { | |
}; | ||
#endif | ||
|
||
/* How much scratch space to give stackref to PyObject* conversion. */ | ||
#define MAX_STACKREF_SCRATCH 1024 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How arbitrary is this amount? What is the usual amount of space required for conversions? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We used to arbitrarily always consume 10 PyObject/PyStackRef per call. Meaning even if you had a single item, the whole 10 slot space would be used. |
||
|
||
// Every PyThreadState is actually allocated as a _PyThreadStateImpl. The | ||
// PyThreadState fields are exposed as part of the C API, although most fields | ||
// are intended to be private. The _PyThreadStateImpl fields not exposed. | ||
|
@@ -47,6 +50,8 @@ typedef struct _PyThreadStateImpl { | |
struct _qsbr_thread_state *qsbr; // only used by free-threaded build | ||
struct llist_node mem_free_queue; // delayed free queue | ||
|
||
PyObject *stackref_scratch[MAX_STACKREF_SCRATCH]; | ||
int n_stackref_scratch_used; | ||
#ifdef Py_GIL_DISABLED | ||
// Stack references for the current thread that exist on the C stack | ||
struct _PyCStackRef *c_stack_refs; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Allow building CPython with the tail calling interpreter on Visual Studio 2026 MSVC. This provides a performance gain over the prior interpreter for MSVC. Patch by Ken Jin, Brandt Bucher, and Chris Eibl. With help from the MSVC team including Hulon Jenkins. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -595,7 +595,9 @@ | |
<ClCompile Include="..\Python\bltinmodule.c" /> | ||
<ClCompile Include="..\Python\bootstrap_hash.c" /> | ||
<ClCompile Include="..\Python\brc.c" /> | ||
<ClCompile Include="..\Python\ceval.c" /> | ||
<ClCompile Include="..\Python\ceval.c"> | ||
<AdditionalOptions Condition="'$(UseTailCallInterp)' == 'true' and $(PlatformToolset) != 'ClangCL'">/std:clatest %(AdditionalOptions)</AdditionalOptions> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if we pass this option to clang-cl? Does it break? Any possibility of passing a specific /std:c rather than "latest"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I've worked with Chris on this and apparently it builds with clang-cl still the last time I checked with him? @chris-eibl There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yupp, clang-cl doesn't like
But I assume MSVC needs
Yeah, we could explicitely enforce
Yes, it still does. Having done a lot of builds recently and still playing around. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm under the impression we need clatest for msvc musttail as well. |
||
</ClCompile> | ||
<ClCompile Include="..\Python\codecs.c" /> | ||
<ClCompile Include="..\Python\codegen.c" /> | ||
<ClCompile Include="..\Python\compile.c" /> | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -1535,7 +1535,7 @@ dummy_func( | |||||
|
||||||
inst(LOAD_BUILD_CLASS, ( -- bc)) { | ||||||
PyObject *bc_o; | ||||||
int err = PyMapping_GetOptionalItem(BUILTINS(), &_Py_ID(__build_class__), &bc_o); | ||||||
int err = _PyEval_Mapping_GetOptionalItem(BUILTINS(), &_Py_ID(__build_class__), &bc_o); | ||||||
ERROR_IF(err < 0); | ||||||
if (bc_o == NULL) { | ||||||
_PyErr_SetString(tstate, PyExc_NameError, | ||||||
|
@@ -1739,7 +1739,7 @@ dummy_func( | |||||
inst(LOAD_FROM_DICT_OR_GLOBALS, (mod_or_class_dict -- v)) { | ||||||
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); | ||||||
PyObject *v_o; | ||||||
int err = PyMapping_GetOptionalItem(PyStackRef_AsPyObjectBorrow(mod_or_class_dict), name, &v_o); | ||||||
int err = _PyEval_Mapping_GetOptionalItem(PyStackRef_AsPyObjectBorrow(mod_or_class_dict), name, &v_o); | ||||||
PyStackRef_CLOSE(mod_or_class_dict); | ||||||
ERROR_IF(err < 0); | ||||||
if (v_o == NULL) { | ||||||
|
@@ -1762,11 +1762,11 @@ dummy_func( | |||||
else { | ||||||
/* Slow-path if globals or builtins is not a dict */ | ||||||
/* namespace 1: globals */ | ||||||
int err = PyMapping_GetOptionalItem(GLOBALS(), name, &v_o); | ||||||
int err = _PyEval_Mapping_GetOptionalItem(GLOBALS(), name, &v_o); | ||||||
ERROR_IF(err < 0); | ||||||
if (v_o == NULL) { | ||||||
/* namespace 2: builtins */ | ||||||
int err = PyMapping_GetOptionalItem(BUILTINS(), name, &v_o); | ||||||
int err = _PyEval_Mapping_GetOptionalItem(BUILTINS(), name, &v_o); | ||||||
ERROR_IF(err < 0); | ||||||
if (v_o == NULL) { | ||||||
_PyEval_FormatExcCheckArg( | ||||||
|
@@ -1932,7 +1932,7 @@ dummy_func( | |||||
assert(class_dict); | ||||||
assert(oparg >= 0 && oparg < _PyFrame_GetCode(frame)->co_nlocalsplus); | ||||||
name = PyTuple_GET_ITEM(_PyFrame_GetCode(frame)->co_localsplusnames, oparg); | ||||||
int err = PyMapping_GetOptionalItem(class_dict, name, &value_o); | ||||||
int err = _PyEval_Mapping_GetOptionalItem(class_dict, name, &value_o); | ||||||
if (err < 0) { | ||||||
ERROR_NO_POP(); | ||||||
} | ||||||
|
@@ -1983,7 +1983,7 @@ dummy_func( | |||||
ERROR_IF(true); | ||||||
} | ||||||
PyObject *str_o = _PyUnicode_JoinArray(&_Py_STR(empty), pieces_o, oparg); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(pieces_o); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(pieces_o, oparg); | ||||||
DECREF_INPUTS(); | ||||||
ERROR_IF(str_o == NULL); | ||||||
str = PyStackRef_FromPyObjectSteal(str_o); | ||||||
|
@@ -2108,7 +2108,7 @@ dummy_func( | |||||
values_o, 2, | ||||||
values_o+1, 2, | ||||||
oparg); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(values_o); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(values_o, oparg*2); | ||||||
DECREF_INPUTS(); | ||||||
ERROR_IF(map_o == NULL); | ||||||
map = PyStackRef_FromPyObjectStealMortal(map_o); | ||||||
|
@@ -2122,7 +2122,7 @@ dummy_func( | |||||
ERROR_IF(true); | ||||||
} | ||||||
/* check if __annotations__ in locals()... */ | ||||||
int err = PyMapping_GetOptionalItem(LOCALS(), &_Py_ID(__annotations__), &ann_dict); | ||||||
int err = _PyEval_Mapping_GetOptionalItem(LOCALS(), &_Py_ID(__annotations__), &ann_dict); | ||||||
ERROR_IF(err < 0); | ||||||
if (ann_dict == NULL) { | ||||||
ann_dict = PyDict_New(); | ||||||
|
@@ -2226,8 +2226,11 @@ dummy_func( | |||||
} | ||||||
// we make no attempt to optimize here; specializations should | ||||||
// handle any case whose performance we care about | ||||||
PyObject *super; | ||||||
Py_BEGIN_LOCALS_MUST_NOT_ESCAPE; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
PyObject *stack[] = {class, self}; | ||||||
PyObject *super = PyObject_Vectorcall(global_super, stack, oparg & 2, NULL); | ||||||
super = PyObject_Vectorcall(global_super, stack, oparg & 2, NULL); | ||||||
Py_END_LOCALS_MUST_NOT_ESCAPE; | ||||||
if (opcode == INSTRUMENTED_LOAD_SUPER_ATTR) { | ||||||
PyObject *arg = oparg & 2 ? class : &_PyInstrumentation_MISSING; | ||||||
if (super == NULL) { | ||||||
|
@@ -2286,8 +2289,12 @@ dummy_func( | |||||
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg >> 2); | ||||||
PyTypeObject *cls = (PyTypeObject *)class; | ||||||
int method_found = 0; | ||||||
PyObject *attr_o = _PySuper_Lookup(cls, self, name, | ||||||
Py_TYPE(self)->tp_getattro == PyObject_GenericGetAttr ? &method_found : NULL); | ||||||
PyObject *attr_o; | ||||||
Py_BEGIN_LOCALS_MUST_NOT_ESCAPE; | ||||||
int *method_found_ptr = &method_found; | ||||||
attr_o = _PySuper_Lookup(cls, self, name, | ||||||
Py_TYPE(self)->tp_getattro == PyObject_GenericGetAttr ? method_found_ptr : NULL); | ||||||
Py_END_LOCALS_MUST_NOT_ESCAPE; | ||||||
if (attr_o == NULL) { | ||||||
ERROR_NO_POP(); | ||||||
} | ||||||
|
@@ -3510,10 +3517,13 @@ dummy_func( | |||||
} | ||||||
assert(PyStackRef_IsTaggedInt(lasti)); | ||||||
(void)lasti; // Shut up compiler warning if asserts are off | ||||||
PyObject* res_o; | ||||||
Py_BEGIN_LOCALS_MUST_NOT_ESCAPE; | ||||||
PyObject *stack[5] = {NULL, PyStackRef_AsPyObjectBorrow(exit_self), exc, val_o, tb}; | ||||||
int has_self = !PyStackRef_IsNull(exit_self); | ||||||
PyObject *res_o = PyObject_Vectorcall(exit_func_o, stack + 2 - has_self, | ||||||
res_o = PyObject_Vectorcall(exit_func_o, stack + 2 - has_self, | ||||||
(3 + has_self) | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL); | ||||||
Py_END_LOCALS_MUST_NOT_ESCAPE; | ||||||
Py_XDECREF(original_tb); | ||||||
ERROR_IF(res_o == NULL); | ||||||
res = PyStackRef_FromPyObjectSteal(res_o); | ||||||
|
@@ -3755,7 +3765,7 @@ dummy_func( | |||||
callable_o, args_o, | ||||||
total_args | PY_VECTORCALL_ARGUMENTS_OFFSET, | ||||||
NULL); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o, total_args); | ||||||
if (opcode == INSTRUMENTED_CALL) { | ||||||
PyObject *arg = total_args == 0 ? | ||||||
&_PyInstrumentation_MISSING : PyStackRef_AsPyObjectBorrow(arguments[0]); | ||||||
|
@@ -3910,7 +3920,7 @@ dummy_func( | |||||
callable_o, args_o, | ||||||
total_args | PY_VECTORCALL_ARGUMENTS_OFFSET, | ||||||
NULL); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o, total_args); | ||||||
assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); | ||||||
DECREF_INPUTS(); | ||||||
ERROR_IF(res_o == NULL); | ||||||
|
@@ -4193,7 +4203,7 @@ dummy_func( | |||||
ERROR_IF(true); | ||||||
} | ||||||
PyObject *res_o = tp->tp_vectorcall((PyObject *)tp, args_o, total_args, NULL); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o, total_args); | ||||||
DECREF_INPUTS(); | ||||||
ERROR_IF(res_o == NULL); | ||||||
res = PyStackRef_FromPyObjectSteal(res_o); | ||||||
|
@@ -4264,7 +4274,7 @@ dummy_func( | |||||
PyCFunction_GET_SELF(callable_o), | ||||||
args_o, | ||||||
total_args); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o, total_args); | ||||||
assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); | ||||||
DECREF_INPUTS(); | ||||||
ERROR_IF(res_o == NULL); | ||||||
|
@@ -4300,7 +4310,7 @@ dummy_func( | |||||
ERROR_IF(true); | ||||||
} | ||||||
PyObject *res_o = cfunc(PyCFunction_GET_SELF(callable_o), args_o, total_args, NULL); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o, total_args); | ||||||
assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); | ||||||
DECREF_INPUTS(); | ||||||
ERROR_IF(res_o == NULL); | ||||||
|
@@ -4480,7 +4490,7 @@ dummy_func( | |||||
PyCFunctionFastWithKeywords cfunc = | ||||||
_PyCFunctionFastWithKeywords_CAST(meth->ml_meth); | ||||||
PyObject *res_o = cfunc(self, (args_o + 1), nargs, NULL); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o, total_args); | ||||||
assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); | ||||||
DECREF_INPUTS(); | ||||||
ERROR_IF(res_o == NULL); | ||||||
|
@@ -4559,7 +4569,7 @@ dummy_func( | |||||
} | ||||||
PyCFunctionFast cfunc = _PyCFunctionFast_CAST(meth->ml_meth); | ||||||
PyObject *res_o = cfunc(self, (args_o + 1), nargs); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o, total_args); | ||||||
assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); | ||||||
DECREF_INPUTS(); | ||||||
ERROR_IF(res_o == NULL); | ||||||
|
@@ -4658,7 +4668,7 @@ dummy_func( | |||||
callable_o, args_o, | ||||||
positional_args | PY_VECTORCALL_ARGUMENTS_OFFSET, | ||||||
kwnames_o); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o, total_args); | ||||||
if (opcode == INSTRUMENTED_CALL_KW) { | ||||||
PyObject *arg = total_args == 0 ? | ||||||
&_PyInstrumentation_MISSING : PyStackRef_AsPyObjectBorrow(arguments[0]); | ||||||
|
@@ -4815,7 +4825,7 @@ dummy_func( | |||||
positional_args | PY_VECTORCALL_ARGUMENTS_OFFSET, | ||||||
kwnames_o); | ||||||
PyStackRef_CLOSE(kwnames); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o); | ||||||
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o, total_args); | ||||||
assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); | ||||||
DECREF_INPUTS(); | ||||||
ERROR_IF(res_o == NULL); | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the only difference the restrict annotation?
We could just add the annotation to
PyMapping_GetOptionalItem
theresult
pointer can never alias the other parameters.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. We can't because
PyMapping_GetOptionalItem
is part of the public API, and we can't change the signature of it.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We aren't changing the signature, just telling the compiler than one of its parameters can't alias the others.
I'm surprised we need to do this anyway. I would have thought that strict aliasing already tells MSVC that they can't alias.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My understanding is, that strict aliasing can't help here, since in
all parameters are of type
PyObject
. And by usingrestrict
we tell the compiler that "we assure you, that none of thesePyObject
s point to the same memory", and if we brake that contract, UB kicks in.