Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions .github/workflows/tail-call.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,19 +79,17 @@ jobs:
with:
python-version: '3.11'

- name: Native Windows (debug)
- name: Native Windows MSVC (release)
if: runner.os == 'Windows' && matrix.architecture != 'ARM64'
shell: cmd
run: |
choco install llvm --allow-downgrade --no-progress --version ${{ matrix.llvm }}.1.0
set PlatformToolset=clangcl
set LLVMToolsVersion=${{ matrix.llvm }}.1.0
set LLVMInstallDir=C:\Program Files\LLVM
call ./PCbuild/build.bat --tail-call-interp -d -p ${{ matrix.architecture }}
call ./PCbuild/rt.bat -d -p ${{ matrix.architecture }} -q --multiprocess 0 --timeout 4500 --verbose2 --verbose3
choco install visualstudio2026buildtools --no-progress -y --force --params "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 --locale en-US --passive"
$env:PATH = "C:\Program Files (x86)\Microsoft Visual Studio\18\BuildTools\MSBuild\Current\bin;$env:PATH"
./PCbuild/build.bat --tail-call-interp -c Release -p ${{ matrix.architecture }} "/p:PlatformToolset=v145"
./PCbuild/rt.bat -p ${{ matrix.architecture }} -q --multiprocess 0 --timeout 4500 --verbose2 --verbose3
# No tests (yet):
- name: Emulated Windows (release)
- name: Emulated Windows Clang (release)
if: runner.os == 'Windows' && matrix.architecture == 'ARM64'
shell: cmd
run: |
Expand Down
16 changes: 16 additions & 0 deletions Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,17 @@ _Py_VectorCall_StackRefSteal(
int total_args,
_PyStackRef kwnames);

PyAPI_FUNC(PyObject*)
_Py_VectorCallInstrumentation_StackRefSteal(
_PyStackRef callable,
_PyStackRef* arguments,
int total_args,
_PyStackRef kwnames,
bool call_instrumentation,
_PyInterpreterFrame* frame,
_Py_CODEUNIT* this_instr,
PyThreadState* tstate);

PyAPI_FUNC(PyObject *)
_Py_BuiltinCallFast_StackRefSteal(
_PyStackRef callable,
Expand Down Expand Up @@ -464,6 +475,11 @@ _Py_assert_within_stack_bounds(
_PyInterpreterFrame *frame, _PyStackRef *stack_pointer,
const char *filename, int lineno);

// Like PyMapping_GetOptionalItem, but returns the PyObject* instead of taking
// it as an out parameter. This helps MSVC's escape analysis when used with
// tail calling.
PyAPI_FUNC(PyObject*) _PyMapping_GetOptionalItem2(PyObject* obj, PyObject* key, int* err);

#ifdef __cplusplus
}
#endif
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Allow building CPython with the tail calling interpreter on Visual Studio 2026 MSVC. This provides a performance gain over the prior interpreter for MSVC. Patch by Ken Jin, Brandt Bucher, and Chris Eibl. With help from the MSVC team including Hulon Jenkins.
8 changes: 8 additions & 0 deletions Objects/abstract.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,14 @@ PyMapping_GetOptionalItem(PyObject *obj, PyObject *key, PyObject **result)
return 0;
}

PyObject*
_PyMapping_GetOptionalItem2(PyObject *obj, PyObject *key, int *err)
{
PyObject* result;
*err = PyMapping_GetOptionalItem(obj, key, &result);
return result;
}

int
PyObject_SetItem(PyObject *o, PyObject *key, PyObject *value)
{
Expand Down
4 changes: 3 additions & 1 deletion PCbuild/pythoncore.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -600,7 +600,9 @@
<ClCompile Include="..\Python\bltinmodule.c" />
<ClCompile Include="..\Python\bootstrap_hash.c" />
<ClCompile Include="..\Python\brc.c" />
<ClCompile Include="..\Python\ceval.c" />
<ClCompile Include="..\Python\ceval.c">
<AdditionalOptions Condition="'$(UseTailCallInterp)' == 'true' and $(PlatformToolset) != 'ClangCL'">/std:clatest %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<ClCompile Include="..\Python\codecs.c" />
<ClCompile Include="..\Python\codegen.c" />
<ClCompile Include="..\Python\compile.c" />
Expand Down
134 changes: 57 additions & 77 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -1507,8 +1507,8 @@ dummy_func(
}

inst(LOAD_BUILD_CLASS, ( -- bc)) {
PyObject *bc_o;
int err = PyMapping_GetOptionalItem(BUILTINS(), &_Py_ID(__build_class__), &bc_o);
int err;
PyObject *bc_o = _PyMapping_GetOptionalItem2(BUILTINS(), &_Py_ID(__build_class__), &err);
ERROR_IF(err < 0);
if (bc_o == NULL) {
_PyErr_SetString(tstate, PyExc_NameError,
Expand Down Expand Up @@ -1711,8 +1711,9 @@ dummy_func(

inst(LOAD_FROM_DICT_OR_GLOBALS, (mod_or_class_dict -- v)) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
PyObject *v_o;
int err = PyMapping_GetOptionalItem(PyStackRef_AsPyObjectBorrow(mod_or_class_dict), name, &v_o);
int err;
PyObject *v_o = _PyMapping_GetOptionalItem2(PyStackRef_AsPyObjectBorrow(mod_or_class_dict), name, &err);

PyStackRef_CLOSE(mod_or_class_dict);
ERROR_IF(err < 0);
if (v_o == NULL) {
Expand All @@ -1735,11 +1736,11 @@ dummy_func(
else {
/* Slow-path if globals or builtins is not a dict */
/* namespace 1: globals */
int err = PyMapping_GetOptionalItem(GLOBALS(), name, &v_o);
v_o = _PyMapping_GetOptionalItem2(GLOBALS(), name, &err);
ERROR_IF(err < 0);
if (v_o == NULL) {
/* namespace 2: builtins */
int err = PyMapping_GetOptionalItem(BUILTINS(), name, &v_o);
v_o = _PyMapping_GetOptionalItem2(BUILTINS(), name, &err);
ERROR_IF(err < 0);
if (v_o == NULL) {
_PyEval_FormatExcCheckArg(
Expand Down Expand Up @@ -1898,14 +1899,14 @@ dummy_func(
}

inst(LOAD_FROM_DICT_OR_DEREF, (class_dict_st -- value)) {
PyObject *value_o;
PyObject *name;
PyObject *class_dict = PyStackRef_AsPyObjectBorrow(class_dict_st);

assert(class_dict);
assert(oparg >= 0 && oparg < _PyFrame_GetCode(frame)->co_nlocalsplus);
name = PyTuple_GET_ITEM(_PyFrame_GetCode(frame)->co_localsplusnames, oparg);
int err = PyMapping_GetOptionalItem(class_dict, name, &value_o);
int err;
PyObject* value_o = _PyMapping_GetOptionalItem2(class_dict, name, &err);
if (err < 0) {
ERROR_NO_POP();
}
Expand Down Expand Up @@ -2074,14 +2075,14 @@ dummy_func(
}

inst(SETUP_ANNOTATIONS, (--)) {
PyObject *ann_dict;
if (LOCALS() == NULL) {
_PyErr_Format(tstate, PyExc_SystemError,
"no locals found when setting up annotations");
ERROR_IF(true);
}
/* check if __annotations__ in locals()... */
int err = PyMapping_GetOptionalItem(LOCALS(), &_Py_ID(__annotations__), &ann_dict);
int err;
PyObject* ann_dict = _PyMapping_GetOptionalItem2(LOCALS(), &_Py_ID(__annotations__), &err);
ERROR_IF(err < 0);
if (ann_dict == NULL) {
ann_dict = PyDict_New();
Expand Down Expand Up @@ -2185,8 +2186,12 @@ dummy_func(
}
// we make no attempt to optimize here; specializations should
// handle any case whose performance we care about
PyObject *stack[] = {class, self};
PyObject *super = PyObject_Vectorcall(global_super, stack, oparg & 2, NULL);
PyObject *super;
{
// scope to tell MSVC that stack is not escaping
PyObject *stack[] = {class, self};
super = PyObject_Vectorcall(global_super, stack, oparg & 2, NULL);
}
if (opcode == INSTRUMENTED_LOAD_SUPER_ATTR) {
PyObject *arg = oparg & 2 ? class : &_PyInstrumentation_MISSING;
if (super == NULL) {
Expand Down Expand Up @@ -2245,8 +2250,13 @@ dummy_func(
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg >> 2);
PyTypeObject *cls = (PyTypeObject *)class;
int method_found = 0;
PyObject *attr_o = _PySuper_Lookup(cls, self, name,
Py_TYPE(self)->tp_getattro == PyObject_GenericGetAttr ? &method_found : NULL);
PyObject *attr_o;
{
// scope to tell MSVC that method_found_ptr is not escaping
int *method_found_ptr = &method_found;
attr_o = _PySuper_Lookup(cls, self, name,
Py_TYPE(self)->tp_getattro == PyObject_GenericGetAttr ? method_found_ptr : NULL);
}
if (attr_o == NULL) {
ERROR_NO_POP();
}
Expand Down Expand Up @@ -3472,10 +3482,14 @@ dummy_func(
}
assert(PyStackRef_IsTaggedInt(lasti));
(void)lasti; // Shut up compiler warning if asserts are off
PyObject *stack[5] = {NULL, PyStackRef_AsPyObjectBorrow(exit_self), exc, val_o, tb};
int has_self = !PyStackRef_IsNull(exit_self);
PyObject *res_o = PyObject_Vectorcall(exit_func_o, stack + 2 - has_self,
(3 + has_self) | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL);
PyObject* res_o;
{
// scope to tell MSVC that stack is not escaping
PyObject *stack[5] = {NULL, PyStackRef_AsPyObjectBorrow(exit_self), exc, val_o, tb};
int has_self = !PyStackRef_IsNull(exit_self);
res_o = PyObject_Vectorcall(exit_func_o, stack + 2 - has_self,
(3 + has_self) | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL);
}
Py_XDECREF(original_tb);
ERROR_IF(res_o == NULL);
res = PyStackRef_FromPyObjectSteal(res_o);
Expand Down Expand Up @@ -3707,36 +3721,18 @@ dummy_func(
frame->return_offset = INSTRUCTION_SIZE;
DISPATCH_INLINED(new_frame);
}
/* Callable is not a normal Python function */
STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
if (CONVERSION_FAILED(args_o)) {
DECREF_INPUTS();
ERROR_IF(true);
}
PyObject *res_o = PyObject_Vectorcall(
callable_o, args_o,
total_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
NULL);
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
if (opcode == INSTRUMENTED_CALL) {
PyObject *arg = total_args == 0 ?
&_PyInstrumentation_MISSING : PyStackRef_AsPyObjectBorrow(arguments[0]);
if (res_o == NULL) {
_Py_call_instrumentation_exc2(
tstate, PY_MONITORING_EVENT_C_RAISE,
frame, this_instr, callable_o, arg);
}
else {
int err = _Py_call_instrumentation_2args(
tstate, PY_MONITORING_EVENT_C_RETURN,
frame, this_instr, callable_o, arg);
if (err < 0) {
Py_CLEAR(res_o);
}
}
}
assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
DECREF_INPUTS();
PyObject* res_o = _Py_VectorCallInstrumentation_StackRefSteal(
callable,
arguments,
total_args,
PyStackRef_NULL,
opcode == INSTRUMENTED_CALL,
frame,
this_instr,
tstate);
DEAD(args);
DEAD(self_or_null);
DEAD(callable);
ERROR_IF(res_o == NULL);
res = PyStackRef_FromPyObjectSteal(res_o);
}
Expand Down Expand Up @@ -4587,35 +4583,19 @@ dummy_func(
frame->return_offset = INSTRUCTION_SIZE;
DISPATCH_INLINED(new_frame);
}
/* Callable is not a normal Python function */
STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
if (CONVERSION_FAILED(args_o)) {
DECREF_INPUTS();
ERROR_IF(true);
}
PyObject *res_o = PyObject_Vectorcall(
callable_o, args_o,
positional_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
kwnames_o);
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
if (opcode == INSTRUMENTED_CALL_KW) {
PyObject *arg = total_args == 0 ?
&_PyInstrumentation_MISSING : PyStackRef_AsPyObjectBorrow(arguments[0]);
if (res_o == NULL) {
_Py_call_instrumentation_exc2(
tstate, PY_MONITORING_EVENT_C_RAISE,
frame, this_instr, callable_o, arg);
}
else {
int err = _Py_call_instrumentation_2args(
tstate, PY_MONITORING_EVENT_C_RETURN,
frame, this_instr, callable_o, arg);
if (err < 0) {
Py_CLEAR(res_o);
}
}
}
DECREF_INPUTS();
PyObject* res_o = _Py_VectorCallInstrumentation_StackRefSteal(
callable,
arguments,
total_args,
kwnames,
opcode == INSTRUMENTED_CALL_KW,
frame,
this_instr,
tstate);
DEAD(kwnames);
DEAD(args);
DEAD(self_or_null);
DEAD(callable);
ERROR_IF(res_o == NULL);
res = PyStackRef_FromPyObjectSteal(res_o);
}
Expand Down
59 changes: 59 additions & 0 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,65 @@ _Py_VectorCall_StackRefSteal(
return res;
}

PyObject*
_Py_VectorCallInstrumentation_StackRefSteal(
_PyStackRef callable,
_PyStackRef* arguments,
int total_args,
_PyStackRef kwnames,
bool call_instrumentation,
_PyInterpreterFrame* frame,
_Py_CODEUNIT* this_instr,
PyThreadState* tstate)
{
PyObject* res;
STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
if (CONVERSION_FAILED(args_o)) {
res = NULL;
goto cleanup;
}
PyObject* callable_o = PyStackRef_AsPyObjectBorrow(callable);
PyObject* kwnames_o = PyStackRef_AsPyObjectBorrow(kwnames);
int positional_args = total_args;
if (kwnames_o != NULL) {
positional_args -= (int)PyTuple_GET_SIZE(kwnames_o);
}
res = PyObject_Vectorcall(
callable_o, args_o,
positional_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
kwnames_o);
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
if (call_instrumentation) {
PyObject* arg = total_args == 0 ?
&_PyInstrumentation_MISSING : PyStackRef_AsPyObjectBorrow(arguments[0]);
if (res == NULL) {
_Py_call_instrumentation_exc2(
tstate, PY_MONITORING_EVENT_C_RAISE,
frame, this_instr, callable_o, arg);
}
else {
int err = _Py_call_instrumentation_2args(
tstate, PY_MONITORING_EVENT_C_RETURN,
frame, this_instr, callable_o, arg);
if (err < 0) {
Py_CLEAR(res);
}
}
}
assert((res != NULL) ^ (PyErr_Occurred() != NULL));
cleanup:
PyStackRef_XCLOSE(kwnames);
// arguments is a pointer into the GC visible stack,
// so we must NULL out values as we clear them.
for (int i = total_args - 1; i >= 0; i--) {
_PyStackRef tmp = arguments[i];
arguments[i] = PyStackRef_NULL;
PyStackRef_CLOSE(tmp);
}
PyStackRef_CLOSE(callable);
return res;
}

PyObject *
_Py_BuiltinCallFast_StackRefSteal(
_PyStackRef callable,
Expand Down
15 changes: 9 additions & 6 deletions Python/ceval_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,19 @@
# elif defined(_MSC_VER) && (_MSC_VER < 1950)
# error "You need at least VS 2026 / PlatformToolset v145 for tail calling."
# endif

// Note: [[clang::musttail]] works for GCC 15, but not __attribute__((musttail)) at the moment.
# define Py_MUSTTAIL [[clang::musttail]]
# define Py_PRESERVE_NONE_CC __attribute__((preserve_none))
Py_PRESERVE_NONE_CC typedef PyObject* (*py_tail_call_funcptr)(TAIL_CALL_PARAMS);
# if defined(_MSC_VER) && !defined(__clang__)
# define Py_MUSTTAIL [[msvc::musttail]]
# define Py_PRESERVE_NONE_CC __preserve_none
# else
# define Py_MUSTTAIL __attribute__((musttail))
# define Py_PRESERVE_NONE_CC __attribute__((preserve_none))
# endif
typedef PyObject *(Py_PRESERVE_NONE_CC *py_tail_call_funcptr)(TAIL_CALL_PARAMS);

# define DISPATCH_TABLE_VAR instruction_funcptr_table
# define DISPATCH_TABLE instruction_funcptr_handler_table
# define TRACING_DISPATCH_TABLE instruction_funcptr_tracing_table
# define TARGET(op) Py_PRESERVE_NONE_CC PyObject *_TAIL_CALL_##op(TAIL_CALL_PARAMS)
# define TARGET(op) Py_NO_INLINE PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_##op(TAIL_CALL_PARAMS)

# define DISPATCH_GOTO() \
do { \
Expand Down
Loading
Loading