Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-98003: Inline call frames for CALL_FUNCTION_EX #98004

Merged
merged 20 commits into from
Apr 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 10 additions & 0 deletions Include/internal/pycore_call.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,16 @@ _PyObject_FastCallTstate(PyThreadState *tstate, PyObject *func, PyObject *const
return _PyObject_VectorcallTstate(tstate, func, args, (size_t)nargs, NULL);
}

PyObject *const *
_PyStack_UnpackDict(PyThreadState *tstate,
PyObject *const *args, Py_ssize_t nargs,
PyObject *kwargs, PyObject **p_kwnames);

void
_PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
PyObject *kwnames);

void _PyStack_UnpackDict_FreeNoDecRef(PyObject *const *stack, PyObject *kwnames);

#ifdef __cplusplus
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Complex function calls are now faster and consume no C stack
space.

20 changes: 8 additions & 12 deletions Objects/call.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,6 @@
#include "pycore_tuple.h" // _PyTuple_ITEMS()


static PyObject *const *
_PyStack_UnpackDict(PyThreadState *tstate,
PyObject *const *args, Py_ssize_t nargs,
PyObject *kwargs, PyObject **p_kwnames);

static void
_PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
PyObject *kwnames);


static PyObject *
null_error(PyThreadState *tstate)
{
Expand Down Expand Up @@ -965,7 +955,7 @@ _PyStack_AsDict(PyObject *const *values, PyObject *kwnames)
The newly allocated argument vector supports PY_VECTORCALL_ARGUMENTS_OFFSET.

When done, you must call _PyStack_UnpackDict_Free(stack, nargs, kwnames) */
static PyObject *const *
PyObject *const *
_PyStack_UnpackDict(PyThreadState *tstate,
PyObject *const *args, Py_ssize_t nargs,
PyObject *kwargs, PyObject **p_kwnames)
Expand Down Expand Up @@ -1034,14 +1024,20 @@ _PyStack_UnpackDict(PyThreadState *tstate,
return stack;
}

static void
void
_PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
PyObject *kwnames)
{
Py_ssize_t n = PyTuple_GET_SIZE(kwnames) + nargs;
for (Py_ssize_t i = 0; i < n; i++) {
Py_DECREF(stack[i]);
}
_PyStack_UnpackDict_FreeNoDecRef(stack, kwnames);
}

void
_PyStack_UnpackDict_FreeNoDecRef(PyObject *const *stack, PyObject *kwnames)
{
PyMem_Free((PyObject **)stack - 1);
Py_DECREF(kwnames);
}
Expand Down
19 changes: 19 additions & 0 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -3103,6 +3103,25 @@ dummy_func(
}
}
else {
if (Py_TYPE(func) == &PyFunction_Type &&
tstate->interp->eval_frame == NULL &&
((PyFunctionObject *)func)->vectorcall == _PyFunction_Vectorcall) {
assert(PyTuple_CheckExact(callargs));
Py_ssize_t nargs = PyTuple_GET_SIZE(callargs);
int code_flags = ((PyCodeObject *)PyFunction_GET_CODE(func))->co_flags;
PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(func));

_PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit_Ex(tstate,
(PyFunctionObject *)func, locals,
nargs, callargs, kwargs);
// Need to manually shrink the stack since we exit with DISPATCH_INLINED.
STACK_SHRINK(oparg + 3);
if (new_frame == NULL) {
goto error;
}
frame->return_offset = 0;
DISPATCH_INLINED(new_frame);
}
result = PyObject_Call(func, callargs, kwargs);
}
DECREF_INPUTS();
Expand Down
46 changes: 46 additions & 0 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,9 @@ static _PyInterpreterFrame *
_PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func,
PyObject *locals, PyObject* const* args,
size_t argcount, PyObject *kwnames);
static _PyInterpreterFrame *
_PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func,
PyObject *locals, Py_ssize_t nargs, PyObject *callargs, PyObject *kwargs);
static void
_PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);

Expand Down Expand Up @@ -1501,6 +1504,49 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func,
return NULL;
}

/* Same as _PyEvalFramePushAndInit but takes an args tuple and kwargs dict.
Steals references to func, callargs and kwargs.
*/
static _PyInterpreterFrame *
_PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func,
PyObject *locals, Py_ssize_t nargs, PyObject *callargs, PyObject *kwargs)
{
bool has_dict = (kwargs != NULL && PyDict_GET_SIZE(kwargs) > 0);
PyObject *kwnames = NULL;
PyObject *const *newargs;
if (has_dict) {
newargs = _PyStack_UnpackDict(tstate, _PyTuple_ITEMS(callargs), nargs, kwargs, &kwnames);
if (newargs == NULL) {
Py_DECREF(func);
goto error;
}
}
else {
newargs = &PyTuple_GET_ITEM(callargs, 0);
/* We need to incref all our args since the new frame steals the references. */
for (Py_ssize_t i = 0; i < nargs; ++i) {
Py_INCREF(PyTuple_GET_ITEM(callargs, i));
}
}
_PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than transforming callargs and kwargs into a shape suitable for _PyEvalFramePushAndInit, it might make sense to push the frame, then unpack the tuple and dict into the the frame without the intermediate objects.
It complicates the errror handling a bit, and the frame will need to be cleaned up if there is an error.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can work on this in another PR.

tstate, (PyFunctionObject *)func, locals,
newargs, nargs, kwnames
);
if (has_dict) {
_PyStack_UnpackDict_FreeNoDecRef(newargs, kwnames);
}
/* No need to decref func here because the reference has been stolen by
_PyEvalFramePushAndInit.
*/
Py_DECREF(callargs);
Py_XDECREF(kwargs);
return new_frame;
error:
Py_DECREF(callargs);
Py_XDECREF(kwargs);
return NULL;
}

PyObject *
_PyEval_Vector(PyThreadState *tstate, PyFunctionObject *func,
PyObject *locals,
Expand Down