Skip to content

Move large uop bodies into functions. #117224

@markshannon

Description

@markshannon

Many of the micro-op bodies are quite large, and are likely to bloat jitted code, harming performance.

We should move these larger bodies into helper functions in the tier2 code generator.

For example, the generator code for _INIT_CALL_PY_EXACT_ARGS looks like this:

case _INIT_CALL_PY_EXACT_ARGS: {
    PyObject **args;
    PyObject *self_or_null;
    PyObject *callable;
    _PyInterpreterFrame *new_frame;
    oparg = CURRENT_OPARG();
    args = &stack_pointer[-oparg];
    self_or_null = stack_pointer[-1 - oparg];
    callable = stack_pointer[-2 - oparg];
    int has_self = (self_or_null != NULL);
    STAT_INC(CALL, hit);
    PyFunctionObject *func = (PyFunctionObject *)callable;
    new_frame = _PyFrame_PushUnchecked(tstate, func, oparg + has_self);
    PyObject **first_non_self_local = new_frame->localsplus + has_self;
    new_frame->localsplus[0] = self_or_null;
    for (int i = 0; i < oparg; i++) {
        first_non_self_local[i] = args[i];
    }
    stack_pointer[-2 - oparg] = (PyObject *)new_frame;
    stack_pointer += -1 - oparg;
    break;
}

By moving the bulk of this into a helper function, we can generate the much shorter:

case _INIT_CALL_PY_EXACT_ARGS: {
    stack_pointer = _INIT_CALL_PY_EXACT_ARGS_func(tstate, frame, stack_pointer, oparg);
    break;
}

with the helper function:

PyObject ** _INIT_CALL_PY_EXACT_ARGS_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject ** stack_pointer, int oparg) {
    PyObject **args;
    PyObject *self_or_null;
    PyObject *callable;
    _PyInterpreterFrame *new_frame;
    args = &stack_pointer[-oparg];
    self_or_null = stack_pointer[-1 - oparg];
    callable = stack_pointer[-2 - oparg];
    int has_self = (self_or_null != NULL);
    STAT_INC(CALL, hit);
    PyFunctionObject *func = (PyFunctionObject *)callable;
    new_frame = _PyFrame_PushUnchecked(tstate, func, oparg + has_self);
    PyObject **first_non_self_local = new_frame->localsplus + has_self;
    new_frame->localsplus[0] = self_or_null;
    for (int i = 0; i < oparg; i++) {
        first_non_self_local[i] = args[i];
    }
    stack_pointer[-2 - oparg] = (PyObject *)new_frame;
    stack_pointer += -1 - oparg;
    return stack_pointer;
}

Linked PRs

Metadata

Metadata

Assignees

Labels

interpreter-core(Objects, Python, Grammar, and Parser dirs)performancePerformance or resource usage

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions