Skip to content

Commit

Permalink
gh-89279: In ceval.c, redefine some macros for speed (#32387)
Browse files Browse the repository at this point in the history
Macros Py_DECREF, Py_XDECREF, Py_IS_TYPE, _Py_atomic_load_32bit_impl
and _Py_DECREF_SPECIALIZED are redefined as macros
that completely replace the inline functions of the same name.
These three came out in the top four of functions that (in MSVC)
somehow weren't inlined.

Co-authored-by: Victor Stinner <vstinner@python.org>
Co-authored-by: Dennis Sweeney <36520290+sweeneyde@users.noreply.github.com>
  • Loading branch information
3 people committed Apr 22, 2022
1 parent 9fe82d0 commit 2f233fc
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 13 deletions.
@@ -0,0 +1 @@
Improve interpreter performance on Windows by inlining a few specific macros.
73 changes: 60 additions & 13 deletions Python/ceval.c
Expand Up @@ -44,6 +44,55 @@
# error "ceval.c must be build with Py_BUILD_CORE define for best performance"
#endif

#ifndef Py_DEBUG
// GH-89279: The MSVC compiler does not inline these static inline functions
// in PGO build in _PyEval_EvalFrameDefault(), because this function is over
// the limit of PGO, and that limit cannot be configured.
// Define them as macros to make sure that they are always inlined by the
// preprocessor.

#undef Py_DECREF
#define Py_DECREF(arg) \
do { \
PyObject *op = _PyObject_CAST(arg); \
if (--op->ob_refcnt == 0) { \
destructor dealloc = Py_TYPE(op)->tp_dealloc; \
(*dealloc)(op); \
} \
} while (0)

#undef Py_XDECREF
#define Py_XDECREF(arg) \
do { \
PyObject *xop = _PyObject_CAST(arg); \
if (xop != NULL) { \
Py_DECREF(xop); \
} \
} while (0)

#undef Py_IS_TYPE
#define Py_IS_TYPE(ob, type) \
(_PyObject_CAST(ob)->ob_type == (type))

#undef _Py_DECREF_SPECIALIZED
#define _Py_DECREF_SPECIALIZED(arg, dealloc) \
do { \
PyObject *op = _PyObject_CAST(arg); \
if (--op->ob_refcnt == 0) { \
destructor d = (destructor)(dealloc); \
d(op); \
} \
} while (0)
#endif

// GH-89279: Similar to above, force inlining by using a macro.
#if defined(_MSC_VER) && SIZEOF_INT == 4
#define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) (assert(sizeof((ATOMIC_VAL)->_value) == 4), *((volatile int*)&((ATOMIC_VAL)->_value)))
#else
#define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) _Py_atomic_load_relaxed(ATOMIC_VAL)
#endif


/* Forward declarations */
static PyObject *trace_call_function(
PyThreadState *tstate, PyObject *callable, PyObject **stack,
Expand Down Expand Up @@ -192,10 +241,10 @@ COMPUTE_EVAL_BREAKER(PyInterpreterState *interp,
struct _ceval_state *ceval2)
{
_Py_atomic_store_relaxed(&ceval2->eval_breaker,
_Py_atomic_load_relaxed(&ceval2->gil_drop_request)
| (_Py_atomic_load_relaxed(&ceval->signals_pending)
_Py_atomic_load_relaxed_int32(&ceval2->gil_drop_request)
| (_Py_atomic_load_relaxed_int32(&ceval->signals_pending)
&& _Py_ThreadCanHandleSignals(interp))
| (_Py_atomic_load_relaxed(&ceval2->pending.calls_to_do)
| (_Py_atomic_load_relaxed_int32(&ceval2->pending.calls_to_do)
&& _Py_ThreadCanHandlePendingCalls())
| ceval2->pending.async_exc);
}
Expand Down Expand Up @@ -740,7 +789,7 @@ _Py_FinishPendingCalls(PyThreadState *tstate)

struct _pending_calls *pending = &tstate->interp->ceval.pending;

if (!_Py_atomic_load_relaxed(&(pending->calls_to_do))) {
if (!_Py_atomic_load_relaxed_int32(&(pending->calls_to_do))) {
return;
}

Expand Down Expand Up @@ -1187,22 +1236,22 @@ eval_frame_handle_pending(PyThreadState *tstate)
struct _ceval_runtime_state *ceval = &runtime->ceval;

/* Pending signals */
if (_Py_atomic_load_relaxed(&ceval->signals_pending)) {
if (_Py_atomic_load_relaxed_int32(&ceval->signals_pending)) {
if (handle_signals(tstate) != 0) {
return -1;
}
}

/* Pending calls */
struct _ceval_state *ceval2 = &tstate->interp->ceval;
if (_Py_atomic_load_relaxed(&ceval2->pending.calls_to_do)) {
if (_Py_atomic_load_relaxed_int32(&ceval2->pending.calls_to_do)) {
if (make_pending_calls(tstate->interp) != 0) {
return -1;
}
}

/* GIL drop request */
if (_Py_atomic_load_relaxed(&ceval2->gil_drop_request)) {
if (_Py_atomic_load_relaxed_int32(&ceval2->gil_drop_request)) {
/* Give another thread a chance */
if (_PyThreadState_Swap(&runtime->gilstate, NULL) != tstate) {
Py_FatalError("tstate mix-up");
Expand Down Expand Up @@ -1360,7 +1409,7 @@ eval_frame_handle_pending(PyThreadState *tstate)

#define CHECK_EVAL_BREAKER() \
_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \
if (_Py_atomic_load_relaxed(eval_breaker)) { \
if (_Py_atomic_load_relaxed_int32(eval_breaker)) { \
goto handle_eval_breaker; \
}

Expand Down Expand Up @@ -1640,10 +1689,8 @@ typedef struct {
PyObject *kwnames;
} CallShape;

static inline bool
is_method(PyObject **stack_pointer, int args) {
return PEEK(args+2) != NULL;
}
// GH-89279: Must be a macro to be sure it's inlined by MSVC.
#define is_method(stack_pointer, args) (PEEK((args)+2) != NULL)

#define KWNAMES_LEN() \
(call_shape.kwnames == NULL ? 0 : ((int)PyTuple_GET_SIZE(call_shape.kwnames)))
Expand Down Expand Up @@ -1796,7 +1843,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
PREDICTED(RESUME_QUICK);
assert(tstate->cframe == &cframe);
assert(frame == cframe.current_frame);
if (_Py_atomic_load_relaxed(eval_breaker) && oparg < 2) {
if (_Py_atomic_load_relaxed_int32(eval_breaker) && oparg < 2) {
goto handle_eval_breaker;
}
DISPATCH();
Expand Down

0 comments on commit 2f233fc

Please sign in to comment.