Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-109369: Merge all eval-breaker flags and monitoring version into one word. #109846

Merged
merged 20 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion Include/cpython/code.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ typedef struct {
PyObject *co_weakreflist; /* to support weakrefs to code objects */ \
_PyExecutorArray *co_executors; /* executors from optimizer */ \
_PyCoCached *_co_cached; /* cached co_* attributes */ \
uint64_t _co_instrumentation_version; /* current instrumentation version */ \
uintptr_t _co_instrumentation_version; /* current instrumentation version */ \
_PyCoMonitoringData *_co_monitoring; /* Monitoring data */ \
int _co_firsttraceable; /* index of first traceable instruction */ \
/* Scratch space for extra data relating to the code object. \
Expand Down
33 changes: 33 additions & 0 deletions Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,39 @@ int _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int a
void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);


#define _PY_GIL_DROP_REQUEST_BIT 0
#define _PY_SIGNALS_PENDING_BIT 1
#define _PY_CALLS_TO_DO_BIT 2
#define _PY_ASYNC_EXCEPTION_BIT 3
#define _PY_GC_SCHEDULED_BIT 4

/* Reserve a few bits for future use */
#define _PY_EVAL_EVENTS_BITS 8
#define _PY_EVAL_EVENTS_MASK ((1 << _PY_EVAL_EVENTS_BITS)-1)

static inline void
_Py_set_eval_breaker_bit(PyInterpreterState *interp, uint32_t bit, uint32_t set)
{
assert(set == 0 || set == 1);
uintptr_t to_set = set << bit;
uintptr_t mask = ((uintptr_t)1) << bit;
uintptr_t old = _Py_atomic_load_uintptr(&interp->ceval.eval_breaker);
if ((old & mask) == to_set) {
return;
}
uintptr_t new;
do {
new = (old & ~mask) | to_set;
} while (!_Py_atomic_compare_exchange_uintptr(&interp->ceval.eval_breaker, &old, new));
}

static inline bool
_Py_eval_breaker_bit_is_set(PyInterpreterState *interp, int32_t bit)
{
return _Py_atomic_load_uintptr_relaxed(&interp->ceval.eval_breaker) & (((uintptr_t)1) << bit);
}


#ifdef __cplusplus
}
#endif
Expand Down
19 changes: 4 additions & 15 deletions Include/internal/pycore_ceval_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,7 @@ struct _pending_calls {
int busy;
PyThread_type_lock lock;
/* Request for running pending calls. */
_Py_atomic_int calls_to_do;
/* Request for looking at the `async_exc` field of the current
thread state.
Guarded by the GIL. */
int async_exc;
int32_t calls_to_do;
#define NPENDINGCALLS 32
struct _pending_call {
_Py_pending_call_func func;
Expand Down Expand Up @@ -62,11 +58,6 @@ struct _ceval_runtime_state {
int _not_used;
#endif
} perf;
/* Request for checking signals. It is shared by all interpreters (see
bpo-40513). Any thread of any interpreter can receive a signal, but only
the main thread of the main interpreter can handle signals: see
_Py_ThreadCanHandleSignals(). */
_Py_atomic_int signals_pending;
/* Pending calls to be made only on the main thread. */
struct _pending_calls pending_mainthread;
};
Expand All @@ -87,14 +78,12 @@ struct _ceval_state {
* the fast path in the eval loop.
* It is by far the hottest field in this struct and
* should be placed at the beginning. */
_Py_atomic_int eval_breaker;
/* Request for dropping the GIL */
_Py_atomic_int gil_drop_request;
uintptr_t eval_breaker;
/* Avoid false sharing */
int64_t padding[7];
int recursion_limit;
struct _gil_runtime_state *gil;
int own_gil;
/* The GC is ready to be executed */
_Py_atomic_int gc_scheduled;
struct _pending_calls pending;
};

Expand Down
3 changes: 1 addition & 2 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ struct _is {
int _initialized;
int finalizing;

uint64_t monitoring_version;
uint64_t last_restart_version;
uintptr_t last_restart_version;
struct pythreads {
uint64_t next_unique_id;
/* The linked list of threads, newest first. */
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The internal eval_breaker and supporting flags, plus the monitoring version
have been merged into a single atomic integer to speed up checks.
7 changes: 2 additions & 5 deletions Modules/gcmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
*/

#include "Python.h"
#include "pycore_ceval.h" // _Py_set_eval_breaker_bit()
#include "pycore_context.h"
#include "pycore_dict.h" // _PyDict_MaybeUntrack()
#include "pycore_initconfig.h"
Expand Down Expand Up @@ -2274,11 +2275,7 @@ _Py_ScheduleGC(PyInterpreterState *interp)
if (gcstate->collecting == 1) {
return;
}
struct _ceval_state *ceval = &interp->ceval;
if (!_Py_atomic_load_relaxed(&ceval->gc_scheduled)) {
_Py_atomic_store_relaxed(&ceval->gc_scheduled, 1);
_Py_atomic_store_relaxed(&ceval->eval_breaker, 1);
}
_Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 1);
}

void
Expand Down
5 changes: 2 additions & 3 deletions Modules/signalmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1767,9 +1767,8 @@ PyErr_CheckSignals(void)
Python code to ensure signals are handled. Checking for the GC here
allows long running native code to clean cycles created using the C-API
even if it doesn't run the evaluation loop */
struct _ceval_state *interp_ceval_state = &tstate->interp->ceval;
if (_Py_atomic_load_relaxed(&interp_ceval_state->gc_scheduled)) {
_Py_atomic_store_relaxed(&interp_ceval_state->gc_scheduled, 0);
if (_Py_eval_breaker_bit_is_set(tstate->interp, _PY_GC_SCHEDULED_BIT)) {
_Py_set_eval_breaker_bit(tstate->interp, _PY_GC_SCHEDULED_BIT, 0);
_Py_RunGC(tstate);
}

Expand Down
23 changes: 13 additions & 10 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,12 @@ dummy_func(
inst(RESUME, (--)) {
TIER_ONE_ONLY
assert(frame == tstate->current_frame);
if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) {
uintptr_t global_version =
_Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) &
~_PY_EVAL_EVENTS_MASK;
uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
assert((code_version & 255) == 0);
if (code_version != global_version) {
int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp);
ERROR_IF(err, error);
next_instr--;
Expand All @@ -156,18 +161,16 @@ dummy_func(
DEOPT_IF(_Py_emscripten_signal_clock == 0, RESUME);
_Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
#endif
/* Possibly combine these two checks */
DEOPT_IF(_PyFrame_GetCode(frame)->_co_instrumentation_version
!= tstate->interp->monitoring_version, RESUME);
DEOPT_IF(_Py_atomic_load_relaxed_int32(&tstate->interp->ceval.eval_breaker), RESUME);
uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker);
uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
assert((version & _PY_EVAL_EVENTS_MASK) == 0);
DEOPT_IF(eval_breaker != version, RESUME);
}

inst(INSTRUMENTED_RESUME, (--)) {
/* Possible performance enhancement:
* We need to check the eval breaker anyway, can we
* combine the instrument verison check and the eval breaker test?
*/
if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) {
uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~_PY_EVAL_EVENTS_MASK;
uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
if (code_version != global_version) {
if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) {
goto error;
}
Expand Down