From bbcf42449e13c0b62f145cd49d12674ef3d5bf64 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 27 May 2022 16:31:41 +0100 Subject: [PATCH] GH-90230: Add stats to breakdown the origin of calls to `PyEval_EvalFrame` (GH-93284) --- Include/internal/pycore_call.h | 2 ++ Include/internal/pycore_ceval.h | 1 + Include/internal/pycore_code.h | 5 +++++ Include/pystats.h | 15 +++++++++++++++ Modules/_asynciomodule.c | 3 ++- Objects/call.c | 16 +++++++++++++--- Objects/descrobject.c | 1 + Objects/genobject.c | 2 ++ Objects/typeobject.c | 1 + Python/bltinmodule.c | 1 + Python/ceval.c | 5 ++++- Python/specialize.c | 5 ++++- Tools/scripts/summarize_stats.py | 17 ++++++++++++----- 13 files changed, 63 insertions(+), 11 deletions(-) diff --git a/Include/internal/pycore_call.h b/Include/internal/pycore_call.h index 3ccacfa0b8b038..55378e3dfebf24 100644 --- a/Include/internal/pycore_call.h +++ b/Include/internal/pycore_call.h @@ -103,6 +103,7 @@ _PyObject_CallNoArgsTstate(PyThreadState *tstate, PyObject *func) { // Private static inline function variant of public PyObject_CallNoArgs() static inline PyObject * _PyObject_CallNoArgs(PyObject *func) { + EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_API, func); PyThreadState *tstate = _PyThreadState_GET(); return _PyObject_VectorcallTstate(tstate, func, NULL, 0, NULL); } @@ -111,6 +112,7 @@ _PyObject_CallNoArgs(PyObject *func) { static inline PyObject * _PyObject_FastCallTstate(PyThreadState *tstate, PyObject *func, PyObject *const *args, Py_ssize_t nargs) { + EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_API, func); return _PyObject_VectorcallTstate(tstate, func, args, (size_t)nargs, NULL); } diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 3efd6beb035a1a..972eb926b3fa26 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -68,6 +68,7 @@ extern PyObject* _PyEval_BuiltinsFromGlobals( static inline PyObject* _PyEval_EvalFrame(PyThreadState *tstate, struct _PyInterpreterFrame *frame, int throwflag) { + EVAL_CALL_STAT_INC(EVAL_CALL_TOTAL); if (tstate->interp->eval_frame == NULL) { return _PyEval_EvalFrameDefault(tstate, frame, throwflag); } diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 3d198432741129..2cfa319f2d2902 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -265,6 +265,9 @@ extern int _PyStaticCode_InternStrings(PyCodeObject *co); #define OBJECT_STAT_INC(name) _py_stats.object_stats.name++ #define OBJECT_STAT_INC_COND(name, cond) \ do { if (cond) _py_stats.object_stats.name++; } while (0) +#define EVAL_CALL_STAT_INC(name) _py_stats.call_stats.eval_calls[name]++ +#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \ + do { if (PyFunction_Check(callable)) _py_stats.call_stats.eval_calls[name]++; } while (0) // Used by the _opcode extension which is built as a shared library PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); @@ -276,6 +279,8 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); #define CALL_STAT_INC(name) ((void)0) #define OBJECT_STAT_INC(name) ((void)0) #define OBJECT_STAT_INC_COND(name, cond) ((void)0) +#define EVAL_CALL_STAT_INC(name) ((void)0) +#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0) #endif // !Py_STATS // Cache values are only valid in memory, so use native endianness. diff --git a/Include/pystats.h b/Include/pystats.h index 4375614b05e483..bea471bfc1de94 100644 --- a/Include/pystats.h +++ b/Include/pystats.h @@ -10,6 +10,20 @@ extern "C" { #define SPECIALIZATION_FAILURE_KINDS 32 +/* Stats for determining who is calling PyEval_EvalFrame */ +#define EVAL_CALL_TOTAL 0 +#define EVAL_CALL_VECTOR 1 +#define EVAL_CALL_GENERATOR 2 +#define EVAL_CALL_LEGACY 3 +#define EVAL_CALL_FUNCTION_VECTORCALL 4 +#define EVAL_CALL_BUILD_CLASS 5 +#define EVAL_CALL_SLOT 6 +#define EVAL_CALL_FUNCTION_EX 7 +#define EVAL_CALL_API 8 +#define EVAL_CALL_METHOD 9 + +#define EVAL_CALL_KINDS 10 + typedef struct _specialization_stats { uint64_t success; uint64_t failure; @@ -31,6 +45,7 @@ typedef struct _call_stats { uint64_t pyeval_calls; uint64_t frames_pushed; uint64_t frame_objects_created; + uint64_t eval_calls[EVAL_CALL_KINDS]; } CallStats; typedef struct _object_stats { diff --git a/Modules/_asynciomodule.c b/Modules/_asynciomodule.c index c2b0e242e277f1..bb7c5a7c3ec550 100644 --- a/Modules/_asynciomodule.c +++ b/Modules/_asynciomodule.c @@ -384,7 +384,7 @@ call_soon(PyObject *loop, PyObject *func, PyObject *arg, PyObject *ctx) nargs++; } stack[nargs] = (PyObject *)ctx; - + EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_API, callable); handle = PyObject_Vectorcall(callable, stack, nargs, context_kwname); Py_DECREF(callable); } @@ -2950,6 +2950,7 @@ task_step_impl(TaskObj *task, PyObject *exc) PyObject *stack[2]; stack[0] = wrapper; stack[1] = (PyObject *)task->task_context; + EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_API, add_cb); tmp = PyObject_Vectorcall(add_cb, stack, 1, context_kwname); Py_DECREF(add_cb); Py_DECREF(wrapper); diff --git a/Objects/call.c b/Objects/call.c index 678d16269f2169..3b5259b3383f43 100644 --- a/Objects/call.c +++ b/Objects/call.c @@ -109,7 +109,9 @@ _Py_CheckSlotResult(PyObject *obj, const char *slot_name, int success) PyObject * PyObject_CallNoArgs(PyObject *func) { - return _PyObject_CallNoArgs(func); + EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_API, func); + PyThreadState *tstate = _PyThreadState_GET(); + return _PyObject_VectorcallTstate(tstate, func, NULL, 0, NULL); } @@ -322,7 +324,7 @@ _PyObject_Call(PyThreadState *tstate, PyObject *callable, assert(!_PyErr_Occurred(tstate)); assert(PyTuple_Check(args)); assert(kwargs == NULL || PyDict_Check(kwargs)); - + EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_API, callable); vectorcallfunc vector_func = _PyVectorcall_Function(callable); if (vector_func != NULL) { return _PyVectorcall_Call(tstate, vector_func, callable, args, kwargs); @@ -367,6 +369,7 @@ PyCFunction_Call(PyObject *callable, PyObject *args, PyObject *kwargs) PyObject * PyObject_CallOneArg(PyObject *func, PyObject *arg) { + EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_API, func); assert(arg != NULL); PyObject *_args[2]; PyObject **args = _args + 1; // For PY_VECTORCALL_ARGUMENTS_OFFSET @@ -389,6 +392,7 @@ _PyFunction_Vectorcall(PyObject *func, PyObject* const* stack, assert(nargs >= 0); PyThreadState *tstate = _PyThreadState_GET(); assert(nargs == 0 || stack != NULL); + EVAL_CALL_STAT_INC(EVAL_CALL_FUNCTION_VECTORCALL); if (((PyCodeObject *)f->func_code)->co_flags & CO_OPTIMIZED) { return _PyEval_Vector(tstate, f, NULL, stack, nargs, kwnames); } @@ -520,7 +524,7 @@ _PyObject_CallFunctionVa(PyThreadState *tstate, PyObject *callable, if (stack == NULL) { return NULL; } - + EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_API, callable); if (nargs == 1 && PyTuple_Check(stack[0])) { /* Special cases for backward compatibility: - PyObject_CallFunction(func, "O", tuple) calls func(*tuple) @@ -815,6 +819,11 @@ object_vacall(PyThreadState *tstate, PyObject *base, stack[i] = va_arg(vargs, PyObject *); } +#ifdef Py_STATS + if (PyFunction_Check(callable)) { + EVAL_CALL_STAT_INC(EVAL_CALL_API); + } +#endif /* Call the function */ result = _PyObject_VectorcallTstate(tstate, callable, stack, nargs, NULL); @@ -852,6 +861,7 @@ PyObject_VectorcallMethod(PyObject *name, PyObject *const *args, args++; nargsf--; } + EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_METHOD, callable); PyObject *result = _PyObject_VectorcallTstate(tstate, callable, args, nargsf, kwnames); Py_DECREF(callable); diff --git a/Objects/descrobject.c b/Objects/descrobject.c index c3c541bf3c3212..8e8a46ceca6b3a 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -1677,6 +1677,7 @@ property_descr_set(PyObject *self, PyObject *obj, PyObject *value) res = PyObject_CallOneArg(func, obj); } else { + EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_API, func); PyObject *args[] = { obj, value }; res = PyObject_Vectorcall(func, args, 2, NULL); } diff --git a/Objects/genobject.c b/Objects/genobject.c index a88522abf414c6..2b45e28cbf16df 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -13,6 +13,7 @@ #include "pycore_pystate.h" // _PyThreadState_GET() #include "structmember.h" // PyMemberDef #include "opcode.h" // SEND +#include "pystats.h" static PyObject *gen_close(PyGenObject *, PyObject *); static PyObject *async_gen_asend_new(PyAsyncGenObject *, PyObject *); @@ -218,6 +219,7 @@ gen_send_ex2(PyGenObject *gen, PyObject *arg, PyObject **presult, } gen->gi_frame_state = FRAME_EXECUTING; + EVAL_CALL_STAT_INC(EVAL_CALL_GENERATOR); result = _PyEval_EvalFrame(tstate, frame, exc); if (gen->gi_frame_state == FRAME_EXECUTING) { gen->gi_frame_state = FRAME_COMPLETED; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index ff5196c904eefb..37ac886f00446c 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1653,6 +1653,7 @@ vectorcall_unbound(PyThreadState *tstate, int unbound, PyObject *func, args++; nargsf = nargsf - 1 + PY_VECTORCALL_ARGUMENTS_OFFSET; } + EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_SLOT, func); return _PyObject_VectorcallTstate(tstate, func, args, nargsf, NULL); } diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 072bf75bf8d697..6284bbdf97468e 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -198,6 +198,7 @@ builtin___build_class__(PyObject *self, PyObject *const *args, Py_ssize_t nargs, goto error; } PyThreadState *tstate = _PyThreadState_GET(); + EVAL_CALL_STAT_INC(EVAL_CALL_BUILD_CLASS); cell = _PyEval_Vector(tstate, (PyFunctionObject *)func, ns, NULL, 0, NULL); if (cell != NULL) { if (bases != orig_bases) { diff --git a/Python/ceval.c b/Python/ceval.c index 56b1017fd9298f..6158524999a84b 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1207,6 +1207,7 @@ PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals) if (func == NULL) { return NULL; } + EVAL_CALL_STAT_INC(EVAL_CALL_LEGACY); PyObject *res = _PyEval_Vector(tstate, func, locals, NULL, 0, NULL); Py_DECREF(func); return res; @@ -6432,6 +6433,7 @@ _PyEval_Vector(PyThreadState *tstate, PyFunctionObject *func, if (frame == NULL) { return NULL; } + EVAL_CALL_STAT_INC(EVAL_CALL_VECTOR); PyObject *retval = _PyEval_EvalFrame(tstate, frame, 0); assert( _PyFrame_GetStackPointer(frame) == _PyFrame_Stackbase(frame) || @@ -6507,6 +6509,7 @@ PyEval_EvalCodeEx(PyObject *_co, PyObject *globals, PyObject *locals, if (func == NULL) { goto fail; } + EVAL_CALL_STAT_INC(EVAL_CALL_LEGACY); res = _PyEval_Vector(tstate, func, locals, allargs, argcount, kwnames); @@ -7299,7 +7302,6 @@ do_call_core(PyThreadState *tstate, ) { PyObject *result; - if (PyCFunction_CheckExact(func) || PyCMethod_CheckExact(func)) { C_TRACE(result, PyObject_Call(func, callargs, kwdict)); return result; @@ -7329,6 +7331,7 @@ do_call_core(PyThreadState *tstate, return result; } } + EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_FUNCTION_EX, func); return PyObject_Call(func, callargs, kwdict); } diff --git a/Python/specialize.c b/Python/specialize.c index 7148b5dd029607..a2fb460388055d 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -176,6 +176,9 @@ print_call_stats(FILE *out, CallStats *stats) fprintf(out, "Calls to Python functions inlined: %" PRIu64 "\n", stats->inlined_py_calls); fprintf(out, "Frames pushed: %" PRIu64 "\n", stats->frames_pushed); fprintf(out, "Frame objects created: %" PRIu64 "\n", stats->frame_objects_created); + for (int i = 0; i < EVAL_CALL_KINDS; i++) { + fprintf(out, "Calls via PyEval_EvalFrame[%d] : %" PRIu64 "\n", i, stats->eval_calls[i]); + } } static void @@ -904,7 +907,7 @@ typedef enum { MANAGED_DICT = 2, OFFSET_DICT = 3, NO_DICT = 4, - LAZY_DICT = 5, + LAZY_DICT = 5, } ObjectDictKind; // Please collect stats carefully before and after modifying. A subtle change diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 3d7479f261b4af..04d18790f0d792 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -108,13 +108,14 @@ def extract_opcode_stats(stats): opcode_stats[int(n)][rest.strip(".")] = value return opcode_stats -def parse_kinds(spec_src): +def parse_kinds(spec_src, prefix="SPEC_FAIL"): defines = collections.defaultdict(list) + start = "#define " + prefix + "_" for line in spec_src: line = line.strip() - if not line.startswith("#define SPEC_FAIL_"): + if not line.startswith(start): continue - line = line[len("#define SPEC_FAIL_"):] + line = line[len(start):] name, val = line.split() defines[int(val.strip())].append(name.strip()) return defines @@ -129,8 +130,6 @@ def kind_to_text(kind, defines, opname): opname = "ATTR" if opname.endswith("SUBSCR"): opname = "SUBSCR" - if opname.startswith("PRECALL"): - opname = "CALL" for name in defines[kind]: if name.startswith(opname): return pretty(name[len(opname)+1:]) @@ -254,6 +253,9 @@ def emit_specialization_overview(opcode_stats, total): )) def emit_call_stats(stats): + stats_path = os.path.join(os.path.dirname(__file__), "../../Include/pystats.h") + with open(stats_path) as stats_src: + defines = parse_kinds(stats_src, prefix="EVAL_CALL") with Section("Call stats", summary="Inlined calls and frame stats"): total = 0 for key, value in stats.items(): @@ -263,6 +265,11 @@ def emit_call_stats(stats): for key, value in stats.items(): if "Calls to" in key: rows.append((key, value, f"{100*value/total:0.1f}%")) + elif key.startswith("Calls "): + name, index = key[:-1].split("[") + index = int(index) + label = name + " (" + pretty(defines[index][0]) + ")" + rows.append((label, value, f"{100*value/total:0.1f}%")) for key, value in stats.items(): if key.startswith("Frame"): rows.append((key, value, f"{100*value/total:0.1f}%"))