Skip to content

Commit

Permalink
Tier 2 cleanups and tweaks (#115534)
Browse files Browse the repository at this point in the history
* Rename `_testinternalcapi.get_{uop,counter}_optimizer` to `new_*_optimizer`
* Use `_PyUOpName()` instead of` _PyOpcode_uop_name[]`
* Add `target` to executor iterator items -- `list(ex)` now returns `(opcode, oparg, target, operand)` quadruples
* Add executor methods `get_opcode()` and `get_oparg()` to get `vmdata.opcode`, `vmdata.oparg`
* Define a helper for printing uops, and unify various places where they are printed
* Add a hack to summarize_stats.py to fix legacy uop names (e.g. `POP_TOP` -> `_POP_TOP`)
* Define helpers in `test_opt.py` for accessing the set or list of opnames of an executor
  • Loading branch information
gvanrossum committed Feb 20, 2024
1 parent 520403e commit 142502e
Show file tree
Hide file tree
Showing 8 changed files with 203 additions and 121 deletions.
157 changes: 85 additions & 72 deletions Lib/test/test_capi/test_opt.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Lib/test/test_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -1799,7 +1799,7 @@ class TestOptimizer(MonitoringTestBase, unittest.TestCase):
def setUp(self):
import _testinternalcapi
self.old_opt = _testinternalcapi.get_optimizer()
opt = _testinternalcapi.get_counter_optimizer()
opt = _testinternalcapi.new_counter_optimizer()
_testinternalcapi.set_optimizer(opt)
super(TestOptimizer, self).setUp()

Expand Down
8 changes: 4 additions & 4 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -960,13 +960,13 @@ iframe_getlasti(PyObject *self, PyObject *frame)
}

static PyObject *
get_counter_optimizer(PyObject *self, PyObject *arg)
new_counter_optimizer(PyObject *self, PyObject *arg)
{
return PyUnstable_Optimizer_NewCounter();
}

static PyObject *
get_uop_optimizer(PyObject *self, PyObject *arg)
new_uop_optimizer(PyObject *self, PyObject *arg)
{
return PyUnstable_Optimizer_NewUOpOptimizer();
}
Expand Down Expand Up @@ -1711,8 +1711,8 @@ static PyMethodDef module_functions[] = {
{"get_optimizer", get_optimizer, METH_NOARGS, NULL},
{"set_optimizer", set_optimizer, METH_O, NULL},
{"get_executor", _PyCFunction_CAST(get_executor), METH_FASTCALL, NULL},
{"get_counter_optimizer", get_counter_optimizer, METH_NOARGS, NULL},
{"get_uop_optimizer", get_uop_optimizer, METH_NOARGS, NULL},
{"new_counter_optimizer", new_counter_optimizer, METH_NOARGS, NULL},
{"new_uop_optimizer", new_uop_optimizer, METH_NOARGS, NULL},
{"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL},
{"invalidate_executors", invalidate_executors, METH_O, NULL},
{"pending_threadfunc", _PyCFunction_CAST(pending_threadfunc),
Expand Down
61 changes: 40 additions & 21 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,10 @@ static const _Py_CODEUNIT _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS[] = {

extern const struct _PyCode_DEF(8) _Py_InitCleanup;

extern const char *_PyUOpName(int index);
#ifdef Py_DEBUG
extern void _PyUOpPrint(const _PyUOpInstruction *uop);
#endif


/* Disable unused label warnings. They are handy for debugging, even
if computed gotos aren't used. */
Expand Down Expand Up @@ -1006,14 +1009,14 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT);
for (;;) {
uopcode = next_uop->opcode;
DPRINTF(3,
"%4d: uop %s, oparg %d, operand %" PRIu64 ", target %d, stack_level %d\n",
(int)(next_uop - (current_executor == NULL ? next_uop : current_executor->trace)),
_PyUOpName(uopcode),
next_uop->oparg,
next_uop->operand,
next_uop->target,
#ifdef Py_DEBUG
if (lltrace >= 3) {
printf("%4d uop: ", (int)(next_uop - (current_executor == NULL ? next_uop : current_executor->trace)));
_PyUOpPrint(next_uop);
printf(" stack_level=%d\n",
(int)(stack_pointer - _PyFrame_Stackbase(frame)));
}
#endif
next_uop++;
OPT_STAT_INC(uops_executed);
UOP_STAT_INC(uopcode, execution_count);
Expand All @@ -1028,9 +1031,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
default:
#ifdef Py_DEBUG
{
fprintf(stderr, "Unknown uop %d, oparg %d, operand %" PRIu64 " @ %d\n",
next_uop[-1].opcode, next_uop[-1].oparg, next_uop[-1].operand,
(int)(next_uop - (current_executor == NULL ? next_uop : current_executor->trace) - 1));
printf("Unknown uop: ");
_PyUOpPrint(&next_uop[-1]);
printf(" @ %d\n", (int)(next_uop - current_executor->trace - 1));
Py_FatalError("Unknown uop");
}
#else
Expand Down Expand Up @@ -1058,10 +1061,15 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
pop_1_error_tier_two:
STACK_SHRINK(1);
error_tier_two:
DPRINTF(2, "Error: [UOp %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d -> %s]\n",
uopcode, _PyUOpName(uopcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target,
(int)(next_uop - current_executor->trace - 1),
_PyOpcode_OpName[frame->instr_ptr->op.code]);
#ifdef Py_DEBUG
if (lltrace >= 2) {
printf("Error: [UOp ");
_PyUOpPrint(&next_uop[-1]);
printf(" @ %d -> %s]\n",
(int)(next_uop - current_executor->trace - 1),
_PyOpcode_OpName[frame->instr_ptr->op.code]);
}
#endif
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
frame->return_offset = 0; // Don't leave this random
_PyFrame_SetStackPointer(frame, stack_pointer);
Expand All @@ -1072,9 +1080,14 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
// Jump here from DEOPT_IF()
deoptimize:
next_instr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame));
DPRINTF(2, "DEOPT: [UOp %d (%s), oparg %d, operand %" PRIu64 ", target %d -> %s]\n",
uopcode, _PyUOpName(uopcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target,
_PyOpcode_OpName[next_instr->op.code]);
#ifdef Py_DEBUG
if (lltrace >= 2) {
printf("DEOPT: [UOp ");
_PyUOpPrint(&next_uop[-1]);
printf(" -> %s]\n",
_PyOpcode_OpName[frame->instr_ptr->op.code]);
}
#endif
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
UOP_STAT_INC(uopcode, miss);
Py_DECREF(current_executor);
Expand All @@ -1088,9 +1101,15 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
uint32_t exit_index = next_uop[-1].exit_index;
assert(exit_index < current_executor->exit_count);
_PyExitData *exit = &current_executor->exits[exit_index];
DPRINTF(2, "SIDE EXIT: [UOp %d (%s), oparg %d, operand %" PRIu64 ", exit %u, temp %d, target %d -> %s]\n",
uopcode, _PyUOpName(uopcode), next_uop[-1].oparg, next_uop[-1].operand, exit_index, exit->temperature,
exit->target, _PyOpcode_OpName[_PyCode_CODE(_PyFrame_GetCode(frame))[exit->target].op.code]);
#ifdef Py_DEBUG
if (lltrace >= 2) {
printf("SIDE EXIT: [UOp ");
_PyUOpPrint(&next_uop[-1]);
printf(", exit %u, temp %d, target %d -> %s]\n",
exit_index, exit->temperature, exit->target,
_PyOpcode_OpName[frame->instr_ptr->op.code]);
}
#endif
Py_INCREF(exit->executor);
tstate->previous_executor = (PyObject *)current_executor;
GOTO_TIER_TWO(exit->executor);
Expand Down
77 changes: 63 additions & 14 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,22 @@ is_valid(PyObject *self, PyObject *Py_UNUSED(ignored))
return PyBool_FromLong(((_PyExecutorObject *)self)->vm_data.valid);
}

static PyObject *
get_opcode(PyObject *self, PyObject *Py_UNUSED(ignored))
{
return PyLong_FromUnsignedLong(((_PyExecutorObject *)self)->vm_data.opcode);
}

static PyObject *
get_oparg(PyObject *self, PyObject *Py_UNUSED(ignored))
{
return PyLong_FromUnsignedLong(((_PyExecutorObject *)self)->vm_data.oparg);
}

static PyMethodDef executor_methods[] = {
{ "is_valid", is_valid, METH_NOARGS, NULL },
{ "get_opcode", get_opcode, METH_NOARGS, NULL },
{ "get_oparg", get_oparg, METH_NOARGS, NULL },
{ NULL, NULL },
};

Expand All @@ -282,9 +296,30 @@ uop_dealloc(_PyExecutorObject *self) {
const char *
_PyUOpName(int index)
{
if (index < 0 || index > MAX_UOP_ID) {
return NULL;
}
return _PyOpcode_uop_name[index];
}

#ifdef Py_DEBUG
void
_PyUOpPrint(const _PyUOpInstruction *uop)
{
const char *name = _PyUOpName(uop->opcode);
if (name == NULL) {
printf("<uop %d>", uop->opcode);
}
else {
printf("%s", name);
}
printf(" (%d, target=%d, operand=%" PRIx64 ")",
uop->oparg,
uop->target,
(uint64_t)uop->operand);
}
#endif

static Py_ssize_t
uop_len(_PyExecutorObject *self)
{
Expand Down Expand Up @@ -312,14 +347,21 @@ uop_item(_PyExecutorObject *self, Py_ssize_t index)
Py_DECREF(oname);
return NULL;
}
PyObject *target = PyLong_FromUnsignedLong(self->trace[index].target);
if (oparg == NULL) {
Py_DECREF(oparg);
Py_DECREF(oname);
return NULL;
}
PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[index].operand);
if (operand == NULL) {
Py_DECREF(target);
Py_DECREF(oparg);
Py_DECREF(oname);
return NULL;
}
PyObject *args[3] = { oname, oparg, operand };
return _PyTuple_FromArraySteal(args, 3);
PyObject *args[4] = { oname, oparg, target, operand };
return _PyTuple_FromArraySteal(args, 4);
}

PySequenceMethods uop_as_sequence = {
Expand Down Expand Up @@ -390,19 +432,29 @@ BRANCH_TO_GUARD[4][2] = {
#endif


// Beware: Macro arg order differs from struct member order
#ifdef Py_DEBUG
#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \
DPRINTF(2, \
" ADD_TO_TRACE(%s, %d, %" PRIu64 ", %d)\n", \
_PyUOpName(OPCODE), \
(OPARG), \
(uint64_t)(OPERAND), \
TARGET); \
assert(trace_length < max_length); \
trace[trace_length].opcode = (OPCODE); \
trace[trace_length].oparg = (OPARG); \
trace[trace_length].target = (TARGET); \
trace[trace_length].operand = (OPERAND); \
if (lltrace >= 2) { \
printf("%4d ADD_TO_TRACE: ", trace_length); \
_PyUOpPrint(&trace[trace_length]); \
printf("\n"); \
} \
trace_length++;
#else
#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \
assert(trace_length < max_length); \
trace[trace_length].opcode = (OPCODE); \
trace[trace_length].oparg = (OPARG); \
trace[trace_length].target = (TARGET); \
trace[trace_length].operand = (OPERAND); \
trace_length++;
#endif

#define INSTR_IP(INSTR, CODE) \
((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive)))
Expand Down Expand Up @@ -890,12 +942,9 @@ make_executor_from_uops(_PyUOpInstruction *buffer, const _PyBloomFilter *depende
if (lltrace >= 2) {
printf("Optimized executor (length %d):\n", length);
for (int i = 0; i < length; i++) {
printf("%4d %s(%d, %d, %" PRIu64 ")\n",
i,
_PyUOpName(executor->trace[i].opcode),
executor->trace[i].oparg,
executor->trace[i].target,
executor->trace[i].operand);
printf("%4d OPTIMIZED: ", i);
_PyUOpPrint(&executor->trace[i]);
printf("\n");
}
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion Python/optimizer_analysis.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2)

#ifdef Py_DEBUG
extern const char *_PyUOpName(int index);
static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG";
static inline int get_lltrace(void) {
char *uop_debug = Py_GETENV(DEBUG_ENV);
Expand Down Expand Up @@ -632,7 +633,7 @@ uop_redundancy_eliminator(
_Py_UOpsSymType **stack_pointer = ctx->frame->stack_pointer;

DPRINTF(3, "Abstract interpreting %s:%d ",
_PyOpcode_uop_name[opcode],
_PyUOpName(opcode),
oparg);
switch (opcode) {
#include "tier2_redundancy_eliminator_cases.c.h"
Expand Down
12 changes: 4 additions & 8 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include <stdlib.h> // rand()

extern const char *_PyUOpName(int index);

/* For guidance on adding or extending families of instructions see
* ./adaptive.md
Expand Down Expand Up @@ -246,17 +247,12 @@ print_optimization_stats(FILE *out, OptimizationStats *stats)
stats->optimizer_failure_reason_no_memory);

const char* const* names;
for (int i = 0; i < 512; i++) {
if (i < 256) {
names = _PyOpcode_OpName;
} else {
names = _PyOpcode_uop_name;
}
for (int i = 0; i <= MAX_UOP_ID; i++) {
if (stats->opcode[i].execution_count) {
fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", names[i], stats->opcode[i].execution_count);
fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].execution_count);
}
if (stats->opcode[i].miss) {
fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", names[i], stats->opcode[i].miss);
fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].miss);
}
}

Expand Down
4 changes: 4 additions & 0 deletions Tools/scripts/summarize_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ def load_raw_data(input: Path) -> RawData:
file=sys.stderr,
)
continue
# Hack to handle older data files where some uops
# are missing an underscore prefix in their name
if key.startswith("uops[") and key[5:6] != "_":
key = "uops[_" + key[5:]
stats[key.strip()] += int(value)
stats["__nfiles__"] += 1

Expand Down

0 comments on commit 142502e

Please sign in to comment.