Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-111520: Integrate the Tier 2 interpreter in the Tier 1 interpreter #111428

Merged
merged 27 commits into from
Nov 1, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
97984d3
Make all labels in _PyUopExecute end in _tier_two
gvanrossum Oct 27, 2023
157a450
Control lltrace via PYTHON_LLTRACE=N
gvanrossum Oct 27, 2023
d1b9c1b
Rename PYTHONUOPS to PYTHON_UOPS for consistency
gvanrossum Oct 27, 2023
d805312
Integrate Tier 2 into _PyEval_EvalFrameDefault
gvanrossum Oct 27, 2023
e0e60ce
DO NOT MERGE: Always use -Xuops
gvanrossum Aug 9, 2023
a720f1a
Merge branch 'main' into mix-tiers
gvanrossum Oct 30, 2023
b808f6d
Merge branch 'main' into mix-tiers
gvanrossum Oct 31, 2023
a0aed59
Get rid of separate executor.c file
gvanrossum Oct 31, 2023
75605c7
Most suggestions from Mark's code review
gvanrossum Oct 31, 2023
5e84476
Fix test_generated_cases.py by stripping preprocessor prefix/suffix
gvanrossum Oct 31, 2023
917b7a2
Eradicate executors.c from Windows build files
gvanrossum Nov 1, 2023
9067eb0
Rename deoptimize_tier_two back to deoptimize (for Justin)
gvanrossum Nov 1, 2023
6a4e495
Fix whitespace
gvanrossum Nov 1, 2023
81f1883
Revert "DO NOT MERGE: Always use -Xuops"
gvanrossum Nov 1, 2023
ee27e73
Add blurb
gvanrossum Nov 1, 2023
7ebc228
Add more color to the news blurb
gvanrossum Nov 1, 2023
a1d0108
Merge remote-tracking branch 'origin/main' into mix-tiers
gvanrossum Nov 1, 2023
a96ac7f
Eliminate 'operand' local variable
gvanrossum Nov 1, 2023
e02409d
Rename self -> current_executor (TODO: eliminate it?)
gvanrossum Nov 1, 2023
fdf1a2f
Move `_EXIT_TRACE` logic to a separate label
gvanrossum Nov 1, 2023
2a6450c
Limit infinite recursion in test_typing
gvanrossum Nov 1, 2023
4783de3
Limit infinite recursion in test_fileio
gvanrossum Nov 1, 2023
b9516a1
Limit infinite recursion in test_xml_etree
gvanrossum Nov 1, 2023
33c3fae
Limit infinite recursion in test_call
gvanrossum Nov 1, 2023
998e054
Fix test_call better: adjust Py_C_RECURSION_LIMIT in pystate.h
gvanrossum Nov 1, 2023
19d9d40
Revert unnecessary fixes to recursive tests
gvanrossum Nov 1, 2023
03de1bf
Even better fix -- increase stack space on Windows in debug mode
gvanrossum Nov 1, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 60 additions & 57 deletions Python/bytecodes.c

Large diffs are not rendered by default.

142 changes: 131 additions & 11 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,15 +201,15 @@ maybe_lltrace_resume_frame(_PyInterpreterFrame *frame, _PyInterpreterFrame *skip
if (r < 0) {
return -1;
}
int lltrace = r;
int lltrace = r * 5; // Levels 1-4 only trace uops
if (!lltrace) {
// When tracing executed uops, also trace bytecode
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
if (uop_debug != NULL && *uop_debug >= '0') {
lltrace = (*uop_debug - '0') >= 5; // TODO: Parse an int and all that
// Can also be controlled by environment variable
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
if (python_lltrace != NULL && *python_lltrace >= '0') {
lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
}
}
if (lltrace) {
if (lltrace >= 5) {
lltrace_resume_frame(frame);
}
return lltrace;
Expand Down Expand Up @@ -679,9 +679,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
#ifdef Py_STATS
int lastopcode = 0;
#endif
// opcode is an 8-bit value to improve the code generated by MSVC
// for the big switch below (in combination with the EXTRA_CASES macro).
uint8_t opcode; /* Current opcode */
int opcode; /* Current opcode */
int oparg; /* Current opcode argument, if any */
#ifdef LLTRACE
int lltrace = 0;
Expand Down Expand Up @@ -729,6 +727,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
goto resume_with_error;
}

/* State shared between Tier 1 and Tier 2 interpreter */
_PyUOpExecutorObject *self = NULL;

/* Local "register" variables.
* These are cached values from the frame and code object. */

Expand Down Expand Up @@ -765,7 +766,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
/* Start instructions */
#if !USE_COMPUTED_GOTOS
dispatch_opcode:
switch (opcode)
// Cast to an 8-bit value to improve the code generated by MSVC
// (in combination with the EXTRA_CASES macro).
switch ((uint8_t)opcode)
#endif
{

Expand Down Expand Up @@ -913,7 +916,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
/* Resume normal execution */
#ifdef LLTRACE
if (lltrace) {
if (lltrace >= 5) {
lltrace_resume_frame(frame);
}
#endif
Expand Down Expand Up @@ -942,6 +945,123 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
stack_pointer = _PyFrame_GetStackPointer(frame);
goto error;



// The Tier 2 interpreter is also here!
enter_tier_two:

#undef LOAD_IP
#define LOAD_IP(UNUSED) \
do { ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; } while (0)

#undef GOTO_ERROR
#define GOTO_ERROR(LABEL) goto LABEL ## _tier_two

#undef DEOPT_IF
#define DEOPT_IF(COND, INSTNAME) \
if ((COND)) { \
goto deoptimize_tier_two;\
}

#ifdef Py_STATS
// Disable these macros that apply to Tier 1 stats when we are in Tier 2
#undef STAT_INC
#define STAT_INC(opname, name) ((void)0)
#undef STAT_DEC
#define STAT_DEC(opname, name) ((void)0)
#undef CALL_STAT_INC
#define CALL_STAT_INC(name) ((void)0)
#endif

#undef ENABLE_SPECIALIZATION
#define ENABLE_SPECIALIZATION 0

#ifdef Py_DEBUG
#define DPRINTF(level, ...) \
if (lltrace >= (level)) { printf(__VA_ARGS__); }
#else
#define DPRINTF(level, ...)
#endif

CHECK_EVAL_BREAKER();
gvanrossum marked this conversation as resolved.
Show resolved Hide resolved

OPT_STAT_INC(traces_executed);
_Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
gvanrossum marked this conversation as resolved.
Show resolved Hide resolved
_PyUOpInstruction *next_uop = self->trace;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should have been set by ENTER_EXECUTOR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes and no. It would mean that next_uop would have to be a local in the top-level function scope. I am trying to reduce the number of locals there. For now let me keep it this way. I expect that it will be fine even when we start stitching, because each trace can only be entered at the top and we need the executor (so that we can decref it upon exiting the trace). The initial next_uop is just the executor plus a fixed offset anyway.

uint64_t operand;
#ifdef Py_STATS
uint64_t trace_uop_execution_counter = 0;
#endif

for (;;) {
opcode = next_uop->opcode;
oparg = next_uop->oparg;
operand = next_uop->operand;
DPRINTF(3,
"%4d: uop %s, oparg %d, operand %" PRIu64 ", stack_level %d\n",
(int)(next_uop - self->trace),
opcode < 256 ? _PyOpcode_OpName[opcode] : _PyOpcode_uop_name[opcode],
oparg,
operand,
(int)(stack_pointer - _PyFrame_Stackbase(frame)));
next_uop++;
OPT_STAT_INC(uops_executed);
UOP_EXE_INC(opcode);
#ifdef Py_STATS
trace_uop_execution_counter++;
#endif

switch (opcode) {

#undef TIER_ONE
#define TIER_TWO 2
#include "executor_cases.c.h"

default:
gvanrossum marked this conversation as resolved.
Show resolved Hide resolved
{
fprintf(stderr, "Unknown uop %d, oparg %d, operand %" PRIu64 "\n",
opcode, oparg, operand);
Py_FatalError("Unknown uop");
}

}
}

unbound_local_error_tier_two:
_PyEval_FormatExcCheckArg(tstate, PyExc_UnboundLocalError,
UNBOUNDLOCAL_ERROR_MSG,
PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg)
);
goto error_tier_two;

pop_4_error_tier_two:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't this be shared with tier 1? Unwinding should work the same.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alas, not quite. The debug output is different, the stats collection is different, but most importantly, we need to DECREF(self) here before jumping to error.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also we need to set next_instr = frame->instr_ptr.

STACK_SHRINK(1);
pop_3_error_tier_two:
STACK_SHRINK(1);
pop_2_error_tier_two:
STACK_SHRINK(1);
pop_1_error_tier_two:
STACK_SHRINK(1);
error_tier_two:
DPRINTF(2, "Error: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
frame->return_offset = 0; // Don't leave this random
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
goto resume_with_error;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may not need to reset return_offset; we can also skip syncing stack_pointer; so we could make this slightly faster as follows (but it doesn't matter since errors are presumed to be on the slow path):

Suggested change
frame->return_offset = 0; // Don't leave this random
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
goto resume_with_error;
Py_DECREF(self);
next_instr = frame->instr_ptr;
goto error;


deoptimize_tier_two:
// On DEOPT_IF we just repeat the last instruction.
// This presumes nothing was popped from the stack (nor pushed).
DPRINTF(2, "DEOPT: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
frame->return_offset = 0; // Dispatch to frame->instr_ptr
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
enter_tier_one:
next_instr = frame->instr_ptr;
goto resume_frame;

}
#if defined(__GNUC__)
# pragma GCC diagnostic pop
Expand Down
9 changes: 6 additions & 3 deletions Python/ceval_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@

/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */
#ifdef LLTRACE
#define PRE_DISPATCH_GOTO() if (lltrace) { \
#define PRE_DISPATCH_GOTO() if (lltrace >= 5) { \
lltrace_instruction(frame, stack_pointer, next_instr); }
#else
#define PRE_DISPATCH_GOTO() ((void)0)
Expand Down Expand Up @@ -116,11 +116,14 @@
goto start_frame; \
} while (0)

// Use this instead of 'goto error' so Tier 2 can go to a different label
#define GOTO_ERROR(LABEL) goto LABEL

#define CHECK_EVAL_BREAKER() \
_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \
if (_Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & _PY_EVAL_EVENTS_MASK) { \
if (_Py_HandlePending(tstate) != 0) { \
goto error; \
GOTO_ERROR(error); \
} \
}

Expand Down Expand Up @@ -326,7 +329,7 @@ do { \
}\
else { \
result = PyFloat_FromDouble(dval); \
if ((result) == NULL) goto error; \
if ((result) == NULL) GOTO_ERROR(error); \
_Py_DECREF_NO_DEALLOC(left); \
_Py_DECREF_NO_DEALLOC(right); \
} \
Expand Down
40 changes: 22 additions & 18 deletions Python/executor.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@
#define TIER_TWO 2
#include "ceval_macros.h"

#undef GOTO_ERROR
#define GOTO_ERROR(LABEL) goto LABEL ## _tier_two

#undef DEOPT_IF
#define DEOPT_IF(COND, INSTNAME) \
if ((COND)) { \
goto deoptimize; \
goto deoptimize_tier_two;\
}

#ifdef Py_STATS
Expand All @@ -45,11 +47,12 @@
_PyInterpreterFrame *
_PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject **stack_pointer)
{
Py_FatalError("Tier 2 is now inlined into Tier 1");
#ifdef Py_DEBUG
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
int lltrace = 0;
if (uop_debug != NULL && *uop_debug >= '0') {
lltrace = *uop_debug - '0'; // TODO: Parse an int and all that
if (python_lltrace != NULL && *python_lltrace >= '0') {
lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
}
#define DPRINTF(level, ...) \
if (lltrace >= (level)) { printf(__VA_ARGS__); }
Expand All @@ -72,7 +75,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject

OPT_STAT_INC(traces_executed);
_Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
int pc = 0;
_PyUOpInstruction *next_uop = self->trace;
int opcode;
int oparg;
uint64_t operand;
Expand All @@ -81,17 +84,17 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
#endif

for (;;) {
opcode = self->trace[pc].opcode;
oparg = self->trace[pc].oparg;
operand = self->trace[pc].operand;
opcode = next_uop->opcode;
oparg = next_uop->oparg;
operand = next_uop->operand;
DPRINTF(3,
"%4d: uop %s, oparg %d, operand %" PRIu64 ", stack_level %d\n",
pc,
(int)(next_uop - self->trace),
opcode < 256 ? _PyOpcode_OpName[opcode] : _PyOpcode_uop_name[opcode],
oparg,
operand,
(int)(stack_pointer - _PyFrame_Stackbase(frame)));
pc++;
next_uop++;
OPT_STAT_INC(uops_executed);
UOP_EXE_INC(opcode);
#ifdef Py_STATS
Expand All @@ -111,22 +114,22 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
}
}

unbound_local_error:
unbound_local_error_tier_two:
_PyEval_FormatExcCheckArg(tstate, PyExc_UnboundLocalError,
UNBOUNDLOCAL_ERROR_MSG,
PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg)
);
goto error;
goto error_tier_two;

pop_4_error:
pop_4_error_tier_two:
STACK_SHRINK(1);
pop_3_error:
pop_3_error_tier_two:
STACK_SHRINK(1);
pop_2_error:
pop_2_error_tier_two:
STACK_SHRINK(1);
pop_1_error:
pop_1_error_tier_two:
STACK_SHRINK(1);
error:
error_tier_two:
// On ERROR_IF we return NULL as the frame.
// The caller recovers the frame from tstate->current_frame.
DPRINTF(2, "Error: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
Expand All @@ -136,13 +139,14 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
Py_DECREF(self);
return NULL;

deoptimize:
deoptimize_tier_two:
// On DEOPT_IF we just repeat the last instruction.
// This presumes nothing was popped from the stack (nor pushed).
DPRINTF(2, "DEOPT: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
frame->return_offset = 0; // Dispatch to frame->instr_ptr
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
enter_tier_one:
return frame;
}
Loading
Loading