From a08909dc765156a81adc296457f146909c068102 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 14:27:54 -0800 Subject: [PATCH 01/21] Add executor_cases.c.h dependency for ceval.o --- Makefile.pre.in | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile.pre.in b/Makefile.pre.in index 2174ec3ac56158..3d766425abba34 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1609,6 +1609,7 @@ Python/ceval.o: \ $(srcdir)/Python/ceval_macros.h \ $(srcdir)/Python/condvar.h \ $(srcdir)/Python/generated_cases.c.h \ + $(srcdir)/Python/executor_cases.c.h \ $(srcdir)/Python/opcode_targets.h Python/flowgraph.o: \ From 4c2914bff6bd7b133ac5016fb2f2342a21a8871e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 10:46:06 -0800 Subject: [PATCH 02/21] Clean up flags.py --- Tools/cases_generator/flags.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Tools/cases_generator/flags.py b/Tools/cases_generator/flags.py index 0066c9e74512c3..808c9e82bbce07 100644 --- a/Tools/cases_generator/flags.py +++ b/Tools/cases_generator/flags.py @@ -53,7 +53,7 @@ def makes_escaping_api_call(instr: parsing.InstDef) -> bool: if "CALL_INTRINSIC" in instr.name: - return True; + return True tkns = iter(instr.tokens) for tkn in tkns: if tkn.kind != lx.IDENTIFIER: @@ -79,6 +79,7 @@ def makes_escaping_api_call(instr: parsing.InstDef) -> bool: return True return False + @dataclasses.dataclass class InstructionFlags: """Construct and manipulate instruction flags""" @@ -124,9 +125,7 @@ def fromInstruction(instr: parsing.InstDef) -> "InstructionFlags": or variable_used(instr, "exception_unwind") or variable_used(instr, "resume_with_error") ), - HAS_ESCAPES_FLAG=( - makes_escaping_api_call(instr) - ), + HAS_ESCAPES_FLAG=makes_escaping_api_call(instr), ) @staticmethod From 053a0a22936e8885333751546a1d9e100b7b6486 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 10:47:50 -0800 Subject: [PATCH 03/21] Clean up parsing.py --- Tools/cases_generator/parsing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 49459be68ae5e8..d36bd52b022ea9 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -105,7 +105,7 @@ class OpName(Node): @dataclass class InstHeader(Node): - annotations : list[str] + annotations: list[str] kind: Literal["inst", "op"] name: str inputs: list[InputEffect] @@ -114,7 +114,7 @@ class InstHeader(Node): @dataclass class InstDef(Node): - annotations : list[str] + annotations: list[str] kind: Literal["inst", "op"] name: str inputs: list[InputEffect] From b838435c80dd6eb33c0d3e83bb11f3cd14dab66a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 15:32:13 -0800 Subject: [PATCH 04/21] Add back printing optimized uops --- Python/optimizer.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index e14ad89bbe2921..5d1ef8a683c250 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -325,7 +325,8 @@ uop_dealloc(_PyUOpExecutorObject *self) { } static const char * -uop_name(int index) { +uop_name(int index) +{ if (index <= MAX_REAL_OPCODE) { return _PyOpcode_OpName[index]; } @@ -832,6 +833,24 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) assert(dest == -1); executor->base.execute = _PyUopExecute; _Py_ExecutorInit((_PyExecutorObject *)executor, dependencies); +#ifdef Py_DEBUG + char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); + int lltrace = 0; + if (python_lltrace != NULL && *python_lltrace >= '0') { + lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that + } + if (lltrace >= 2) { + printf("Optimized executor (length %d):\n", length); + for (int i = 0; i < length; i++) { + printf("%4d %s(%d, %d, %" PRIu64 ")\n", + i, + uop_name(executor->trace[i].opcode), + executor->trace[i].oparg, + executor->trace[i].target, + executor->trace[i].operand); + } + } +#endif return (_PyExecutorObject *)executor; } From b28effa9f7b43ed892cc31bb2872708d64ec0588 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 15:37:24 -0800 Subject: [PATCH 05/21] Hacky way to make FOR_ITER a viable uop --- Include/internal/pycore_opcode_metadata.h | 86 +++++++++++++---------- Python/abstract_interp_cases.c.h | 10 +++ Python/bytecodes.c | 29 +++++++- Python/executor_cases.c.h | 49 +++++++++++++ Python/optimizer.c | 1 + Tools/cases_generator/flags.py | 2 +- Tools/cases_generator/generate_cases.py | 2 +- Tools/cases_generator/instructions.py | 2 +- 8 files changed, 137 insertions(+), 44 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 4d98b23df5d927..1442350411c90a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -81,45 +81,46 @@ #define _IS_NONE 353 #define _SPECIALIZE_FOR_ITER 354 #define _FOR_ITER 355 -#define _ITER_CHECK_LIST 356 -#define _ITER_JUMP_LIST 357 -#define _GUARD_NOT_EXHAUSTED_LIST 358 -#define _ITER_NEXT_LIST 359 -#define _ITER_CHECK_TUPLE 360 -#define _ITER_JUMP_TUPLE 361 -#define _GUARD_NOT_EXHAUSTED_TUPLE 362 -#define _ITER_NEXT_TUPLE 363 -#define _ITER_CHECK_RANGE 364 -#define _ITER_JUMP_RANGE 365 -#define _GUARD_NOT_EXHAUSTED_RANGE 366 -#define _ITER_NEXT_RANGE 367 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 368 -#define _GUARD_KEYS_VERSION 369 -#define _LOAD_ATTR_METHOD_WITH_VALUES 370 -#define _LOAD_ATTR_METHOD_NO_DICT 371 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 372 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 373 -#define _CHECK_ATTR_METHOD_LAZY_DICT 374 -#define _LOAD_ATTR_METHOD_LAZY_DICT 375 -#define _SPECIALIZE_CALL 376 -#define _CALL 377 -#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 378 -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 379 -#define _CHECK_PEP_523 380 -#define _CHECK_FUNCTION_EXACT_ARGS 381 -#define _CHECK_STACK_SPACE 382 -#define _INIT_CALL_PY_EXACT_ARGS 383 -#define _PUSH_FRAME 384 -#define _SPECIALIZE_BINARY_OP 385 -#define _BINARY_OP 386 -#define _GUARD_IS_TRUE_POP 387 -#define _GUARD_IS_FALSE_POP 388 -#define _GUARD_IS_NONE_POP 389 -#define _GUARD_IS_NOT_NONE_POP 390 -#define _JUMP_TO_TOP 391 -#define _SAVE_RETURN_OFFSET 392 -#define _INSERT 393 -#define _CHECK_VALIDITY 394 +#define _FOR_ITER_TIER_TWO 356 +#define _ITER_CHECK_LIST 357 +#define _ITER_JUMP_LIST 358 +#define _GUARD_NOT_EXHAUSTED_LIST 359 +#define _ITER_NEXT_LIST 360 +#define _ITER_CHECK_TUPLE 361 +#define _ITER_JUMP_TUPLE 362 +#define _GUARD_NOT_EXHAUSTED_TUPLE 363 +#define _ITER_NEXT_TUPLE 364 +#define _ITER_CHECK_RANGE 365 +#define _ITER_JUMP_RANGE 366 +#define _GUARD_NOT_EXHAUSTED_RANGE 367 +#define _ITER_NEXT_RANGE 368 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 369 +#define _GUARD_KEYS_VERSION 370 +#define _LOAD_ATTR_METHOD_WITH_VALUES 371 +#define _LOAD_ATTR_METHOD_NO_DICT 372 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 373 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 374 +#define _CHECK_ATTR_METHOD_LAZY_DICT 375 +#define _LOAD_ATTR_METHOD_LAZY_DICT 376 +#define _SPECIALIZE_CALL 377 +#define _CALL 378 +#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 379 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 380 +#define _CHECK_PEP_523 381 +#define _CHECK_FUNCTION_EXACT_ARGS 382 +#define _CHECK_STACK_SPACE 383 +#define _INIT_CALL_PY_EXACT_ARGS 384 +#define _PUSH_FRAME 385 +#define _SPECIALIZE_BINARY_OP 386 +#define _BINARY_OP 387 +#define _GUARD_IS_TRUE_POP 388 +#define _GUARD_IS_FALSE_POP 389 +#define _GUARD_IS_NONE_POP 390 +#define _GUARD_IS_NOT_NONE_POP 391 +#define _JUMP_TO_TOP 392 +#define _SAVE_RETURN_OFFSET 393 +#define _INSERT 394 +#define _CHECK_VALIDITY 395 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); #ifdef NEED_OPCODE_METADATA @@ -543,6 +544,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case _FOR_ITER: return 1; + case _FOR_ITER_TIER_TWO: + return 1; case FOR_ITER: return 1; case INSTRUMENTED_FOR_ITER: @@ -1181,6 +1184,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case _FOR_ITER: return 2; + case _FOR_ITER_TIER_TWO: + return 2; case FOR_ITER: return 2; case INSTRUMENTED_FOR_ITER: @@ -1676,6 +1681,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_SPECIALIZE_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_FOR_ITER] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [_FOR_ITER_TIER_TWO] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_ITER_CHECK_LIST] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, @@ -1906,6 +1912,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN [MATCH_KEYS] = { .nuops = 1, .uops = { { MATCH_KEYS, 0, 0 } } }, [GET_ITER] = { .nuops = 1, .uops = { { GET_ITER, 0, 0 } } }, [GET_YIELD_FROM_ITER] = { .nuops = 1, .uops = { { GET_YIELD_FROM_ITER, 0, 0 } } }, + [FOR_ITER] = { .nuops = 1, .uops = { { _FOR_ITER, 0, 0 } } }, [FOR_ITER_LIST] = { .nuops = 3, .uops = { { _ITER_CHECK_LIST, 0, 0 }, { _ITER_JUMP_LIST, 0, 0 }, { _ITER_NEXT_LIST, 0, 0 } } }, [FOR_ITER_TUPLE] = { .nuops = 3, .uops = { { _ITER_CHECK_TUPLE, 0, 0 }, { _ITER_JUMP_TUPLE, 0, 0 }, { _ITER_NEXT_TUPLE, 0, 0 } } }, [FOR_ITER_RANGE] = { .nuops = 3, .uops = { { _ITER_CHECK_RANGE, 0, 0 }, { _ITER_JUMP_RANGE, 0, 0 }, { _ITER_NEXT_RANGE, 0, 0 } } }, @@ -2005,6 +2012,7 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_IS_NONE] = "_IS_NONE", [_SPECIALIZE_FOR_ITER] = "_SPECIALIZE_FOR_ITER", [_FOR_ITER] = "_FOR_ITER", + [_FOR_ITER_TIER_TWO] = "_FOR_ITER_TIER_TWO", [_ITER_CHECK_LIST] = "_ITER_CHECK_LIST", [_ITER_JUMP_LIST] = "_ITER_JUMP_LIST", [_GUARD_NOT_EXHAUSTED_LIST] = "_GUARD_NOT_EXHAUSTED_LIST", diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index a2f6aa8def8f69..28338f53ea7fb9 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -242,6 +242,10 @@ break; } + case _SPECIALIZE_UNPACK_SEQUENCE: { + break; + } + case _UNPACK_SEQUENCE: { STACK_SHRINK(1); STACK_GROW(oparg); @@ -624,6 +628,12 @@ break; } + case _FOR_ITER_TIER_TWO: { + STACK_GROW(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + break; + } + case _ITER_CHECK_LIST: { break; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8a7dcb8416eb8c..da58cb75e607ed 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2368,7 +2368,7 @@ dummy_func( goto enter_tier_one; } - replaced op(_POP_JUMP_IF_FALSE, (unused/1, cond -- )) { + replaced op(_POP_JUMP_IF_FALSE, (unused/1, cond -- )) { assert(PyBool_Check(cond)); int flag = Py_IsFalse(cond); #if ENABLE_SPECIALIZATION @@ -2512,7 +2512,7 @@ dummy_func( #endif /* ENABLE_SPECIALIZATION */ } - op(_FOR_ITER, (iter -- iter, next)) { + replaced op(_FOR_ITER, (iter -- iter, next)) { /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ next = (*Py_TYPE(iter)->tp_iternext)(iter); if (next == NULL) { @@ -2535,6 +2535,31 @@ dummy_func( // Common case: no jump, leave it to the code generator } + op(_FOR_ITER_TIER_TWO, (iter -- iter, next)) { + /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ + next = (*Py_TYPE(iter)->tp_iternext)(iter); + if (next == NULL) { + if (_PyErr_Occurred(tstate)) { + if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + GOTO_ERROR(error); + } + _PyErr_Clear(tstate); + } + /* iterator ended normally */ + Py_DECREF(iter); + STACK_SHRINK(1); + /* HACK: Emulate DEOPT_IF to jump over END_FOR */ + _PyFrame_SetStackPointer(frame, stack_pointer); + frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; + assert(frame->instr_ptr[-1].op.code == END_FOR || + frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); + Py_DECREF(current_executor); + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); + goto enter_tier_one; + } + // Common case: no jump, leave it to the code generator + } + macro(FOR_ITER) = _SPECIALIZE_FOR_ITER + _FOR_ITER; inst(INSTRUMENTED_FOR_ITER, (unused/1 -- )) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 4e29fb9f0fa93d..4f2f73ee76d5ef 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -866,6 +866,24 @@ break; } + case _SPECIALIZE_UNPACK_SEQUENCE: { + PyObject *seq; + seq = stack_pointer[-1]; + uint16_t counter = (uint16_t)operand; + #if ENABLE_SPECIALIZATION + if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + next_instr = this_instr; + _Py_Specialize_UnpackSequence(seq, next_instr, oparg); + DISPATCH_SAME_OPARG(); + } + STAT_INC(UNPACK_SEQUENCE, deferred); + DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + #endif /* ENABLE_SPECIALIZATION */ + (void)seq; + (void)counter; + break; + } + case _UNPACK_SEQUENCE: { PyObject *seq; seq = stack_pointer[-1]; @@ -2101,6 +2119,37 @@ break; } + case _FOR_ITER_TIER_TWO: { + PyObject *iter; + PyObject *next; + iter = stack_pointer[-1]; + /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ + next = (*Py_TYPE(iter)->tp_iternext)(iter); + if (next == NULL) { + if (_PyErr_Occurred(tstate)) { + if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + GOTO_ERROR(error); + } + _PyErr_Clear(tstate); + } + /* iterator ended normally */ + Py_DECREF(iter); + STACK_SHRINK(1); + /* HACK: Emulate DEOPT_IF to jump over END_FOR */ + _PyFrame_SetStackPointer(frame, stack_pointer); + frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; + assert(frame->instr_ptr[-1].op.code == END_FOR || + frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); + Py_DECREF(current_executor); + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); + goto enter_tier_one; + } + // Common case: no jump, leave it to the code generator + STACK_GROW(1); + stack_pointer[-1] = next; + break; + } + case _ITER_CHECK_LIST: { PyObject *iter; iter = stack_pointer[-1]; diff --git a/Python/optimizer.c b/Python/optimizer.c index 5d1ef8a683c250..5c9f965aeefa85 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -392,6 +392,7 @@ _PyUop_Replacements[OPCODE_METADATA_SIZE] = { [_ITER_JUMP_RANGE] = _GUARD_NOT_EXHAUSTED_RANGE, [_ITER_JUMP_LIST] = _GUARD_NOT_EXHAUSTED_LIST, [_ITER_JUMP_TUPLE] = _GUARD_NOT_EXHAUSTED_TUPLE, + [_FOR_ITER] = _FOR_ITER_TIER_TWO, }; static const uint16_t diff --git a/Tools/cases_generator/flags.py b/Tools/cases_generator/flags.py index 808c9e82bbce07..bf76112159e38e 100644 --- a/Tools/cases_generator/flags.py +++ b/Tools/cases_generator/flags.py @@ -175,7 +175,7 @@ def variable_used_unspecialized(node: parsing.Node, name: str) -> bool: tokens: list[lx.Token] = [] skipping = False for i, token in enumerate(node.tokens): - if token.kind == "MACRO": + if token.kind == "CMACRO": text = "".join(token.text.split()) # TODO: Handle nested #if if text == "#if": diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index d1dbfeae8d74f6..ba45e3a625072e 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -658,7 +658,7 @@ def write_macro_expansions( if not part.instr.is_viable_uop() and "replaced" not in part.instr.annotations: # This note just reminds us about macros that cannot # be expanded to Tier 2 uops. It is not an error. - # It is sometimes emitted for macros that have a + # Suppress it using 'replaced op(...)' for macros having # manual translation in translate_bytecode_to_trace() # in Python/optimizer.c. if len(parts) > 1 or part.instr.name != name: diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index 9039ac5c6f127e..457221a0e15f75 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -115,7 +115,7 @@ def __init__(self, inst: parsing.InstDef): def is_viable_uop(self) -> bool: """Whether this instruction is viable as a uop.""" dprint: typing.Callable[..., None] = lambda *args, **kwargs: None - if "FRAME" in self.name: + if self.name == "_FOR_ITER_TIER_TWO": dprint = print if self.name == "_EXIT_TRACE": From de8f199a17d2daec020fff97f0661e2a7c0f29ff Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 16:03:13 -0800 Subject: [PATCH 06/21] _SPECIALIZE_UNPACK_SEQUENCE is TIER_ONE_ONLY --- Python/abstract_interp_cases.c.h | 4 ---- Python/bytecodes.c | 1 + Python/executor_cases.c.h | 18 ------------------ Python/generated_cases.c.h | 1 + 4 files changed, 2 insertions(+), 22 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 28338f53ea7fb9..0d7fbe8a39a5d4 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -242,10 +242,6 @@ break; } - case _SPECIALIZE_UNPACK_SEQUENCE: { - break; - } - case _UNPACK_SEQUENCE: { STACK_SHRINK(1); STACK_GROW(oparg); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index da58cb75e607ed..64e511c7106f2a 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1210,6 +1210,7 @@ dummy_func( }; specializing op(_SPECIALIZE_UNPACK_SEQUENCE, (counter/1, seq -- seq)) { + TIER_ONE_ONLY #if ENABLE_SPECIALIZATION if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { next_instr = this_instr; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 4f2f73ee76d5ef..0652d7a3ab8b3f 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -866,24 +866,6 @@ break; } - case _SPECIALIZE_UNPACK_SEQUENCE: { - PyObject *seq; - seq = stack_pointer[-1]; - uint16_t counter = (uint16_t)operand; - #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { - next_instr = this_instr; - _Py_Specialize_UnpackSequence(seq, next_instr, oparg); - DISPATCH_SAME_OPARG(); - } - STAT_INC(UNPACK_SEQUENCE, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); - #endif /* ENABLE_SPECIALIZATION */ - (void)seq; - (void)counter; - break; - } - case _UNPACK_SEQUENCE: { PyObject *seq; seq = stack_pointer[-1]; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index fe0cbfe6330e51..a74529d88557be 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1731,6 +1731,7 @@ seq = stack_pointer[-1]; { uint16_t counter = read_u16(&this_instr[1].cache); + TIER_ONE_ONLY #if ENABLE_SPECIALIZATION if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { next_instr = this_instr; From 5c5d8bd5d71e285a950cd40f6c0cb0c3cc575fef Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 16:14:17 -0800 Subject: [PATCH 07/21] NEWS --- .../2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst b/Misc/NEWS.d/next/Core and Builtins/2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst new file mode 100644 index 00000000000000..b2a34ac735cdeb --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst @@ -0,0 +1 @@ +Enable translating unspecialized ``FOR_ITER`` to Tier 2. From 36e9ada4d03af2207babd57bfbfd490ed0f9b5fb Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 16 Nov 2023 12:34:16 -0800 Subject: [PATCH 08/21] Double max trace length to 256 --- Include/internal/pycore_uops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h index 0ecbd2dfd1af73..8ab9aaf4108079 100644 --- a/Include/internal/pycore_uops.h +++ b/Include/internal/pycore_uops.h @@ -10,7 +10,7 @@ extern "C" { #include "pycore_frame.h" // _PyInterpreterFrame -#define _Py_UOP_MAX_TRACE_LENGTH 128 +#define _Py_UOP_MAX_TRACE_LENGTH 256 typedef struct { uint16_t opcode; From def1830fad23042b498b21263305a7051790991d Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 16 Nov 2023 15:24:02 -0800 Subject: [PATCH 09/21] Move stuff around to suit the JIT branch --- Include/internal/pycore_opcode_metadata.h | 2 +- Python/bytecodes.c | 6 ++---- Python/ceval.c | 3 +-- Python/ceval_macros.h | 2 ++ Python/executor_cases.c.h | 6 ++---- 5 files changed, 8 insertions(+), 11 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 1442350411c90a..d6e348ae951c55 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1784,7 +1784,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [_JUMP_TO_TOP] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG }, [_SET_IP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_SAVE_RETURN_OFFSET] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, - [_EXIT_TRACE] = { true, INSTR_FMT_IX, 0 }, + [_EXIT_TRACE] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, [_INSERT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [_CHECK_VALIDITY] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, }; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 64e511c7106f2a..20afb1f9197d8c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2550,13 +2550,10 @@ dummy_func( Py_DECREF(iter); STACK_SHRINK(1); /* HACK: Emulate DEOPT_IF to jump over END_FOR */ - _PyFrame_SetStackPointer(frame, stack_pointer); frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; assert(frame->instr_ptr[-1].op.code == END_FOR || frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); - Py_DECREF(current_executor); - OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); - goto enter_tier_one; + goto exit_trace; } // Common case: no jump, leave it to the code generator } @@ -4034,6 +4031,7 @@ dummy_func( op(_EXIT_TRACE, (--)) { TIER_TWO_ONLY + frame->instr_ptr = CURRENT_TARGET() + _PyCode_CODE(_PyFrame_GetCode(frame)); GOTO_TIER_ONE(); } diff --git a/Python/ceval.c b/Python/ceval.c index d684c72cc9e302..f1add9f8cf17a9 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1067,7 +1067,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int UOP_STAT_INC(opcode, miss); frame->return_offset = 0; // Dispatch to frame->instr_ptr _PyFrame_SetStackPointer(frame, stack_pointer); - frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable); + frame->instr_ptr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame)); Py_DECREF(current_executor); // Fall through // Jump here from ENTER_EXECUTOR @@ -1078,7 +1078,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int // Jump here from _EXIT_TRACE exit_trace: _PyFrame_SetStackPointer(frame, stack_pointer); - frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable); Py_DECREF(current_executor); OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); goto enter_tier_one; diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 546adbe5f438d1..cd6edeb0734a11 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -397,3 +397,5 @@ stack_pointer = _PyFrame_GetStackPointer(frame); #define GOTO_TIER_TWO() goto enter_tier_two; #define GOTO_TIER_ONE() goto exit_trace; + +#define CURRENT_TARGET() (next_uop[-1].target) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 0652d7a3ab8b3f..ccc72d3427e7ac 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2118,13 +2118,10 @@ Py_DECREF(iter); STACK_SHRINK(1); /* HACK: Emulate DEOPT_IF to jump over END_FOR */ - _PyFrame_SetStackPointer(frame, stack_pointer); frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; assert(frame->instr_ptr[-1].op.code == END_FOR || frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); - Py_DECREF(current_executor); - OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); - goto enter_tier_one; + goto exit_trace; } // Common case: no jump, leave it to the code generator STACK_GROW(1); @@ -3276,6 +3273,7 @@ case _EXIT_TRACE: { TIER_TWO_ONLY + frame->instr_ptr = CURRENT_TARGET() + _PyCode_CODE(_PyFrame_GetCode(frame)); GOTO_TIER_ONE(); break; } From 70968183bf7dbc5c487d9a20d426ba997de0fc34 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 17 Nov 2023 12:47:24 -0800 Subject: [PATCH 10/21] Clean up _FOR_ITER_TIER_TWO using DEOPT_IF(true) --- Include/internal/pycore_opcode_metadata.h | 2 +- Python/bytecodes.c | 7 ++----- Python/executor_cases.c.h | 7 ++----- Python/optimizer.c | 6 ++++++ 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index d6e348ae951c55..1a2c4956849011 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1681,7 +1681,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_SPECIALIZE_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_FOR_ITER] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [_FOR_ITER_TIER_TWO] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [_FOR_ITER_TIER_TWO] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_ITER_CHECK_LIST] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 20afb1f9197d8c..abc51d82e9409f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2549,11 +2549,8 @@ dummy_func( /* iterator ended normally */ Py_DECREF(iter); STACK_SHRINK(1); - /* HACK: Emulate DEOPT_IF to jump over END_FOR */ - frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; - assert(frame->instr_ptr[-1].op.code == END_FOR || - frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); - goto exit_trace; + /* The translator sets the deopt target just past END_FOR */ + DEOPT_IF(true); } // Common case: no jump, leave it to the code generator } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ccc72d3427e7ac..8d13c524794008 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2117,11 +2117,8 @@ /* iterator ended normally */ Py_DECREF(iter); STACK_SHRINK(1); - /* HACK: Emulate DEOPT_IF to jump over END_FOR */ - frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; - assert(frame->instr_ptr[-1].op.code == END_FOR || - frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); - goto exit_trace; + /* The translator sets the deopt target just past END_FOR */ + DEOPT_IF(true, _FOR_ITER_TIER_TWO); } // Common case: no jump, leave it to the code generator STACK_GROW(1); diff --git a/Python/optimizer.c b/Python/optimizer.c index 5c9f965aeefa85..2df1d266e67f95 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -621,6 +621,12 @@ translate_bytecode_to_trace( } if (_PyUop_Replacements[uop]) { uop = _PyUop_Replacements[uop]; + if (uop == _FOR_ITER_TIER_TWO) { + target += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; + _Py_CODEUNIT word = _PyCode_CODE(code)[target-1]; + assert(word.op.code == END_FOR || + word.op.code == INSTRUMENTED_END_FOR); + } } break; case OPARG_CACHE_1: From 58521059780ff091c118f3b89789831fcf822937 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 17 Nov 2023 13:16:47 -0800 Subject: [PATCH 11/21] Add test --- Lib/test/test_capi/test_misc.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index fe5c36c0c0dec9..21a5cd3326d707 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2808,6 +2808,36 @@ def testfunc(n): uops = {opname for opname, _, _ in ex} self.assertIn("_GUARD_IS_FALSE_POP", uops) + def test_for_iter_tier_two(self): + class MyIter: + def __init__(self, n): + self.n = n + def __iter__(self): + return self + def __next__(self): + self.n -= 1 + if self.n < 0: + raise StopIteration + return self.n + + def testfunc(n, m): + x = 0 + for i in range(m): + for j in MyIter(n): + x += 1000*i + j + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + x = testfunc(10, 10) + + self.assertEqual(x, sum(range(10)) * 10010) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_FOR_ITER_TIER_TWO", uops) + if __name__ == "__main__": unittest.main() From 4ac68b3dfee0b632eaa2bfb62e3667799c095b6f Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 17 Nov 2023 14:06:17 -0800 Subject: [PATCH 12/21] Revert debug change to is_viable_uop() --- Tools/cases_generator/instructions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index 457221a0e15f75..9039ac5c6f127e 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -115,7 +115,7 @@ def __init__(self, inst: parsing.InstDef): def is_viable_uop(self) -> bool: """Whether this instruction is viable as a uop.""" dprint: typing.Callable[..., None] = lambda *args, **kwargs: None - if self.name == "_FOR_ITER_TIER_TWO": + if "FRAME" in self.name: dprint = print if self.name == "_EXIT_TRACE": From 95b1a01ea6f2b39b7bc96d2634b74f27cb153e00 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 17 Nov 2023 14:07:03 -0800 Subject: [PATCH 13/21] Avoid debug-only local variable 'word' --- Python/optimizer.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 2df1d266e67f95..4278da3598e4f0 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -623,9 +623,8 @@ translate_bytecode_to_trace( uop = _PyUop_Replacements[uop]; if (uop == _FOR_ITER_TIER_TWO) { target += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; - _Py_CODEUNIT word = _PyCode_CODE(code)[target-1]; - assert(word.op.code == END_FOR || - word.op.code == INSTRUMENTED_END_FOR); + assert(_PyCode_CODE(code)[target-1].op.code == END_FOR || + _PyCode_CODE(code)[target-1].op.code == INSTRUMENTED_END_FOR); } } break; From 4c720287aa9910c89a0e7ea4b3ae76224c0f9892 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 17 Nov 2023 14:12:38 -0800 Subject: [PATCH 14/21] Revert changes to _EXIT_TRACE logic --- Include/internal/pycore_opcode_metadata.h | 2 +- Python/bytecodes.c | 1 - Python/ceval.c | 1 + Python/ceval_macros.h | 2 -- Python/executor_cases.c.h | 1 - 5 files changed, 2 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 1a2c4956849011..4e45725d393479 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1784,7 +1784,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [_JUMP_TO_TOP] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG }, [_SET_IP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_SAVE_RETURN_OFFSET] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, - [_EXIT_TRACE] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, + [_EXIT_TRACE] = { true, INSTR_FMT_IX, 0 }, [_INSERT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [_CHECK_VALIDITY] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, }; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index abc51d82e9409f..06baa85589b168 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4028,7 +4028,6 @@ dummy_func( op(_EXIT_TRACE, (--)) { TIER_TWO_ONLY - frame->instr_ptr = CURRENT_TARGET() + _PyCode_CODE(_PyFrame_GetCode(frame)); GOTO_TIER_ONE(); } diff --git a/Python/ceval.c b/Python/ceval.c index f1add9f8cf17a9..390de32f8e1b15 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1078,6 +1078,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int // Jump here from _EXIT_TRACE exit_trace: _PyFrame_SetStackPointer(frame, stack_pointer); + frame->instr_ptr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame)); Py_DECREF(current_executor); OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); goto enter_tier_one; diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index cd6edeb0734a11..546adbe5f438d1 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -397,5 +397,3 @@ stack_pointer = _PyFrame_GetStackPointer(frame); #define GOTO_TIER_TWO() goto enter_tier_two; #define GOTO_TIER_ONE() goto exit_trace; - -#define CURRENT_TARGET() (next_uop[-1].target) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8d13c524794008..ae662b20e4403f 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3270,7 +3270,6 @@ case _EXIT_TRACE: { TIER_TWO_ONLY - frame->instr_ptr = CURRENT_TARGET() + _PyCode_CODE(_PyFrame_GetCode(frame)); GOTO_TIER_ONE(); break; } From 3f0df1ab36f0a47b1c1df9606540461dfd721caf Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 19 Nov 2023 10:24:24 -0800 Subject: [PATCH 15/21] Double max trace length to 512 --- Include/internal/pycore_uops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h index 8ab9aaf4108079..e2b94894681f44 100644 --- a/Include/internal/pycore_uops.h +++ b/Include/internal/pycore_uops.h @@ -10,7 +10,7 @@ extern "C" { #include "pycore_frame.h" // _PyInterpreterFrame -#define _Py_UOP_MAX_TRACE_LENGTH 256 +#define _Py_UOP_MAX_TRACE_LENGTH 512 typedef struct { uint16_t opcode; From b11b8ea58d3baa873f847d1d26f0b0e790c933d2 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 19 Nov 2023 10:24:33 -0800 Subject: [PATCH 16/21] Faster uops: do away with 'operand' variable The code generator now generates code to access 'operand' as needed. (Alas, this needs to be changed for the JIT.) This speeds the spectral_norm benchmark up by 3-4% on my Intel Mac (but not using --enable-optimizations (PGO/LTO)). --- Python/ceval.c | 11 +++---- Python/executor_cases.c.h | 44 +++++++++++++-------------- Tools/cases_generator/instructions.py | 4 ++- 3 files changed, 29 insertions(+), 30 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 9c192ce88f13f3..27cbaa99f6dbf1 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -994,7 +994,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int OPT_STAT_INC(traces_executed); _PyUOpInstruction *next_uop = current_executor->trace; - uint64_t operand; #ifdef Py_STATS uint64_t trace_uop_execution_counter = 0; #endif @@ -1002,13 +1001,12 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int for (;;) { opcode = next_uop->opcode; oparg = next_uop->oparg; - operand = next_uop->operand; DPRINTF(3, "%4d: uop %s, oparg %d, operand %" PRIu64 ", target %d, stack_level %d\n", (int)(next_uop - current_executor->trace), _PyUopName(opcode), oparg, - operand, + next_uop->operand, next_uop->target, (int)(stack_pointer - _PyFrame_Stackbase(frame))); next_uop++; @@ -1025,8 +1023,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int default: #ifdef Py_DEBUG { - fprintf(stderr, "Unknown uop %d, oparg %d, operand %" PRIu64 "\n", - opcode, oparg, operand); + fprintf(stderr, "Unknown uop %d, oparg %d\n", opcode, oparg); Py_FatalError("Unknown uop"); } #else @@ -1055,7 +1052,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int STACK_SHRINK(1); error_tier_two: DPRINTF(2, "Error: [Uop %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d]\n", - opcode, _PyUopName(opcode), oparg, operand, next_uop[-1].target, + opcode, _PyUopName(opcode), oparg, next_uop[-1].operand, next_uop[-1].target, (int)(next_uop - current_executor->trace - 1)); OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); frame->return_offset = 0; // Don't leave this random @@ -1068,7 +1065,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int // On DEOPT_IF we just repeat the last instruction. // This presumes nothing was popped from the stack (nor pushed). DPRINTF(2, "DEOPT: [Uop %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d]\n", - opcode, _PyUopName(opcode), oparg, operand, next_uop[-1].target, + opcode, _PyUopName(opcode), oparg, next_uop[-1].operand, next_uop[-1].target, (int)(next_uop - current_executor->trace - 1)); OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); UOP_STAT_INC(opcode, miss); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ae662b20e4403f..6d9447481fe6bc 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -204,7 +204,7 @@ PyObject *value; PyObject *res; value = stack_pointer[-1]; - uint32_t version = (uint32_t)operand; + uint32_t version = (uint32_t)next_uop[-1].operand; // This one is a bit weird, because we expect *some* failures: assert(version); DEOPT_IF(Py_TYPE(value)->tp_version_tag != version, TO_BOOL); @@ -1115,7 +1115,7 @@ } case _GUARD_GLOBALS_VERSION: { - uint16_t version = (uint16_t)operand; + uint16_t version = (uint16_t)next_uop[-1].operand; PyDictObject *dict = (PyDictObject *)GLOBALS(); DEOPT_IF(!PyDict_CheckExact(dict), _GUARD_GLOBALS_VERSION); DEOPT_IF(dict->ma_keys->dk_version != version, _GUARD_GLOBALS_VERSION); @@ -1124,7 +1124,7 @@ } case _GUARD_BUILTINS_VERSION: { - uint16_t version = (uint16_t)operand; + uint16_t version = (uint16_t)next_uop[-1].operand; PyDictObject *dict = (PyDictObject *)BUILTINS(); DEOPT_IF(!PyDict_CheckExact(dict), _GUARD_BUILTINS_VERSION); DEOPT_IF(dict->ma_keys->dk_version != version, _GUARD_BUILTINS_VERSION); @@ -1135,7 +1135,7 @@ case _LOAD_GLOBAL_MODULE: { PyObject *res; PyObject *null = NULL; - uint16_t index = (uint16_t)operand; + uint16_t index = (uint16_t)next_uop[-1].operand; PyDictObject *dict = (PyDictObject *)GLOBALS(); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); res = entries[index].me_value; @@ -1153,7 +1153,7 @@ case _LOAD_GLOBAL_BUILTINS: { PyObject *res; PyObject *null = NULL; - uint16_t index = (uint16_t)operand; + uint16_t index = (uint16_t)next_uop[-1].operand; PyDictObject *bdict = (PyDictObject *)BUILTINS(); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(bdict->ma_keys); res = entries[index].me_value; @@ -1585,7 +1585,7 @@ case _GUARD_TYPE_VERSION: { PyObject *owner; owner = stack_pointer[-1]; - uint32_t type_version = (uint32_t)operand; + uint32_t type_version = (uint32_t)next_uop[-1].operand; PyTypeObject *tp = Py_TYPE(owner); assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version, _GUARD_TYPE_VERSION); @@ -1607,7 +1607,7 @@ PyObject *attr; PyObject *null = NULL; owner = stack_pointer[-1]; - uint16_t index = (uint16_t)operand; + uint16_t index = (uint16_t)next_uop[-1].operand; PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); attr = _PyDictOrValues_GetValues(dorv)->values[index]; DEOPT_IF(attr == NULL, _LOAD_ATTR_INSTANCE_VALUE); @@ -1624,7 +1624,7 @@ case _CHECK_ATTR_MODULE: { PyObject *owner; owner = stack_pointer[-1]; - uint32_t type_version = (uint32_t)operand; + uint32_t type_version = (uint32_t)next_uop[-1].operand; DEOPT_IF(!PyModule_CheckExact(owner), _CHECK_ATTR_MODULE); PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict; assert(dict != NULL); @@ -1637,7 +1637,7 @@ PyObject *attr; PyObject *null = NULL; owner = stack_pointer[-1]; - uint16_t index = (uint16_t)operand; + uint16_t index = (uint16_t)next_uop[-1].operand; PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict; assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); assert(index < dict->ma_keys->dk_nentries); @@ -1671,7 +1671,7 @@ PyObject *attr; PyObject *null = NULL; owner = stack_pointer[-1]; - uint16_t hint = (uint16_t)operand; + uint16_t hint = (uint16_t)next_uop[-1].operand; PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(dorv); DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, _LOAD_ATTR_WITH_HINT); @@ -1702,7 +1702,7 @@ PyObject *attr; PyObject *null = NULL; owner = stack_pointer[-1]; - uint16_t index = (uint16_t)operand; + uint16_t index = (uint16_t)next_uop[-1].operand; char *addr = (char *)owner + index; attr = *(PyObject **)addr; DEOPT_IF(attr == NULL, _LOAD_ATTR_SLOT); @@ -1719,7 +1719,7 @@ case _CHECK_ATTR_CLASS: { PyObject *owner; owner = stack_pointer[-1]; - uint32_t type_version = (uint32_t)operand; + uint32_t type_version = (uint32_t)next_uop[-1].operand; DEOPT_IF(!PyType_Check(owner), _CHECK_ATTR_CLASS); assert(type_version != 0); DEOPT_IF(((PyTypeObject *)owner)->tp_version_tag != type_version, _CHECK_ATTR_CLASS); @@ -1731,7 +1731,7 @@ PyObject *attr; PyObject *null = NULL; owner = stack_pointer[-1]; - PyObject *descr = (PyObject *)operand; + PyObject *descr = (PyObject *)next_uop[-1].operand; STAT_INC(LOAD_ATTR, hit); assert(descr != NULL); attr = Py_NewRef(descr); @@ -1757,7 +1757,7 @@ PyObject *value; owner = stack_pointer[-1]; value = stack_pointer[-2]; - uint16_t index = (uint16_t)operand; + uint16_t index = (uint16_t)next_uop[-1].operand; PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); STAT_INC(STORE_ATTR, hit); PyDictValues *values = _PyDictOrValues_GetValues(dorv); @@ -1779,7 +1779,7 @@ PyObject *value; owner = stack_pointer[-1]; value = stack_pointer[-2]; - uint16_t index = (uint16_t)operand; + uint16_t index = (uint16_t)next_uop[-1].operand; char *addr = (char *)owner + index; STAT_INC(STORE_ATTR, hit); PyObject *old_value = *(PyObject **)addr; @@ -2378,7 +2378,7 @@ case _GUARD_KEYS_VERSION: { PyObject *owner; owner = stack_pointer[-1]; - uint32_t keys_version = (uint32_t)operand; + uint32_t keys_version = (uint32_t)next_uop[-1].operand; PyTypeObject *owner_cls = Py_TYPE(owner); PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls; DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version != keys_version, _GUARD_KEYS_VERSION); @@ -2390,7 +2390,7 @@ PyObject *attr; PyObject *self; owner = stack_pointer[-1]; - PyObject *descr = (PyObject *)operand; + PyObject *descr = (PyObject *)next_uop[-1].operand; assert(oparg & 1); /* Cached method object */ STAT_INC(LOAD_ATTR, hit); @@ -2409,7 +2409,7 @@ PyObject *attr; PyObject *self; owner = stack_pointer[-1]; - PyObject *descr = (PyObject *)operand; + PyObject *descr = (PyObject *)next_uop[-1].operand; assert(oparg & 1); assert(Py_TYPE(owner)->tp_dictoffset == 0); STAT_INC(LOAD_ATTR, hit); @@ -2427,7 +2427,7 @@ PyObject *owner; PyObject *attr; owner = stack_pointer[-1]; - PyObject *descr = (PyObject *)operand; + PyObject *descr = (PyObject *)next_uop[-1].operand; assert((oparg & 1) == 0); STAT_INC(LOAD_ATTR, hit); assert(descr != NULL); @@ -2441,7 +2441,7 @@ PyObject *owner; PyObject *attr; owner = stack_pointer[-1]; - PyObject *descr = (PyObject *)operand; + PyObject *descr = (PyObject *)next_uop[-1].operand; assert((oparg & 1) == 0); assert(Py_TYPE(owner)->tp_dictoffset == 0); STAT_INC(LOAD_ATTR, hit); @@ -2468,7 +2468,7 @@ PyObject *attr; PyObject *self; owner = stack_pointer[-1]; - PyObject *descr = (PyObject *)operand; + PyObject *descr = (PyObject *)next_uop[-1].operand; assert(oparg & 1); STAT_INC(LOAD_ATTR, hit); assert(descr != NULL); @@ -2517,7 +2517,7 @@ PyObject *callable; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; - uint32_t func_version = (uint32_t)operand; + uint32_t func_version = (uint32_t)next_uop[-1].operand; DEOPT_IF(!PyFunction_Check(callable), _CHECK_FUNCTION_EXACT_ARGS); PyFunctionObject *func = (PyFunctionObject *)callable; DEOPT_IF(func->func_version != func_version, _CHECK_FUNCTION_EXACT_ARGS); diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index 9039ac5c6f127e..c2ed5ef778834d 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -166,7 +166,9 @@ def write_body( f"{func}(&this_instr[{active.offset + 1}].cache);" ) else: - out.emit(f"{typ}{ceffect.name} = ({typ.strip()})operand;") + # TODO: Use something else when generating JIT code + operand = "next_uop[-1].operand" + out.emit(f"{typ}{ceffect.name} = ({typ.strip()}){operand};") # Write the body, substituting a goto for ERROR_IF() and other stuff assert dedent <= 0 From a2c4f007157770c889d0abad4f30dead178f1abe Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 19 Nov 2023 11:17:59 -0800 Subject: [PATCH 17/21] Do the same for 'oparg' -- another 4% speedup --- Python/ceval.c | 7 +- Python/executor_cases.c.h | 91 +++++++++++++++++++++++++ Tools/cases_generator/generate_cases.py | 3 + 3 files changed, 97 insertions(+), 4 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 27cbaa99f6dbf1..15f40c3a5a45ee 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1000,12 +1000,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int for (;;) { opcode = next_uop->opcode; - oparg = next_uop->oparg; DPRINTF(3, "%4d: uop %s, oparg %d, operand %" PRIu64 ", target %d, stack_level %d\n", (int)(next_uop - current_executor->trace), _PyUopName(opcode), - oparg, + next_uop->oparg, next_uop->operand, next_uop->target, (int)(stack_pointer - _PyFrame_Stackbase(frame))); @@ -1052,7 +1051,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int STACK_SHRINK(1); error_tier_two: DPRINTF(2, "Error: [Uop %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d]\n", - opcode, _PyUopName(opcode), oparg, next_uop[-1].operand, next_uop[-1].target, + opcode, _PyUopName(opcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target, (int)(next_uop - current_executor->trace - 1)); OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); frame->return_offset = 0; // Don't leave this random @@ -1065,7 +1064,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int // On DEOPT_IF we just repeat the last instruction. // This presumes nothing was popped from the stack (nor pushed). DPRINTF(2, "DEOPT: [Uop %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d]\n", - opcode, _PyUopName(opcode), oparg, next_uop[-1].operand, next_uop[-1].target, + opcode, _PyUopName(opcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target, (int)(next_uop - current_executor->trace - 1)); OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); UOP_STAT_INC(opcode, miss); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 6d9447481fe6bc..7e34058a5da90c 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -25,6 +25,7 @@ } case LOAD_FAST_CHECK: { + oparg = next_uop[-1].oparg; PyObject *value; value = GETLOCAL(oparg); if (value == NULL) goto unbound_local_error_tier_two; @@ -35,6 +36,7 @@ } case LOAD_FAST: { + oparg = next_uop[-1].oparg; PyObject *value; value = GETLOCAL(oparg); assert(value != NULL); @@ -45,6 +47,7 @@ } case LOAD_FAST_AND_CLEAR: { + oparg = next_uop[-1].oparg; PyObject *value; value = GETLOCAL(oparg); // do not use SETLOCAL here, it decrefs the old value @@ -55,6 +58,7 @@ } case LOAD_CONST: { + oparg = next_uop[-1].oparg; PyObject *value; value = GETITEM(FRAME_CO_CONSTS, oparg); Py_INCREF(value); @@ -64,6 +68,7 @@ } case STORE_FAST: { + oparg = next_uop[-1].oparg; PyObject *value; value = stack_pointer[-1]; SETLOCAL(oparg, value); @@ -525,6 +530,7 @@ } case LIST_APPEND: { + oparg = next_uop[-1].oparg; PyObject *v; PyObject *list; v = stack_pointer[-1]; @@ -535,6 +541,7 @@ } case SET_ADD: { + oparg = next_uop[-1].oparg; PyObject *v; PyObject *set; v = stack_pointer[-1]; @@ -621,6 +628,7 @@ } case CALL_INTRINSIC_1: { + oparg = next_uop[-1].oparg; PyObject *value; PyObject *res; value = stack_pointer[-1]; @@ -633,6 +641,7 @@ } case CALL_INTRINSIC_2: { + oparg = next_uop[-1].oparg; PyObject *value1; PyObject *value2; PyObject *res; @@ -764,6 +773,7 @@ } case GET_AWAITABLE: { + oparg = next_uop[-1].oparg; PyObject *iterable; PyObject *iter; iterable = stack_pointer[-1]; @@ -825,6 +835,7 @@ } case STORE_NAME: { + oparg = next_uop[-1].oparg; PyObject *v; v = stack_pointer[-1]; PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); @@ -847,6 +858,7 @@ } case DELETE_NAME: { + oparg = next_uop[-1].oparg; PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); PyObject *ns = LOCALS(); int err; @@ -867,6 +879,7 @@ } case _UNPACK_SEQUENCE: { + oparg = next_uop[-1].oparg; PyObject *seq; seq = stack_pointer[-1]; PyObject **top = stack_pointer + oparg - 1; @@ -879,6 +892,7 @@ } case UNPACK_SEQUENCE_TWO_TUPLE: { + oparg = next_uop[-1].oparg; PyObject *seq; PyObject **values; seq = stack_pointer[-1]; @@ -896,6 +910,7 @@ } case UNPACK_SEQUENCE_TUPLE: { + oparg = next_uop[-1].oparg; PyObject *seq; PyObject **values; seq = stack_pointer[-1]; @@ -914,6 +929,7 @@ } case UNPACK_SEQUENCE_LIST: { + oparg = next_uop[-1].oparg; PyObject *seq; PyObject **values; seq = stack_pointer[-1]; @@ -932,6 +948,7 @@ } case UNPACK_EX: { + oparg = next_uop[-1].oparg; PyObject *seq; seq = stack_pointer[-1]; int totalargs = 1 + (oparg & 0xFF) + (oparg >> 8); @@ -944,6 +961,7 @@ } case _STORE_ATTR: { + oparg = next_uop[-1].oparg; PyObject *owner; PyObject *v; owner = stack_pointer[-1]; @@ -958,6 +976,7 @@ } case DELETE_ATTR: { + oparg = next_uop[-1].oparg; PyObject *owner; owner = stack_pointer[-1]; PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); @@ -969,6 +988,7 @@ } case STORE_GLOBAL: { + oparg = next_uop[-1].oparg; PyObject *v; v = stack_pointer[-1]; PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); @@ -980,6 +1000,7 @@ } case DELETE_GLOBAL: { + oparg = next_uop[-1].oparg; PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); int err; err = PyDict_DelItem(GLOBALS(), name); @@ -1009,6 +1030,7 @@ } case LOAD_FROM_DICT_OR_GLOBALS: { + oparg = next_uop[-1].oparg; PyObject *mod_or_class_dict; PyObject *v; mod_or_class_dict = stack_pointer[-1]; @@ -1038,6 +1060,7 @@ } case LOAD_NAME: { + oparg = next_uop[-1].oparg; PyObject *v; PyObject *mod_or_class_dict = LOCALS(); if (mod_or_class_dict == NULL) { @@ -1071,6 +1094,7 @@ } case _LOAD_GLOBAL: { + oparg = next_uop[-1].oparg; PyObject *res; PyObject *null = NULL; PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); @@ -1133,6 +1157,7 @@ } case _LOAD_GLOBAL_MODULE: { + oparg = next_uop[-1].oparg; PyObject *res; PyObject *null = NULL; uint16_t index = (uint16_t)next_uop[-1].operand; @@ -1151,6 +1176,7 @@ } case _LOAD_GLOBAL_BUILTINS: { + oparg = next_uop[-1].oparg; PyObject *res; PyObject *null = NULL; uint16_t index = (uint16_t)next_uop[-1].operand; @@ -1169,6 +1195,7 @@ } case DELETE_FAST: { + oparg = next_uop[-1].oparg; PyObject *v = GETLOCAL(oparg); if (v == NULL) goto unbound_local_error_tier_two; SETLOCAL(oparg, NULL); @@ -1176,6 +1203,7 @@ } case MAKE_CELL: { + oparg = next_uop[-1].oparg; // "initial" is probably NULL but not if it's an arg (or set // via PyFrame_LocalsToFast() before MAKE_CELL has run). PyObject *initial = GETLOCAL(oparg); @@ -1188,6 +1216,7 @@ } case DELETE_DEREF: { + oparg = next_uop[-1].oparg; PyObject *cell = GETLOCAL(oparg); PyObject *oldobj = PyCell_GET(cell); // Can't use ERROR_IF here. @@ -1202,6 +1231,7 @@ } case LOAD_FROM_DICT_OR_DEREF: { + oparg = next_uop[-1].oparg; PyObject *class_dict; PyObject *value; class_dict = stack_pointer[-1]; @@ -1227,6 +1257,7 @@ } case LOAD_DEREF: { + oparg = next_uop[-1].oparg; PyObject *value; PyObject *cell = GETLOCAL(oparg); value = PyCell_GET(cell); @@ -1241,6 +1272,7 @@ } case STORE_DEREF: { + oparg = next_uop[-1].oparg; PyObject *v; v = stack_pointer[-1]; PyObject *cell = GETLOCAL(oparg); @@ -1252,6 +1284,7 @@ } case COPY_FREE_VARS: { + oparg = next_uop[-1].oparg; /* Copy closure variables to free variables */ PyCodeObject *co = _PyFrame_GetCode(frame); assert(PyFunction_Check(frame->f_funcobj)); @@ -1266,6 +1299,7 @@ } case BUILD_STRING: { + oparg = next_uop[-1].oparg; PyObject **pieces; PyObject *str; pieces = stack_pointer - oparg; @@ -1281,6 +1315,7 @@ } case BUILD_TUPLE: { + oparg = next_uop[-1].oparg; PyObject **values; PyObject *tup; values = stack_pointer - oparg; @@ -1293,6 +1328,7 @@ } case BUILD_LIST: { + oparg = next_uop[-1].oparg; PyObject **values; PyObject *list; values = stack_pointer - oparg; @@ -1305,6 +1341,7 @@ } case LIST_EXTEND: { + oparg = next_uop[-1].oparg; PyObject *iterable; PyObject *list; iterable = stack_pointer[-1]; @@ -1329,6 +1366,7 @@ } case SET_UPDATE: { + oparg = next_uop[-1].oparg; PyObject *iterable; PyObject *set; iterable = stack_pointer[-1]; @@ -1341,6 +1379,7 @@ } case BUILD_SET: { + oparg = next_uop[-1].oparg; PyObject **values; PyObject *set; values = stack_pointer - oparg; @@ -1365,6 +1404,7 @@ } case BUILD_MAP: { + oparg = next_uop[-1].oparg; PyObject **values; PyObject *map; values = stack_pointer - oparg*2; @@ -1407,6 +1447,7 @@ } case BUILD_CONST_KEY_MAP: { + oparg = next_uop[-1].oparg; PyObject *keys; PyObject **values; PyObject *map; @@ -1432,6 +1473,7 @@ } case DICT_UPDATE: { + oparg = next_uop[-1].oparg; PyObject *update; PyObject *dict; update = stack_pointer[-1]; @@ -1451,6 +1493,7 @@ } case DICT_MERGE: { + oparg = next_uop[-1].oparg; PyObject *update; PyObject *dict; PyObject *callable; @@ -1468,6 +1511,7 @@ } case MAP_ADD: { + oparg = next_uop[-1].oparg; PyObject *value; PyObject *key; PyObject *dict; @@ -1483,6 +1527,7 @@ } case LOAD_SUPER_ATTR_ATTR: { + oparg = next_uop[-1].oparg; PyObject *self; PyObject *class; PyObject *global_super; @@ -1506,6 +1551,7 @@ } case LOAD_SUPER_ATTR_METHOD: { + oparg = next_uop[-1].oparg; PyObject *self; PyObject *class; PyObject *global_super; @@ -1542,6 +1588,7 @@ } case _LOAD_ATTR: { + oparg = next_uop[-1].oparg; PyObject *owner; PyObject *attr; PyObject *self_or_null = NULL; @@ -1603,6 +1650,7 @@ } case _LOAD_ATTR_INSTANCE_VALUE: { + oparg = next_uop[-1].oparg; PyObject *owner; PyObject *attr; PyObject *null = NULL; @@ -1633,6 +1681,7 @@ } case _LOAD_ATTR_MODULE: { + oparg = next_uop[-1].oparg; PyObject *owner; PyObject *attr; PyObject *null = NULL; @@ -1667,6 +1716,7 @@ } case _LOAD_ATTR_WITH_HINT: { + oparg = next_uop[-1].oparg; PyObject *owner; PyObject *attr; PyObject *null = NULL; @@ -1698,6 +1748,7 @@ } case _LOAD_ATTR_SLOT: { + oparg = next_uop[-1].oparg; PyObject *owner; PyObject *attr; PyObject *null = NULL; @@ -1727,6 +1778,7 @@ } case _LOAD_ATTR_CLASS: { + oparg = next_uop[-1].oparg; PyObject *owner; PyObject *attr; PyObject *null = NULL; @@ -1791,6 +1843,7 @@ } case _COMPARE_OP: { + oparg = next_uop[-1].oparg; PyObject *right; PyObject *left; PyObject *res; @@ -1813,6 +1866,7 @@ } case COMPARE_OP_FLOAT: { + oparg = next_uop[-1].oparg; PyObject *right; PyObject *left; PyObject *res; @@ -1835,6 +1889,7 @@ } case COMPARE_OP_INT: { + oparg = next_uop[-1].oparg; PyObject *right; PyObject *left; PyObject *res; @@ -1861,6 +1916,7 @@ } case COMPARE_OP_STR: { + oparg = next_uop[-1].oparg; PyObject *right; PyObject *left; PyObject *res; @@ -1884,6 +1940,7 @@ } case IS_OP: { + oparg = next_uop[-1].oparg; PyObject *right; PyObject *left; PyObject *b; @@ -1899,6 +1956,7 @@ } case CONTAINS_OP: { + oparg = next_uop[-1].oparg; PyObject *right; PyObject *left; PyObject *b; @@ -1995,6 +2053,7 @@ } case MATCH_CLASS: { + oparg = next_uop[-1].oparg; PyObject *names; PyObject *type; PyObject *subject; @@ -2386,6 +2445,7 @@ } case _LOAD_ATTR_METHOD_WITH_VALUES: { + oparg = next_uop[-1].oparg; PyObject *owner; PyObject *attr; PyObject *self; @@ -2405,6 +2465,7 @@ } case _LOAD_ATTR_METHOD_NO_DICT: { + oparg = next_uop[-1].oparg; PyObject *owner; PyObject *attr; PyObject *self; @@ -2424,6 +2485,7 @@ } case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { + oparg = next_uop[-1].oparg; PyObject *owner; PyObject *attr; owner = stack_pointer[-1]; @@ -2438,6 +2500,7 @@ } case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { + oparg = next_uop[-1].oparg; PyObject *owner; PyObject *attr; owner = stack_pointer[-1]; @@ -2464,6 +2527,7 @@ } case _LOAD_ATTR_METHOD_LAZY_DICT: { + oparg = next_uop[-1].oparg; PyObject *owner; PyObject *attr; PyObject *self; @@ -2482,6 +2546,7 @@ } case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { + oparg = next_uop[-1].oparg; PyObject *null; PyObject *callable; null = stack_pointer[-1 - oparg]; @@ -2492,6 +2557,7 @@ } case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { + oparg = next_uop[-1].oparg; PyObject *callable; PyObject *func; PyObject *self; @@ -2513,6 +2579,7 @@ } case _CHECK_FUNCTION_EXACT_ARGS: { + oparg = next_uop[-1].oparg; PyObject *self_or_null; PyObject *callable; self_or_null = stack_pointer[-1 - oparg]; @@ -2527,6 +2594,7 @@ } case _CHECK_STACK_SPACE: { + oparg = next_uop[-1].oparg; PyObject *callable; callable = stack_pointer[-2 - oparg]; PyFunctionObject *func = (PyFunctionObject *)callable; @@ -2537,6 +2605,7 @@ } case _INIT_CALL_PY_EXACT_ARGS: { + oparg = next_uop[-1].oparg; PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -2585,6 +2654,7 @@ } case CALL_TYPE_1: { + oparg = next_uop[-1].oparg; PyObject **args; PyObject *null; PyObject *callable; @@ -2607,6 +2677,7 @@ } case CALL_STR_1: { + oparg = next_uop[-1].oparg; PyObject **args; PyObject *null; PyObject *callable; @@ -2631,6 +2702,7 @@ } case CALL_TUPLE_1: { + oparg = next_uop[-1].oparg; PyObject **args; PyObject *null; PyObject *callable; @@ -2669,6 +2741,7 @@ } case CALL_BUILTIN_CLASS: { + oparg = next_uop[-1].oparg; PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -2700,6 +2773,7 @@ } case CALL_BUILTIN_O: { + oparg = next_uop[-1].oparg; PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -2739,6 +2813,7 @@ } case CALL_BUILTIN_FAST: { + oparg = next_uop[-1].oparg; PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -2782,6 +2857,7 @@ } case CALL_BUILTIN_FAST_WITH_KEYWORDS: { + oparg = next_uop[-1].oparg; PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -2819,6 +2895,7 @@ } case CALL_LEN: { + oparg = next_uop[-1].oparg; PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -2854,6 +2931,7 @@ } case CALL_ISINSTANCE: { + oparg = next_uop[-1].oparg; PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -2891,6 +2969,7 @@ } case CALL_METHOD_DESCRIPTOR_O: { + oparg = next_uop[-1].oparg; PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -2933,6 +3012,7 @@ } case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { + oparg = next_uop[-1].oparg; PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -2973,6 +3053,7 @@ } case CALL_METHOD_DESCRIPTOR_NOARGS: { + oparg = next_uop[-1].oparg; PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -3014,6 +3095,7 @@ } case CALL_METHOD_DESCRIPTOR_FAST: { + oparg = next_uop[-1].oparg; PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -3073,6 +3155,7 @@ } case SET_FUNCTION_ATTRIBUTE: { + oparg = next_uop[-1].oparg; PyObject *func; PyObject *attr; func = stack_pointer[-1]; @@ -3107,6 +3190,7 @@ } case BUILD_SLICE: { + oparg = next_uop[-1].oparg; PyObject *step = NULL; PyObject *stop; PyObject *start; @@ -3126,6 +3210,7 @@ } case CONVERT_VALUE: { + oparg = next_uop[-1].oparg; PyObject *value; PyObject *result; value = stack_pointer[-1]; @@ -3173,6 +3258,7 @@ } case COPY: { + oparg = next_uop[-1].oparg; PyObject *bottom; PyObject *top; bottom = stack_pointer[-1 - (oparg-1)]; @@ -3184,6 +3270,7 @@ } case _BINARY_OP: { + oparg = next_uop[-1].oparg; PyObject *rhs; PyObject *lhs; PyObject *res; @@ -3200,6 +3287,7 @@ } case SWAP: { + oparg = next_uop[-1].oparg; PyObject *top; PyObject *bottom; top = stack_pointer[-1]; @@ -3252,6 +3340,7 @@ } case _SET_IP: { + oparg = next_uop[-1].oparg; TIER_TWO_ONLY // TODO: Put the code pointer in `operand` to avoid indirection via `frame` frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + oparg; @@ -3259,6 +3348,7 @@ } case _SAVE_RETURN_OFFSET: { + oparg = next_uop[-1].oparg; #if TIER_ONE frame->return_offset = (uint16_t)(next_instr - this_instr); #endif @@ -3275,6 +3365,7 @@ } case _INSERT: { + oparg = next_uop[-1].oparg; PyObject *top; top = stack_pointer[-1]; // Inserts TOS at position specified by oparg; diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 149558e1640364..9b92bf4425eb6c 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -810,6 +810,9 @@ def write_executor_instructions( n_uops += 1 self.out.emit("") with self.out.block(f"case {instr.name}:"): + if instr.instr_flags.HAS_ARG_FLAG: + # TODO: Skip this when generating JIT code + self.out.emit("oparg = next_uop[-1].oparg;") stacking.write_single_instr(instr, self.out, tier=TIER_TWO) if instr.check_eval_breaker: self.out.emit("CHECK_EVAL_BREAKER();") From ddba5ed2be9f52c4fc5b72fc5124f0be8c380699 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 20 Nov 2023 10:09:49 -0800 Subject: [PATCH 18/21] Use CURRENT_OPARG() and CURRENT_OPERAND() macros This prepares us for the JIT template, hopefully. --- Python/ceval_macros.h | 4 + Python/executor_cases.c.h | 226 ++++++++++++------------ Tools/cases_generator/generate_cases.py | 3 +- Tools/cases_generator/instructions.py | 4 +- 4 files changed, 119 insertions(+), 118 deletions(-) diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 546adbe5f438d1..b0cb7c8926338c 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -397,3 +397,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); #define GOTO_TIER_TWO() goto enter_tier_two; #define GOTO_TIER_ONE() goto exit_trace; + +#define CURRENT_OPARG() (next_uop[-1].oparg) + +#define CURRENT_OPERAND() (next_uop[-1].operand) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 7e34058a5da90c..547be6f13237dd 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -25,7 +25,7 @@ } case LOAD_FAST_CHECK: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *value; value = GETLOCAL(oparg); if (value == NULL) goto unbound_local_error_tier_two; @@ -36,7 +36,7 @@ } case LOAD_FAST: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *value; value = GETLOCAL(oparg); assert(value != NULL); @@ -47,7 +47,7 @@ } case LOAD_FAST_AND_CLEAR: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *value; value = GETLOCAL(oparg); // do not use SETLOCAL here, it decrefs the old value @@ -58,7 +58,7 @@ } case LOAD_CONST: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *value; value = GETITEM(FRAME_CO_CONSTS, oparg); Py_INCREF(value); @@ -68,7 +68,7 @@ } case STORE_FAST: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *value; value = stack_pointer[-1]; SETLOCAL(oparg, value); @@ -209,7 +209,7 @@ PyObject *value; PyObject *res; value = stack_pointer[-1]; - uint32_t version = (uint32_t)next_uop[-1].operand; + uint32_t version = (uint32_t)CURRENT_OPERAND(); // This one is a bit weird, because we expect *some* failures: assert(version); DEOPT_IF(Py_TYPE(value)->tp_version_tag != version, TO_BOOL); @@ -530,7 +530,7 @@ } case LIST_APPEND: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *v; PyObject *list; v = stack_pointer[-1]; @@ -541,7 +541,7 @@ } case SET_ADD: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *v; PyObject *set; v = stack_pointer[-1]; @@ -628,7 +628,7 @@ } case CALL_INTRINSIC_1: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *value; PyObject *res; value = stack_pointer[-1]; @@ -641,7 +641,7 @@ } case CALL_INTRINSIC_2: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *value1; PyObject *value2; PyObject *res; @@ -773,7 +773,7 @@ } case GET_AWAITABLE: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *iterable; PyObject *iter; iterable = stack_pointer[-1]; @@ -835,7 +835,7 @@ } case STORE_NAME: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *v; v = stack_pointer[-1]; PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); @@ -858,7 +858,7 @@ } case DELETE_NAME: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); PyObject *ns = LOCALS(); int err; @@ -879,7 +879,7 @@ } case _UNPACK_SEQUENCE: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *seq; seq = stack_pointer[-1]; PyObject **top = stack_pointer + oparg - 1; @@ -892,7 +892,7 @@ } case UNPACK_SEQUENCE_TWO_TUPLE: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *seq; PyObject **values; seq = stack_pointer[-1]; @@ -910,7 +910,7 @@ } case UNPACK_SEQUENCE_TUPLE: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *seq; PyObject **values; seq = stack_pointer[-1]; @@ -929,7 +929,7 @@ } case UNPACK_SEQUENCE_LIST: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *seq; PyObject **values; seq = stack_pointer[-1]; @@ -948,7 +948,7 @@ } case UNPACK_EX: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *seq; seq = stack_pointer[-1]; int totalargs = 1 + (oparg & 0xFF) + (oparg >> 8); @@ -961,7 +961,7 @@ } case _STORE_ATTR: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *owner; PyObject *v; owner = stack_pointer[-1]; @@ -976,7 +976,7 @@ } case DELETE_ATTR: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *owner; owner = stack_pointer[-1]; PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); @@ -988,7 +988,7 @@ } case STORE_GLOBAL: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *v; v = stack_pointer[-1]; PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); @@ -1000,7 +1000,7 @@ } case DELETE_GLOBAL: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); int err; err = PyDict_DelItem(GLOBALS(), name); @@ -1030,7 +1030,7 @@ } case LOAD_FROM_DICT_OR_GLOBALS: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *mod_or_class_dict; PyObject *v; mod_or_class_dict = stack_pointer[-1]; @@ -1060,7 +1060,7 @@ } case LOAD_NAME: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *v; PyObject *mod_or_class_dict = LOCALS(); if (mod_or_class_dict == NULL) { @@ -1094,7 +1094,7 @@ } case _LOAD_GLOBAL: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *res; PyObject *null = NULL; PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); @@ -1139,7 +1139,7 @@ } case _GUARD_GLOBALS_VERSION: { - uint16_t version = (uint16_t)next_uop[-1].operand; + uint16_t version = (uint16_t)CURRENT_OPERAND(); PyDictObject *dict = (PyDictObject *)GLOBALS(); DEOPT_IF(!PyDict_CheckExact(dict), _GUARD_GLOBALS_VERSION); DEOPT_IF(dict->ma_keys->dk_version != version, _GUARD_GLOBALS_VERSION); @@ -1148,7 +1148,7 @@ } case _GUARD_BUILTINS_VERSION: { - uint16_t version = (uint16_t)next_uop[-1].operand; + uint16_t version = (uint16_t)CURRENT_OPERAND(); PyDictObject *dict = (PyDictObject *)BUILTINS(); DEOPT_IF(!PyDict_CheckExact(dict), _GUARD_BUILTINS_VERSION); DEOPT_IF(dict->ma_keys->dk_version != version, _GUARD_BUILTINS_VERSION); @@ -1157,10 +1157,10 @@ } case _LOAD_GLOBAL_MODULE: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *res; PyObject *null = NULL; - uint16_t index = (uint16_t)next_uop[-1].operand; + uint16_t index = (uint16_t)CURRENT_OPERAND(); PyDictObject *dict = (PyDictObject *)GLOBALS(); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); res = entries[index].me_value; @@ -1176,10 +1176,10 @@ } case _LOAD_GLOBAL_BUILTINS: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *res; PyObject *null = NULL; - uint16_t index = (uint16_t)next_uop[-1].operand; + uint16_t index = (uint16_t)CURRENT_OPERAND(); PyDictObject *bdict = (PyDictObject *)BUILTINS(); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(bdict->ma_keys); res = entries[index].me_value; @@ -1195,7 +1195,7 @@ } case DELETE_FAST: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *v = GETLOCAL(oparg); if (v == NULL) goto unbound_local_error_tier_two; SETLOCAL(oparg, NULL); @@ -1203,7 +1203,7 @@ } case MAKE_CELL: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); // "initial" is probably NULL but not if it's an arg (or set // via PyFrame_LocalsToFast() before MAKE_CELL has run). PyObject *initial = GETLOCAL(oparg); @@ -1216,7 +1216,7 @@ } case DELETE_DEREF: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *cell = GETLOCAL(oparg); PyObject *oldobj = PyCell_GET(cell); // Can't use ERROR_IF here. @@ -1231,7 +1231,7 @@ } case LOAD_FROM_DICT_OR_DEREF: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *class_dict; PyObject *value; class_dict = stack_pointer[-1]; @@ -1257,7 +1257,7 @@ } case LOAD_DEREF: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *value; PyObject *cell = GETLOCAL(oparg); value = PyCell_GET(cell); @@ -1272,7 +1272,7 @@ } case STORE_DEREF: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *v; v = stack_pointer[-1]; PyObject *cell = GETLOCAL(oparg); @@ -1284,7 +1284,7 @@ } case COPY_FREE_VARS: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); /* Copy closure variables to free variables */ PyCodeObject *co = _PyFrame_GetCode(frame); assert(PyFunction_Check(frame->f_funcobj)); @@ -1299,7 +1299,7 @@ } case BUILD_STRING: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **pieces; PyObject *str; pieces = stack_pointer - oparg; @@ -1315,7 +1315,7 @@ } case BUILD_TUPLE: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **values; PyObject *tup; values = stack_pointer - oparg; @@ -1328,7 +1328,7 @@ } case BUILD_LIST: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **values; PyObject *list; values = stack_pointer - oparg; @@ -1341,7 +1341,7 @@ } case LIST_EXTEND: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *iterable; PyObject *list; iterable = stack_pointer[-1]; @@ -1366,7 +1366,7 @@ } case SET_UPDATE: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *iterable; PyObject *set; iterable = stack_pointer[-1]; @@ -1379,7 +1379,7 @@ } case BUILD_SET: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **values; PyObject *set; values = stack_pointer - oparg; @@ -1404,7 +1404,7 @@ } case BUILD_MAP: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **values; PyObject *map; values = stack_pointer - oparg*2; @@ -1447,7 +1447,7 @@ } case BUILD_CONST_KEY_MAP: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *keys; PyObject **values; PyObject *map; @@ -1473,7 +1473,7 @@ } case DICT_UPDATE: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *update; PyObject *dict; update = stack_pointer[-1]; @@ -1493,7 +1493,7 @@ } case DICT_MERGE: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *update; PyObject *dict; PyObject *callable; @@ -1511,7 +1511,7 @@ } case MAP_ADD: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *value; PyObject *key; PyObject *dict; @@ -1527,7 +1527,7 @@ } case LOAD_SUPER_ATTR_ATTR: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *self; PyObject *class; PyObject *global_super; @@ -1551,7 +1551,7 @@ } case LOAD_SUPER_ATTR_METHOD: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *self; PyObject *class; PyObject *global_super; @@ -1588,7 +1588,7 @@ } case _LOAD_ATTR: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *owner; PyObject *attr; PyObject *self_or_null = NULL; @@ -1632,7 +1632,7 @@ case _GUARD_TYPE_VERSION: { PyObject *owner; owner = stack_pointer[-1]; - uint32_t type_version = (uint32_t)next_uop[-1].operand; + uint32_t type_version = (uint32_t)CURRENT_OPERAND(); PyTypeObject *tp = Py_TYPE(owner); assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version, _GUARD_TYPE_VERSION); @@ -1650,12 +1650,12 @@ } case _LOAD_ATTR_INSTANCE_VALUE: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *owner; PyObject *attr; PyObject *null = NULL; owner = stack_pointer[-1]; - uint16_t index = (uint16_t)next_uop[-1].operand; + uint16_t index = (uint16_t)CURRENT_OPERAND(); PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); attr = _PyDictOrValues_GetValues(dorv)->values[index]; DEOPT_IF(attr == NULL, _LOAD_ATTR_INSTANCE_VALUE); @@ -1672,7 +1672,7 @@ case _CHECK_ATTR_MODULE: { PyObject *owner; owner = stack_pointer[-1]; - uint32_t type_version = (uint32_t)next_uop[-1].operand; + uint32_t type_version = (uint32_t)CURRENT_OPERAND(); DEOPT_IF(!PyModule_CheckExact(owner), _CHECK_ATTR_MODULE); PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict; assert(dict != NULL); @@ -1681,12 +1681,12 @@ } case _LOAD_ATTR_MODULE: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *owner; PyObject *attr; PyObject *null = NULL; owner = stack_pointer[-1]; - uint16_t index = (uint16_t)next_uop[-1].operand; + uint16_t index = (uint16_t)CURRENT_OPERAND(); PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict; assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); assert(index < dict->ma_keys->dk_nentries); @@ -1716,12 +1716,12 @@ } case _LOAD_ATTR_WITH_HINT: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *owner; PyObject *attr; PyObject *null = NULL; owner = stack_pointer[-1]; - uint16_t hint = (uint16_t)next_uop[-1].operand; + uint16_t hint = (uint16_t)CURRENT_OPERAND(); PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(dorv); DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, _LOAD_ATTR_WITH_HINT); @@ -1748,12 +1748,12 @@ } case _LOAD_ATTR_SLOT: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *owner; PyObject *attr; PyObject *null = NULL; owner = stack_pointer[-1]; - uint16_t index = (uint16_t)next_uop[-1].operand; + uint16_t index = (uint16_t)CURRENT_OPERAND(); char *addr = (char *)owner + index; attr = *(PyObject **)addr; DEOPT_IF(attr == NULL, _LOAD_ATTR_SLOT); @@ -1770,7 +1770,7 @@ case _CHECK_ATTR_CLASS: { PyObject *owner; owner = stack_pointer[-1]; - uint32_t type_version = (uint32_t)next_uop[-1].operand; + uint32_t type_version = (uint32_t)CURRENT_OPERAND(); DEOPT_IF(!PyType_Check(owner), _CHECK_ATTR_CLASS); assert(type_version != 0); DEOPT_IF(((PyTypeObject *)owner)->tp_version_tag != type_version, _CHECK_ATTR_CLASS); @@ -1778,12 +1778,12 @@ } case _LOAD_ATTR_CLASS: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *owner; PyObject *attr; PyObject *null = NULL; owner = stack_pointer[-1]; - PyObject *descr = (PyObject *)next_uop[-1].operand; + PyObject *descr = (PyObject *)CURRENT_OPERAND(); STAT_INC(LOAD_ATTR, hit); assert(descr != NULL); attr = Py_NewRef(descr); @@ -1809,7 +1809,7 @@ PyObject *value; owner = stack_pointer[-1]; value = stack_pointer[-2]; - uint16_t index = (uint16_t)next_uop[-1].operand; + uint16_t index = (uint16_t)CURRENT_OPERAND(); PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); STAT_INC(STORE_ATTR, hit); PyDictValues *values = _PyDictOrValues_GetValues(dorv); @@ -1831,7 +1831,7 @@ PyObject *value; owner = stack_pointer[-1]; value = stack_pointer[-2]; - uint16_t index = (uint16_t)next_uop[-1].operand; + uint16_t index = (uint16_t)CURRENT_OPERAND(); char *addr = (char *)owner + index; STAT_INC(STORE_ATTR, hit); PyObject *old_value = *(PyObject **)addr; @@ -1843,7 +1843,7 @@ } case _COMPARE_OP: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *right; PyObject *left; PyObject *res; @@ -1866,7 +1866,7 @@ } case COMPARE_OP_FLOAT: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *right; PyObject *left; PyObject *res; @@ -1889,7 +1889,7 @@ } case COMPARE_OP_INT: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *right; PyObject *left; PyObject *res; @@ -1916,7 +1916,7 @@ } case COMPARE_OP_STR: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *right; PyObject *left; PyObject *res; @@ -1940,7 +1940,7 @@ } case IS_OP: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *right; PyObject *left; PyObject *b; @@ -1956,7 +1956,7 @@ } case CONTAINS_OP: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *right; PyObject *left; PyObject *b; @@ -2053,7 +2053,7 @@ } case MATCH_CLASS: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *names; PyObject *type; PyObject *subject; @@ -2437,7 +2437,7 @@ case _GUARD_KEYS_VERSION: { PyObject *owner; owner = stack_pointer[-1]; - uint32_t keys_version = (uint32_t)next_uop[-1].operand; + uint32_t keys_version = (uint32_t)CURRENT_OPERAND(); PyTypeObject *owner_cls = Py_TYPE(owner); PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls; DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version != keys_version, _GUARD_KEYS_VERSION); @@ -2445,12 +2445,12 @@ } case _LOAD_ATTR_METHOD_WITH_VALUES: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *owner; PyObject *attr; PyObject *self; owner = stack_pointer[-1]; - PyObject *descr = (PyObject *)next_uop[-1].operand; + PyObject *descr = (PyObject *)CURRENT_OPERAND(); assert(oparg & 1); /* Cached method object */ STAT_INC(LOAD_ATTR, hit); @@ -2465,12 +2465,12 @@ } case _LOAD_ATTR_METHOD_NO_DICT: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *owner; PyObject *attr; PyObject *self; owner = stack_pointer[-1]; - PyObject *descr = (PyObject *)next_uop[-1].operand; + PyObject *descr = (PyObject *)CURRENT_OPERAND(); assert(oparg & 1); assert(Py_TYPE(owner)->tp_dictoffset == 0); STAT_INC(LOAD_ATTR, hit); @@ -2485,11 +2485,11 @@ } case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *owner; PyObject *attr; owner = stack_pointer[-1]; - PyObject *descr = (PyObject *)next_uop[-1].operand; + PyObject *descr = (PyObject *)CURRENT_OPERAND(); assert((oparg & 1) == 0); STAT_INC(LOAD_ATTR, hit); assert(descr != NULL); @@ -2500,11 +2500,11 @@ } case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *owner; PyObject *attr; owner = stack_pointer[-1]; - PyObject *descr = (PyObject *)next_uop[-1].operand; + PyObject *descr = (PyObject *)CURRENT_OPERAND(); assert((oparg & 1) == 0); assert(Py_TYPE(owner)->tp_dictoffset == 0); STAT_INC(LOAD_ATTR, hit); @@ -2527,12 +2527,12 @@ } case _LOAD_ATTR_METHOD_LAZY_DICT: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *owner; PyObject *attr; PyObject *self; owner = stack_pointer[-1]; - PyObject *descr = (PyObject *)next_uop[-1].operand; + PyObject *descr = (PyObject *)CURRENT_OPERAND(); assert(oparg & 1); STAT_INC(LOAD_ATTR, hit); assert(descr != NULL); @@ -2546,7 +2546,7 @@ } case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *null; PyObject *callable; null = stack_pointer[-1 - oparg]; @@ -2557,7 +2557,7 @@ } case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *callable; PyObject *func; PyObject *self; @@ -2579,12 +2579,12 @@ } case _CHECK_FUNCTION_EXACT_ARGS: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *self_or_null; PyObject *callable; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; - uint32_t func_version = (uint32_t)next_uop[-1].operand; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); DEOPT_IF(!PyFunction_Check(callable), _CHECK_FUNCTION_EXACT_ARGS); PyFunctionObject *func = (PyFunctionObject *)callable; DEOPT_IF(func->func_version != func_version, _CHECK_FUNCTION_EXACT_ARGS); @@ -2594,7 +2594,7 @@ } case _CHECK_STACK_SPACE: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *callable; callable = stack_pointer[-2 - oparg]; PyFunctionObject *func = (PyFunctionObject *)callable; @@ -2605,7 +2605,7 @@ } case _INIT_CALL_PY_EXACT_ARGS: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -2654,7 +2654,7 @@ } case CALL_TYPE_1: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **args; PyObject *null; PyObject *callable; @@ -2677,7 +2677,7 @@ } case CALL_STR_1: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **args; PyObject *null; PyObject *callable; @@ -2702,7 +2702,7 @@ } case CALL_TUPLE_1: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **args; PyObject *null; PyObject *callable; @@ -2741,7 +2741,7 @@ } case CALL_BUILTIN_CLASS: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -2773,7 +2773,7 @@ } case CALL_BUILTIN_O: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -2813,7 +2813,7 @@ } case CALL_BUILTIN_FAST: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -2857,7 +2857,7 @@ } case CALL_BUILTIN_FAST_WITH_KEYWORDS: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -2895,7 +2895,7 @@ } case CALL_LEN: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -2931,7 +2931,7 @@ } case CALL_ISINSTANCE: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -2969,7 +2969,7 @@ } case CALL_METHOD_DESCRIPTOR_O: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -3012,7 +3012,7 @@ } case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -3053,7 +3053,7 @@ } case CALL_METHOD_DESCRIPTOR_NOARGS: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -3095,7 +3095,7 @@ } case CALL_METHOD_DESCRIPTOR_FAST: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject **args; PyObject *self_or_null; PyObject *callable; @@ -3155,7 +3155,7 @@ } case SET_FUNCTION_ATTRIBUTE: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *func; PyObject *attr; func = stack_pointer[-1]; @@ -3190,7 +3190,7 @@ } case BUILD_SLICE: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *step = NULL; PyObject *stop; PyObject *start; @@ -3210,7 +3210,7 @@ } case CONVERT_VALUE: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *value; PyObject *result; value = stack_pointer[-1]; @@ -3258,7 +3258,7 @@ } case COPY: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *bottom; PyObject *top; bottom = stack_pointer[-1 - (oparg-1)]; @@ -3270,7 +3270,7 @@ } case _BINARY_OP: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *rhs; PyObject *lhs; PyObject *res; @@ -3287,7 +3287,7 @@ } case SWAP: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *top; PyObject *bottom; top = stack_pointer[-1]; @@ -3340,7 +3340,7 @@ } case _SET_IP: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); TIER_TWO_ONLY // TODO: Put the code pointer in `operand` to avoid indirection via `frame` frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + oparg; @@ -3348,7 +3348,7 @@ } case _SAVE_RETURN_OFFSET: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); #if TIER_ONE frame->return_offset = (uint16_t)(next_instr - this_instr); #endif @@ -3365,7 +3365,7 @@ } case _INSERT: { - oparg = next_uop[-1].oparg; + oparg = CURRENT_OPARG(); PyObject *top; top = stack_pointer[-1]; // Inserts TOS at position specified by oparg; diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 9b92bf4425eb6c..851bd2f53879e5 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -811,8 +811,7 @@ def write_executor_instructions( self.out.emit("") with self.out.block(f"case {instr.name}:"): if instr.instr_flags.HAS_ARG_FLAG: - # TODO: Skip this when generating JIT code - self.out.emit("oparg = next_uop[-1].oparg;") + self.out.emit("oparg = CURRENT_OPARG();") stacking.write_single_instr(instr, self.out, tier=TIER_TWO) if instr.check_eval_breaker: self.out.emit("CHECK_EVAL_BREAKER();") diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index c2ed5ef778834d..149a08810e4ae5 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -166,9 +166,7 @@ def write_body( f"{func}(&this_instr[{active.offset + 1}].cache);" ) else: - # TODO: Use something else when generating JIT code - operand = "next_uop[-1].operand" - out.emit(f"{typ}{ceffect.name} = ({typ.strip()}){operand};") + out.emit(f"{typ}{ceffect.name} = ({typ.strip()})CURRENT_OPERAND();") # Write the body, substituting a goto for ERROR_IF() and other stuff assert dedent <= 0 From 0fae4e64f291d2c547426322d152b0d3ba8d8da8 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 20 Nov 2023 10:36:05 -0800 Subject: [PATCH 19/21] (Theoretically) improve 'Unknown uop' message --- Python/ceval.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Python/ceval.c b/Python/ceval.c index 15f40c3a5a45ee..7479e88db75fd9 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1022,7 +1022,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int default: #ifdef Py_DEBUG { - fprintf(stderr, "Unknown uop %d, oparg %d\n", opcode, oparg); + fprintf(stderr, "Unknown uop %d, oparg %d, operand %" PRIu64 " @ %d\n", + opcode, next_uop[-1].oparg, next_uop[-1].operand, + (int)(next_uop - current_executor->trace - 1)); Py_FatalError("Unknown uop"); } #else From 7c1f6d47a5a8c03935d9add02e222d1f70880cd3 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 20 Nov 2023 10:42:11 -0800 Subject: [PATCH 20/21] Add news --- .../2023-11-20-10-40-40.gh-issue-112287.15gWAK.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-11-20-10-40-40.gh-issue-112287.15gWAK.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-11-20-10-40-40.gh-issue-112287.15gWAK.rst b/Misc/NEWS.d/next/Core and Builtins/2023-11-20-10-40-40.gh-issue-112287.15gWAK.rst new file mode 100644 index 00000000000000..c9723f58f1197f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-11-20-10-40-40.gh-issue-112287.15gWAK.rst @@ -0,0 +1,3 @@ +Slightly optimize the Tier 2 (uop) interpreter by only loading `oparg` and +`operand` when needed. Also double the trace size limit again, to 512 this +time. From 1d1f146d45586deedb2ce6170ecbf613976ba8af Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 20 Nov 2023 10:43:20 -0800 Subject: [PATCH 21/21] Fix backticks (as always) --- .../2023-11-20-10-40-40.gh-issue-112287.15gWAK.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-11-20-10-40-40.gh-issue-112287.15gWAK.rst b/Misc/NEWS.d/next/Core and Builtins/2023-11-20-10-40-40.gh-issue-112287.15gWAK.rst index c9723f58f1197f..3f31a0f55ca44e 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2023-11-20-10-40-40.gh-issue-112287.15gWAK.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2023-11-20-10-40-40.gh-issue-112287.15gWAK.rst @@ -1,3 +1,3 @@ -Slightly optimize the Tier 2 (uop) interpreter by only loading `oparg` and -`operand` when needed. Also double the trace size limit again, to 512 this +Slightly optimize the Tier 2 (uop) interpreter by only loading ``oparg`` and +``operand`` when needed. Also double the trace size limit again, to 512 this time.