From ac92527c08d917dffdb9c0a218d06f21114614a2 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Sat, 13 Jan 2024 01:30:27 +0800 Subject: [PATCH] gh-113710: Add types to the interpreter DSL (#113711) Co-authored-by: Jules <57632293+JuliaPoo@users.noreply.github.com> Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> --- Include/internal/pycore_opcode_metadata.h | 24 +++--- Include/internal/pycore_uop_ids.h | 9 +-- Include/internal/pycore_uop_metadata.h | 78 +++++++++---------- Lib/test/test_generated_cases.py | 25 +++--- ...-01-09-23-01-00.gh-issue-113710.pe3flY.rst | 1 + Python/bytecodes.c | 57 +++++++------- Python/executor_cases.c.h | 10 --- Tools/cases_generator/analyzer.py | 48 ++++++++++-- Tools/cases_generator/generators_common.py | 8 +- .../cases_generator/interpreter_definition.md | 37 ++++++++- Tools/cases_generator/lexer.py | 12 ++- .../opcode_metadata_generator.py | 2 + Tools/cases_generator/parsing.py | 36 +++++++-- Tools/cases_generator/stack.py | 18 ++--- 14 files changed, 227 insertions(+), 138 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-09-23-01-00.gh-issue-113710.pe3flY.rst diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index a9d698da25a1db..fbb448f663369a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -909,6 +909,8 @@ enum InstructionFormat { #define HAS_DEOPT_FLAG (128) #define HAS_ERROR_FLAG (256) #define HAS_ESCAPES_FLAG (512) +#define HAS_PURE_FLAG (1024) +#define HAS_PASSTHROUGH_FLAG (2048) #define OPCODE_HAS_ARG(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ARG_FLAG)) #define OPCODE_HAS_CONST(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_CONST_FLAG)) #define OPCODE_HAS_NAME(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NAME_FLAG)) @@ -919,6 +921,8 @@ enum InstructionFormat { #define OPCODE_HAS_DEOPT(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_DEOPT_FLAG)) #define OPCODE_HAS_ERROR(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ERROR_FLAG)) #define OPCODE_HAS_ESCAPES(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ESCAPES_FLAG)) +#define OPCODE_HAS_PURE(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PURE_FLAG)) +#define OPCODE_HAS_PASSTHROUGH(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PASSTHROUGH_FLAG)) #define OPARG_FULL 0 #define OPARG_CACHE_1 1 @@ -996,7 +1000,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = { [COMPARE_OP_STR] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [CONTAINS_OP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CONVERT_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, - [COPY] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [COPY] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_PURE_FLAG }, [COPY_FREE_VARS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [DELETE_ATTR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [DELETE_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1007,8 +1011,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = { [DICT_MERGE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [DICT_UPDATE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [END_ASYNC_FOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [END_FOR] = { true, INSTR_FMT_IX, 0 }, - [END_SEND] = { true, INSTR_FMT_IX, 0 }, + [END_FOR] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, + [END_SEND] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [ENTER_EXECUTOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG }, [EXIT_INIT_CHECK] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [EXTENDED_ARG] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, @@ -1067,9 +1071,9 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = { [LOAD_ATTR_SLOT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [LOAD_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_BUILD_CLASS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG }, + [LOAD_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG }, [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG }, [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, @@ -1096,9 +1100,9 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = { [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, - [POP_TOP] = { true, INSTR_FMT_IX, 0 }, + [POP_TOP] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [PUSH_EXC_INFO] = { true, INSTR_FMT_IX, 0 }, - [PUSH_NULL] = { true, INSTR_FMT_IX, 0 }, + [PUSH_NULL] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [RAISE_VARARGS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [RERAISE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [RESERVED] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, @@ -1127,7 +1131,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = { [STORE_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [STORE_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [STORE_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, - [SWAP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [SWAP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_PURE_FLAG }, [TO_BOOL] = { true, INSTR_FMT_IXC00, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [TO_BOOL_ALWAYS_TRUE] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG }, [TO_BOOL_BOOL] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG }, @@ -1137,7 +1141,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = { [TO_BOOL_STR] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG }, [UNARY_INVERT] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNARY_NEGATIVE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [UNARY_NOT] = { true, INSTR_FMT_IX, 0 }, + [UNARY_NOT] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [UNPACK_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, @@ -1147,7 +1151,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = { [YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [JUMP] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [JUMP_NO_INTERRUPT] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG }, - [LOAD_CLOSURE] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [LOAD_CLOSURE] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [LOAD_METHOD] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_SUPER_METHOD] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ZERO_SUPER_ATTR] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index b3b36327c480cc..8ee90d79a13c2f 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -229,11 +229,10 @@ extern "C" { #define _GUARD_IS_NOT_NONE_POP 376 #define _JUMP_TO_TOP 377 #define _SAVE_RETURN_OFFSET 378 -#define _INSERT 379 -#define _CHECK_VALIDITY 380 -#define _LOAD_CONST_INLINE_BORROW 381 -#define _INTERNAL_INCREMENT_OPT_COUNTER 382 -#define MAX_UOP_ID 382 +#define _CHECK_VALIDITY 379 +#define _LOAD_CONST_INLINE_BORROW 380 +#define _INTERNAL_INCREMENT_OPT_COUNTER 381 +#define MAX_UOP_ID 381 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index ab498e9cefde22..3b251d3814b1da 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -19,36 +19,36 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_NOP] = 0, [_RESUME_CHECK] = HAS_DEOPT_FLAG, [_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG, - [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, + [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, - [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG, + [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG, [_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, - [_POP_TOP] = 0, - [_PUSH_NULL] = 0, - [_END_SEND] = 0, + [_POP_TOP] = HAS_PURE_FLAG, + [_PUSH_NULL] = HAS_PURE_FLAG, + [_END_SEND] = HAS_PURE_FLAG, [_UNARY_NEGATIVE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_UNARY_NOT] = 0, + [_UNARY_NOT] = HAS_PURE_FLAG, [_TO_BOOL] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_TO_BOOL_BOOL] = HAS_DEOPT_FLAG, + [_TO_BOOL_BOOL] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_TO_BOOL_INT] = HAS_DEOPT_FLAG, [_TO_BOOL_LIST] = HAS_DEOPT_FLAG, [_TO_BOOL_NONE] = HAS_DEOPT_FLAG, [_TO_BOOL_STR] = HAS_DEOPT_FLAG, [_TO_BOOL_ALWAYS_TRUE] = HAS_DEOPT_FLAG, [_UNARY_INVERT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_GUARD_BOTH_INT] = HAS_DEOPT_FLAG, - [_BINARY_OP_MULTIPLY_INT] = HAS_ERROR_FLAG, - [_BINARY_OP_ADD_INT] = HAS_ERROR_FLAG, - [_BINARY_OP_SUBTRACT_INT] = HAS_ERROR_FLAG, - [_GUARD_BOTH_FLOAT] = HAS_DEOPT_FLAG, - [_BINARY_OP_MULTIPLY_FLOAT] = 0, - [_BINARY_OP_ADD_FLOAT] = 0, - [_BINARY_OP_SUBTRACT_FLOAT] = 0, - [_GUARD_BOTH_UNICODE] = HAS_DEOPT_FLAG, - [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_GUARD_BOTH_INT] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_BINARY_OP_MULTIPLY_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_ADD_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_SUBTRACT_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_GUARD_BOTH_FLOAT] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_BINARY_OP_MULTIPLY_FLOAT] = HAS_PURE_FLAG, + [_BINARY_OP_ADD_FLOAT] = HAS_PURE_FLAG, + [_BINARY_OP_SUBTRACT_FLOAT] = HAS_PURE_FLAG, + [_GUARD_BOTH_UNICODE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_BINARY_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -112,17 +112,17 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_SUPER_ATTR_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_LOAD_SUPER_ATTR_METHOD] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_LOAD_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_GUARD_TYPE_VERSION] = HAS_DEOPT_FLAG, - [_CHECK_MANAGED_OBJECT_HAS_VALUES] = HAS_DEOPT_FLAG, + [_GUARD_TYPE_VERSION] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_MANAGED_OBJECT_HAS_VALUES] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_LOAD_ATTR_INSTANCE_VALUE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_CHECK_ATTR_MODULE] = HAS_DEOPT_FLAG, + [_CHECK_ATTR_MODULE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_LOAD_ATTR_MODULE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_CHECK_ATTR_WITH_HINT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_CHECK_ATTR_WITH_HINT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG | HAS_PASSTHROUGH_FLAG, [_LOAD_ATTR_WITH_HINT] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_LOAD_ATTR_SLOT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_CHECK_ATTR_CLASS] = HAS_DEOPT_FLAG, + [_CHECK_ATTR_CLASS] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_LOAD_ATTR_CLASS] = HAS_ARG_FLAG, - [_GUARD_DORV_VALUES] = HAS_DEOPT_FLAG, + [_GUARD_DORV_VALUES] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_STORE_ATTR_INSTANCE_VALUE] = HAS_ESCAPES_FLAG, [_STORE_ATTR_SLOT] = HAS_ESCAPES_FLAG, [_COMPARE_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -142,33 +142,33 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GET_ITER] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GET_YIELD_FROM_ITER] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FOR_ITER_TIER_TWO] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_ITER_CHECK_LIST] = HAS_DEOPT_FLAG, - [_GUARD_NOT_EXHAUSTED_LIST] = HAS_DEOPT_FLAG, + [_ITER_CHECK_LIST] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_GUARD_NOT_EXHAUSTED_LIST] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_ITER_NEXT_LIST] = 0, - [_ITER_CHECK_TUPLE] = HAS_DEOPT_FLAG, - [_GUARD_NOT_EXHAUSTED_TUPLE] = HAS_DEOPT_FLAG, + [_ITER_CHECK_TUPLE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_GUARD_NOT_EXHAUSTED_TUPLE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_ITER_NEXT_TUPLE] = 0, - [_ITER_CHECK_RANGE] = HAS_DEOPT_FLAG, - [_GUARD_NOT_EXHAUSTED_RANGE] = HAS_DEOPT_FLAG, + [_ITER_CHECK_RANGE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_GUARD_NOT_EXHAUSTED_RANGE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_ITER_NEXT_RANGE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BEFORE_ASYNC_WITH] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BEFORE_WITH] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_WITH_EXCEPT_START] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_PUSH_EXC_INFO] = 0, - [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = HAS_DEOPT_FLAG, - [_GUARD_KEYS_VERSION] = HAS_DEOPT_FLAG, + [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_GUARD_KEYS_VERSION] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_LOAD_ATTR_METHOD_WITH_VALUES] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_LOAD_ATTR_METHOD_NO_DICT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = HAS_ARG_FLAG, [_LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = HAS_ARG_FLAG, - [_CHECK_ATTR_METHOD_LAZY_DICT] = HAS_DEOPT_FLAG, + [_CHECK_ATTR_METHOD_LAZY_DICT] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_LOAD_ATTR_METHOD_LAZY_DICT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, - [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = HAS_ARG_FLAG, [_CHECK_PEP_523] = HAS_DEOPT_FLAG, - [_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, + [_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_PUSH_FRAME] = 0, [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_CALL_STR_1] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -190,9 +190,9 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG, [_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FORMAT_WITH_SPEC] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_COPY] = HAS_ARG_FLAG, + [_COPY] = HAS_ARG_FLAG | HAS_PURE_FLAG, [_BINARY_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG, - [_SWAP] = HAS_ARG_FLAG, + [_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG, [_GUARD_IS_TRUE_POP] = HAS_DEOPT_FLAG, [_GUARD_IS_FALSE_POP] = HAS_DEOPT_FLAG, [_GUARD_IS_NONE_POP] = HAS_DEOPT_FLAG, @@ -201,7 +201,6 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_SET_IP] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, [_EXIT_TRACE] = HAS_DEOPT_FLAG, - [_INSERT] = HAS_ARG_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, [_LOAD_CONST_INLINE_BORROW] = 0, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, @@ -304,7 +303,6 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_TYPE_VERSION] = "_GUARD_TYPE_VERSION", [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = "_INIT_CALL_BOUND_METHOD_EXACT_ARGS", [_INIT_CALL_PY_EXACT_ARGS] = "_INIT_CALL_PY_EXACT_ARGS", - [_INSERT] = "_INSERT", [_INTERNAL_INCREMENT_OPT_COUNTER] = "_INTERNAL_INCREMENT_OPT_COUNTER", [_IS_NONE] = "_IS_NONE", [_IS_OP] = "_IS_OP", diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 3b2f579be684b7..ca1228ee7008a9 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -9,7 +9,7 @@ def skip_if_different_mount_drives(): - if sys.platform != 'win32': + if sys.platform != "win32": return ROOT = os.path.dirname(os.path.dirname(__file__)) root_drive = os.path.splitroot(ROOT)[0] @@ -22,11 +22,13 @@ def skip_if_different_mount_drives(): f"directory have different mount drives " f"({cwd_drive} and {root_drive})" ) + + skip_if_different_mount_drives() -test_tools.skip_if_missing('cases_generator') -with test_tools.imports_under_tool('cases_generator'): +test_tools.skip_if_missing("cases_generator") +with test_tools.imports_under_tool("cases_generator"): from analyzer import StackItem import parser from stack import Stack @@ -39,13 +41,14 @@ def handle_stderr(): else: return support.captured_stderr() + class TestEffects(unittest.TestCase): def test_effect_sizes(self): stack = Stack() inputs = [ - x:= StackItem("x", None, "", "1"), - y:= StackItem("y", None, "", "oparg"), - z:= StackItem("z", None, "", "oparg*2"), + x := StackItem("x", None, "", "1"), + y := StackItem("y", None, "", "oparg"), + z := StackItem("z", None, "", "oparg*2"), ] outputs = [ StackItem("x", None, "", "1"), @@ -96,9 +99,7 @@ def run_cases_test(self, input: str, expected: str): with handle_stderr(): tier1_generator.generate_tier1_from_files( - [self.temp_input_filename], - self.temp_output_filename, - False + [self.temp_input_filename], self.temp_output_filename, False ) with open(self.temp_output_filename) as temp_output: @@ -750,7 +751,7 @@ def test_override_op(self): def test_annotated_inst(self): input = """ - guard inst(OP, (--)) { + pure inst(OP, (--)) { ham(); } """ @@ -767,7 +768,7 @@ def test_annotated_inst(self): def test_annotated_op(self): input = """ - guard op(OP, (--)) { + pure op(OP, (--)) { spam(); } macro(M) = OP; @@ -784,7 +785,7 @@ def test_annotated_op(self): self.run_cases_test(input, output) input = """ - guard register specializing op(OP, (--)) { + pure register specializing op(OP, (--)) { spam(); } macro(M) = OP; diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-09-23-01-00.gh-issue-113710.pe3flY.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-09-23-01-00.gh-issue-113710.pe3flY.rst new file mode 100644 index 00000000000000..cffc48bfa13639 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-09-23-01-00.gh-issue-113710.pe3flY.rst @@ -0,0 +1 @@ +Add typed stack effects to the interpreter DSL, along with various instruction annotations. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b346fe73f76fe4..6df99d6465347f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -208,7 +208,7 @@ dummy_func( Py_INCREF(value); } - inst(LOAD_FAST, (-- value)) { + pure inst(LOAD_FAST, (-- value)) { value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -229,7 +229,7 @@ dummy_func( Py_INCREF(value2); } - inst(LOAD_CONST, (-- value)) { + pure inst(LOAD_CONST, (-- value)) { value = GETITEM(FRAME_CO_CONSTS, oparg); Py_INCREF(value); } @@ -257,11 +257,11 @@ dummy_func( SETLOCAL(oparg2, value2); } - inst(POP_TOP, (value --)) { + pure inst(POP_TOP, (value --)) { DECREF_INPUTS(); } - inst(PUSH_NULL, (-- res)) { + pure inst(PUSH_NULL, (-- res)) { res = NULL; } @@ -281,7 +281,7 @@ dummy_func( DECREF_INPUTS(); } - inst(END_SEND, (receiver, value -- value)) { + pure inst(END_SEND, (receiver, value -- value)) { Py_DECREF(receiver); } @@ -303,7 +303,7 @@ dummy_func( ERROR_IF(res == NULL, error); } - inst(UNARY_NOT, (value -- res)) { + pure inst(UNARY_NOT, (value -- res)) { assert(PyBool_Check(value)); res = Py_IsFalse(value) ? Py_True : Py_False; } @@ -411,12 +411,12 @@ dummy_func( // BINARY_OP_INPLACE_ADD_UNICODE, // See comments at that opcode. }; - op(_GUARD_BOTH_INT, (left, right -- left, right)) { + op(_GUARD_BOTH_INT, (left, right -- left: &PYLONG_TYPE, right: &PYLONG_TYPE)) { DEOPT_IF(!PyLong_CheckExact(left)); DEOPT_IF(!PyLong_CheckExact(right)); } - op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) { + pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res: &PYLONG_TYPE)) { STAT_INC(BINARY_OP, hit); res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); @@ -424,7 +424,7 @@ dummy_func( ERROR_IF(res == NULL, error); } - op(_BINARY_OP_ADD_INT, (left, right -- res)) { + pure op(_BINARY_OP_ADD_INT, (left, right -- res: &PYLONG_TYPE)) { STAT_INC(BINARY_OP, hit); res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); @@ -432,7 +432,7 @@ dummy_func( ERROR_IF(res == NULL, error); } - op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { + pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res: &PYLONG_TYPE)) { STAT_INC(BINARY_OP, hit); res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); @@ -447,12 +447,12 @@ dummy_func( macro(BINARY_OP_SUBTRACT_INT) = _GUARD_BOTH_INT + unused/1 + _BINARY_OP_SUBTRACT_INT; - op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) { + op(_GUARD_BOTH_FLOAT, (left, right -- left: &PYFLOAT_TYPE, right: &PYFLOAT_TYPE)) { DEOPT_IF(!PyFloat_CheckExact(left)); DEOPT_IF(!PyFloat_CheckExact(right)); } - op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) { + pure op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res: &PYFLOAT_TYPE)) { STAT_INC(BINARY_OP, hit); double dres = ((PyFloatObject *)left)->ob_fval * @@ -460,7 +460,7 @@ dummy_func( DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); } - op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) { + pure op(_BINARY_OP_ADD_FLOAT, (left, right -- res: &PYFLOAT_TYPE)) { STAT_INC(BINARY_OP, hit); double dres = ((PyFloatObject *)left)->ob_fval + @@ -468,7 +468,7 @@ dummy_func( DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); } - op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) { + pure op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res: &PYFLOAT_TYPE)) { STAT_INC(BINARY_OP, hit); double dres = ((PyFloatObject *)left)->ob_fval - @@ -483,12 +483,12 @@ dummy_func( macro(BINARY_OP_SUBTRACT_FLOAT) = _GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_SUBTRACT_FLOAT; - op(_GUARD_BOTH_UNICODE, (left, right -- left, right)) { + op(_GUARD_BOTH_UNICODE, (left, right -- left: &PYUNICODE_TYPE, right: &PYUNICODE_TYPE)) { DEOPT_IF(!PyUnicode_CheckExact(left)); DEOPT_IF(!PyUnicode_CheckExact(right)); } - op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) { + pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res: &PYUNICODE_TYPE)) { STAT_INC(BINARY_OP, hit); res = PyUnicode_Concat(left, right); _Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc); @@ -1900,7 +1900,7 @@ dummy_func( LOAD_ATTR, }; - op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) { + op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner: &(GUARD_TYPE_VERSION_TYPE + type_version))) { PyTypeObject *tp = Py_TYPE(owner); assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version); @@ -2081,7 +2081,7 @@ dummy_func( DISPATCH_INLINED(new_frame); } - op(_GUARD_DORV_VALUES, (owner -- owner)) { + op(_GUARD_DORV_VALUES, (owner -- owner: &GUARD_DORV_VALUES_TYPE)) { assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); DEOPT_IF(!_PyDictOrValues_IsValues(dorv)); @@ -2721,7 +2721,7 @@ dummy_func( DEOPT_IF(r->len <= 0); } - op(_ITER_NEXT_RANGE, (iter -- iter, next)) { + op(_ITER_NEXT_RANGE, (iter -- iter, next: &PYLONG_TYPE)) { _PyRangeIterObject *r = (_PyRangeIterObject *)iter; assert(Py_TYPE(r) == &PyRangeIter_Type); assert(r->len > 0); @@ -2879,13 +2879,13 @@ dummy_func( exc_info->exc_value = Py_NewRef(new_exc); } - op(_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, (owner -- owner)) { + op(_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, (owner -- owner: &GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE)) { assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv)); } - op(_GUARD_KEYS_VERSION, (keys_version/2, owner -- owner)) { + op(_GUARD_KEYS_VERSION, (keys_version/2, owner -- owner: &(GUARD_KEYS_VERSION_TYPE + keys_version))) { PyTypeObject *owner_cls = Py_TYPE(owner); PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls; DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version != keys_version); @@ -3100,7 +3100,7 @@ dummy_func( macro(CALL) = _SPECIALIZE_CALL + unused/2 + _CALL; - op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable, null, unused[oparg])) { + op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable: &PYMETHOD_TYPE, null: &NULL_TYPE, unused[oparg])) { DEOPT_IF(null != NULL); DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type); } @@ -3118,7 +3118,7 @@ dummy_func( DEOPT_IF(tstate->interp->eval_frame); } - op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { + op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable: &(PYFUNCTION_TYPE_VERSION_TYPE + func_version), self_or_null, unused[oparg])) { DEOPT_IF(!PyFunction_Check(callable)); PyFunctionObject *func = (PyFunctionObject *)callable; DEOPT_IF(func->func_version != func_version); @@ -3133,7 +3133,7 @@ dummy_func( DEOPT_IF(tstate->py_recursion_remaining <= 1); } - op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) { + pure op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) { int argcount = oparg; if (self_or_null != NULL) { args--; @@ -3877,7 +3877,7 @@ dummy_func( ERROR_IF(res == NULL, error); } - inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { + pure inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { assert(oparg > 0); top = Py_NewRef(bottom); } @@ -3906,7 +3906,7 @@ dummy_func( macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + _BINARY_OP; - inst(SWAP, (bottom, unused[oparg-2], top -- + pure inst(SWAP, (bottom, unused[oparg-2], top -- top, unused[oparg-2], bottom)) { assert(oparg >= 2); } @@ -4056,11 +4056,6 @@ dummy_func( DEOPT_IF(1); } - op(_INSERT, (unused[oparg], top -- top, unused[oparg])) { - // Inserts TOS at position specified by oparg; - memmove(&stack_pointer[-1 - oparg], &stack_pointer[-oparg], oparg * sizeof(stack_pointer[0])); - } - op(_CHECK_VALIDITY, (--)) { TIER_TWO_ONLY DEOPT_IF(!current_executor->vm_data.valid); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 3ffe4161b0124e..6060bebca9afa5 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3381,16 +3381,6 @@ break; } - case _INSERT: { - PyObject *top; - oparg = CURRENT_OPARG(); - top = stack_pointer[-1]; - // Inserts TOS at position specified by oparg; - memmove(&stack_pointer[-1 - oparg], &stack_pointer[-oparg], oparg * sizeof(stack_pointer[0])); - stack_pointer[-1 - oparg] = top; - break; - } - case _CHECK_VALIDITY: { TIER_TWO_ONLY if (!current_executor->vm_data.valid) goto deoptimize; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 82ef8888bfcee5..7ed3b57136554f 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field import lexer import parser from typing import Optional @@ -22,6 +22,10 @@ class Properties: uses_locals: bool has_free: bool + pure: bool + passthrough: bool + guard: bool + def dump(self, indent: str) -> None: print(indent, end="") text = ", ".join([f"{key}: {value}" for (key, value) in self.__dict__.items()]) @@ -45,6 +49,9 @@ def from_list(properties: list["Properties"]) -> "Properties": uses_co_names=any(p.uses_co_names for p in properties), uses_locals=any(p.uses_locals for p in properties), has_free=any(p.has_free for p in properties), + pure=all(p.pure for p in properties), + passthrough=all(p.passthrough for p in properties), + guard=all(p.guard for p in properties), ) @@ -64,6 +71,9 @@ def from_list(properties: list["Properties"]) -> "Properties": uses_co_names=False, uses_locals=False, has_free=False, + pure=False, + passthrough=False, + guard=False, ) @@ -88,6 +98,9 @@ class StackItem: condition: str | None size: str peek: bool = False + type_prop: None | tuple[str, None | str] = field( + default_factory=lambda: None, init=True, compare=False, hash=False + ) def __str__(self) -> str: cond = f" if ({self.condition})" if self.condition else "" @@ -259,7 +272,9 @@ def override_error( def convert_stack_item(item: parser.StackEffect) -> StackItem: - return StackItem(item.name, item.type, item.cond, (item.size or "1")) + return StackItem( + item.name, item.type, item.cond, (item.size or "1"), type_prop=item.type_prop + ) def analyze_stack(op: parser.InstDef) -> StackEffect: @@ -377,7 +392,6 @@ def makes_escaping_api_call(instr: parser.InstDef) -> bool: return False - EXITS = { "DISPATCH", "GO_TO_INSTRUCTION", @@ -417,16 +431,33 @@ def always_exits(op: parser.InstDef) -> bool: return False +def stack_effect_only_peeks(instr: parser.InstDef) -> bool: + stack_inputs = [s for s in instr.inputs if not isinstance(s, parser.CacheEffect)] + if len(stack_inputs) != len(instr.outputs): + return False + if len(stack_inputs) == 0: + return False + if any(s.cond for s in stack_inputs) or any(s.cond for s in instr.outputs): + return False + return all( + (s.name == other.name and s.type == other.type and s.size == other.size) + for s, other in zip(stack_inputs, instr.outputs) + ) + + def compute_properties(op: parser.InstDef) -> Properties: has_free = ( variable_used(op, "PyCell_New") or variable_used(op, "PyCell_GET") or variable_used(op, "PyCell_SET") ) + infallible = is_infallible(op) + deopts = variable_used(op, "DEOPT_IF") + passthrough = stack_effect_only_peeks(op) and infallible return Properties( escapes=makes_escaping_api_call(op), - infallible=is_infallible(op), - deopts=variable_used(op, "DEOPT_IF"), + infallible=infallible, + deopts=deopts, oparg=variable_used(op, "oparg"), jumps=variable_used(op, "JUMPBY"), eval_breaker=variable_used(op, "CHECK_EVAL_BREAKER"), @@ -440,6 +471,9 @@ def compute_properties(op: parser.InstDef) -> Properties: uses_locals=(variable_used(op, "GETLOCAL") or variable_used(op, "SETLOCAL")) and not has_free, has_free=has_free, + pure="pure" in op.annotations, + passthrough=passthrough, + guard=passthrough and deopts, ) @@ -686,9 +720,7 @@ def analyze_forest(forest: list[parser.AstNode]) -> Analysis: inst = instructions["BINARY_OP_INPLACE_ADD_UNICODE"] inst.family = families["BINARY_OP"] families["BINARY_OP"].members.append(inst) - opmap, first_arg, min_instrumented = assign_opcodes( - instructions, families, pseudos - ) + opmap, first_arg, min_instrumented = assign_opcodes(instructions, families, pseudos) return Analysis( instructions, uops, families, pseudos, opmap, first_arg, min_instrumented ) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 5a42a05c5c2ef2..c6c602c7122b41 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -26,7 +26,9 @@ def root_relative_path(filename: str) -> str: return filename -def write_header(generator: str, sources: list[str], outfile: TextIO, comment: str = "//") -> None: +def write_header( + generator: str, sources: list[str], outfile: TextIO, comment: str = "//" +) -> None: outfile.write( f"""{comment} This file is generated by {root_relative_path(generator)} {comment} from: @@ -209,6 +211,10 @@ def cflags(p: Properties) -> str: flags.append("HAS_ERROR_FLAG") if p.escapes: flags.append("HAS_ESCAPES_FLAG") + if p.pure: + flags.append("HAS_PURE_FLAG") + if p.passthrough: + flags.append("HAS_PASSTHROUGH_FLAG") if flags: return " | ".join(flags) else: diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md index 5c4238756748a7..e5a48999e962cb 100644 --- a/Tools/cases_generator/interpreter_definition.md +++ b/Tools/cases_generator/interpreter_definition.md @@ -15,6 +15,7 @@ These tools would be used to: * Generate the tier 2 interpreter * Generate documentation for instructions * Generate metadata about instructions, such as stack use (done). +* Generate the tier 2 optimizer's abstract interpreter. Having a single definition file ensures that there is a single source of truth for bytecode semantics. @@ -108,7 +109,10 @@ and a piece of C code describing its semantics:: NAME [":" type] [ "if" "(" C-expression ")" ] type: - NAME ["*"] + NAME ["*"] | type_prop + + type_prop: + "&" "(" NAME ["+" NAME] ")" stream: NAME "/" size @@ -138,7 +142,27 @@ The following definitions may occur: The optional `type` in an `object` is the C type. It defaults to `PyObject *`. The objects before the "--" are the objects on top of the stack at the start of the instruction. Those after the "--" are the objects on top of the stack at the -end of the instruction. +end of the instruction. When prefixed by a `&`, the `type` production rule follows the +`type_prop` production rule. This indicates the type of the value is of that specific type +after the operation. In this case, the type may also contain 64-bit refinement information +that is fetched from a previously defined operand in the instruction header, such as +a type version tag. This follows the format `type + refinement`. The list of possible types +and their refinements are below. They obey the following predicates: + + +* `PYLONG_TYPE`: `Py_TYPE(val) == &PyLong_Type` +* `PYFLOAT_TYPE`: `Py_TYPE(val) == &PyFloat_Type` +* `PYUNICODE_TYPE`: `Py_TYPE(val) == &PYUNICODE_TYPE` +* `NULL_TYPE`: `val == NULL` +* `GUARD_TYPE_VERSION_TYPE`: `type->tp_version_tag == auxillary` +* `GUARD_DORV_VALUES_TYPE`: `_PyDictOrValues_IsValues(obj)` +* `GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE`: + `_PyDictOrValues_IsValues(obj) || _PyObject_MakeInstanceAttributesFromDict(obj, dorv)` +* `GUARD_KEYS_VERSION_TYPE`: `owner_heap_type->ht_cached_keys->dk_version == auxillary` +* `PYMETHOD_TYPE`: `Py_TYPE(val) == &PyMethod_Type` +* `PYFUNCTION_TYPE_VERSION_TYPE`: + `PyFunction_Check(callable) && func->func_version == auxillary && code->co_argcount == oparg + (self_or_null != NULL)` + An `inst` without `stack_effect` is a transitional form to allow the original C code definitions to be copied. It lacks information to generate anything other than the @@ -158,6 +182,15 @@ By convention cache effects (`stream`) must precede the input effects. The name `oparg` is pre-defined as a 32 bit value fetched from the instruction stream. +### Special instruction annotations + +Instruction headers may be prefixed by one or more annotations. The non-exhaustive +list of annotations and their meanings are as follows: + +* `override`. For external use by other interpreter definitions to override the current + instruction definition. +* `pure`. This instruction has no side effects. + ### Special functions/macros The C code may include special functions that are understood by the tools as diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index c3c2954a42083f..4f8d01c5492f51 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -216,7 +216,13 @@ def choice(*opts: str) -> str: keywords = {name.lower(): name for name in kwds} ANNOTATION = "ANNOTATION" -annotations = {"specializing", "guard", "override", "register", "replaced"} +annotations = { + "specializing", + "override", + "register", + "replaced", + "pure", +} __all__ = [] __all__.extend(kwds) @@ -324,7 +330,9 @@ def tokenize(src: str, line: int = 1, filename: str = "") -> Iterator[Token]: else: begin = line, start - linestart if kind != "\n": - yield Token(filename, kind, text, begin, (line, start - linestart + len(text))) + yield Token( + filename, kind, text, begin, (line, start - linestart + len(text)) + ) def to_text(tkns: list[Token], dedent: int = 0) -> str: diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 9b7df9a54c7b3b..1826a0b645c3b8 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -50,6 +50,8 @@ "DEOPT", "ERROR", "ESCAPES", + "PURE", + "PASSTHROUGH", ] diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 60c185dcef58e9..307919cb37ce1e 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -75,6 +75,11 @@ class StackEffect(Node): size: str = "" # Optional `[size]` # Note: size cannot be combined with type or cond + # Optional `(type, refinement)` + type_prop: None | tuple[str, None | str] = field( + default_factory=lambda: None, init=True, compare=False, hash=False + ) + def __repr__(self) -> str: items = [self.name, self.type, self.cond, self.size] while items and items[-1] == "": @@ -138,11 +143,13 @@ class Family(Node): @dataclass class Pseudo(Node): name: str - flags: list[str] # instr flags to set on the pseudo instruction - targets: list[str] # opcodes this can be replaced by + flags: list[str] # instr flags to set on the pseudo instruction + targets: list[str] # opcodes this can be replaced by + AstNode = InstDef | Macro | Pseudo | Family + class Parser(PLexer): @contextual def definition(self) -> AstNode | None: @@ -253,14 +260,25 @@ def cache_effect(self) -> CacheEffect | None: @contextual def stack_effect(self) -> StackEffect | None: - # IDENTIFIER [':' IDENTIFIER [TIMES]] ['if' '(' expression ')'] + # IDENTIFIER [':' [IDENTIFIER [TIMES]] ['&' '(' IDENTIFIER ['+' IDENTIFIER] ')']] ['if' '(' expression ')'] # | IDENTIFIER '[' expression ']' if tkn := self.expect(lx.IDENTIFIER): type_text = "" + type_prop = None if self.expect(lx.COLON): - type_text = self.require(lx.IDENTIFIER).text.strip() - if self.expect(lx.TIMES): - type_text += " *" + if i := self.expect(lx.IDENTIFIER): + type_text = i.text.strip() + if self.expect(lx.TIMES): + type_text += " *" + if self.expect(lx.AND): + consumed_bracket = self.expect(lx.LPAREN) is not None + type_prop_text = self.require(lx.IDENTIFIER).text.strip() + refinement = None + if self.expect(lx.PLUS): + refinement = self.require(lx.IDENTIFIER).text.strip() + type_prop = (type_prop_text, refinement) + if consumed_bracket: + self.require(lx.RPAREN) cond_text = "" if self.expect(lx.IF): self.require(lx.LPAREN) @@ -277,7 +295,7 @@ def stack_effect(self) -> StackEffect | None: self.require(lx.RBRACKET) type_text = "PyObject **" size_text = size.text.strip() - return StackEffect(tkn.text, type_text, cond_text, size_text) + return StackEffect(tkn.text, type_text, cond_text, size_text, type_prop) return None @contextual @@ -364,7 +382,9 @@ def family_def(self) -> Family | None: if self.expect(lx.COMMA): if not (size := self.expect(lx.IDENTIFIER)): if not (size := self.expect(lx.NUMBER)): - raise self.make_syntax_error("Expected identifier or number") + raise self.make_syntax_error( + "Expected identifier or number" + ) if self.expect(lx.RPAREN): if self.expect(lx.EQUALS): if not self.expect(lx.LBRACE): diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index d351037a663ca2..6633950aada002 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -3,6 +3,8 @@ from dataclasses import dataclass from cwriter import CWriter +UNUSED = {"unused"} + def maybe_parenthesize(sym: str) -> str: """Add parentheses around a string if it contains an operator @@ -29,6 +31,7 @@ def var_size(var: StackItem) -> str: else: return var.size + @dataclass class StackOffset: "The stack offset of the virtual base of the stack from the physical stack pointer" @@ -47,10 +50,7 @@ def push(self, item: StackItem) -> None: self.pushed.append(var_size(item)) def __sub__(self, other: "StackOffset") -> "StackOffset": - return StackOffset( - self.popped + other.pushed, - self.pushed + other.popped - ) + return StackOffset(self.popped + other.pushed, self.pushed + other.popped) def __neg__(self) -> "StackOffset": return StackOffset(self.pushed, self.popped) @@ -134,18 +134,18 @@ def pop(self, var: StackItem) -> str: ) if popped.name == var.name: return "" - elif popped.name == "unused": + elif popped.name in UNUSED: self.defined.add(var.name) return ( f"{var.name} = {indirect}stack_pointer[{self.top_offset.to_c()}];\n" ) - elif var.name == "unused": + elif var.name in UNUSED: return "" else: self.defined.add(var.name) return f"{var.name} = {popped.name};\n" self.base_offset.pop(var) - if var.name == "unused": + if var.name in UNUSED: return "" else: self.defined.add(var.name) @@ -159,7 +159,7 @@ def pop(self, var: StackItem) -> str: def push(self, var: StackItem) -> str: self.variables.append(var) - if var.is_array() and var.name not in self.defined and var.name != "unused": + if var.is_array() and var.name not in self.defined and var.name not in UNUSED: c_offset = self.top_offset.to_c() self.top_offset.push(var) self.defined.add(var.name) @@ -172,7 +172,7 @@ def flush(self, out: CWriter) -> None: for var in self.variables: if not var.peek: cast = "(PyObject *)" if var.type else "" - if var.name != "unused" and not var.is_array(): + if var.name not in UNUSED and not var.is_array(): if var.condition: out.emit(f"if ({var.condition}) ") out.emit(