diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-07-26-09-31-12.gh-issue-93678.W8vvgT.rst b/Misc/NEWS.d/next/Core and Builtins/2022-07-26-09-31-12.gh-issue-93678.W8vvgT.rst new file mode 100644 index 00000000000000..6ff816a172cc27 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-07-26-09-31-12.gh-issue-93678.W8vvgT.rst @@ -0,0 +1 @@ +Add cfg_builder struct and refactor the relevant code so that a cfg can be constructed without an instance of the compiler struct. diff --git a/Python/compile.c b/Python/compile.c index 94ce8669a1fd46..975efa7e23ecdf 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -325,6 +325,17 @@ enum { COMPILER_SCOPE_COMPREHENSION, }; +typedef struct cfg_builder_ { + /* The entryblock, at which control flow begins. All blocks of the + CFG are reachable through the b_next links */ + basicblock *cfg_entryblock; + /* Pointer to the most recently allocated block. By following + b_list links, you can reach all allocated blocks. */ + basicblock *block_list; + /* pointer to the block currently being constructed */ + basicblock *curblock; +} cfg_builder; + /* The following items change on entry and exit of code blocks. They must be saved and restored when returning to a block. */ @@ -350,10 +361,8 @@ struct compiler_unit { Py_ssize_t u_argcount; /* number of arguments for block */ Py_ssize_t u_posonlyargcount; /* number of positional only arguments for block */ Py_ssize_t u_kwonlyargcount; /* number of keyword only arguments for block */ - /* Pointer to the most recently allocated block. By following b_list - members, you can reach all early allocated blocks. */ - basicblock *u_blocks; - basicblock *u_curblock; /* pointer to current block */ + + cfg_builder u_cfg_builder; /* The control flow graph */ int u_nfblocks; struct fblockinfo u_fblock[CO_MAXBLOCKS]; @@ -390,6 +399,9 @@ struct compiler { PyArena *c_arena; /* pointer to memory allocation arena */ }; +#define CFG_BUILDER(c) (&((c)->u->u_cfg_builder)) +#define COMPILER_LOC(c) ((c)->u->u_loc) + typedef struct { // A list of strings corresponding to name captures. It is used to track: // - Repeated name assignments in the same pattern. @@ -417,12 +429,9 @@ typedef struct { static int basicblock_next_instr(basicblock *); -static int compiler_enter_scope(struct compiler *, identifier, int, void *, int); +static int cfg_builder_addop_i(cfg_builder *g, int opcode, Py_ssize_t oparg, struct location loc); + static void compiler_free(struct compiler *); -static basicblock *compiler_new_block(struct compiler *); -static int compiler_addop(struct compiler *, int, bool); -static int compiler_addop_i(struct compiler *, int, Py_ssize_t, bool); -static int compiler_addop_j(struct compiler *, int, basicblock *, bool); static int compiler_error(struct compiler *, const char *, ...); static int compiler_warn(struct compiler *, const char *, ...); static int compiler_nameop(struct compiler *, identifier, expr_context_ty); @@ -442,7 +451,6 @@ static int are_all_items_const(asdl_expr_seq *, Py_ssize_t, Py_ssize_t); static int compiler_with(struct compiler *, stmt_ty, int); static int compiler_async_with(struct compiler *, stmt_ty, int); static int compiler_async_for(struct compiler *, stmt_ty); -static int validate_keywords(struct compiler *c, asdl_keyword_seq *keywords); static int compiler_call_simple_kw_helper(struct compiler *c, asdl_keyword_seq *keywords, Py_ssize_t nkwelts); @@ -714,10 +722,9 @@ dictbytype(PyObject *src, int scope_type, int flag, Py_ssize_t offset) } static void -compiler_unit_check(struct compiler_unit *u) +cfg_builder_check(cfg_builder *g) { - basicblock *block; - for (block = u->u_blocks; block != NULL; block = block->b_list) { + for (basicblock *block = g->block_list; block != NULL; block = block->b_list) { assert(!_PyMem_IsPtrFreed(block)); if (block->b_instr != NULL) { assert(block->b_ialloc > 0); @@ -732,19 +739,24 @@ compiler_unit_check(struct compiler_unit *u) } static void -compiler_unit_free(struct compiler_unit *u) +cfg_builder_free(cfg_builder* g) { - basicblock *b, *next; - - compiler_unit_check(u); - b = u->u_blocks; + cfg_builder_check(g); + basicblock *b = g->block_list; while (b != NULL) { - if (b->b_instr) + if (b->b_instr) { PyObject_Free((void *)b->b_instr); - next = b->b_list; + } + basicblock *next = b->b_list; PyObject_Free((void *)b); b = next; } +} + +static void +compiler_unit_free(struct compiler_unit *u) +{ + cfg_builder_free(&u->u_cfg_builder); Py_CLEAR(u->u_ste); Py_CLEAR(u->u_name); Py_CLEAR(u->u_qualname); @@ -832,59 +844,48 @@ compiler_set_qualname(struct compiler *c) Returns NULL on error. */ static basicblock * -new_basicblock() +cfg_builder_new_block(cfg_builder *g) { basicblock *b = (basicblock *)PyObject_Calloc(1, sizeof(basicblock)); if (b == NULL) { PyErr_NoMemory(); return NULL; } + /* Extend the singly linked list of blocks with new block. */ + b->b_list = g->block_list; + g->block_list = b; return b; } static basicblock * -compiler_new_block(struct compiler *c) +cfg_builder_use_next_block(cfg_builder *g, basicblock *block) { - basicblock *b = new_basicblock(); - if (b == NULL) { - return NULL; - } - /* Extend the singly linked list of blocks with new block. */ - struct compiler_unit *u = c->u; - b->b_list = u->u_blocks; - u->u_blocks = b; - return b; + assert(block != NULL); + g->curblock->b_next = block; + g->curblock = block; + return block; } static basicblock * -compiler_use_next_block(struct compiler *c, basicblock *block) +compiler_new_block(struct compiler *c) { - assert(block != NULL); - c->u->u_curblock->b_next = block; - c->u->u_curblock = block; - return block; + return cfg_builder_new_block(CFG_BUILDER(c)); } static basicblock * -basicblock_new_b_list_successor(basicblock *prev) +compiler_use_next_block(struct compiler *c, basicblock *block) { - basicblock *result = new_basicblock(); - if (result == NULL) { - return NULL; - } - result->b_list = prev->b_list; - prev->b_list = result; - return result; + return cfg_builder_use_next_block(CFG_BUILDER(c), block); } static basicblock * -copy_basicblock(basicblock *block) +copy_basicblock(cfg_builder *g, basicblock *block) { /* Cannot copy a block if it has a fallthrough, since * a block can only have one fallthrough predecessor. */ assert(BB_NO_FALLTHROUGH(block)); - basicblock *result = basicblock_new_b_list_successor(block); + basicblock *result = cfg_builder_new_block(g); if (result == NULL) { return NULL; } @@ -1247,28 +1248,13 @@ PyCompile_OpcodeStackEffect(int opcode, int oparg) return stack_effect(opcode, oparg, -1); } -static int -compiler_use_new_implicit_block_if_needed(struct compiler *c) -{ - basicblock *b = c->u->u_curblock; - struct instr *last = basicblock_last_instr(b); - if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) { - basicblock *b = compiler_new_block(c); - if (b == NULL) { - return -1; - } - compiler_use_next_block(c, b); - } - return 0; -} - /* Add an opcode with no argument. Returns 0 on failure, 1 on success. */ static int basicblock_addop(basicblock *b, int opcode, int oparg, - basicblock *target, const struct location *loc) + basicblock *target, struct location loc) { assert(IS_WITHIN_OPCODE_RANGE(opcode)); assert(!IS_ASSEMBLER_OPCODE(opcode)); @@ -1287,25 +1273,35 @@ basicblock_addop(basicblock *b, int opcode, int oparg, i->i_opcode = opcode; i->i_oparg = oparg; i->i_target = target; - i->i_loc = loc ? *loc : NO_LOCATION; + i->i_loc = loc; return 1; } static int -compiler_addop(struct compiler *c, int opcode, bool line) +cfg_builder_addop(cfg_builder *g, int opcode, int oparg, basicblock *target, + struct location loc) { - assert(!HAS_ARG(opcode)); - if (compiler_use_new_implicit_block_if_needed(c) < 0) { - return -1; + struct instr *last = basicblock_last_instr(g->curblock); + if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) { + basicblock *b = cfg_builder_new_block(g); + if (b == NULL) { + return -1; + } + cfg_builder_use_next_block(g, b); } + return basicblock_addop(g->curblock, opcode, oparg, target, loc); +} - const struct location *loc = line ? &c->u->u_loc : NULL; - return basicblock_addop(c->u->u_curblock, opcode, 0, NULL, loc); +static int +cfg_builder_addop_noarg(cfg_builder *g, int opcode, struct location loc) +{ + assert(!HAS_ARG(opcode)); + return cfg_builder_addop(g, opcode, 0, NULL, loc); } static Py_ssize_t -compiler_add_o(PyObject *dict, PyObject *o) +dict_add_o(PyObject *dict, PyObject *o) { PyObject *v; Py_ssize_t arg; @@ -1450,7 +1446,7 @@ compiler_add_const(struct compiler *c, PyObject *o) return -1; } - Py_ssize_t arg = compiler_add_o(c->u->u_consts, key); + Py_ssize_t arg = dict_add_o(c->u->u_consts, key); Py_DECREF(key); return arg; } @@ -1461,17 +1457,17 @@ compiler_addop_load_const(struct compiler *c, PyObject *o) Py_ssize_t arg = compiler_add_const(c, o); if (arg < 0) return 0; - return compiler_addop_i(c, LOAD_CONST, arg, true); + return cfg_builder_addop_i(CFG_BUILDER(c), LOAD_CONST, arg, COMPILER_LOC(c)); } static int compiler_addop_o(struct compiler *c, int opcode, PyObject *dict, PyObject *o) { - Py_ssize_t arg = compiler_add_o(dict, o); + Py_ssize_t arg = dict_add_o(dict, o); if (arg < 0) return 0; - return compiler_addop_i(c, opcode, arg, true); + return cfg_builder_addop_i(CFG_BUILDER(c), opcode, arg, COMPILER_LOC(c)); } static int @@ -1483,7 +1479,7 @@ compiler_addop_name(struct compiler *c, int opcode, PyObject *dict, PyObject *mangled = _Py_Mangle(c->u->u_private, o); if (!mangled) return 0; - arg = compiler_add_o(dict, mangled); + arg = dict_add_o(dict, mangled); Py_DECREF(mangled); if (arg < 0) return 0; @@ -1495,18 +1491,15 @@ compiler_addop_name(struct compiler *c, int opcode, PyObject *dict, arg <<= 1; arg |= 1; } - return compiler_addop_i(c, opcode, arg, true); + return cfg_builder_addop_i(CFG_BUILDER(c), opcode, arg, COMPILER_LOC(c)); } /* Add an opcode with an integer argument. Returns 0 on failure, 1 on success. */ static int -compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg, bool line) +cfg_builder_addop_i(cfg_builder *g, int opcode, Py_ssize_t oparg, struct location loc) { - if (compiler_use_new_implicit_block_if_needed(c) < 0) { - return -1; - } /* oparg value is unsigned, but a signed C int is usually used to store it in the C code (like Python/ceval.c). @@ -1516,35 +1509,30 @@ compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg, bool line) EXTENDED_ARG is used for 16, 24, and 32-bit arguments. */ int oparg_ = Py_SAFE_DOWNCAST(oparg, Py_ssize_t, int); - - const struct location *loc = line ? &c->u->u_loc : NULL; - return basicblock_addop(c->u->u_curblock, opcode, oparg_, NULL, loc); + return cfg_builder_addop(g, opcode, oparg_, NULL, loc); } static int -compiler_addop_j(struct compiler *c, int opcode, basicblock *target, bool line) +cfg_builder_addop_j(cfg_builder *g, int opcode, basicblock *target, struct location loc) { - if (compiler_use_new_implicit_block_if_needed(c) < 0) { - return -1; - } - const struct location *loc = line ? &c->u->u_loc : NULL; assert(target != NULL); assert(IS_JUMP_OPCODE(opcode) || IS_BLOCK_PUSH_OPCODE(opcode)); - return basicblock_addop(c->u->u_curblock, opcode, 0, target, loc); + return cfg_builder_addop(g, opcode, 0, target, loc); } + #define ADDOP(C, OP) { \ - if (!compiler_addop((C), (OP), true)) \ + if (!cfg_builder_addop_noarg(CFG_BUILDER(C), (OP), COMPILER_LOC(C))) \ return 0; \ } #define ADDOP_NOLINE(C, OP) { \ - if (!compiler_addop((C), (OP), false)) \ + if (!cfg_builder_addop_noarg(CFG_BUILDER(C), (OP), NO_LOCATION)) \ return 0; \ } #define ADDOP_IN_SCOPE(C, OP) { \ - if (!compiler_addop((C), (OP), true)) { \ + if (!cfg_builder_addop_noarg(CFG_BUILDER(C), (OP), COMPILER_LOC(C))) { \ compiler_exit_scope(c); \ return 0; \ } \ @@ -1583,17 +1571,17 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *target, bool line) } #define ADDOP_I(C, OP, O) { \ - if (!compiler_addop_i((C), (OP), (O), true)) \ + if (!cfg_builder_addop_i(CFG_BUILDER(C), (OP), (O), COMPILER_LOC(C))) \ return 0; \ } #define ADDOP_I_NOLINE(C, OP, O) { \ - if (!compiler_addop_i((C), (OP), (O), false)) \ + if (!cfg_builder_addop_i(CFG_BUILDER(C), (OP), (O), NO_LOCATION)) \ return 0; \ } #define ADDOP_JUMP(C, OP, O) { \ - if (!compiler_addop_j((C), (OP), (O), true)) \ + if (!cfg_builder_addop_j(CFG_BUILDER(C), (OP), (O), COMPILER_LOC(C))) \ return 0; \ } @@ -1601,7 +1589,7 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *target, bool line) * Used for artificial jumps that have no corresponding * token in the source code. */ #define ADDOP_JUMP_NOLINE(C, OP, O) { \ - if (!compiler_addop_j((C), (OP), (O), false)) \ + if (!cfg_builder_addop_j(CFG_BUILDER(C), (OP), (O), NO_LOCATION)) \ return 0; \ } @@ -1718,7 +1706,6 @@ compiler_enter_scope(struct compiler *c, identifier name, return 0; } - u->u_blocks = NULL; u->u_nfblocks = 0; u->u_firstlineno = lineno; u->u_loc = LOCATION(lineno, lineno, 0, 0); @@ -1751,10 +1738,12 @@ compiler_enter_scope(struct compiler *c, identifier name, c->c_nestlevel++; - block = compiler_new_block(c); + cfg_builder *g = CFG_BUILDER(c); + g->block_list = NULL; + block = cfg_builder_new_block(g); if (block == NULL) return 0; - c->u->u_curblock = block; + g->curblock = g->cfg_entryblock = block; if (u->u_scope_type == COMPILER_SCOPE_MODULE) { c->u->u_loc.lineno = 0; @@ -1791,7 +1780,7 @@ compiler_exit_scope(struct compiler *c) _PyErr_WriteUnraisableMsg("on removing the last compiler " "stack item", NULL); } - compiler_unit_check(c->u); + cfg_builder_check(CFG_BUILDER(c)); } else { c->u = NULL; @@ -4240,7 +4229,7 @@ compiler_nameop(struct compiler *c, identifier name, expr_context_ty ctx) } assert(op); - arg = compiler_add_o(dict, mangled); + arg = dict_add_o(dict, mangled); Py_DECREF(mangled); if (arg < 0) { return 0; @@ -4248,7 +4237,7 @@ compiler_nameop(struct compiler *c, identifier name, expr_context_ty ctx) if (op == LOAD_GLOBAL) { arg <<= 1; } - return compiler_addop_i(c, op, arg, true); + return cfg_builder_addop_i(CFG_BUILDER(c), op, arg, COMPILER_LOC(c)); } static int @@ -6291,7 +6280,7 @@ emit_and_reset_fail_pop(struct compiler *c, pattern_context *pc) } while (--pc->fail_pop_size) { compiler_use_next_block(c, pc->fail_pop[pc->fail_pop_size]); - if (!compiler_addop(c, POP_TOP, true)) { + if (!cfg_builder_addop_noarg(CFG_BUILDER(c), POP_TOP, COMPILER_LOC(c))) { pc->fail_pop_size = 0; PyObject_Free(pc->fail_pop); pc->fail_pop = NULL; @@ -6725,7 +6714,8 @@ compiler_pattern_or(struct compiler *c, pattern_ty p, pattern_context *pc) pc->fail_pop = NULL; pc->fail_pop_size = 0; pc->on_top = 0; - if (!compiler_addop_i(c, COPY, 1, true) || !compiler_pattern(c, alt, pc)) { + if (!cfg_builder_addop_i(CFG_BUILDER(c), COPY, 1, COMPILER_LOC(c)) || + !compiler_pattern(c, alt, pc)) { goto error; } // Success! @@ -6788,7 +6778,7 @@ compiler_pattern_or(struct compiler *c, pattern_ty p, pattern_context *pc) } } assert(control); - if (!compiler_addop_j(c, JUMP, end, true) || + if (!cfg_builder_addop_j(CFG_BUILDER(c), JUMP, end, COMPILER_LOC(c)) || !emit_and_reset_fail_pop(c, pc)) { goto error; @@ -6800,7 +6790,7 @@ compiler_pattern_or(struct compiler *c, pattern_ty p, pattern_context *pc) // Need to NULL this for the PyObject_Free call in the error block. old_pc.fail_pop = NULL; // No match. Pop the remaining copy of the subject and fail: - if (!compiler_addop(c, POP_TOP, true) || !jump_to_fail_pop(c, pc, JUMP)) { + if (!cfg_builder_addop_noarg(CFG_BUILDER(c), POP_TOP, COMPILER_LOC(c)) || !jump_to_fail_pop(c, pc, JUMP)) { goto error; } compiler_use_next_block(c, end); @@ -7430,7 +7420,8 @@ mark_cold(basicblock *entryblock) { } static int -push_cold_blocks_to_end(basicblock *entryblock, int code_flags) { +push_cold_blocks_to_end(cfg_builder *g, int code_flags) { + basicblock *entryblock = g->cfg_entryblock; if (entryblock->b_next == NULL) { /* single basicblock, no need to reorder */ return 0; @@ -7443,11 +7434,11 @@ push_cold_blocks_to_end(basicblock *entryblock, int code_flags) { /* an explicit jump instead of fallthrough */ for (basicblock *b = entryblock; b != NULL; b = b->b_next) { if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) { - basicblock *explicit_jump = basicblock_new_b_list_successor(b); + basicblock *explicit_jump = cfg_builder_new_block(g); if (explicit_jump == NULL) { return -1; } - basicblock_addop(explicit_jump, JUMP, 0, b->b_next, &NO_LOCATION); + basicblock_addop(explicit_jump, JUMP, 0, b->b_next, NO_LOCATION); explicit_jump->b_cold = 1; explicit_jump->b_next = b->b_next; @@ -8024,7 +8015,7 @@ consts_dict_keys_inorder(PyObject *dict) while (PyDict_Next(dict, &pos, &k, &v)) { i = PyLong_AS_LONG(v); /* The keys of the dictionary can be tuples wrapping a constant. - * (see compiler_add_o and _PyCode_ConstantKey). In that case + * (see dict_add_o and _PyCode_ConstantKey). In that case * the object we want is always second. */ if (PyTuple_CheckExact(k)) { k = PyTuple_GET_ITEM(k, 1); @@ -8296,7 +8287,7 @@ trim_unused_consts(basicblock *entryblock, PyObject *consts); /* Duplicates exit BBs, so that line numbers can be propagated to them */ static int -duplicate_exits_without_lineno(basicblock *entryblock); +duplicate_exits_without_lineno(cfg_builder *g); static int extend_block(basicblock *bb); @@ -8547,7 +8538,7 @@ assemble(struct compiler *c, int addNone) } /* Make sure every block that falls off the end returns None. */ - if (!basicblock_returns(c->u->u_curblock)) { + if (!basicblock_returns(CFG_BUILDER(c)->curblock)) { UNSET_LOC(c); if (addNone) ADDOP_LOAD_CONST(c, Py_None); @@ -8569,17 +8560,18 @@ assemble(struct compiler *c, int addNone) } int nblocks = 0; - basicblock *entryblock = NULL; - for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) { + for (basicblock *b = CFG_BUILDER(c)->block_list; b != NULL; b = b->b_list) { nblocks++; - entryblock = b; } - assert(entryblock != NULL); if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) { PyErr_NoMemory(); goto error; } + cfg_builder *g = CFG_BUILDER(c); + basicblock *entryblock = g->cfg_entryblock; + assert(entryblock != NULL); + /* Set firstlineno if it wasn't explicitly set. */ if (!c->u->u_firstlineno) { if (entryblock->b_instr && entryblock->b_instr->i_loc.lineno) { @@ -8614,7 +8606,7 @@ assemble(struct compiler *c, int addNone) if (trim_unused_consts(entryblock, consts)) { goto error; } - if (duplicate_exits_without_lineno(entryblock)) { + if (duplicate_exits_without_lineno(g)) { return NULL; } propagate_line_numbers(entryblock); @@ -8631,7 +8623,7 @@ assemble(struct compiler *c, int addNone) } convert_exception_handlers_to_nops(entryblock); - if (push_cold_blocks_to_end(entryblock, code_flags) < 0) { + if (push_cold_blocks_to_end(g, code_flags) < 0) { goto error; } @@ -9550,15 +9542,16 @@ is_exit_without_lineno(basicblock *b) { * copy the line number from the sole predecessor block. */ static int -duplicate_exits_without_lineno(basicblock *entryblock) +duplicate_exits_without_lineno(cfg_builder *g) { /* Copy all exit blocks without line number that are targets of a jump. */ + basicblock *entryblock = g->cfg_entryblock; for (basicblock *b = entryblock; b != NULL; b = b->b_next) { if (b->b_iused > 0 && is_jump(&b->b_instr[b->b_iused-1])) { basicblock *target = b->b_instr[b->b_iused-1].i_target; if (is_exit_without_lineno(target) && target->b_predecessors > 1) { - basicblock *new_target = copy_basicblock(target); + basicblock *new_target = copy_basicblock(g, target); if (new_target == NULL) { return -1; }