diff --git a/Include/internal/pycore_compile.h b/Include/internal/pycore_compile.h index 06a6082cddae6a..1a628a08ca4ebf 100644 --- a/Include/internal/pycore_compile.h +++ b/Include/internal/pycore_compile.h @@ -38,6 +38,11 @@ extern int _PyAST_Optimize( struct _arena *arena, _PyASTOptimizeState *state); +/* Access compiler internals for unit testing */ +PyAPI_FUNC(PyObject*) _PyCompile_OptimizeCfg( + PyObject *instructions, + PyObject *consts); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index aada220395023d..c736bfecd077fd 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -298,6 +298,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(code) STRUCT_FOR_ID(command) STRUCT_FOR_ID(comment_factory) + STRUCT_FOR_ID(consts) STRUCT_FOR_ID(context) STRUCT_FOR_ID(cookie) STRUCT_FOR_ID(copy) @@ -407,6 +408,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(input) STRUCT_FOR_ID(insert_comments) STRUCT_FOR_ID(insert_pis) + STRUCT_FOR_ID(instructions) STRUCT_FOR_ID(intern) STRUCT_FOR_ID(intersection) STRUCT_FOR_ID(isatty) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 09890cd812015b..58d9e934b96c19 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -807,6 +807,7 @@ extern "C" { INIT_ID(code), \ INIT_ID(command), \ INIT_ID(comment_factory), \ + INIT_ID(consts), \ INIT_ID(context), \ INIT_ID(cookie), \ INIT_ID(copy), \ @@ -916,6 +917,7 @@ extern "C" { INIT_ID(input), \ INIT_ID(insert_comments), \ INIT_ID(insert_pis), \ + INIT_ID(instructions), \ INIT_ID(intern), \ INIT_ID(intersection), \ INIT_ID(isatty), \ @@ -1916,6 +1918,8 @@ _PyUnicode_InitStaticStrings(void) { PyUnicode_InternInPlace(&string); string = &_Py_ID(comment_factory); PyUnicode_InternInPlace(&string); + string = &_Py_ID(consts); + PyUnicode_InternInPlace(&string); string = &_Py_ID(context); PyUnicode_InternInPlace(&string); string = &_Py_ID(cookie); @@ -2134,6 +2138,8 @@ _PyUnicode_InitStaticStrings(void) { PyUnicode_InternInPlace(&string); string = &_Py_ID(insert_pis); PyUnicode_InternInPlace(&string); + string = &_Py_ID(instructions); + PyUnicode_InternInPlace(&string); string = &_Py_ID(intern); PyUnicode_InternInPlace(&string); string = &_Py_ID(intersection); @@ -5755,6 +5761,10 @@ _PyStaticObjects_CheckRefcnt(void) { _PyObject_Dump((PyObject *)&_Py_ID(comment_factory)); Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT"); }; + if (Py_REFCNT((PyObject *)&_Py_ID(consts)) < _PyObject_IMMORTAL_REFCNT) { + _PyObject_Dump((PyObject *)&_Py_ID(consts)); + Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT"); + }; if (Py_REFCNT((PyObject *)&_Py_ID(context)) < _PyObject_IMMORTAL_REFCNT) { _PyObject_Dump((PyObject *)&_Py_ID(context)); Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT"); @@ -6191,6 +6201,10 @@ _PyStaticObjects_CheckRefcnt(void) { _PyObject_Dump((PyObject *)&_Py_ID(insert_pis)); Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT"); }; + if (Py_REFCNT((PyObject *)&_Py_ID(instructions)) < _PyObject_IMMORTAL_REFCNT) { + _PyObject_Dump((PyObject *)&_Py_ID(instructions)); + Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT"); + }; if (Py_REFCNT((PyObject *)&_Py_ID(intern)) < _PyObject_IMMORTAL_REFCNT) { _PyObject_Dump((PyObject *)&_Py_ID(intern)); Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT"); diff --git a/Lib/test/support/bytecode_helper.py b/Lib/test/support/bytecode_helper.py index 471d4a68f915aa..05b54911e3f25a 100644 --- a/Lib/test/support/bytecode_helper.py +++ b/Lib/test/support/bytecode_helper.py @@ -3,6 +3,7 @@ import unittest import dis import io +from _testinternalcapi import optimize_cfg _UNSPECIFIED = object() @@ -40,3 +41,95 @@ def assertNotInBytecode(self, x, opname, argval=_UNSPECIFIED): msg = '(%s,%r) occurs in bytecode:\n%s' msg = msg % (opname, argval, disassembly) self.fail(msg) + + +class CfgOptimizationTestCase(unittest.TestCase): + + HAS_ARG = set(dis.hasarg) + HAS_TARGET = set(dis.hasjrel + dis.hasjabs + dis.hasexc) + HAS_ARG_OR_TARGET = HAS_ARG.union(HAS_TARGET) + + def setUp(self): + self.last_label = 0 + + def Label(self): + self.last_label += 1 + return self.last_label + + def complete_insts_info(self, insts): + # fill in omitted fields in location, and oparg 0 for ops with no arg. + instructions = [] + for item in insts: + if isinstance(item, int): + instructions.append(item) + else: + assert isinstance(item, tuple) + inst = list(reversed(item)) + opcode = dis.opmap[inst.pop()] + oparg = inst.pop() if opcode in self.HAS_ARG_OR_TARGET else 0 + loc = inst + [-1] * (4 - len(inst)) + instructions.append((opcode, oparg, *loc)) + return instructions + + def normalize_insts(self, insts): + """ Map labels to instruction index. + Remove labels which are not used as jump targets. + """ + labels_map = {} + targets = set() + idx = 1 + for item in insts: + assert isinstance(item, (int, tuple)) + if isinstance(item, tuple): + opcode, oparg, *_ = item + if dis.opmap.get(opcode, opcode) in self.HAS_TARGET: + targets.add(oparg) + idx += 1 + elif isinstance(item, int): + assert item not in labels_map, "label reused" + labels_map[item] = idx + + res = [] + for item in insts: + if isinstance(item, int) and item in targets: + if not res or labels_map[item] != res[-1]: + res.append(labels_map[item]) + elif isinstance(item, tuple): + opcode, oparg, *loc = item + opcode = dis.opmap.get(opcode, opcode) + if opcode in self.HAS_TARGET: + arg = labels_map[oparg] + else: + arg = oparg if opcode in self.HAS_TARGET else None + opcode = dis.opname[opcode] + res.append((opcode, arg, *loc)) + return res + + def get_optimized(self, insts, consts): + insts = self.complete_insts_info(insts) + insts = optimize_cfg(insts, consts) + return insts, consts + + def compareInstructions(self, actual_, expected_): + # get two lists where each entry is a label or + # an instruction tuple. Compare them, while mapping + # each actual label to a corresponding expected label + # based on their locations. + + self.assertIsInstance(actual_, list) + self.assertIsInstance(expected_, list) + + actual = self.normalize_insts(actual_) + expected = self.normalize_insts(expected_) + self.assertEqual(len(actual), len(expected)) + + # compare instructions + for act, exp in zip(actual, expected): + if isinstance(act, int): + self.assertEqual(exp, act) + continue + self.assertIsInstance(exp, tuple) + self.assertIsInstance(act, tuple) + # pad exp with -1's (if location info is incomplete) + exp += (-1,) * (len(act) - len(exp)) + self.assertEqual(exp, act) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index e03c42c2f823dc..7ece468363be58 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -4,7 +4,7 @@ import textwrap import unittest -from test.support.bytecode_helper import BytecodeTestCase +from test.support.bytecode_helper import BytecodeTestCase, CfgOptimizationTestCase def compile_pattern_with_fast_locals(pattern): @@ -864,5 +864,81 @@ def trace(frame, event, arg): self.assertNotInBytecode(f, "LOAD_FAST_CHECK") +class DirectiCfgOptimizerTests(CfgOptimizationTestCase): + + def cfg_optimization_test(self, insts, expected_insts, + consts=None, expected_consts=None): + if expected_consts is None: + expected_consts = consts + opt_insts, opt_consts = self.get_optimized(insts, consts) + self.compareInstructions(opt_insts, expected_insts) + self.assertEqual(opt_consts, expected_consts) + + def test_conditional_jump_forward_non_const_condition(self): + insts = [ + ('LOAD_NAME', 1, 11), + ('POP_JUMP_IF_TRUE', lbl := self.Label(), 12), + ('LOAD_CONST', 2, 13), + lbl, + ('LOAD_CONST', 3, 14), + ] + expected = [ + ('LOAD_NAME', '1', 11), + ('POP_JUMP_IF_TRUE', lbl := self.Label(), 12), + ('LOAD_CONST', '2', 13), + lbl, + ('LOAD_CONST', '3', 14) + ] + self.cfg_optimization_test(insts, expected, consts=list(range(5))) + + def test_conditional_jump_forward_const_condition(self): + # The unreachable branch of the jump is removed + + insts = [ + ('LOAD_CONST', 3, 11), + ('POP_JUMP_IF_TRUE', lbl := self.Label(), 12), + ('LOAD_CONST', 2, 13), + lbl, + ('LOAD_CONST', 3, 14), + ] + expected = [ + ('NOP', None, 11), + ('JUMP', lbl := self.Label(), 12), + lbl, + ('LOAD_CONST', '3', 14) + ] + self.cfg_optimization_test(insts, expected, consts=list(range(5))) + + def test_conditional_jump_backward_non_const_condition(self): + insts = [ + lbl1 := self.Label(), + ('LOAD_NAME', 1, 11), + ('POP_JUMP_IF_TRUE', lbl1, 12), + ('LOAD_CONST', 2, 13), + ] + expected = [ + lbl := self.Label(), + ('LOAD_NAME', '1', 11), + ('POP_JUMP_IF_TRUE', lbl, 12), + ('LOAD_CONST', '2', 13) + ] + self.cfg_optimization_test(insts, expected, consts=list(range(5))) + + def test_conditional_jump_backward_const_condition(self): + # The unreachable branch of the jump is removed + insts = [ + lbl1 := self.Label(), + ('LOAD_CONST', 1, 11), + ('POP_JUMP_IF_TRUE', lbl1, 12), + ('LOAD_CONST', 2, 13), + ] + expected = [ + lbl := self.Label(), + ('NOP', None, 11), + ('JUMP', lbl, 12) + ] + self.cfg_optimization_test(insts, expected, consts=list(range(5))) + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-08-15-20-52-41.gh-issue-93678.X7GuIJ.rst b/Misc/NEWS.d/next/Core and Builtins/2022-08-15-20-52-41.gh-issue-93678.X7GuIJ.rst new file mode 100644 index 00000000000000..9e2b90ba07a454 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-08-15-20-52-41.gh-issue-93678.X7GuIJ.rst @@ -0,0 +1 @@ +Added test a harness for direct unit tests of the compiler's optimization stage. The ``_testinternalcapi.optimize_cfg()`` function runs the optimiser on a sequence of instructions. The ``CfgOptimizationTestCase`` class in ``test.support`` has utilities for invoking the optimizer and checking the output. diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 238de749fffc5d..9d92b076387f61 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -14,6 +14,7 @@ #include "Python.h" #include "pycore_atomic_funcs.h" // _Py_atomic_int_get() #include "pycore_bitutils.h" // _Py_bswap32() +#include "pycore_compile.h" // _PyCompile_OptimizeCfg() #include "pycore_fileutils.h" // _Py_normpath #include "pycore_frame.h" // _PyInterpreterFrame #include "pycore_gc.h" // PyGC_Head @@ -25,7 +26,12 @@ #include "pycore_pystate.h" // _PyThreadState_GET() #include "osdefs.h" // MAXPATHLEN +#include "clinic/_testinternalcapi.c.h" +/*[clinic input] +module _testinternalcapi +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7bb583d8c9eb9a78]*/ static PyObject * get_configs(PyObject *self, PyObject *Py_UNUSED(args)) { @@ -525,6 +531,25 @@ set_eval_frame_record(PyObject *self, PyObject *list) } +/*[clinic input] + +_testinternalcapi.optimize_cfg -> object + + instructions: object + consts: object + +Apply compiler optimizations to an instruction list. +[clinic start generated code]*/ + +static PyObject * +_testinternalcapi_optimize_cfg_impl(PyObject *module, PyObject *instructions, + PyObject *consts) +/*[clinic end generated code: output=5412aeafca683c8b input=7e8a3de86ebdd0f9]*/ +{ + return _PyCompile_OptimizeCfg(instructions, consts); +} + + static PyMethodDef TestMethods[] = { {"get_configs", get_configs, METH_NOARGS}, {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, @@ -543,6 +568,7 @@ static PyMethodDef TestMethods[] = { {"DecodeLocaleEx", decode_locale_ex, METH_VARARGS}, {"set_eval_frame_default", set_eval_frame_default, METH_NOARGS, NULL}, {"set_eval_frame_record", set_eval_frame_record, METH_O, NULL}, + _TESTINTERNALCAPI_OPTIMIZE_CFG_METHODDEF {NULL, NULL} /* sentinel */ }; diff --git a/Modules/clinic/_testinternalcapi.c.h b/Modules/clinic/_testinternalcapi.c.h new file mode 100644 index 00000000000000..8113fff37997af --- /dev/null +++ b/Modules/clinic/_testinternalcapi.c.h @@ -0,0 +1,68 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif + + +PyDoc_STRVAR(_testinternalcapi_optimize_cfg__doc__, +"optimize_cfg($module, /, instructions, consts)\n" +"--\n" +"\n" +"Apply compiler optimizations to an instruction list."); + +#define _TESTINTERNALCAPI_OPTIMIZE_CFG_METHODDEF \ + {"optimize_cfg", _PyCFunction_CAST(_testinternalcapi_optimize_cfg), METH_FASTCALL|METH_KEYWORDS, _testinternalcapi_optimize_cfg__doc__}, + +static PyObject * +_testinternalcapi_optimize_cfg_impl(PyObject *module, PyObject *instructions, + PyObject *consts); + +static PyObject * +_testinternalcapi_optimize_cfg(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(instructions), &_Py_ID(consts), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"instructions", "consts", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "optimize_cfg", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject *instructions; + PyObject *consts; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf); + if (!args) { + goto exit; + } + instructions = args[0]; + consts = args[1]; + return_value = _testinternalcapi_optimize_cfg_impl(module, instructions, consts); + +exit: + return return_value; +} +/*[clinic end generated code: output=3b1fd713290f68a9 input=a9049054013a1b77]*/ diff --git a/Python/compile.c b/Python/compile.c index 339e0e792be416..e5ac162ccc0a5b 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -457,6 +457,7 @@ typedef struct { static int basicblock_next_instr(basicblock *); +static basicblock *cfg_builder_new_block(cfg_builder *g); static int cfg_builder_maybe_start_new_block(cfg_builder *g); static int cfg_builder_addop_i(cfg_builder *g, int opcode, Py_ssize_t oparg, struct location loc); @@ -767,8 +768,20 @@ cfg_builder_check(cfg_builder *g) } } +static int +cfg_builder_init(cfg_builder *g) +{ + g->g_block_list = NULL; + basicblock *block = cfg_builder_new_block(g); + if (block == NULL) + return 0; + g->g_curblock = g->g_entryblock = block; + g->g_current_label = NO_LABEL; + return 1; +} + static void -cfg_builder_free(cfg_builder* g) +cfg_builder_fini(cfg_builder* g) { cfg_builder_check(g); basicblock *b = g->g_block_list; @@ -785,7 +798,7 @@ cfg_builder_free(cfg_builder* g) static void compiler_unit_free(struct compiler_unit *u) { - cfg_builder_free(&u->u_cfg_builder); + cfg_builder_fini(&u->u_cfg_builder); Py_CLEAR(u->u_ste); Py_CLEAR(u->u_name); Py_CLEAR(u->u_qualname); @@ -1708,7 +1721,6 @@ compiler_enter_scope(struct compiler *c, identifier name, int scope_type, void *key, int lineno) { struct compiler_unit *u; - basicblock *block; u = (struct compiler_unit *)PyObject_Calloc(1, sizeof( struct compiler_unit)); @@ -1786,12 +1798,9 @@ compiler_enter_scope(struct compiler *c, identifier name, c->c_nestlevel++; cfg_builder *g = CFG_BUILDER(c); - g->g_block_list = NULL; - block = cfg_builder_new_block(g); - if (block == NULL) + if (!cfg_builder_init(g)) { return 0; - g->g_curblock = g->g_entryblock = block; - g->g_current_label = NO_LABEL; + } if (u->u_scope_type == COMPILER_SCOPE_MODULE) { c->u->u_loc.lineno = 0; @@ -8220,7 +8229,7 @@ dump_instr(struct instr *i) sprintf(arg, "arg: %d ", i->i_oparg); } if (HAS_TARGET(i->i_opcode)) { - sprintf(arg, "target: %p ", i->i_target); + sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg); } fprintf(stderr, "line: %d, opcode: %d %s%s%s\n", i->i_loc.lineno, i->i_opcode, arg, jabs, jrel); @@ -8251,7 +8260,7 @@ static int calculate_jump_targets(basicblock *entryblock); static int -optimize_cfg(basicblock *entryblock, PyObject *consts, PyObject *const_cache); +optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache); static int trim_unused_consts(basicblock *entryblock, PyObject *consts); @@ -8465,18 +8474,18 @@ static void propagate_line_numbers(basicblock *entryblock); static void -eliminate_empty_basic_blocks(basicblock *entryblock); +eliminate_empty_basic_blocks(cfg_builder *g); static int -remove_redundant_jumps(basicblock *entryblock) { +remove_redundant_jumps(cfg_builder *g) { /* If a non-empty block ends with a jump instruction, check if the next * non-empty block reached through normal flow control is the target * of that jump. If it is, then the jump instruction is redundant and * can be deleted. */ int removed = 0; - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { if (b->b_iused > 0) { struct instr *b_last_instr = &b->b_instr[b->b_iused - 1]; assert(!IS_ASSEMBLER_OPCODE(b_last_instr->i_opcode)); @@ -8495,7 +8504,7 @@ remove_redundant_jumps(basicblock *entryblock) { } } if (removed) { - eliminate_empty_basic_blocks(entryblock); + eliminate_empty_basic_blocks(g); } return 0; } @@ -8545,13 +8554,12 @@ assemble(struct compiler *c, int addNone) } cfg_builder *g = CFG_BUILDER(c); - basicblock *entryblock = g->g_entryblock; - assert(entryblock != NULL); + assert(g->g_entryblock != NULL); /* Set firstlineno if it wasn't explicitly set. */ if (!c->u->u_firstlineno) { - if (entryblock->b_instr && entryblock->b_instr->i_loc.lineno) { - c->u->u_firstlineno = entryblock->b_instr->i_loc.lineno; + if (g->g_entryblock->b_instr && g->g_entryblock->b_instr->i_loc.lineno) { + c->u->u_firstlineno = g->g_entryblock->b_instr->i_loc.lineno; } else { c->u->u_firstlineno = 1; @@ -8559,11 +8567,11 @@ assemble(struct compiler *c, int addNone) } // This must be called before fix_cell_offsets(). - if (insert_prefix_instructions(c, entryblock, cellfixedoffsets, nfreevars, code_flags)) { + if (insert_prefix_instructions(c, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) { goto error; } - int numdropped = fix_cell_offsets(c, entryblock, cellfixedoffsets); + int numdropped = fix_cell_offsets(c, g->g_entryblock, cellfixedoffsets); PyMem_Free(cellfixedoffsets); // At this point we're done with it. cellfixedoffsets = NULL; if (numdropped < 0) { @@ -8575,52 +8583,52 @@ assemble(struct compiler *c, int addNone) if (consts == NULL) { goto error; } - if (calculate_jump_targets(entryblock)) { + if (calculate_jump_targets(g->g_entryblock)) { goto error; } - if (optimize_cfg(entryblock, consts, c->c_const_cache)) { + if (optimize_cfg(g, consts, c->c_const_cache)) { goto error; } - if (trim_unused_consts(entryblock, consts)) { + if (trim_unused_consts(g->g_entryblock, consts)) { goto error; } if (duplicate_exits_without_lineno(g)) { return NULL; } - propagate_line_numbers(entryblock); - guarantee_lineno_for_exits(entryblock, c->u->u_firstlineno); + propagate_line_numbers(g->g_entryblock); + guarantee_lineno_for_exits(g->g_entryblock, c->u->u_firstlineno); - int maxdepth = stackdepth(entryblock, code_flags); + int maxdepth = stackdepth(g->g_entryblock, code_flags); if (maxdepth < 0) { goto error; } /* TO DO -- For 3.12, make sure that `maxdepth <= MAX_ALLOWED_STACK_USE` */ - if (label_exception_targets(entryblock)) { + if (label_exception_targets(g->g_entryblock)) { goto error; } - convert_exception_handlers_to_nops(entryblock); + convert_exception_handlers_to_nops(g->g_entryblock); if (push_cold_blocks_to_end(g, code_flags) < 0) { goto error; } - if (remove_redundant_jumps(entryblock) < 0) { + if (remove_redundant_jumps(g) < 0) { goto error; } - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { clean_basic_block(b); } /* Order of basic blocks must have been determined by now */ - normalize_jumps(entryblock); + normalize_jumps(g->g_entryblock); - if (add_checks_for_loads_of_unknown_variables(entryblock, c) < 0) { + if (add_checks_for_loads_of_unknown_variables(g->g_entryblock, c) < 0) { goto error; } /* Can't modify the bytecode after computing jump offsets. */ - assemble_jump_offsets(entryblock); + assemble_jump_offsets(g->g_entryblock); /* Create assembler */ @@ -8628,7 +8636,7 @@ assemble(struct compiler *c, int addNone) goto error; /* Emit code. */ - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { for (int j = 0; j < b->b_iused; j++) if (!assemble_emit(&a, &b->b_instr[j])) goto error; @@ -8636,13 +8644,13 @@ assemble(struct compiler *c, int addNone) /* Emit location info */ a.a_lineno = c->u->u_firstlineno; - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { for (int j = 0; j < b->b_iused; j++) if (!assemble_emit_location(&a, &b->b_instr[j])) goto error; } - if (!assemble_exception_table(&a, entryblock)) { + if (!assemble_exception_table(&a, g->g_entryblock)) { goto error; } if (_PyBytes_Resize(&a.a_except_table, a.a_except_table_off) < 0) { @@ -9352,16 +9360,19 @@ mark_reachable(basicblock *entryblock) { } static void -eliminate_empty_basic_blocks(basicblock *entryblock) { +eliminate_empty_basic_blocks(cfg_builder *g) { /* Eliminate empty blocks */ - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { basicblock *next = b->b_next; while (next && next->b_iused == 0) { next = next->b_next; } b->b_next = next; } - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + while(g->g_entryblock && g->g_entryblock->b_iused == 0) { + g->g_entryblock = g->g_entryblock->b_next; + } + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { assert(b->b_iused > 0); for (int i = 0; i < b->b_iused; i++) { struct instr *instr = &b->b_instr[i]; @@ -9467,42 +9478,42 @@ calculate_jump_targets(basicblock *entryblock) */ static int -optimize_cfg(basicblock *entryblock, PyObject *consts, PyObject *const_cache) +optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache) { assert(PyDict_CheckExact(const_cache)); - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { if (normalize_basic_block(b)) { return -1; } } - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { if (extend_block(b)) { return -1; } } - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { if (optimize_basic_block(const_cache, b, consts)) { return -1; } clean_basic_block(b); assert(b->b_predecessors == 0); } - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { if (extend_block(b)) { return -1; } } - if (mark_reachable(entryblock)) { + if (mark_reachable(g->g_entryblock)) { return -1; } /* Delete unreachable instructions */ - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { if (b->b_predecessors == 0) { b->b_iused = 0; } } - eliminate_empty_basic_blocks(entryblock); - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + eliminate_empty_basic_blocks(g); + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { clean_basic_block(b); } return 0; @@ -9601,6 +9612,157 @@ duplicate_exits_without_lineno(cfg_builder *g) } +/* Access to compiler optimizations for unit tests. + * + * _PyCompile_OptimizeCfg takes an instruction list, constructs + * a CFG, optimizes it and converts back to an instruction list. + * + * An instruction list is a PyList where each item is either + * a tuple describing a single instruction: + * (opcode, oparg, lineno, end_lineno, col, end_col), or + * a jump target label marking the beginning of a basic block. + */ + +static int +instructions_to_cfg(PyObject *instructions, cfg_builder *g) +{ + assert(PyList_Check(instructions)); + + Py_ssize_t instr_size = PyList_GET_SIZE(instructions); + for (Py_ssize_t i = 0; i < instr_size; i++) { + PyObject *item = PyList_GET_ITEM(instructions, i); + if (PyLong_Check(item)) { + int lbl_id = PyLong_AsLong(item); + if (PyErr_Occurred()) { + return -1; + } + if (lbl_id <= 0 || lbl_id > instr_size) { + /* expect label in a reasonable range */ + PyErr_SetString(PyExc_ValueError, "label out of range"); + return -1; + } + jump_target_label lbl = {lbl_id}; + if (cfg_builder_use_label(g, lbl) < 0) { + return -1; + } + } + else { + if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 6) { + PyErr_SetString(PyExc_ValueError, "expected a 6-tuple"); + return -1; + } + int opcode = PyLong_AsLong(PyTuple_GET_ITEM(item, 0)); + if (PyErr_Occurred()) { + return -1; + } + int oparg = PyLong_AsLong(PyTuple_GET_ITEM(item, 1)); + if (PyErr_Occurred()) { + return -1; + } + struct location loc; + loc.lineno = PyLong_AsLong(PyTuple_GET_ITEM(item, 2)); + if (PyErr_Occurred()) { + return -1; + } + loc.end_lineno = PyLong_AsLong(PyTuple_GET_ITEM(item, 3)); + if (PyErr_Occurred()) { + return -1; + } + loc.col_offset = PyLong_AsLong(PyTuple_GET_ITEM(item, 4)); + if (PyErr_Occurred()) { + return -1; + } + loc.end_col_offset = PyLong_AsLong(PyTuple_GET_ITEM(item, 5)); + if (PyErr_Occurred()) { + return -1; + } + if (!cfg_builder_addop(g, opcode, oparg, loc)) { + return -1; + } + } + } + return 0; +} + +static PyObject * +cfg_to_instructions(cfg_builder *g) +{ + PyObject *instructions = PyList_New(0); + if (instructions == NULL) { + return NULL; + } + int lbl = 1; + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + b->b_label = lbl++; + } + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + PyObject *lbl = PyLong_FromLong(b->b_label); + if (lbl == NULL) { + goto error; + } + if (PyList_Append(instructions, lbl) != 0) { + Py_DECREF(lbl); + goto error; + } + Py_DECREF(lbl); + for (int i = 0; i < b->b_iused; i++) { + struct instr *instr = &b->b_instr[i]; + struct location loc = instr->i_loc; + int arg = HAS_TARGET(instr->i_opcode) ? instr->i_target->b_label : instr->i_oparg; + PyObject *inst_tuple = Py_BuildValue( + "(iiiiii)", instr->i_opcode, arg, + loc.lineno, loc.end_lineno, + loc.col_offset, loc.end_col_offset); + if (inst_tuple == NULL) { + goto error; + } + + if (PyList_Append(instructions, inst_tuple) != 0) { + Py_DECREF(inst_tuple); + goto error; + } + Py_DECREF(inst_tuple); + } + } + + return instructions; +error: + Py_DECREF(instructions); + return NULL; +} + + +PyObject * +_PyCompile_OptimizeCfg(PyObject *instructions, PyObject *consts) +{ + PyObject *res = NULL; + PyObject *const_cache = NULL; + cfg_builder g; + memset(&g, 0, sizeof(cfg_builder)); + if (cfg_builder_init(&g) < 0) { + goto error; + } + if (instructions_to_cfg(instructions, &g) < 0) { + goto error; + } + const_cache = PyDict_New(); + if (const_cache == NULL) { + goto error; + } + if (calculate_jump_targets(g.g_entryblock)) { + goto error; + } + if (optimize_cfg(&g, consts, const_cache) < 0) { + goto error; + } + res = cfg_to_instructions(&g); +error: + Py_XDECREF(const_cache); + cfg_builder_fini(&g); + return res; +} + + /* Retained for API compatibility. * Optimization is now done in optimize_cfg */