Skip to content

Commit

Permalink
gh-99254: remove all unused consts from code objects (GH-99255)
Browse files Browse the repository at this point in the history
  • Loading branch information
iritkatriel committed Nov 11, 2022
1 parent faf7dfa commit 3dd6ee2
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 33 deletions.
3 changes: 2 additions & 1 deletion Lib/importlib/_bootstrap_external.py
Expand Up @@ -425,6 +425,7 @@ def _write_atomic(path, data, mode=0o666):
# Python 3.12a1 3509 (Conditional jumps only jump forward)
# Python 3.12a1 3510 (FOR_ITER leaves iterator on the stack)
# Python 3.12a1 3511 (Add STOPITERATION_ERROR instruction)
# Python 3.12a1 3512 (Remove all unused consts from code objects)

# Python 3.13 will start with 3550

Expand All @@ -437,7 +438,7 @@ def _write_atomic(path, data, mode=0o666):
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
# in PC/launcher.c must also be updated.

MAGIC_NUMBER = (3511).to_bytes(2, 'little') + b'\r\n'
MAGIC_NUMBER = (3512).to_bytes(2, 'little') + b'\r\n'

_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c

Expand Down
38 changes: 36 additions & 2 deletions Lib/test/test_compile.py
Expand Up @@ -670,7 +670,7 @@ def test_merge_code_attrs(self):
self.assertIs(f1.__code__.co_linetable, f2.__code__.co_linetable)

@support.cpython_only
def test_strip_unused_consts(self):
def test_remove_unused_consts(self):
def f():
"docstring"
if True:
Expand All @@ -679,7 +679,41 @@ def f():
return "unused"

self.assertEqual(f.__code__.co_consts,
("docstring", True, "used"))
("docstring", "used"))

@support.cpython_only
def test_remove_unused_consts_no_docstring(self):
# the first item (None for no docstring in this case) is
# always retained.
def f():
if True:
return "used"
else:
return "unused"

self.assertEqual(f.__code__.co_consts,
(None, "used"))

@support.cpython_only
def test_remove_unused_consts_extended_args(self):
N = 1000
code = ["def f():\n"]
code.append("\ts = ''\n")
code.append("\tfor i in range(1):\n")
for i in range(N):
code.append(f"\t\tif True: s += 't{i}'\n")
code.append(f"\t\tif False: s += 'f{i}'\n")
code.append("\treturn s\n")

code = "".join(code)
g = {}
eval(compile(code, "file.py", "exec"), g)
exec(code, g)
f = g['f']
expected = tuple([None, '', 1] + [f't{i}' for i in range(N)])
self.assertEqual(f.__code__.co_consts, expected)
expected = "".join(expected[3:])
self.assertEqual(expected, f())

# Stripping unused constants is not a strict requirement for the
# Python semantics, it's a more an implementation detail.
Expand Down
22 changes: 11 additions & 11 deletions Lib/test/test_dis.py
Expand Up @@ -168,13 +168,13 @@ def bug1333982(x=[]):
%3d RESUME 0
%3d LOAD_ASSERTION_ERROR
LOAD_CONST 2 (<code object <listcomp> at 0x..., file "%s", line %d>)
LOAD_CONST 1 (<code object <listcomp> at 0x..., file "%s", line %d>)
MAKE_FUNCTION 0
LOAD_FAST 0 (x)
GET_ITER
CALL 0
%3d LOAD_CONST 3 (1)
%3d LOAD_CONST 2 (1)
%3d BINARY_OP 0 (+)
CALL 0
Expand Down Expand Up @@ -1446,9 +1446,9 @@ def jumpy():
# End fodder for opinfo generation tests
expected_outer_line = 1
_line_offset = outer.__code__.co_firstlineno - 1
code_object_f = outer.__code__.co_consts[3]
code_object_f = outer.__code__.co_consts[1]
expected_f_line = code_object_f.co_firstlineno - _line_offset
code_object_inner = code_object_f.co_consts[3]
code_object_inner = code_object_f.co_consts[1]
expected_inner_line = code_object_inner.co_firstlineno - _line_offset
expected_jumpy_line = 1

Expand Down Expand Up @@ -1485,21 +1485,21 @@ def _prepare_test_cases():
Instruction(opname='MAKE_CELL', opcode=135, arg=0, argval='a', argrepr='a', offset=0, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='MAKE_CELL', opcode=135, arg=1, argval='b', argrepr='b', offset=2, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='RESUME', opcode=151, arg=0, argval=0, argrepr='', offset=4, starts_line=1, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=7, argval=(3, 4), argrepr='(3, 4)', offset=6, starts_line=2, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=5, argval=(3, 4), argrepr='(3, 4)', offset=6, starts_line=2, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=0, argval='a', argrepr='a', offset=8, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=1, argval='b', argrepr='b', offset=10, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='BUILD_TUPLE', opcode=102, arg=2, argval=2, argrepr='', offset=12, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=3, argval=code_object_f, argrepr=repr(code_object_f), offset=14, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=1, argval=code_object_f, argrepr=repr(code_object_f), offset=14, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='MAKE_FUNCTION', opcode=132, arg=9, argval=9, argrepr='defaults, closure', offset=16, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='STORE_FAST', opcode=125, arg=2, argval='f', argrepr='f', offset=18, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_GLOBAL', opcode=116, arg=1, argval='print', argrepr='NULL + print', offset=20, starts_line=7, is_jump_target=False, positions=None),
Instruction(opname='LOAD_DEREF', opcode=137, arg=0, argval='a', argrepr='a', offset=32, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_DEREF', opcode=137, arg=1, argval='b', argrepr='b', offset=34, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=4, argval='', argrepr="''", offset=36, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=5, argval=1, argrepr='1', offset=38, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=2, argval='', argrepr="''", offset=36, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=3, argval=1, argrepr='1', offset=38, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='BUILD_LIST', opcode=103, arg=0, argval=0, argrepr='', offset=40, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='BUILD_MAP', opcode=105, arg=0, argval=0, argrepr='', offset=42, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=6, argval='Hello world!', argrepr="'Hello world!'", offset=44, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=4, argval='Hello world!', argrepr="'Hello world!'", offset=44, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='CALL', opcode=171, arg=7, argval=7, argrepr='', offset=46, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='POP_TOP', opcode=1, arg=None, argval=None, argrepr='', offset=56, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_FAST', opcode=124, arg=2, argval='f', argrepr='f', offset=58, starts_line=8, is_jump_target=False, positions=None),
Expand All @@ -1511,13 +1511,13 @@ def _prepare_test_cases():
Instruction(opname='MAKE_CELL', opcode=135, arg=0, argval='c', argrepr='c', offset=2, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='MAKE_CELL', opcode=135, arg=1, argval='d', argrepr='d', offset=4, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='RESUME', opcode=151, arg=0, argval=0, argrepr='', offset=6, starts_line=2, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=4, argval=(5, 6), argrepr='(5, 6)', offset=8, starts_line=3, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=2, argval=(5, 6), argrepr='(5, 6)', offset=8, starts_line=3, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=3, argval='a', argrepr='a', offset=10, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=4, argval='b', argrepr='b', offset=12, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=0, argval='c', argrepr='c', offset=14, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=1, argval='d', argrepr='d', offset=16, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='BUILD_TUPLE', opcode=102, arg=4, argval=4, argrepr='', offset=18, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=3, argval=code_object_inner, argrepr=repr(code_object_inner), offset=20, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=1, argval=code_object_inner, argrepr=repr(code_object_inner), offset=20, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='MAKE_FUNCTION', opcode=132, arg=9, argval=9, argrepr='defaults, closure', offset=22, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='STORE_FAST', opcode=125, arg=2, argval='inner', argrepr='inner', offset=24, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_GLOBAL', opcode=116, arg=1, argval='print', argrepr='NULL + print', offset=26, starts_line=5, is_jump_target=False, positions=None),
Expand Down
@@ -0,0 +1 @@
The compiler now removes all unused constants from code objects (except the first one, which may be a docstring).
111 changes: 92 additions & 19 deletions Python/compile.c
Expand Up @@ -8472,7 +8472,7 @@ static int
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache);

static int
trim_unused_consts(basicblock *entryblock, PyObject *consts);
remove_unused_consts(basicblock *entryblock, PyObject *consts);

/* Duplicates exit BBs, so that line numbers can be propagated to them */
static int
Expand Down Expand Up @@ -8813,6 +8813,9 @@ assemble(struct compiler *c, int addNone)
if (add_checks_for_loads_of_uninitialized_variables(g->g_entryblock, c) < 0) {
goto error;
}
if (remove_unused_consts(g->g_entryblock, consts)) {
goto error;
}

/** line numbers (TODO: move this before optimization stage) */
if (duplicate_exits_without_lineno(g) < 0) {
Expand Down Expand Up @@ -8844,10 +8847,6 @@ assemble(struct compiler *c, int addNone)
/* Can't modify the bytecode after computing jump offsets. */
assemble_jump_offsets(g->g_entryblock);

if (trim_unused_consts(g->g_entryblock, consts)) {
goto error;
}

/* Create assembler */
if (!assemble_init(&a, c->u->u_firstlineno))
goto error;
Expand Down Expand Up @@ -9706,32 +9705,106 @@ optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache)
return 0;
}

// Remove trailing unused constants.

static int
trim_unused_consts(basicblock *entryblock, PyObject *consts)
remove_unused_consts(basicblock *entryblock, PyObject *consts)
{
assert(PyList_CheckExact(consts));
Py_ssize_t nconsts = PyList_GET_SIZE(consts);
if (nconsts == 0) {
return 0; /* nothing to do */
}

Py_ssize_t *index_map = NULL;
Py_ssize_t *reverse_index_map = NULL;
int err = 1;

index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
if (index_map == NULL) {
goto end;
}
for (Py_ssize_t i = 1; i < nconsts; i++) {
index_map[i] = -1;
}
// The first constant may be docstring; keep it always.
int max_const_index = 0;
index_map[0] = 0;

/* mark used consts */
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
for (int i = 0; i < b->b_iused; i++) {
if ((b->b_instr[i].i_opcode == LOAD_CONST ||
b->b_instr[i].i_opcode == KW_NAMES) &&
b->b_instr[i].i_oparg > max_const_index) {
max_const_index = b->b_instr[i].i_oparg;
if (b->b_instr[i].i_opcode == LOAD_CONST ||
b->b_instr[i].i_opcode == KW_NAMES) {

int index = b->b_instr[i].i_oparg;
index_map[index] = index;
}
}
}
if (max_const_index+1 < PyList_GET_SIZE(consts)) {
//fprintf(stderr, "removing trailing consts: max=%d, size=%d\n",
// max_const_index, (int)PyList_GET_SIZE(consts));
if (PyList_SetSlice(consts, max_const_index+1,
PyList_GET_SIZE(consts), NULL) < 0) {
return 1;
/* now index_map[i] == i if consts[i] is used, -1 otherwise */

/* condense consts */
Py_ssize_t n_used_consts = 0;
for (int i = 0; i < nconsts; i++) {
if (index_map[i] != -1) {
assert(index_map[i] == i);
index_map[n_used_consts++] = index_map[i];
}
}
return 0;
if (n_used_consts == nconsts) {
/* nothing to do */
err = 0;
goto end;
}

/* move all used consts to the beginning of the consts list */
assert(n_used_consts < nconsts);
for (Py_ssize_t i = 0; i < n_used_consts; i++) {
Py_ssize_t old_index = index_map[i];
assert(i <= old_index && old_index < nconsts);
if (i != old_index) {
PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
assert(value != NULL);
PyList_SetItem(consts, i, Py_NewRef(value));
}
}

/* truncate the consts list at its new size */
if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
goto end;
}

/* adjust const indices in the bytecode */
reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
if (reverse_index_map == NULL) {
goto end;
}
for (Py_ssize_t i = 0; i < nconsts; i++) {
reverse_index_map[i] = -1;
}
for (Py_ssize_t i = 0; i < n_used_consts; i++) {
assert(index_map[i] != -1);
assert(reverse_index_map[index_map[i]] == -1);
reverse_index_map[index_map[i]] = i;
}

for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
for (int i = 0; i < b->b_iused; i++) {
if (b->b_instr[i].i_opcode == LOAD_CONST ||
b->b_instr[i].i_opcode == KW_NAMES) {

int index = b->b_instr[i].i_oparg;
assert(reverse_index_map[index] >= 0);
assert(reverse_index_map[index] < n_used_consts);
b->b_instr[i].i_oparg = (int)reverse_index_map[index];
}
}
}

err = 0;
end:
PyMem_Free(index_map);
PyMem_Free(reverse_index_map);
return err;
}

static inline int
Expand Down

0 comments on commit 3dd6ee2

Please sign in to comment.