Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-112962: in dis module, put cache information in the Instruction instead of creating fake Instructions to represent it #113016

Merged
merged 10 commits into from
Dec 13, 2023
17 changes: 14 additions & 3 deletions Doc/library/dis.rst
Original file line number Diff line number Diff line change
Expand Up @@ -328,13 +328,16 @@ operation is being performed, so the intermediate analysis object isn't useful:
source line information (if any) is taken directly from the disassembled code
object.

The *show_caches* and *adaptive* parameters work as they do in :func:`dis`.
The *adaptive* parameter works as it does in :func:`dis`.

.. versionadded:: 3.4

.. versionchanged:: 3.11
Added the *show_caches* and *adaptive* parameters.

.. versionchanged:: 3.13
The *show_caches* parameter is deprecated and has no effect.
iritkatriel marked this conversation as resolved.
Show resolved Hide resolved


.. function:: findlinestarts(code)

Expand Down Expand Up @@ -482,6 +485,14 @@ details of bytecode instructions as :class:`Instruction` instances:
:class:`dis.Positions` object holding the
start and end locations that are covered by this instruction.

.. data::cache_info

Information about the cache entries of this instruction, as
triplets of the form ``(name, size, data)``, where the ``name``
and ``size`` describe the cache format and data is the contents
of the cache. It is ``None`` if the instruction does not have
caches.
iritkatriel marked this conversation as resolved.
Show resolved Hide resolved

.. versionadded:: 3.4

.. versionchanged:: 3.11
Expand All @@ -493,8 +504,8 @@ details of bytecode instructions as :class:`Instruction` instances:
Changed field ``starts_line``.

Added fields ``start_offset``, ``cache_offset``, ``end_offset``,
``baseopname``, ``baseopcode``, ``jump_target``, ``oparg``, and
``line_number``.
``baseopname``, ``baseopcode``, ``jump_target``, ``oparg``,
``line_number`` and ``cache_info``.


.. class:: Positions
Expand Down
74 changes: 44 additions & 30 deletions Lib/dis.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,9 +267,10 @@ def show_code(co, *, file=None):
'starts_line',
'line_number',
'label',
'positions'
'positions',
'cache_info',
],
defaults=[None, None]
defaults=[None, None, None]
)

_Instruction.opname.__doc__ = "Human readable name for operation"
Expand All @@ -286,6 +287,7 @@ def show_code(co, *, file=None):
_Instruction.line_number.__doc__ = "source line number associated with this opcode (if any), otherwise None"
_Instruction.label.__doc__ = "A label (int > 0) if this instruction is a jump target, otherwise None"
_Instruction.positions.__doc__ = "dis.Positions object holding the span of source code covered by this instruction"
_Instruction.cache_info.__doc__ = "list of (name, size, data), one for each cache entry of the instruction"

_ExceptionTableEntryBase = collections.namedtuple("_ExceptionTableEntryBase",
"start end target depth lasti")
Expand Down Expand Up @@ -334,6 +336,8 @@ class Instruction(_Instruction):
label - A label if this instruction is a jump target, otherwise None
positions - Optional dis.Positions object holding the span of source code
covered by this instruction
cache_info - information about the format and content of the instruction's cache
entries (if any)
"""

@property
Expand Down Expand Up @@ -570,10 +574,10 @@ def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False):
linestarts=linestarts,
line_offset=line_offset,
co_positions=co.co_positions(),
show_caches=show_caches,
original_code=original_code,
arg_resolver=arg_resolver)


iritkatriel marked this conversation as resolved.
Show resolved Hide resolved
def _get_const_value(op, arg, co_consts):
"""Helper to get the value of the const in a hasconst op.

Expand Down Expand Up @@ -645,8 +649,7 @@ def _is_backward_jump(op):
'ENTER_EXECUTOR')

def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=None,
show_caches=False, original_code=None, labels_map=None,
arg_resolver=None):
original_code=None, labels_map=None, arg_resolver=None):
"""Iterate over the instructions in a bytecode string.

Generates a sequence of Instruction namedtuples giving the details of each
Expand Down Expand Up @@ -682,32 +685,28 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N
else:
argval, argrepr = arg, repr(arg)

instr = Instruction(_all_opname[op], op, arg, argval, argrepr,
offset, start_offset, starts_line, line_number,
labels_map.get(offset, None), positions)

caches = _get_cache_size(_all_opname[deop])
# Advance the co_positions iterator:
for _ in range(caches):
next(co_positions, ())

if caches:
cache_info = []
for name, size in _cache_format[opname[deop]].items():
data = code[offset + 2: offset + 2 + 2 * size]
cache_info.append((name, size, data))
else:
cache_info = None

yield Instruction(_all_opname[op], op, arg, argval, argrepr,
offset, start_offset, starts_line, line_number,
labels_map.get(offset, None), positions)
labels_map.get(offset, None), positions, cache_info)


caches = _get_cache_size(_all_opname[deop])
if not caches:
continue
if not show_caches:
# We still need to advance the co_positions iterator:
for _ in range(caches):
next(co_positions, ())
continue
for name, size in _cache_format[opname[deop]].items():
for i in range(size):
offset += 2
# Only show the fancy argrepr for a CACHE instruction when it's
# the first entry for a particular cache value:
if i == 0:
data = code[offset: offset + 2 * size]
argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
else:
argrepr = ""
yield Instruction(
"CACHE", CACHE, 0, None, argrepr, offset, offset, False, None, None,
Positions(*next(co_positions, ()))
)

def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False,
show_offsets=False):
Expand Down Expand Up @@ -787,7 +786,6 @@ def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None,
instrs = _get_instructions_bytes(code, linestarts=linestarts,
line_offset=line_offset,
co_positions=co_positions,
show_caches=show_caches,
original_code=original_code,
labels_map=labels_map,
arg_resolver=arg_resolver)
Expand All @@ -805,6 +803,23 @@ def print_instructions(instrs, exception_entries, formatter, show_caches=False,
is_current_instr = instr.offset <= lasti \
<= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)])
formatter.print_instruction(instr, is_current_instr)
deop = _deoptop(instr.opcode)
if show_caches and instr.cache_info:
offset = instr.offset
for name, size, data in instr.cache_info:
for i in range(size):
offset += 2
# Only show the fancy argrepr for a CACHE instruction when it's
# the first entry for a particular cache value:
if i == 0:
argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
else:
argrepr = ""
formatter.print_instruction(
Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset,
False, None, None, instr.positions),
is_current_instr)
iritkatriel marked this conversation as resolved.
Show resolved Hide resolved

formatter.print_exception_table(exception_entries)

def _disassemble_str(source, **kwargs):
Expand Down Expand Up @@ -952,7 +967,6 @@ def __iter__(self):
linestarts=self._linestarts,
line_offset=self._line_offset,
co_positions=co.co_positions(),
show_caches=self.show_caches,
original_code=original_code,
labels_map=labels_map,
arg_resolver=arg_resolver)
Expand Down
11 changes: 11 additions & 0 deletions Lib/test/support/bytecode_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@

_UNSPECIFIED = object()

def instructions_with_positions(instrs, co_positions):
# Return (instr, positions) pairs from the instrs list and co_positions
# iterator. The latter contains items for cache lines and the former
# doesn't, so those need to be skipped.

co_positions = co_positions or iter(())
for instr in instrs:
yield instr, next(co_positions, ())
for _ in (instr.cache_info or ()):
next(co_positions, ())

class BytecodeTestCase(unittest.TestCase):
"""Custom assertion methods for inspecting bytecode."""

Expand Down
8 changes: 4 additions & 4 deletions Lib/test/test_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@
gc_collect)
from test.support.script_helper import assert_python_ok
from test.support import threading_helper
from test.support.bytecode_helper import (BytecodeTestCase,
instructions_with_positions)
from opcode import opmap, opname
COPY_FREE_VARS = opmap['COPY_FREE_VARS']

Expand Down Expand Up @@ -384,10 +386,8 @@ def test_co_positions_artificial_instructions(self):
code = traceback.tb_frame.f_code

artificial_instructions = []
for instr, positions in zip(
dis.get_instructions(code, show_caches=True),
code.co_positions(),
strict=True
for instr, positions in instructions_with_positions(
dis.get_instructions(code), code.co_positions()
):
# If any of the positions is None, then all have to
# be None as well for the case above. There are still
Expand Down
5 changes: 3 additions & 2 deletions Lib/test/test_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from test import support
from test.support import (script_helper, requires_debug_ranges,
requires_specialization, Py_C_RECURSION_LIMIT)
from test.support.bytecode_helper import instructions_with_positions
from test.support.os_helper import FakePath

class TestSpecifics(unittest.TestCase):
Expand Down Expand Up @@ -1346,8 +1347,8 @@ def generic_visit(self, node):
def assertOpcodeSourcePositionIs(self, code, opcode,
line, end_line, column, end_column, occurrence=1):

for instr, position in zip(
dis.Bytecode(code, show_caches=True), code.co_positions(), strict=True
for instr, position in instructions_with_positions(
dis.Bytecode(code), code.co_positions()
):
if instr.opname == opcode:
occurrence -= 1
Expand Down
41 changes: 34 additions & 7 deletions Lib/test/test_dis.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import opcode

CACHE = dis.opmap["CACHE"]

def get_tb():
def _error():
Expand Down Expand Up @@ -1227,9 +1228,9 @@ def f():
else:
# "copy" the code to un-quicken it:
f.__code__ = f.__code__.replace()
for instruction in dis.get_instructions(
for instruction in _unroll_caches_as_Instructions(dis.get_instructions(
f, show_caches=True, adaptive=adaptive
):
), show_caches=True):
if instruction.opname == "CACHE":
yield instruction.argrepr

Expand Down Expand Up @@ -1262,7 +1263,8 @@ def f():
# However, this might change in the future. So we explicitly try to find
# a CACHE entry in the instructions. If we can't do that, fail the test

for inst in dis.get_instructions(f, show_caches=True):
for inst in _unroll_caches_as_Instructions(
dis.get_instructions(f, show_caches=True), show_caches=True):
if inst.opname == "CACHE":
op_offset = inst.offset - 2
cache_offset = inst.offset
Expand Down Expand Up @@ -1775,8 +1777,8 @@ def simple(): pass
class InstructionTestCase(BytecodeTestCase):

def assertInstructionsEqual(self, instrs_1, instrs_2, /):
instrs_1 = [instr_1._replace(positions=None) for instr_1 in instrs_1]
instrs_2 = [instr_2._replace(positions=None) for instr_2 in instrs_2]
instrs_1 = [instr_1._replace(positions=None, cache_info=None) for instr_1 in instrs_1]
instrs_2 = [instr_2._replace(positions=None, cache_info=None) for instr_2 in instrs_2]
self.assertEqual(instrs_1, instrs_2)

class InstructionTests(InstructionTestCase):
Expand Down Expand Up @@ -1890,9 +1892,9 @@ def roots(a, b, c):
instruction.positions.col_offset,
instruction.positions.end_col_offset,
)
for instruction in dis.get_instructions(
for instruction in _unroll_caches_as_Instructions(dis.get_instructions(
code, adaptive=adaptive, show_caches=show_caches
)
), show_caches=show_caches)
]
self.assertEqual(co_positions, dis_positions)

Expand Down Expand Up @@ -2233,6 +2235,31 @@ def get_disassembly(self, tb):
dis.distb(tb, file=output)
return output.getvalue()

def _unroll_caches_as_Instructions(instrs, show_caches=False):
# Cache entries are no longer reported by dis as fake instructions,
# but some tests assume that do. We should rewrite the tests to assume
# the new API, but it will be clearer to keep the tests working as
# before and do that in a separate PR.

for instr in instrs:
yield instr
if not show_caches:
continue

offset = instr.offset
for name, size, data in (instr.cache_info or ()):
for i in range(size):
offset += 2
# Only show the fancy argrepr for a CACHE instruction when it's
# the first entry for a particular cache value:
if i == 0:
argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
else:
argrepr = ""

yield Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset,
False, None, None, instr.positions)


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
:mod:`dis` module functions add cache information to the
:class:`~dis.Instruction` instance rather than creating fake
:class:`~dis.Instruction` instances to represent the cache entries.