Skip to content

Commit

Permalink
gh-102676: Add more convenience properties to dis.Instruction (#103969
Browse files Browse the repository at this point in the history
)

Adds start_offset, cache_offset, end_offset, baseopcode,
baseopname, jump_target and oparg to dis.Instruction.

Also slightly improves the disassembly output by allowing
opnames to overflow into the space reserved for opargs.
  • Loading branch information
tomasr8 committed Jun 11, 2023
1 parent 845e593 commit 18d16e9
Show file tree
Hide file tree
Showing 4 changed files with 435 additions and 196 deletions.
42 changes: 42 additions & 0 deletions Doc/library/dis.rst
Expand Up @@ -342,10 +342,25 @@ details of bytecode instructions as :class:`Instruction` instances:
human readable name for operation


.. data:: baseopcode

numeric code for the base operation if operation is specialized;
otherwise equal to :data:`opcode`


.. data:: baseopname

human readable name for the base operation if operation is specialized;
otherwise equal to :data:`opname`


.. data:: arg

numeric argument to operation (if any), otherwise ``None``

.. data:: oparg

alias for :data:`arg`

.. data:: argval

Expand All @@ -363,6 +378,22 @@ details of bytecode instructions as :class:`Instruction` instances:
start index of operation within bytecode sequence


.. data:: start_offset

start index of operation within bytecode sequence, including prefixed
``EXTENDED_ARG`` operations if present; otherwise equal to :data:`offset`


.. data:: cache_offset

start index of the cache entries following the operation


.. data:: end_offset

end index of the cache entries following the operation


.. data:: starts_line

line started by this opcode (if any), otherwise ``None``
Expand All @@ -373,6 +404,12 @@ details of bytecode instructions as :class:`Instruction` instances:
``True`` if other code jumps to here, otherwise ``False``


.. data:: jump_target

bytecode index of the jump target if this is a jump operation,
otherwise ``None``


.. data:: positions

:class:`dis.Positions` object holding the
Expand All @@ -384,6 +421,11 @@ details of bytecode instructions as :class:`Instruction` instances:

Field ``positions`` is added.

.. versionchanged:: 3.13

Added fields ``start_offset``, ``cache_offset``, ``end_offset``,
``baseopname``, ``baseopcode``, ``jump_target`` and ``oparg``.


.. class:: Positions

Expand Down
110 changes: 90 additions & 20 deletions Lib/dis.py
Expand Up @@ -265,6 +265,7 @@ def show_code(co, *, file=None):
'argval',
'argrepr',
'offset',
'start_offset',
'starts_line',
'is_jump_target',
'positions'
Expand All @@ -278,6 +279,10 @@ def show_code(co, *, file=None):
_Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg"
_Instruction.argrepr.__doc__ = "Human readable description of operation argument"
_Instruction.offset.__doc__ = "Start index of operation within bytecode sequence"
_Instruction.start_offset.__doc__ = (
"Start index of operation within bytecode sequence, including extended args if present; "
"otherwise equal to Instruction.offset"
)
_Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None"
_Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False"
_Instruction.positions.__doc__ = "dis.Positions object holding the span of source code covered by this instruction"
Expand All @@ -288,8 +293,26 @@ def show_code(co, *, file=None):
_OPNAME_WIDTH = 20
_OPARG_WIDTH = 5

def _get_jump_target(op, arg, offset):
"""Gets the bytecode offset of the jump target if this is a jump instruction.
Otherwise return None.
"""
deop = _deoptop(op)
caches = _inline_cache_entries[deop]
if deop in hasjrel:
if _is_backward_jump(deop):
arg = -arg
target = offset + 2 + arg*2
target += 2 * caches
elif deop in hasjabs:
target = arg*2
else:
target = None
return target

class Instruction(_Instruction):
"""Details for a bytecode operation
"""Details for a bytecode operation.
Defined fields:
opname - human readable name for operation
Expand All @@ -298,14 +321,55 @@ class Instruction(_Instruction):
argval - resolved arg value (if known), otherwise same as arg
argrepr - human readable description of operation argument
offset - start index of operation within bytecode sequence
start_offset - start index of operation within bytecode sequence including extended args if present;
otherwise equal to Instruction.offset
starts_line - line started by this opcode (if any), otherwise None
is_jump_target - True if other code jumps to here, otherwise False
positions - Optional dis.Positions object holding the span of source code
covered by this instruction
"""

@property
def oparg(self):
"""Alias for Instruction.arg."""
return self.arg

@property
def baseopcode(self):
"""Numeric code for the base operation if operation is specialized.
Otherwise equal to Instruction.opcode.
"""
return _deoptop(self.opcode)

@property
def baseopname(self):
"""Human readable name for the base operation if operation is specialized.
Otherwise equal to Instruction.opname.
"""
return opname[self.baseopcode]

@property
def cache_offset(self):
"""Start index of the cache entries following the operation."""
return self.offset + 2

@property
def end_offset(self):
"""End index of the cache entries following the operation."""
return self.cache_offset + _inline_cache_entries[self.opcode]*2

@property
def jump_target(self):
"""Bytecode index of the jump target if this is a jump operation.
Otherwise return None.
"""
return _get_jump_target(self.opcode, self.arg, self.offset)

def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4):
"""Format instruction details for inclusion in disassembly output
"""Format instruction details for inclusion in disassembly output.
*lineno_width* sets the width of the line number field (0 omits it)
*mark_as_current* inserts a '-->' marker arrow as part of the line
Expand Down Expand Up @@ -335,12 +399,19 @@ def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4):
fields.append(self.opname.ljust(_OPNAME_WIDTH))
# Column: Opcode argument
if self.arg is not None:
fields.append(repr(self.arg).rjust(_OPARG_WIDTH))
arg = repr(self.arg)
# If opname is longer than _OPNAME_WIDTH, we allow it to overflow into
# the space reserved for oparg. This results in fewer misaligned opargs
# in the disassembly output.
opname_excess = max(0, len(self.opname) - _OPNAME_WIDTH)
fields.append(repr(self.arg).rjust(_OPARG_WIDTH - opname_excess))
# Column: Opcode argument details
if self.argrepr:
fields.append('(' + self.argrepr + ')')
return ' '.join(fields).rstrip()

def __str__(self):
return self._disassemble()

def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False):
"""Iterator for the opcodes in methods, functions or code
Expand Down Expand Up @@ -454,7 +525,7 @@ def _get_instructions_bytes(code, varname_from_oparg=None,
for i in range(start, end):
labels.add(target)
starts_line = None
for offset, op, arg in _unpack_opargs(code):
for offset, start_offset, op, arg in _unpack_opargs(code):
if linestarts is not None:
starts_line = linestarts.get(offset, None)
if starts_line is not None:
Expand Down Expand Up @@ -526,7 +597,7 @@ def _get_instructions_bytes(code, varname_from_oparg=None,
argrepr = _intrinsic_2_descs[arg]
yield Instruction(_all_opname[op], op,
arg, argval, argrepr,
offset, starts_line, is_jump_target, positions)
offset, start_offset, starts_line, is_jump_target, positions)
caches = _inline_cache_entries[deop]
if not caches:
continue
Expand All @@ -546,7 +617,7 @@ def _get_instructions_bytes(code, varname_from_oparg=None,
else:
argrepr = ""
yield Instruction(
"CACHE", CACHE, 0, None, argrepr, offset, None, False,
"CACHE", CACHE, 0, None, argrepr, offset, offset, None, False,
Positions(*next(co_positions, ()))
)

Expand Down Expand Up @@ -632,6 +703,7 @@ def _disassemble_str(source, **kwargs):

def _unpack_opargs(code):
extended_arg = 0
extended_args_offset = 0 # Number of EXTENDED_ARG instructions preceding the current instruction
caches = 0
for i in range(0, len(code), 2):
# Skip inline CACHE entries:
Expand All @@ -652,7 +724,13 @@ def _unpack_opargs(code):
else:
arg = None
extended_arg = 0
yield (i, op, arg)
if deop == EXTENDED_ARG:
extended_args_offset += 1
yield (i, i, op, arg)
else:
start_offset = i - extended_args_offset*2
yield (i, start_offset, op, arg)
extended_args_offset = 0

def findlabels(code):
"""Detect all offsets in a byte code which are jump targets.
Expand All @@ -661,18 +739,10 @@ def findlabels(code):
"""
labels = []
for offset, op, arg in _unpack_opargs(code):
for offset, _, op, arg in _unpack_opargs(code):
if arg is not None:
deop = _deoptop(op)
caches = _inline_cache_entries[deop]
if deop in hasjrel:
if _is_backward_jump(deop):
arg = -arg
label = offset + 2 + arg*2
label += 2 * caches
elif deop in hasjabs:
label = arg*2
else:
label = _get_jump_target(op, arg, offset)
if label is None:
continue
if label not in labels:
labels.append(label)
Expand Down Expand Up @@ -701,7 +771,7 @@ def _find_imports(co):

consts = co.co_consts
names = co.co_names
opargs = [(op, arg) for _, op, arg in _unpack_opargs(co.co_code)
opargs = [(op, arg) for _, _, op, arg in _unpack_opargs(co.co_code)
if op != EXTENDED_ARG]
for i, (op, oparg) in enumerate(opargs):
if op == IMPORT_NAME and i >= 2:
Expand All @@ -723,7 +793,7 @@ def _find_store_names(co):
}

names = co.co_names
for _, op, arg in _unpack_opargs(co.co_code):
for _, _, op, arg in _unpack_opargs(co.co_code):
if op in STORE_OPS:
yield names[arg]

Expand Down

0 comments on commit 18d16e9

Please sign in to comment.