From 8fbbb284b0725e2aceeee07058a1206aeaa54888 Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Tue, 16 Jan 2024 07:51:46 -0500 Subject: [PATCH] wip: sysmon for branches --- coverage/sysmon.py | 226 ++++++++++++++++++++++++++++++--------------- lab/run_sysmon.py | 69 +++++++++----- 2 files changed, 200 insertions(+), 95 deletions(-) diff --git a/coverage/sysmon.py b/coverage/sysmon.py index 65c5b6e77..9a19827b1 100644 --- a/coverage/sysmon.py +++ b/coverage/sysmon.py @@ -5,12 +5,12 @@ from __future__ import annotations +import collections import functools import inspect +import itertools import os -import os.path import sys -import threading import traceback from dataclasses import dataclass @@ -23,7 +23,7 @@ cast, ) -from coverage.debug import short_filename, short_stack +from coverage.debug import log, short_filename, short_stack from coverage.types import ( AnyCallable, TArc, @@ -37,7 +37,8 @@ # pylint: disable=unused-argument -LOG = False +# $set_env.py: COVERAGE_LOG_SYSMON - Trace sys.monitoring calls +LOG = bool(int(os.getenv("COVERAGE_LOG_SYSMON", "0"))) # This module will be imported in all versions of Python, but only used in 3.12+ # It will be type-checked for 3.12, but not for earlier versions. @@ -61,7 +62,11 @@ def __init__(self, wrapped: Any, namespace: str) -> None: def __getattr__(self, name: str) -> Callable[..., Any]: def _wrapped(*args: Any, **kwargs: Any) -> Any: - log(f"{self.namespace}.{name}{args}{kwargs}") + self_prefix = "" + caller_self = inspect.stack()[1][0].f_locals.get("self") + if caller_self is not None: + self_prefix = f"{id(caller_self):#x}: " + log(f"{self_prefix}{self.namespace}.{name}{args}{kwargs}") return getattr(self.wrapped, name)(*args, **kwargs) return _wrapped @@ -70,30 +75,11 @@ def _wrapped(*args: Any, **kwargs: Any) -> Any: assert sys_monitoring is not None short_stack = functools.partial( - short_stack, full=True, short_filenames=True, frame_ids=True, + short_stack, + full=True, + short_filenames=True, + frame_ids=True, ) - seen_threads: set[int] = set() - - def log(msg: str) -> None: - """Write a message to our detailed debugging log(s).""" - # Thread ids are reused across processes? - # Make a shorter number more likely to be unique. - pid = os.getpid() - tid = cast(int, threading.current_thread().ident) - tslug = f"{(pid * tid) % 9_999_991:07d}" - if tid not in seen_threads: - seen_threads.add(tid) - log(f"New thread {tid} {tslug}:\n{short_stack()}") - # log_seq = int(os.getenv("PANSEQ", "0")) - # root = f"/tmp/pan.{log_seq:03d}" - for filename in [ - "/tmp/foo.out", - # f"{root}.out", - # f"{root}-{pid}.out", - # f"{root}-{pid}-{tslug}.out", - ]: - with open(filename, "a") as f: - print(f"{pid}:{tslug}: {msg}", file=f, flush=True) def arg_repr(arg: Any) -> str: """Make a customized repr for logged values.""" @@ -114,17 +100,29 @@ def _wrapped(self: Any, *args: Any) -> Any: try: # log(f"{method.__name__}() stack:\n{short_stack()}") args_reprs = [] + code = None for name, arg in zip(names, args): if name is None: continue + if name == "code": + code = arg args_reprs.append(f"{name}={arg_repr(arg)}") + if name.endswith("@") and code is not None: + try: + line = bytes_to_lines(code)[arg] + except LookupError: + pass + else: + args_reprs[-1] += f"#{line}" log(f"{id(self):#x}:{method.__name__}({', '.join(args_reprs)})") ret = method(self, *args) # log(f" end {id(self):#x}:{method.__name__}({', '.join(args_reprs)})") return ret except Exception as exc: log(f"!!{exc.__class__.__name__}: {exc}") - log("".join(traceback.format_exception(exc))) # pylint: disable=[no-value-for-parameter] + # fmt: off + log("".join(traceback.format_exception(exc))) # pylint: disable=[no-value-for-parameter] + # fmt: on try: assert sys_monitoring is not None sys_monitoring.set_events(sys.monitoring.COVERAGE_ID, 0) @@ -139,7 +137,9 @@ def _wrapped(self: Any, *args: Any) -> Any: else: - def log(msg: str) -> None: + def log( # pylint: disable=function-redefined + msg: str, stack: bool = False + ) -> None: """Write a message to our detailed debugging log(s), but not really.""" def panopticon(*names: str | None) -> AnyCallable: @@ -156,9 +156,9 @@ class CodeInfo: """The information we want about each code object.""" tracing: bool - file_data: TTraceFileData | None - # TODO: what is byte_to_line for? - byte_to_line: dict[int, int] | None + file_data: TTraceFileData + byte_to_line: dict[int, int] + branch_dests: dict[int, set[int]] def bytes_to_lines(code: CodeType) -> dict[int, int]: @@ -174,9 +174,14 @@ def bytes_to_lines(code: CodeType) -> dict[int, int]: class SysMonitor(TracerCore): """Python implementation of the raw data tracer for PEP669 implementations.""" + serial = itertools.count() + # One of these will be used across threads. Be careful. def __init__(self, tool_id: int) -> None: + self.number = next(self.serial) + log(f"SysMonitor() #{self.number}: {id(self):#x}") + # Attributes set from the collector: self.data: TTraceData self.trace_arcs = False @@ -201,17 +206,16 @@ def __init__(self, tool_id: int) -> None: self.local_event_codes: dict[int, CodeType] = {} self.sysmon_on = False - self.stats = { - "starts": 0, - } - self.stopped = False self._activity = False def __repr__(self) -> str: points = sum(len(v) for v in self.data.values()) files = len(self.data) - return f"" + return ( + f"" + ) @panopticon() def start(self) -> None: @@ -221,17 +225,28 @@ def start(self) -> None: assert sys_monitoring is not None sys_monitoring.use_tool_id(self.myid, "coverage.py") register = functools.partial(sys_monitoring.register_callback, self.myid) - events = sys_monitoring.events + events = sys.monitoring.events if self.trace_arcs: sys_monitoring.set_events( self.myid, - events.PY_START | events.PY_UNWIND, + events.PY_START + | events.PY_UNWIND + | events.EXCEPTION_HANDLED + | events.RAISE + | events.RERAISE + | events.PY_THROW, ) - register(events.PY_START, self.sysmon_py_start) - register(events.PY_RESUME, self.sysmon_py_resume_arcs) - register(events.PY_RETURN, self.sysmon_py_return_arcs) - register(events.PY_UNWIND, self.sysmon_py_unwind_arcs) + register(events.BRANCH, self.sysmon_branch) + register(events.EXCEPTION_HANDLED, self.sysmon_exception_handled) + register(events.JUMP, self.sysmon_jump) register(events.LINE, self.sysmon_line_arcs) + register(events.PY_RESUME, self.sysmon_py_resume) + register(events.PY_RETURN, self.sysmon_py_return) + register(events.PY_START, self.sysmon_py_start) + register(events.PY_THROW, self.sysmon_py_throw) + register(events.PY_UNWIND, self.sysmon_py_unwind) + register(events.RAISE, self.sysmon_raise) + register(events.RERAISE, self.sysmon_reraise) else: sys_monitoring.set_events(self.myid, events.PY_START) register(events.PY_START, self.sysmon_py_start) @@ -252,6 +267,7 @@ def stop(self) -> None: for code in self.local_event_codes.values(): sys_monitoring.set_local_events(self.myid, code, 0) self.local_event_codes = {} + sys_monitoring.free_tool_id(self.myid) @panopticon() @@ -291,7 +307,6 @@ def sysmon_py_start(self, code: CodeType, instruction_offset: int) -> MonitorRet """Handle sys.monitoring.events.PY_START events.""" # Entering a new frame. Decide if we should trace in this file. self._activity = True - self.stats["starts"] += 1 code_info = self.code_infos.get(id(code)) tracing_code: bool | None = None @@ -320,13 +335,15 @@ def sysmon_py_start(self, code: CodeType, instruction_offset: int) -> MonitorRet file_data = self.data[tracename] b2l = bytes_to_lines(code) else: - file_data = None - b2l = None + # These won't be used, but empties simplify the type checking. + file_data = set() + b2l = {} self.code_infos[id(code)] = CodeInfo( tracing=tracing_code, file_data=file_data, byte_to_line=b2l, + branch_dests=collections.defaultdict(set), ) self.code_objects.append(code) @@ -334,17 +351,10 @@ def sysmon_py_start(self, code: CodeType, instruction_offset: int) -> MonitorRet events = sys.monitoring.events if self.sysmon_on: assert sys_monitoring is not None - sys_monitoring.set_local_events( - self.myid, - code, - events.PY_RETURN - # - | events.PY_RESUME - # | events.PY_YIELD - | events.LINE, - # | events.BRANCH - # | events.JUMP - ) + my_events = events.PY_RETURN | events.PY_RESUME | events.LINE + if self.trace_arcs: + my_events |= events.BRANCH | events.JUMP + sys_monitoring.set_local_events(self.myid, code, my_events) self.local_event_codes[id(code)] = code if tracing_code and self.trace_arcs: @@ -355,16 +365,21 @@ def sysmon_py_start(self, code: CodeType, instruction_offset: int) -> MonitorRet return sys.monitoring.DISABLE @panopticon("code", "@") - def sysmon_py_resume_arcs( - self, code: CodeType, instruction_offset: int, + def sysmon_py_resume( + self, + code: CodeType, + instruction_offset: int, ) -> MonitorReturn: """Handle sys.monitoring.events.PY_RESUME events for branch coverage.""" frame = self.callers_frame() self.last_lines[frame] = frame.f_lineno @panopticon("code", "@", None) - def sysmon_py_return_arcs( - self, code: CodeType, instruction_offset: int, retval: object, + def sysmon_py_return( + self, + code: CodeType, + instruction_offset: int, + retval: object, ) -> MonitorReturn: """Handle sys.monitoring.events.PY_RETURN events for branch coverage.""" frame = self.callers_frame() @@ -373,15 +388,18 @@ def sysmon_py_return_arcs( last_line = self.last_lines.get(frame) if last_line is not None: arc = (last_line, -code.co_firstlineno) - # log(f"adding {arc=}") + log(f"adding {arc=}") cast(Set[TArc], code_info.file_data).add(arc) # Leaving this function, no need for the frame any more. self.last_lines.pop(frame, None) @panopticon("code", "@", "exc") - def sysmon_py_unwind_arcs( - self, code: CodeType, instruction_offset: int, exception: BaseException, + def sysmon_py_unwind( + self, + code: CodeType, + instruction_offset: int, + exception: BaseException, ) -> MonitorReturn: """Handle sys.monitoring.events.PY_UNWIND events for branch coverage.""" frame = self.callers_frame() @@ -394,30 +412,94 @@ def sysmon_py_unwind_arcs( if code_info is not None and code_info.file_data is not None: if last_line is not None: arc = (last_line, -code.co_firstlineno) - # log(f"adding {arc=}") + log(f"adding {arc=}") cast(Set[TArc], code_info.file_data).add(arc) - @panopticon("code", "line") def sysmon_line_lines(self, code: CodeType, line_number: int) -> MonitorReturn: """Handle sys.monitoring.events.LINE events for line coverage.""" code_info = self.code_infos[id(code)] if code_info.file_data is not None: + log(f"adding {line_number=}") cast(Set[TLineNo], code_info.file_data).add(line_number) - # log(f"adding {line_number=}") return sys.monitoring.DISABLE @panopticon("code", "line") def sysmon_line_arcs(self, code: CodeType, line_number: int) -> MonitorReturn: """Handle sys.monitoring.events.LINE events for branch coverage.""" code_info = self.code_infos[id(code)] - ret = None if code_info.file_data is not None: frame = self.callers_frame() last_line = self.last_lines.get(frame) - if last_line is not None: + if last_line is not None and last_line != line_number: arc = (last_line, line_number) + log(f"adding {arc=}") cast(Set[TArc], code_info.file_data).add(arc) - # log(f"adding {arc=}") self.last_lines[frame] = line_number - return ret + return sys.monitoring.DISABLE + + @panopticon("code", "src@", "dst@") + def sysmon_jump( + self, code: CodeType, instruction_offset: int, destination_offset: int + ) -> MonitorReturn: + """Handle sys.monitoring.events.JUMP events.""" + code_info = self.code_infos[id(code)] + assert code_info.file_data is not None + src_line = code_info.byte_to_line[instruction_offset] + dst_line = code_info.byte_to_line[destination_offset] + if src_line != dst_line: + arc = (src_line, dst_line) + log(f"adding {arc=}") + cast(Set[TArc], code_info.file_data).add(arc) + self.last_lines[self.callers_frame()] = dst_line + return sys.monitoring.DISABLE + + @panopticon("code", "src@", "dst@") + def sysmon_branch( + self, code: CodeType, instruction_offset: int, destination_offset: int + ) -> MonitorReturn: + """Handle sys.monitoring.events.BRANCH events.""" + code_info = self.code_infos[id(code)] + assert code_info.file_data is not None + dests = code_info.branch_dests[instruction_offset] + if destination_offset not in dests: + src_line = code_info.byte_to_line[instruction_offset] + dst_line = code_info.byte_to_line[destination_offset] + if src_line != dst_line: + arc = (src_line, dst_line) + log(f"adding {arc=}") + cast(Set[TArc], code_info.file_data).add(arc) + dests.add(destination_offset) + self.last_lines[self.callers_frame()] = dst_line + # A BRANCH event can only have two possible destinations. Once we've + # seen both, we can disable the event. + if len(dests) == 2: + return sys.monitoring.DISABLE + else: + return None + + @panopticon("code", "@", "exc") + def sysmon_exception_handled( + self, code: CodeType, instruction_offset: int, exception: BaseException + ) -> MonitorReturn: + """Handle sys.monitoring.events.EXCEPTION_HANDLED events.""" + + @panopticon("code", "@", "exc") + def sysmon_raise( + self, code: CodeType, instruction_offset: int, exception: BaseException + ) -> MonitorReturn: + """Handle sys.monitoring.events.RAISE events.""" + frame = self.callers_frame() + self.last_lines[frame] = frame.f_lineno + + @panopticon("code", "@", "exc") + def sysmon_reraise( + self, code: CodeType, instruction_offset: int, exception: BaseException + ) -> MonitorReturn: + """Handle sys.monitoring.events.RERAISE events.""" + + @panopticon("code", "@", "exc") + def sysmon_py_throw( + self, code: CodeType, instruction_offset: int, exception: BaseException + ) -> MonitorReturn: + """Handle sys.monitoring.events.PY_THROW events.""" diff --git a/lab/run_sysmon.py b/lab/run_sysmon.py index a52453cc1..d98a76a78 100644 --- a/lab/run_sysmon.py +++ b/lab/run_sysmon.py @@ -6,6 +6,10 @@ import functools import sys +from types import CodeType, FrameType + +from coverage.debug import short_filename, short_stack + print(sys.version) the_program = sys.argv[1] @@ -27,8 +31,40 @@ def bytes_to_lines(code): return b2l +def arg_repr(arg): + """Make a customized repr for logged values.""" + if isinstance(arg, CodeType): + return ( + f"" + ) + return repr(arg) + + +def handler(*names): + def _decorator(func): + @functools.wraps(func) + def _wrapped(*args): + args_reprs = [] + code = None + for name, arg in zip(names, args): + if name: + if name == "code": + code = arg + args_reprs.append(f"{name}={arg_repr(arg)}") + if name.endswith("@") and code is not None: + line = bytes_to_lines(code)[arg] + args_reprs[-1] += f"#{line}" + name = func.__name__.removeprefix("sysmon_") + print(f"{name}({', '.join(args_reprs)})") + return func(*args) + return _wrapped + return _decorator + + +@handler("code", "@") def sysmon_py_start(code, instruction_offset): - print(f"PY_START: {code.co_filename}@{instruction_offset}") sys.monitoring.set_local_events( my_id, code, @@ -36,41 +72,28 @@ def sysmon_py_start(code, instruction_offset): ) +@handler("code", "@") def sysmon_py_resume(code, instruction_offset): - b2l = bytes_to_lines(code) - print( - f"PY_RESUME: {code.co_filename}@{instruction_offset}, " - + f"{b2l[instruction_offset]}" - ) - + ... +@handler("code", "@", None) def sysmon_py_return(code, instruction_offset, retval): - b2l = bytes_to_lines(code) - print( - f"PY_RETURN: {code.co_filename}@{instruction_offset}, " - + f"{b2l[instruction_offset]}" - ) + return sys.monitoring.DISABLE +@handler("code", "#") def sysmon_line(code, line_number): - print(f"LINE: {code.co_filename}@{line_number}") return sys.monitoring.DISABLE +@handler("code", "src@", "dst@") def sysmon_branch(code, instruction_offset, destination_offset): - b2l = bytes_to_lines(code) - print( - f"BRANCH: {code.co_filename}@{instruction_offset}->{destination_offset}, " - + f"{b2l[instruction_offset]}->{b2l[destination_offset]}" - ) + ... #return sys.monitoring.DISABLE +@handler("code", "src@", "dst@") def sysmon_jump(code, instruction_offset, destination_offset): - b2l = bytes_to_lines(code) - print( - f"JUMP: {code.co_filename}@{instruction_offset}->{destination_offset}, " - + f"{b2l[instruction_offset]}->{b2l[destination_offset]}" - ) + return sys.monitoring.DISABLE sys.monitoring.set_events(