diff --git a/scalene/scalene_json.py b/scalene/scalene_json.py index 48266e6ac..109875d25 100644 --- a/scalene/scalene_json.py +++ b/scalene/scalene_json.py @@ -293,6 +293,13 @@ def output_profiles( this_stk.extend(stk) stks.append((this_stk, stats.stacks[stk])) + # Convert native stacks into a representation suitable for JSON dumping. + native_stks = [] + for stk in stats.native_stacks.keys(): + this_stk: List[str] = [] + this_stk.extend(stk) + native_stks.append((this_stk, stats.native_stacks[stk])) + output: Dict[str, Any] = { "program": program, "entrypoint_dir": entrypoint_dir, @@ -313,6 +320,7 @@ def output_profiles( "memory": profile_memory, "samples": stats.memory_footprint_samples, "stacks": stks, + "native_stacks": native_stks } # Build a list of files we will actually report on. diff --git a/scalene/scalene_profiler.py b/scalene/scalene_profiler.py index 54144bb21..b24d00773 100644 --- a/scalene/scalene_profiler.py +++ b/scalene/scalene_profiler.py @@ -612,6 +612,12 @@ def enable_signals() -> None: Scalene.__signals.cpu_timer_signal, Scalene.__args.cpu_sampling_rate, ) + if Scalene.__args.stacks: + try: + # Experimental native traceback support + signal.enable_native_traceback(Scalene.__signals.cpu_signal) + except AttributeError: + pass def __init__( self, @@ -1019,6 +1025,7 @@ def process_cpu_sample( main_thread_frame = new_frames[0][0] if Scalene.__args.stacks: + add_native_stack(Scalene.__signals.cpu_signal, Scalene.should_trace, Scalene.__stats.native_stacks) add_stack( main_thread_frame, Scalene.should_trace, Scalene.__stats.stacks ) diff --git a/scalene/scalene_statistics.py b/scalene/scalene_statistics.py index 056f15735..44de3ad59 100644 --- a/scalene/scalene_statistics.py +++ b/scalene/scalene_statistics.py @@ -42,6 +42,9 @@ def __init__(self) -> None: # full stacks taken during CPU samples, together with number of hits self.stacks: Dict[Tuple[Any], int] = defaultdict(int) + # native stacks taken during CPU samples, together with number of hits + self.native_stacks: Dict[Tuple[Any], int] = defaultdict(int) + # CPU samples for each location in the program # spent in the interpreter self.cpu_samples_python: Dict[ @@ -191,6 +194,7 @@ def clear(self) -> None: self.elapsed_time = 0 self.alloc_samples = 0 self.stacks.clear() + self.native_stacks.clear() self.cpu_samples_python.clear() self.cpu_samples_c.clear() self.cpu_utilization.clear() @@ -323,6 +327,7 @@ def build_function_stats(self, filename: Filename): # type: ignore "elapsed_time", "alloc_samples", "stacks", + "native_stacks", "total_cpu_samples", "cpu_samples_c", "cpu_samples_python", @@ -424,6 +429,7 @@ def merge_stats(self, the_dir_name: pathlib.Path) -> None: self.elapsed_time = max(self.elapsed_time, x.elapsed_time) self.alloc_samples += x.alloc_samples self.stacks.update(x.stacks) + self.native_stacks.update(x.native_stacks) self.total_cpu_samples += x.total_cpu_samples self.total_gpu_samples += x.total_gpu_samples self.increment_per_line_samples( diff --git a/scalene/scalene_utility.py b/scalene/scalene_utility.py index 2b32d5b73..67b7b2849 100644 --- a/scalene/scalene_utility.py +++ b/scalene/scalene_utility.py @@ -1,6 +1,7 @@ import inspect import os import pathlib +import signal import sys from jinja2 import Environment, FileSystemLoader @@ -45,6 +46,21 @@ def add_stack( f = f.f_back stacks[tuple(stk)] += 1 +def add_native_stack( + sig: int, + should_trace: Callable[[Filename, str], bool], + native_stacks: Dict[Any, int], +) -> None: + """Add one to the stack starting from this frame.""" + try: + tb = signal.get_native_traceback(sig) + except AttributeError: + return + stk: List[Tuple[str, str, int]] = list() + for f in tb: + stk.insert(0, (f.filename, f.name)) + native_stacks[tuple(stk)] += 1 + def on_stack( frame: FrameType, fname: Filename, lineno: LineNumber