From fb8983c267d545f35a26509033c8c7cbdb45359c Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Tue, 9 Sep 2025 17:04:13 +0100 Subject: [PATCH 01/15] gh-138709: Implement CPU time profiling in profiling.sample --- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 4 + Lib/profiling/sampling/collector.py | 16 +- Lib/profiling/sampling/pstats_collector.py | 5 +- Lib/profiling/sampling/sample.py | 30 ++- Lib/profiling/sampling/stack_collector.py | 9 +- Lib/test/test_external_inspection.py | 109 +++++++++ .../test_profiling/test_sampling_profiler.py | 146 +++++++++++ Modules/_remote_debugging_module.c | 229 ++++++++++++++++-- Modules/clinic/_remote_debugging_module.c.h | 29 ++- PCbuild/_remote_debugging.vcxproj | 5 + 13 files changed, 548 insertions(+), 37 deletions(-) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 63888eab7b4481..56e6c06f880227 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -896,6 +896,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(coro)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(count)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(covariant)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cpu_time)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ctx)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cwd)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(d_parameter_type)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index b863a7c970e3d4..3a46fbaf8cd253 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -387,6 +387,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(coro) STRUCT_FOR_ID(count) STRUCT_FOR_ID(covariant) + STRUCT_FOR_ID(cpu_time) STRUCT_FOR_ID(ctx) STRUCT_FOR_ID(cwd) STRUCT_FOR_ID(d_parameter_type) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 3ce7200ffeb6a4..00de62b9e3b1d8 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -894,6 +894,7 @@ extern "C" { INIT_ID(coro), \ INIT_ID(count), \ INIT_ID(covariant), \ + INIT_ID(cpu_time), \ INIT_ID(ctx), \ INIT_ID(cwd), \ INIT_ID(d_parameter_type), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index e76e603230a5db..f4176b98ad999d 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1336,6 +1336,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(cpu_time); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(ctx); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py index 112d3071a1148c..3333e7bc99d177 100644 --- a/Lib/profiling/sampling/collector.py +++ b/Lib/profiling/sampling/collector.py @@ -1,5 +1,17 @@ from abc import ABC, abstractmethod +# Enums are slow +THREAD_STATE_RUNNING = 0 +THREAD_STATE_IDLE = 1 +THREAD_STATE_GIL_WAIT = 2 +THREAD_STATE_UNKNOWN = 3 + +STATUS = { + THREAD_STATE_RUNNING: "running", + THREAD_STATE_IDLE: "idle", + THREAD_STATE_GIL_WAIT: "gil_wait", + THREAD_STATE_UNKNOWN: "unknown", +} class Collector(ABC): @abstractmethod @@ -10,10 +22,12 @@ def collect(self, stack_frames): def export(self, filename): """Export collected data to a file.""" - def _iter_all_frames(self, stack_frames): + def _iter_all_frames(self, stack_frames, skip_idle=False): """Iterate over all frame stacks from all interpreters and threads.""" for interpreter_info in stack_frames: for thread_info in interpreter_info.threads: + if skip_idle and thread_info.status != THREAD_STATE_RUNNING: + continue frames = thread_info.frame_info if frames: yield frames diff --git a/Lib/profiling/sampling/pstats_collector.py b/Lib/profiling/sampling/pstats_collector.py index d492c15bb2aaf8..dec81b60659c53 100644 --- a/Lib/profiling/sampling/pstats_collector.py +++ b/Lib/profiling/sampling/pstats_collector.py @@ -5,7 +5,7 @@ class PstatsCollector(Collector): - def __init__(self, sample_interval_usec): + def __init__(self, sample_interval_usec, *, skip_idle=False): self.result = collections.defaultdict( lambda: dict(total_rec_calls=0, direct_calls=0, cumulative_calls=0) ) @@ -14,6 +14,7 @@ def __init__(self, sample_interval_usec): self.callers = collections.defaultdict( lambda: collections.defaultdict(int) ) + self.skip_idle = skip_idle def _process_frames(self, frames): """Process a single thread's frame stack.""" @@ -40,7 +41,7 @@ def _process_frames(self, frames): self.callers[callee][caller] += 1 def collect(self, stack_frames): - for frames in self._iter_all_frames(stack_frames): + for frames in self._iter_all_frames(stack_frames, skip_idle=self.skip_idle): self._process_frames(frames) def export(self, filename): diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py index 8a65f312234730..b901a0605c395e 100644 --- a/Lib/profiling/sampling/sample.py +++ b/Lib/profiling/sampling/sample.py @@ -120,18 +120,18 @@ def _run_with_sync(original_cmd): class SampleProfiler: - def __init__(self, pid, sample_interval_usec, all_threads): + def __init__(self, pid, sample_interval_usec, all_threads, *, cpu_time=False): self.pid = pid self.sample_interval_usec = sample_interval_usec self.all_threads = all_threads if _FREE_THREADED_BUILD: self.unwinder = _remote_debugging.RemoteUnwinder( - self.pid, all_threads=self.all_threads + self.pid, all_threads=self.all_threads, cpu_time=cpu_time ) else: only_active_threads = bool(self.all_threads) self.unwinder = _remote_debugging.RemoteUnwinder( - self.pid, only_active_thread=only_active_threads + self.pid, only_active_thread=only_active_threads, cpu_time=cpu_time ) # Track sample intervals and total sample count self.sample_intervals = deque(maxlen=100) @@ -596,21 +596,22 @@ def sample( show_summary=True, output_format="pstats", realtime_stats=False, + skip_idle=False, ): profiler = SampleProfiler( - pid, sample_interval_usec, all_threads=all_threads + pid, sample_interval_usec, all_threads=all_threads, cpu_time=skip_idle ) profiler.realtime_stats = realtime_stats collector = None match output_format: case "pstats": - collector = PstatsCollector(sample_interval_usec) + collector = PstatsCollector(sample_interval_usec, skip_idle=skip_idle) case "collapsed": - collector = CollapsedStackCollector() + collector = CollapsedStackCollector(skip_idle=skip_idle) filename = filename or f"collapsed.{pid}.txt" case "flamegraph": - collector = FlamegraphCollector() + collector = FlamegraphCollector(skip_idle=skip_idle) filename = filename or f"flamegraph.{pid}.html" case _: raise ValueError(f"Invalid output format: {output_format}") @@ -660,6 +661,7 @@ def wait_for_process_and_sample(pid, sort_value, args): filename = args.outfile if not filename and args.format == "collapsed": filename = f"collapsed.{pid}.txt" + skip_idle = True if args.mode == "cpu" else False sample( pid, @@ -672,6 +674,7 @@ def wait_for_process_and_sample(pid, sort_value, args): show_summary=not args.no_summary, output_format=args.format, realtime_stats=args.realtime_stats, + skip_idle=skip_idle, ) @@ -726,6 +729,15 @@ def main(): help="Print real-time sampling statistics (Hz, mean, min, max, stdev) during profiling", ) + # Mode options + mode_group = parser.add_argument_group("Mode options") + mode_group.add_argument( + "--mode", + choices=["wall", "cpu"], + default="wall-time", + help="Sampling mode: wall-time (default, skip_idle=False) or cpu-time (skip_idle=True)", + ) + # Output format selection output_group = parser.add_argument_group("Output options") output_format = output_group.add_mutually_exclusive_group() @@ -850,6 +862,9 @@ def main(): elif target_count > 1: parser.error("only one target type can be specified: -p/--pid, -m/--module, or script") + # Set skip_idle based on mode + skip_idle = True if args.mode == "cpu" else False + if args.pid: sample( args.pid, @@ -862,6 +877,7 @@ def main(): show_summary=not args.no_summary, output_format=args.format, realtime_stats=args.realtime_stats, + skip_idle=skip_idle, ) elif args.module or args.args: if args.module: diff --git a/Lib/profiling/sampling/stack_collector.py b/Lib/profiling/sampling/stack_collector.py index 25539640b8de40..5738d21b0ee674 100644 --- a/Lib/profiling/sampling/stack_collector.py +++ b/Lib/profiling/sampling/stack_collector.py @@ -10,9 +10,10 @@ class StackTraceCollector(Collector): - def __init__(self): + def __init__(self, *, skip_idle=False): self.call_trees = [] self.function_samples = collections.defaultdict(int) + self.skip_idle = skip_idle def _process_frames(self, frames): """Process a single thread's frame stack.""" @@ -28,7 +29,7 @@ def _process_frames(self, frames): self.function_samples[frame] += 1 def collect(self, stack_frames): - for frames in self._iter_all_frames(stack_frames): + for frames in self._iter_all_frames(stack_frames, skip_idle=self.skip_idle): self._process_frames(frames) @@ -49,8 +50,8 @@ def export(self, filename): class FlamegraphCollector(StackTraceCollector): - def __init__(self): - super().__init__() + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) self.stats = {} def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None): diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 262e472da7eac3..62596ca1ba0649 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -1670,6 +1670,115 @@ def test_unsupported_platform_error(self): str(cm.exception) ) +class TestDetectionOfThreadStatus(unittest.TestCase): + @unittest.skipIf( + sys.platform not in ("linux", "darwin", "win32"), + "Test only runs on unsupported platforms (not Linux, macOS, or Windows)", + ) + @unittest.skipIf(sys.platform == "android", "Android raises Linux-specific exception") + def test_thread_status_detection(self): + port = find_unused_port() + script = textwrap.dedent( + f"""\ + import time, sys, socket, threading + import os + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect(('localhost', {port})) + + def sleeper(): + tid = threading.get_native_id() + sock.sendall(f'ready:sleeper:{{tid}}\\n'.encode()) + time.sleep(10000) + + def busy(): + tid = threading.get_native_id() + sock.sendall(f'ready:busy:{{tid}}\\n'.encode()) + x = 0 + while True: + x = x + 1 + time.sleep(0.5) + + t1 = threading.Thread(target=sleeper) + t2 = threading.Thread(target=busy) + t1.start() + t2.start() + sock.sendall(b'ready:main\\n') + t1.join() + t2.join() + sock.close() + """ + ) + with os_helper.temp_dir() as work_dir: + script_dir = os.path.join(work_dir, "script_pkg") + os.mkdir(script_dir) + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server_socket.bind(("localhost", port)) + server_socket.settimeout(SHORT_TIMEOUT) + server_socket.listen(1) + + script_name = _make_test_script(script_dir, "thread_status_script", script) + client_socket = None + try: + p = subprocess.Popen([sys.executable, script_name]) + client_socket, _ = server_socket.accept() + server_socket.close() + response = b"" + sleeper_tid = None + busy_tid = None + while True: + chunk = client_socket.recv(1024) + response += chunk + if b"ready:main" in response and b"ready:sleeper" in response and b"ready:busy" in response: + # Parse TIDs from the response + for line in response.split(b"\n"): + if line.startswith(b"ready:sleeper:"): + try: + sleeper_tid = int(line.split(b":")[-1]) + except Exception: + pass + elif line.startswith(b"ready:busy:"): + try: + busy_tid = int(line.split(b":")[-1]) + except Exception: + pass + break + + attempts = 10 + try: + unwinder = RemoteUnwinder(p.pid, all_threads=True, cpu_time=True) + for _ in range(attempts): + traces = unwinder.get_stack_trace() + # Check if any thread is running + if any(thread_info.status == 0 for interpreter_info in traces + for thread_info in interpreter_info.threads): + break + time.sleep(0.5) # Give a bit of time to let threads settle + except PermissionError: + self.skipTest( + "Insufficient permissions to read the stack trace" + ) + + + # Find threads and their statuses + statuses = {} + for interpreter_info in traces: + for thread_info in interpreter_info.threads: + statuses[thread_info.thread_id] = thread_info.status + + self.assertIsNotNone(sleeper_tid, "Sleeper thread id not received") + self.assertIsNotNone(busy_tid, "Busy thread id not received") + self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads") + self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads") + self.assertEqual(statuses[sleeper_tid], 1, "Sleeper thread should be idle (1)") + self.assertEqual(statuses[busy_tid], 0, "Busy thread should be running (0)") + + finally: + if client_socket is not None: + client_socket.close() + p.terminate() + p.wait(timeout=SHORT_TIMEOUT) if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_profiling/test_sampling_profiler.py b/Lib/test/test_profiling/test_sampling_profiler.py index 84339d46d02f73..1d376bf6044831 100644 --- a/Lib/test/test_profiling/test_sampling_profiler.py +++ b/Lib/test/test_profiling/test_sampling_profiler.py @@ -1996,6 +1996,7 @@ def test_cli_module_argument_parsing(self): show_summary=True, output_format="pstats", realtime_stats=False, + skip_idle=False ) @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist") @@ -2023,6 +2024,7 @@ def test_cli_module_with_arguments(self): show_summary=True, output_format="pstats", realtime_stats=False, + skip_idle=False ) @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist") @@ -2050,6 +2052,7 @@ def test_cli_script_argument_parsing(self): show_summary=True, output_format="pstats", realtime_stats=False, + skip_idle=False ) @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist") @@ -2149,6 +2152,7 @@ def test_cli_module_with_profiler_options(self): show_summary=True, output_format="pstats", realtime_stats=False, + skip_idle=False ) @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist") @@ -2182,6 +2186,7 @@ def test_cli_script_with_profiler_options(self): show_summary=True, output_format="collapsed", realtime_stats=False, + skip_idle=False ) def test_cli_empty_module_name(self): @@ -2393,6 +2398,7 @@ def test_argument_parsing_basic(self): show_summary=True, output_format="pstats", realtime_stats=False, + skip_idle=False ) def test_sort_options(self): @@ -2423,5 +2429,145 @@ def test_sort_options(self): mock_sample.reset_mock() +class TestCpuModeFiltering(unittest.TestCase): + """Test CPU mode filtering functionality (--mode=cpu).""" + + def test_mode_validation(self): + """Test that CLI validates mode choices correctly.""" + # Invalid mode choice should raise SystemExit + test_args = ["profiling.sampling.sample", "--mode", "invalid", "-p", "12345"] + + with ( + mock.patch("sys.argv", test_args), + mock.patch("sys.stderr", io.StringIO()) as mock_stderr, + self.assertRaises(SystemExit) as cm, + ): + profiling.sampling.sample.main() + + self.assertEqual(cm.exception.code, 2) # argparse error + error_msg = mock_stderr.getvalue() + self.assertIn("invalid choice", error_msg) + + def test_frames_filtered_with_skip_idle(self): + """Test that frames are actually filtered when skip_idle=True.""" + # Create mock frames with different thread statuses + class MockThreadInfoWithStatus: + def __init__(self, thread_id, frame_info, status): + self.thread_id = thread_id + self.frame_info = frame_info + self.status = status + + # Create test data: running thread, idle thread, and another running thread + test_frames = [ + MockInterpreterInfo(0, [ + MockThreadInfoWithStatus(1, [MockFrameInfo("active1.py", 10, "active_func1")], 0), # RUNNING + MockThreadInfoWithStatus(2, [MockFrameInfo("idle.py", 20, "idle_func")], 1), # IDLE + MockThreadInfoWithStatus(3, [MockFrameInfo("active2.py", 30, "active_func2")], 0), # RUNNING + ]) + ] + + # Test with skip_idle=True - should only process running threads + collector_skip = PstatsCollector(sample_interval_usec=1000, skip_idle=True) + collector_skip.collect(test_frames) + + # Should only have functions from running threads (status 0) + active1_key = ("active1.py", 10, "active_func1") + active2_key = ("active2.py", 30, "active_func2") + idle_key = ("idle.py", 20, "idle_func") + + self.assertIn(active1_key, collector_skip.result) + self.assertIn(active2_key, collector_skip.result) + self.assertNotIn(idle_key, collector_skip.result) # Idle thread should be filtered out + + # Test with skip_idle=False - should process all threads + collector_no_skip = PstatsCollector(sample_interval_usec=1000, skip_idle=False) + collector_no_skip.collect(test_frames) + + # Should have functions from all threads + self.assertIn(active1_key, collector_no_skip.result) + self.assertIn(active2_key, collector_no_skip.result) + self.assertIn(idle_key, collector_no_skip.result) # Idle thread should be included + + @requires_subprocess() + def test_cpu_mode_integration_filtering(self): + """Integration test: CPU mode should only capture active threads, not idle ones.""" + # Script with one mostly-idle thread and one CPU-active thread + cpu_vs_idle_script = ''' +import time +import threading + +def idle_worker(): + time.sleep(999999) + +def cpu_active_worker(): + x = 1 + while True: + x += 1 + +def main(): +# Start both threads + idle_thread = threading.Thread(target=idle_worker) + cpu_thread = threading.Thread(target=cpu_active_worker) + idle_thread.start() + cpu_thread.start() + idle_thread.join() + cpu_thread.join() + +main() + +''' + with test_subprocess(cpu_vs_idle_script) as proc: + with ( + io.StringIO() as captured_output, + mock.patch("sys.stdout", captured_output), + ): + try: + profiling.sampling.sample.sample( + proc.pid, + duration_sec=0.5, + sample_interval_usec=5000, + skip_idle=True, # CPU mode + show_summary=False, + all_threads=True, + ) + except (PermissionError, RuntimeError) as e: + self.skipTest("Insufficient permissions for remote profiling") + + cpu_mode_output = captured_output.getvalue() + + # Test wall-clock mode (skip_idle=False) - should capture both functions + with ( + io.StringIO() as captured_output, + mock.patch("sys.stdout", captured_output), + ): + try: + profiling.sampling.sample.sample( + proc.pid, + duration_sec=0.5, + sample_interval_usec=5000, + skip_idle=False, # Wall-clock mode + show_summary=False, + all_threads=True, + ) + except (PermissionError, RuntimeError) as e: + self.skipTest("Insufficient permissions for remote profiling") + + wall_mode_output = captured_output.getvalue() + + # Verify both modes captured samples + self.assertIn("Captured", cpu_mode_output) + self.assertIn("samples", cpu_mode_output) + self.assertIn("Captured", wall_mode_output) + self.assertIn("samples", wall_mode_output) + + # CPU mode should strongly favor cpu_active_worker over mostly_idle_worker + self.assertIn("cpu_active_worker", cpu_mode_output) + self.assertNotIn("idle_worker", cpu_mode_output) + + # Wall-clock mode should capture both types of work + self.assertIn("cpu_active_worker", wall_mode_output) + self.assertIn("idle_worker", wall_mode_output) + + if __name__ == "__main__": unittest.main() diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index c306143ee73b18..f383076b6d57bd 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -34,6 +34,31 @@ # define HAVE_PROCESS_VM_READV 0 #endif +// Returns thread status using proc_pidinfo, caches thread_id_offset on first use (macOS only) +#ifdef __APPLE__ +#include +#include +#define MAX_NATIVE_THREADS 4096 +#endif + +#ifdef MS_WINDOWS +#include +#include +// ntstatus.h conflicts with windows.h so we have to define the NTSTATUS values we need +#define STATUS_SUCCESS ((NTSTATUS)0x00000000L) +#define STATUS_INFO_LENGTH_MISMATCH ((NTSTATUS)0xC0000004L) +typedef enum _WIN32_THREADSTATE { + WIN32_THREADSTATE_INITIALIZED = 0, // Recognized by the kernel + WIN32_THREADSTATE_READY = 1, // Prepared to run on the next available processor + WIN32_THREADSTATE_RUNNING = 2, // Currently executing + WIN32_THREADSTATE_STANDBY = 3, // About to run, only one thread may be in this state at a time + WIN32_THREADSTATE_TERMINATED = 4, // Finished executing + WIN32_THREADSTATE_WAITING = 5, // Not ready for the processor, when ready, it will be rescheduled + WIN32_THREADSTATE_TRANSITION = 6, // Waiting for resources other than the processor + WIN32_THREADSTATE_UNKNOWN = 7 // Thread state is unknown +} WIN32_THREADSTATE; +#endif + /* ============================================================================ * TYPE DEFINITIONS AND STRUCTURES * ============================================================================ */ @@ -153,6 +178,7 @@ static PyStructSequence_Desc CoroInfo_desc = { // ThreadInfo structseq type - replaces 2-tuple (thread_id, frame_info) static PyStructSequence_Field ThreadInfo_fields[] = { {"thread_id", "Thread ID"}, + {"status", "Thread status"}, {"frame_info", "Frame information"}, {NULL} }; @@ -211,6 +237,13 @@ typedef struct { PyTypeObject *AwaitedInfo_Type; } RemoteDebuggingState; +enum _ThreadState { + THREAD_STATE_RUNNING, + THREAD_STATE_IDLE, + THREAD_STATE_GIL_WAIT, + THREAD_STATE_UNKNOWN +}; + typedef struct { PyObject_HEAD proc_handle_t handle; @@ -224,12 +257,20 @@ typedef struct { _Py_hashtable_t *code_object_cache; int debug; int only_active_thread; + int cpu_time; RemoteDebuggingState *cached_state; // Cached module state #ifdef Py_GIL_DISABLED // TLBC cache invalidation tracking uint32_t tlbc_generation; // Track TLBC index pool changes _Py_hashtable_t *tlbc_cache; // Cache of TLBC arrays by code object address #endif +#ifdef __APPLE__ + uint64_t thread_id_offset; +#endif +#ifdef MS_WINDOWS + PVOID win_process_buffer; + ULONG win_process_buffer_size; +#endif } RemoteUnwinderObject; #define RemoteUnwinder_CAST(op) ((RemoteUnwinderObject *)(op)) @@ -2453,10 +2494,127 @@ process_frame_chain( return 0; } +static int +get_thread_status(RemoteUnwinderObject *unwinder, uint64_t tid, uint64_t pthread_id) { +#ifdef __APPLE__ + if (unwinder->thread_id_offset == 0) { + uint64_t *tids = (uint64_t *)PyMem_Malloc(MAX_NATIVE_THREADS * sizeof(uint64_t)); + if (!tids) { + PyErr_NoMemory(); + return -1; + } + int n = proc_pidinfo(unwinder->handle.pid, PROC_PIDLISTTHREADS, 0, tids, MAX_NATIVE_THREADS * sizeof(uint64_t)) / sizeof(uint64_t); + if (n <= 0) { + PyMem_Free(tids); + return THREAD_STATE_UNKNOWN; + } + uint64_t min_offset = UINT64_MAX; + for (int i = 0; i < n; i++) { + uint64_t offset = tids[i] - pthread_id; + if (offset < min_offset) { + min_offset = offset; + } + } + unwinder->thread_id_offset = min_offset; + PyMem_Free(tids); + } + struct proc_threadinfo ti; + uint64_t tid_with_offset = pthread_id + unwinder->thread_id_offset; + if (proc_pidinfo(unwinder->handle.pid, PROC_PIDTHREADINFO, tid_with_offset, &ti, sizeof(ti)) != sizeof(ti)) { + return THREAD_STATE_UNKNOWN; + } + if (ti.pth_run_state == TH_STATE_RUNNING) { + return THREAD_STATE_RUNNING; + } + return THREAD_STATE_IDLE; +#elif defined(__linux__) + char stat_path[256]; + char buffer[2048] = ""; + + snprintf(stat_path, sizeof(stat_path), "/proc/%d/task/%lu/stat", unwinder->handle.pid, tid); + + int fd = open(stat_path, O_RDONLY); + if (fd == -1) { + return THREAD_STATE_UNKNOWN; + } + + if (read(fd, buffer, 2047) == 0) { + close(fd); + return THREAD_STATE_UNKNOWN; + } + close(fd); + + char *p = strchr(buffer, ')'); + if (!p) { + return THREAD_STATE_UNKNOWN; + } + + p += 2; // Skip ") " + if (*p == ' ') { + p++; + } + + switch (*p) { + case 'R': // Running + return THREAD_STATE_RUNNING; + case 'S': // Interruptible sleep + case 'D': // Uninterruptible sleep + case 'T': // Stopped + case 'Z': // Zombie + case 'I': // Idle kernel thread + return THREAD_STATE_IDLE; + default: + return THREAD_STATE_UNKNOWN; + } +#elif defined(MS_WINDOWS) + ULONG n; + NTSTATUS status = NtQuerySystemInformation( + SystemProcessInformation, + unwinder->win_process_buffer, + unwinder->win_process_buffer_size, + &n + ); + if (status == STATUS_INFO_LENGTH_MISMATCH) { + // Buffer was too small so we reallocate a larger one and try again. + unwinder->win_process_buffer_size = n; + PVOID new_buffer = PyMem_Realloc(unwinder->win_process_buffer, n); + if (!new_buffer) { + return -1; + } + unwinder->win_process_buffer = new_buffer; + return get_thread_status(unwinder, tid, pthread_id); + } + if (status != STATUS_SUCCESS) { + return -1; + } + + SYSTEM_PROCESS_INFORMATION *pi = (SYSTEM_PROCESS_INFORMATION *)unwinder->win_process_buffer; + while ((ULONG)(ULONG_PTR)pi->UniqueProcessId != unwinder->handle.pid) { + if (pi->NextEntryOffset == 0) { + // We didn't find the process + return -1; + } + pi = (SYSTEM_PROCESS_INFORMATION *)(((BYTE *)pi) + pi->NextEntryOffset); + } + + SYSTEM_THREAD_INFORMATION *ti = (SYSTEM_THREAD_INFORMATION *)((char *)pi + sizeof(SYSTEM_PROCESS_INFORMATION)); + for (Py_ssize_t i = 0; i < pi->NumberOfThreads; i++, ti++) { + if (ti->ClientId.UniqueThread == (HANDLE)tid) { + return ti->ThreadState != WIN32_THREADSTATE_RUNNING ? THREAD_STATE_IDLE : THREAD_STATE_RUNNING; + } + } + + return -1; +#else + return THREAD_STATE_UNKNOWN; +#endif +} + static PyObject* unwind_stack_for_thread( RemoteUnwinderObject *unwinder, - uintptr_t *current_tstate + uintptr_t *current_tstate, + uintptr_t gil_holder_tstate ) { PyObject *frame_info = NULL; PyObject *thread_id = NULL; @@ -2484,6 +2642,20 @@ unwind_stack_for_thread( goto error; } + long tid = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id); + int status = THREAD_STATE_UNKNOWN; + if (unwinder->cpu_time == 1) { + long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id); + status = get_thread_status(unwinder, tid, pthread_id); + if (status == -1) { + PyErr_Print(); + PyErr_SetString(PyExc_RuntimeError, "Failed to get thread status"); + goto error; + } + } else { + status = (*current_tstate == gil_holder_tstate) ? THREAD_STATE_RUNNING : THREAD_STATE_GIL_WAIT; + } + if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain"); goto error; @@ -2491,8 +2663,7 @@ unwind_stack_for_thread( *current_tstate = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.next); - thread_id = PyLong_FromLongLong( - GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id)); + thread_id = PyLong_FromLongLong(tid); if (thread_id == NULL) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID"); goto error; @@ -2505,8 +2676,16 @@ unwind_stack_for_thread( goto error; } - PyStructSequence_SetItem(result, 0, thread_id); // Steals reference - PyStructSequence_SetItem(result, 1, frame_info); // Steals reference + PyObject *py_status = PyLong_FromLong(status); + if (py_status == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread status"); + goto error; + } + PyErr_Print(); + + PyStructSequence_SetItem(result, 0, thread_id); + PyStructSequence_SetItem(result, 1, py_status); // Steals reference + PyStructSequence_SetItem(result, 2, frame_info); // Steals reference cleanup_stack_chunks(&chunks); return result; @@ -2537,6 +2716,7 @@ _remote_debugging.RemoteUnwinder.__init__ * all_threads: bool = False only_active_thread: bool = False + cpu_time: bool = False debug: bool = False Initialize a new RemoteUnwinder object for debugging a remote Python process. @@ -2546,6 +2726,7 @@ Initialize a new RemoteUnwinder object for debugging a remote Python process. all_threads: If True, initialize state for all threads in the process. If False, only initialize for the main thread. only_active_thread: If True, only sample the thread holding the GIL. + cpu_time: If True, enable CPU time tracking for unwinder operations. Cannot be used together with all_threads=True. debug: If True, chain exceptions to explain the sequence of events that lead to the exception. @@ -2564,8 +2745,8 @@ static int _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, int pid, int all_threads, int only_active_thread, - int debug) -/*[clinic end generated code: output=13ba77598ecdcbe1 input=cfc21663fbe263c4]*/ + int cpu_time, int debug) +/*[clinic end generated code: output=2598ce54f6335ac7 input=0cf2038cc304c165]*/ { // Validate that all_threads and only_active_thread are not both True if (all_threads && only_active_thread) { @@ -2584,6 +2765,7 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, self->debug = debug; self->only_active_thread = only_active_thread; + self->cpu_time = cpu_time; self->cached_state = NULL; if (_Py_RemoteDebug_InitProcHandle(&self->handle, pid) < 0) { set_exception_cause(self, PyExc_RuntimeError, "Failed to initialize process handle"); @@ -2656,6 +2838,15 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, } #endif +#if defined(__APPLE__) + self->thread_id_offset = 0; +#endif + +#ifdef MS_WINDOWS + self->win_process_buffer = NULL; + self->win_process_buffer_size = 0; +#endif + return 0; } @@ -2761,21 +2952,25 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self goto exit; } + // Get the GIL holder for this interpreter (needed for GIL_WAIT logic) + uintptr_t gil_holder_tstate = 0; + int gil_locked = GET_MEMBER(int, interp_state_buffer, + self->debug_offsets.interpreter_state.gil_runtime_state_locked); + if (gil_locked) { + gil_holder_tstate = (uintptr_t)GET_MEMBER(PyThreadState*, interp_state_buffer, + self->debug_offsets.interpreter_state.gil_runtime_state_holder); + } + uintptr_t current_tstate; if (self->only_active_thread) { // Find the GIL holder for THIS interpreter - int gil_locked = GET_MEMBER(int, interp_state_buffer, - self->debug_offsets.interpreter_state.gil_runtime_state_locked); - if (!gil_locked) { // This interpreter's GIL is not locked, skip it Py_DECREF(interpreter_threads); goto next_interpreter; } - // Get the GIL holder for this interpreter - current_tstate = (uintptr_t)GET_MEMBER(PyThreadState*, interp_state_buffer, - self->debug_offsets.interpreter_state.gil_runtime_state_holder); + current_tstate = gil_holder_tstate; } else if (self->tstate_addr == 0) { // Get all threads for this interpreter current_tstate = GET_MEMBER(uintptr_t, interp_state_buffer, @@ -2786,7 +2981,7 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self } while (current_tstate != 0) { - PyObject* frame_info = unwind_stack_for_thread(self, ¤t_tstate); + PyObject* frame_info = unwind_stack_for_thread(self, ¤t_tstate, gil_holder_tstate); if (!frame_info) { Py_DECREF(interpreter_threads); set_exception_cause(self, PyExc_RuntimeError, "Failed to unwind stack for thread"); @@ -3038,6 +3233,12 @@ RemoteUnwinder_dealloc(PyObject *op) if (self->code_object_cache) { _Py_hashtable_destroy(self->code_object_cache); } +#ifdef MS_WINDOWS + if(self->win_process_buffer != NULL) { + PyMem_Free(self->win_process_buffer); + } +#endif + #ifdef Py_GIL_DISABLED if (self->tlbc_cache) { _Py_hashtable_destroy(self->tlbc_cache); diff --git a/Modules/clinic/_remote_debugging_module.c.h b/Modules/clinic/_remote_debugging_module.c.h index 9bfcdde407fe3c..6f483014e3e589 100644 --- a/Modules/clinic/_remote_debugging_module.c.h +++ b/Modules/clinic/_remote_debugging_module.c.h @@ -11,7 +11,7 @@ preserve PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, "RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n" -" debug=False)\n" +" cpu_time=False, debug=False)\n" "--\n" "\n" "Initialize a new RemoteUnwinder object for debugging a remote Python process.\n" @@ -21,6 +21,7 @@ PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, " all_threads: If True, initialize state for all threads in the process.\n" " If False, only initialize for the main thread.\n" " only_active_thread: If True, only sample the thread holding the GIL.\n" +" cpu_time: If True, enable CPU time tracking for unwinder operations.\n" " Cannot be used together with all_threads=True.\n" " debug: If True, chain exceptions to explain the sequence of events that\n" " lead to the exception.\n" @@ -38,7 +39,7 @@ static int _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, int pid, int all_threads, int only_active_thread, - int debug); + int cpu_time, int debug); static int _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs) @@ -46,7 +47,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje int return_value = -1; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 4 + #define NUM_KEYWORDS 5 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -55,7 +56,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(debug), }, + .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(cpu_time), &_Py_ID(debug), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -64,20 +65,21 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "debug", NULL}; + static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "cpu_time", "debug", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "RemoteUnwinder", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[4]; + PyObject *argsbuf[5]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; int pid; int all_threads = 0; int only_active_thread = 0; + int cpu_time = 0; int debug = 0; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, @@ -110,12 +112,21 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje goto skip_optional_kwonly; } } - debug = PyObject_IsTrue(fastargs[3]); + if (fastargs[3]) { + cpu_time = PyObject_IsTrue(fastargs[3]); + if (cpu_time < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + debug = PyObject_IsTrue(fastargs[4]); if (debug < 0) { goto exit; } skip_optional_kwonly: - return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, debug); + return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, cpu_time, debug); exit: return return_value; @@ -297,4 +308,4 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject return return_value; } -/*[clinic end generated code: output=2ba15411abf82c33 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=8a265335c972dc31 input=a9049054013a1b77]*/ diff --git a/PCbuild/_remote_debugging.vcxproj b/PCbuild/_remote_debugging.vcxproj index c55f2908e03d33..a01905fdf2f437 100644 --- a/PCbuild/_remote_debugging.vcxproj +++ b/PCbuild/_remote_debugging.vcxproj @@ -92,6 +92,11 @@ <_ProjectFileVersion>10.0.30319.1 + + + ntdll.lib;%(AdditionalDependencies) + + From 8859bf8bdb3f198b5a286152b14f1f2dbe6bf0bd Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 15 Sep 2025 10:45:41 +0100 Subject: [PATCH 02/15] Implement GIL mode and filtering --- Lib/profiling/sampling/sample.py | 44 +++++++++---- Modules/_remote_debugging_module.c | 71 +++++++++++++++------ Modules/clinic/_remote_debugging_module.c.h | 20 +++--- 3 files changed, 92 insertions(+), 43 deletions(-) diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py index b901a0605c395e..20437481a0af98 100644 --- a/Lib/profiling/sampling/sample.py +++ b/Lib/profiling/sampling/sample.py @@ -15,6 +15,21 @@ from .stack_collector import CollapsedStackCollector, FlamegraphCollector _FREE_THREADED_BUILD = sysconfig.get_config_var("Py_GIL_DISABLED") is not None + +# Profiling mode constants +PROFILING_MODE_WALL = 0 +PROFILING_MODE_CPU = 1 +PROFILING_MODE_GIL = 2 + + +def _parse_mode(mode_string): + """Convert mode string to mode constant.""" + mode_map = { + "wall": PROFILING_MODE_WALL, + "cpu": PROFILING_MODE_CPU, + "gil": PROFILING_MODE_GIL, + } + return mode_map[mode_string] _HELP_DESCRIPTION = """Sample a process's stack frames and generate profiling data. Supports the following target modes: - -p PID: Profile an existing process by PID @@ -120,18 +135,18 @@ def _run_with_sync(original_cmd): class SampleProfiler: - def __init__(self, pid, sample_interval_usec, all_threads, *, cpu_time=False): + def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL): self.pid = pid self.sample_interval_usec = sample_interval_usec self.all_threads = all_threads if _FREE_THREADED_BUILD: self.unwinder = _remote_debugging.RemoteUnwinder( - self.pid, all_threads=self.all_threads, cpu_time=cpu_time + self.pid, all_threads=self.all_threads, mode=mode ) else: only_active_threads = bool(self.all_threads) self.unwinder = _remote_debugging.RemoteUnwinder( - self.pid, only_active_thread=only_active_threads, cpu_time=cpu_time + self.pid, only_active_thread=only_active_threads, mode=mode ) # Track sample intervals and total sample count self.sample_intervals = deque(maxlen=100) @@ -596,13 +611,16 @@ def sample( show_summary=True, output_format="pstats", realtime_stats=False, - skip_idle=False, + mode=PROFILING_MODE_WALL, ): profiler = SampleProfiler( - pid, sample_interval_usec, all_threads=all_threads, cpu_time=skip_idle + pid, sample_interval_usec, all_threads=all_threads, mode=mode ) profiler.realtime_stats = realtime_stats + # Determine skip_idle for collector compatibility + skip_idle = mode != PROFILING_MODE_WALL + collector = None match output_format: case "pstats": @@ -661,7 +679,8 @@ def wait_for_process_and_sample(pid, sort_value, args): filename = args.outfile if not filename and args.format == "collapsed": filename = f"collapsed.{pid}.txt" - skip_idle = True if args.mode == "cpu" else False + + mode = _parse_mode(args.mode) sample( pid, @@ -674,7 +693,7 @@ def wait_for_process_and_sample(pid, sort_value, args): show_summary=not args.no_summary, output_format=args.format, realtime_stats=args.realtime_stats, - skip_idle=skip_idle, + mode=mode, ) @@ -733,9 +752,9 @@ def main(): mode_group = parser.add_argument_group("Mode options") mode_group.add_argument( "--mode", - choices=["wall", "cpu"], - default="wall-time", - help="Sampling mode: wall-time (default, skip_idle=False) or cpu-time (skip_idle=True)", + choices=["wall", "cpu", "gil"], + default="wall", + help="Sampling mode: wall (all threads), cpu (only CPU-running threads), gil (only GIL-holding threads)", ) # Output format selection @@ -862,8 +881,7 @@ def main(): elif target_count > 1: parser.error("only one target type can be specified: -p/--pid, -m/--module, or script") - # Set skip_idle based on mode - skip_idle = True if args.mode == "cpu" else False + mode = _parse_mode(args.mode) if args.pid: sample( @@ -877,7 +895,7 @@ def main(): show_summary=not args.no_summary, output_format=args.format, realtime_stats=args.realtime_stats, - skip_idle=skip_idle, + mode=mode, ) elif args.module or args.args: if args.module: diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index f383076b6d57bd..2e4fea0e0352e5 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -244,6 +244,12 @@ enum _ThreadState { THREAD_STATE_UNKNOWN }; +enum _ProfilingMode { + PROFILING_MODE_WALL = 0, + PROFILING_MODE_CPU = 1, + PROFILING_MODE_GIL = 2 +}; + typedef struct { PyObject_HEAD proc_handle_t handle; @@ -257,7 +263,7 @@ typedef struct { _Py_hashtable_t *code_object_cache; int debug; int only_active_thread; - int cpu_time; + int mode; // Use enum _ProfilingMode values RemoteDebuggingState *cached_state; // Cached module state #ifdef Py_GIL_DISABLED // TLBC cache invalidation tracking @@ -2629,6 +2635,39 @@ unwind_stack_for_thread( goto error; } + long tid = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id); + + // Calculate thread status based on mode + int status = THREAD_STATE_UNKNOWN; + if (unwinder->mode == PROFILING_MODE_CPU) { + long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id); + status = get_thread_status(unwinder, tid, pthread_id); + if (status == -1) { + PyErr_Print(); + PyErr_SetString(PyExc_RuntimeError, "Failed to get thread status"); + goto error; + } + } else if (unwinder->mode == PROFILING_MODE_GIL) { + status = (*current_tstate == gil_holder_tstate) ? THREAD_STATE_RUNNING : THREAD_STATE_GIL_WAIT; + } else { + // PROFILING_MODE_WALL - all threads are considered running + status = THREAD_STATE_RUNNING; + } + + // Check if we should skip this thread based on mode + int should_skip = 0; + if (unwinder->mode == PROFILING_MODE_CPU && status != THREAD_STATE_RUNNING) { + should_skip = 1; + } else if (unwinder->mode == PROFILING_MODE_GIL && status != THREAD_STATE_RUNNING) { + should_skip = 1; + } + + if (should_skip) { + // Advance to next thread and return NULL to skip processing + *current_tstate = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.next); + return NULL; + } + uintptr_t frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.current_frame); frame_info = PyList_New(0); @@ -2642,20 +2681,6 @@ unwind_stack_for_thread( goto error; } - long tid = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id); - int status = THREAD_STATE_UNKNOWN; - if (unwinder->cpu_time == 1) { - long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id); - status = get_thread_status(unwinder, tid, pthread_id); - if (status == -1) { - PyErr_Print(); - PyErr_SetString(PyExc_RuntimeError, "Failed to get thread status"); - goto error; - } - } else { - status = (*current_tstate == gil_holder_tstate) ? THREAD_STATE_RUNNING : THREAD_STATE_GIL_WAIT; - } - if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain"); goto error; @@ -2716,7 +2741,7 @@ _remote_debugging.RemoteUnwinder.__init__ * all_threads: bool = False only_active_thread: bool = False - cpu_time: bool = False + mode: int = 0 debug: bool = False Initialize a new RemoteUnwinder object for debugging a remote Python process. @@ -2726,7 +2751,7 @@ Initialize a new RemoteUnwinder object for debugging a remote Python process. all_threads: If True, initialize state for all threads in the process. If False, only initialize for the main thread. only_active_thread: If True, only sample the thread holding the GIL. - cpu_time: If True, enable CPU time tracking for unwinder operations. + mode: Profiling mode: 0=WALL (wall-time), 1=CPU (cpu-time), 2=GIL (gil-time). Cannot be used together with all_threads=True. debug: If True, chain exceptions to explain the sequence of events that lead to the exception. @@ -2745,8 +2770,8 @@ static int _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, int pid, int all_threads, int only_active_thread, - int cpu_time, int debug) -/*[clinic end generated code: output=2598ce54f6335ac7 input=0cf2038cc304c165]*/ + int mode, int debug) +/*[clinic end generated code: output=784e9990115aa569 input=d082d792d2ba9924]*/ { // Validate that all_threads and only_active_thread are not both True if (all_threads && only_active_thread) { @@ -2765,7 +2790,7 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, self->debug = debug; self->only_active_thread = only_active_thread; - self->cpu_time = cpu_time; + self->mode = mode; self->cached_state = NULL; if (_Py_RemoteDebug_InitProcHandle(&self->handle, pid) < 0) { set_exception_cause(self, PyExc_RuntimeError, "Failed to initialize process handle"); @@ -2983,6 +3008,12 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self while (current_tstate != 0) { PyObject* frame_info = unwind_stack_for_thread(self, ¤t_tstate, gil_holder_tstate); if (!frame_info) { + // Check if this was an intentional skip due to mode-based filtering + if ((self->mode == PROFILING_MODE_CPU || self->mode == PROFILING_MODE_GIL) && !PyErr_Occurred()) { + // Thread was skipped due to mode filtering, continue to next thread + continue; + } + // This was an actual error Py_DECREF(interpreter_threads); set_exception_cause(self, PyExc_RuntimeError, "Failed to unwind stack for thread"); Py_CLEAR(result); diff --git a/Modules/clinic/_remote_debugging_module.c.h b/Modules/clinic/_remote_debugging_module.c.h index 6f483014e3e589..8d06e70ccc70c5 100644 --- a/Modules/clinic/_remote_debugging_module.c.h +++ b/Modules/clinic/_remote_debugging_module.c.h @@ -11,7 +11,7 @@ preserve PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, "RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n" -" cpu_time=False, debug=False)\n" +" mode=0, debug=False)\n" "--\n" "\n" "Initialize a new RemoteUnwinder object for debugging a remote Python process.\n" @@ -21,7 +21,7 @@ PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, " all_threads: If True, initialize state for all threads in the process.\n" " If False, only initialize for the main thread.\n" " only_active_thread: If True, only sample the thread holding the GIL.\n" -" cpu_time: If True, enable CPU time tracking for unwinder operations.\n" +" mode: Profiling mode: 0=WALL (wall-time), 1=CPU (cpu-time), 2=GIL (gil-time).\n" " Cannot be used together with all_threads=True.\n" " debug: If True, chain exceptions to explain the sequence of events that\n" " lead to the exception.\n" @@ -39,7 +39,7 @@ static int _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, int pid, int all_threads, int only_active_thread, - int cpu_time, int debug); + int mode, int debug); static int _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs) @@ -56,7 +56,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(cpu_time), &_Py_ID(debug), }, + .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -65,7 +65,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "cpu_time", "debug", NULL}; + static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "RemoteUnwinder", @@ -79,7 +79,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje int pid; int all_threads = 0; int only_active_thread = 0; - int cpu_time = 0; + int mode = 0; int debug = 0; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, @@ -113,8 +113,8 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje } } if (fastargs[3]) { - cpu_time = PyObject_IsTrue(fastargs[3]); - if (cpu_time < 0) { + mode = PyLong_AsInt(fastargs[3]); + if (mode == -1 && PyErr_Occurred()) { goto exit; } if (!--noptargs) { @@ -126,7 +126,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje goto exit; } skip_optional_kwonly: - return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, cpu_time, debug); + return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug); exit: return return_value; @@ -308,4 +308,4 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject return return_value; } -/*[clinic end generated code: output=8a265335c972dc31 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=47ff0bccf5c45828 input=a9049054013a1b77]*/ From ad433347113b1411075c335f510857b357fe9825 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 15 Sep 2025 10:49:37 +0100 Subject: [PATCH 03/15] Implement GIL mode and filtering --- .../test_profiling/test_sampling_profiler.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_profiling/test_sampling_profiler.py b/Lib/test/test_profiling/test_sampling_profiler.py index 1d376bf6044831..a5b0ff38196484 100644 --- a/Lib/test/test_profiling/test_sampling_profiler.py +++ b/Lib/test/test_profiling/test_sampling_profiler.py @@ -1996,7 +1996,7 @@ def test_cli_module_argument_parsing(self): show_summary=True, output_format="pstats", realtime_stats=False, - skip_idle=False + mode=0 ) @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist") @@ -2024,7 +2024,7 @@ def test_cli_module_with_arguments(self): show_summary=True, output_format="pstats", realtime_stats=False, - skip_idle=False + mode=0 ) @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist") @@ -2052,7 +2052,7 @@ def test_cli_script_argument_parsing(self): show_summary=True, output_format="pstats", realtime_stats=False, - skip_idle=False + mode=0 ) @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist") @@ -2152,7 +2152,7 @@ def test_cli_module_with_profiler_options(self): show_summary=True, output_format="pstats", realtime_stats=False, - skip_idle=False + mode=0 ) @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist") @@ -2186,7 +2186,7 @@ def test_cli_script_with_profiler_options(self): show_summary=True, output_format="collapsed", realtime_stats=False, - skip_idle=False + mode=0 ) def test_cli_empty_module_name(self): @@ -2398,7 +2398,7 @@ def test_argument_parsing_basic(self): show_summary=True, output_format="pstats", realtime_stats=False, - skip_idle=False + mode=0 ) def test_sort_options(self): @@ -2526,7 +2526,7 @@ def main(): proc.pid, duration_sec=0.5, sample_interval_usec=5000, - skip_idle=True, # CPU mode + mode=1, # CPU mode show_summary=False, all_threads=True, ) @@ -2535,7 +2535,7 @@ def main(): cpu_mode_output = captured_output.getvalue() - # Test wall-clock mode (skip_idle=False) - should capture both functions + # Test wall-clock mode (mode=0) - should capture both functions with ( io.StringIO() as captured_output, mock.patch("sys.stdout", captured_output), @@ -2545,7 +2545,7 @@ def main(): proc.pid, duration_sec=0.5, sample_interval_usec=5000, - skip_idle=False, # Wall-clock mode + mode=0, # Wall-clock mode show_summary=False, all_threads=True, ) From 1767b39d05f01f50b30e5980be7c72ef9c3e76a5 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 15 Sep 2025 10:54:37 +0100 Subject: [PATCH 04/15] More tests --- .../test_profiling/test_sampling_profiler.py | 215 ++++++++++++++++++ 1 file changed, 215 insertions(+) diff --git a/Lib/test/test_profiling/test_sampling_profiler.py b/Lib/test/test_profiling/test_sampling_profiler.py index a5b0ff38196484..eea61ed07069ad 100644 --- a/Lib/test/test_profiling/test_sampling_profiler.py +++ b/Lib/test/test_profiling/test_sampling_profiler.py @@ -2569,5 +2569,220 @@ def main(): self.assertIn("idle_worker", wall_mode_output) +class TestGilModeFiltering(unittest.TestCase): + """Test GIL mode filtering functionality (--mode=gil).""" + + def test_gil_mode_validation(self): + """Test that CLI accepts gil mode choice correctly.""" + test_args = ["profiling.sampling.sample", "--mode", "gil", "-p", "12345"] + + with ( + mock.patch("sys.argv", test_args), + mock.patch("profiling.sampling.sample.sample") as mock_sample, + ): + try: + profiling.sampling.sample.main() + except SystemExit: + pass # Expected due to invalid PID + + # Should have attempted to call sample with mode=2 (GIL mode) + mock_sample.assert_called_once() + call_args = mock_sample.call_args[1] + self.assertEqual(call_args["mode"], 2) # PROFILING_MODE_GIL + + def test_gil_mode_sample_function_call(self): + """Test that sample() function correctly uses GIL mode.""" + with ( + mock.patch("profiling.sampling.sample.SampleProfiler") as mock_profiler, + mock.patch("profiling.sampling.sample.PstatsCollector") as mock_collector, + ): + # Mock the profiler instance + mock_instance = mock.Mock() + mock_profiler.return_value = mock_instance + + # Mock the collector instance + mock_collector_instance = mock.Mock() + mock_collector.return_value = mock_collector_instance + + # Call sample with GIL mode and a filename to avoid pstats creation + profiling.sampling.sample.sample( + 12345, + mode=2, # PROFILING_MODE_GIL + duration_sec=1, + sample_interval_usec=1000, + filename="test_output.txt", + ) + + # Verify SampleProfiler was created with correct mode + mock_profiler.assert_called_once() + call_args = mock_profiler.call_args + self.assertEqual(call_args[1]['mode'], 2) # mode parameter + + # Verify profiler.sample was called + mock_instance.sample.assert_called_once() + + # Verify collector.export was called since we provided a filename + mock_collector_instance.export.assert_called_once_with("test_output.txt") + + def test_gil_mode_collector_configuration(self): + """Test that collectors are configured correctly for GIL mode.""" + with ( + mock.patch("profiling.sampling.sample.SampleProfiler") as mock_profiler, + mock.patch("profiling.sampling.sample.PstatsCollector") as mock_collector, + ): + # Mock the profiler instance + mock_instance = mock.Mock() + mock_profiler.return_value = mock_instance + + # Call sample with GIL mode + profiling.sampling.sample.sample( + 12345, + mode=2, # PROFILING_MODE_GIL + output_format="pstats", + ) + + # Verify collector was created with skip_idle=True (since mode != WALL) + mock_collector.assert_called_once() + call_args = mock_collector.call_args[1] + self.assertTrue(call_args['skip_idle']) + + def test_gil_mode_with_collapsed_format(self): + """Test GIL mode with collapsed stack format.""" + with ( + mock.patch("profiling.sampling.sample.SampleProfiler") as mock_profiler, + mock.patch("profiling.sampling.sample.CollapsedStackCollector") as mock_collector, + ): + # Mock the profiler instance + mock_instance = mock.Mock() + mock_profiler.return_value = mock_instance + + # Call sample with GIL mode and collapsed format + profiling.sampling.sample.sample( + 12345, + mode=2, # PROFILING_MODE_GIL + output_format="collapsed", + filename="test_output.txt", + ) + + # Verify collector was created with skip_idle=True + mock_collector.assert_called_once() + call_args = mock_collector.call_args[1] + self.assertTrue(call_args['skip_idle']) + + def test_gil_mode_cli_argument_parsing(self): + """Test CLI argument parsing for GIL mode with various options.""" + test_args = [ + "profiling.sampling.sample", + "--mode", "gil", + "--interval", "500", + "--duration", "5", + "-p", "12345" + ] + + with ( + mock.patch("sys.argv", test_args), + mock.patch("profiling.sampling.sample.sample") as mock_sample, + ): + try: + profiling.sampling.sample.main() + except SystemExit: + pass # Expected due to invalid PID + + # Verify all arguments were parsed correctly + mock_sample.assert_called_once() + call_args = mock_sample.call_args[1] + self.assertEqual(call_args["mode"], 2) # GIL mode + self.assertEqual(call_args["sample_interval_usec"], 500) + self.assertEqual(call_args["duration_sec"], 5) + + @requires_subprocess() + def test_gil_mode_integration_behavior(self): + """Integration test: GIL mode should capture GIL-holding threads.""" + # Create a test script with GIL-releasing operations + gil_test_script = ''' +import time +import threading + +def gil_releasing_work(): + time.sleep(999999) + +def gil_holding_work(): + x = 1 + while True: + x += 1 + +def main(): +# Start both threads + idle_thread = threading.Thread(target=idle_worker) + cpu_thread = threading.Thread(target=cpu_active_worker) + idle_thread.start() + cpu_thread.start() + idle_thread.join() + cpu_thread.join() + +main() +''' + with test_subprocess(gil_test_script) as proc: + with ( + io.StringIO() as captured_output, + mock.patch("sys.stdout", captured_output), + ): + try: + profiling.sampling.sample.sample( + proc.pid, + duration_sec=0.5, + sample_interval_usec=5000, + mode=2, # GIL mode + show_summary=False, + all_threads=True, + ) + except (PermissionError, RuntimeError) as e: + self.skipTest("Insufficient permissions for remote profiling") + + gil_mode_output = captured_output.getvalue() + + # Test wall-clock mode for comparison + with ( + io.StringIO() as captured_output, + mock.patch("sys.stdout", captured_output), + ): + try: + profiling.sampling.sample.sample( + proc.pid, + duration_sec=0.5, + sample_interval_usec=5000, + mode=0, # Wall-clock mode + show_summary=False, + all_threads=True, + ) + except (PermissionError, RuntimeError) as e: + self.skipTest("Insufficient permissions for remote profiling") + + wall_mode_output = captured_output.getvalue() + + # GIL mode should primarily capture GIL-holding work + # (Note: actual behavior depends on threading implementation) + self.assertIn("gil_holding_work", gil_mode_output) + + # Wall-clock mode should capture both types of work + self.assertIn("gil_holding_work", wall_mode_output) + + def test_mode_constants_are_defined(self): + """Test that all profiling mode constants are properly defined.""" + self.assertEqual(profiling.sampling.sample.PROFILING_MODE_WALL, 0) + self.assertEqual(profiling.sampling.sample.PROFILING_MODE_CPU, 1) + self.assertEqual(profiling.sampling.sample.PROFILING_MODE_GIL, 2) + + def test_parse_mode_function(self): + """Test the _parse_mode function with all valid modes.""" + self.assertEqual(profiling.sampling.sample._parse_mode("wall"), 0) + self.assertEqual(profiling.sampling.sample._parse_mode("cpu"), 1) + self.assertEqual(profiling.sampling.sample._parse_mode("gil"), 2) + + # Test invalid mode raises KeyError + with self.assertRaises(KeyError): + profiling.sampling.sample._parse_mode("invalid") + + if __name__ == "__main__": unittest.main() From 9685f2d2f1b9d7ed428b36f0f8fad7a33506319b Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 15 Sep 2025 11:04:23 +0100 Subject: [PATCH 05/15] fixup! Merge remote-tracking branch 'upstream/main' into cputime --- Lib/profiling/sampling/stack_collector.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Lib/profiling/sampling/stack_collector.py b/Lib/profiling/sampling/stack_collector.py index 5ba7c14304ebbc..0958cb8711e642 100644 --- a/Lib/profiling/sampling/stack_collector.py +++ b/Lib/profiling/sampling/stack_collector.py @@ -12,15 +12,13 @@ class StackTraceCollector(Collector): def __init__(self, *, skip_idle=False): - self.call_trees = [] - self.function_samples = collections.defaultdict(int) self.skip_idle = skip_idle def collect(self, stack_frames, skip_idle=None): - if skip_idle == None: + if skip_idle is None: skip_idle = self.skip_idle - for frames in self._iter_all_frames(stack_frames): + for frames in self._iter_all_frames(stack_frames, skip_idle=skip_idle): if not frames: continue self.process_frames(frames) From cb6a43dc827909146df008a0dbe85bb8a303d16d Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 15 Sep 2025 11:06:02 +0100 Subject: [PATCH 06/15] fixup! fixup! Merge remote-tracking branch 'upstream/main' into cputime --- Lib/profiling/sampling/stack_collector.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Lib/profiling/sampling/stack_collector.py b/Lib/profiling/sampling/stack_collector.py index 0958cb8711e642..8f42749fe69b64 100644 --- a/Lib/profiling/sampling/stack_collector.py +++ b/Lib/profiling/sampling/stack_collector.py @@ -14,10 +14,7 @@ class StackTraceCollector(Collector): def __init__(self, *, skip_idle=False): self.skip_idle = skip_idle - def collect(self, stack_frames, skip_idle=None): - if skip_idle is None: - skip_idle = self.skip_idle - + def collect(self, stack_frames, skip_idle=False): for frames in self._iter_all_frames(stack_frames, skip_idle=skip_idle): if not frames: continue From f6ac8cc1b730cac36b8a3311314b4f9dcc0a36be Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 15 Sep 2025 11:06:36 +0100 Subject: [PATCH 07/15] fixup! fixup! fixup! Merge remote-tracking branch 'upstream/main' into cputime --- Lib/profiling/sampling/stack_collector.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/profiling/sampling/stack_collector.py b/Lib/profiling/sampling/stack_collector.py index 8f42749fe69b64..6983be70ee0440 100644 --- a/Lib/profiling/sampling/stack_collector.py +++ b/Lib/profiling/sampling/stack_collector.py @@ -25,7 +25,8 @@ def process_frames(self, frames): class CollapsedStackCollector(StackTraceCollector): - def __init__(self): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) self.stack_counter = collections.Counter() def process_frames(self, frames): From 457ca998e0303618ec9ee796ad2ccfddefff4c62 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 15 Sep 2025 13:25:52 +0100 Subject: [PATCH 08/15] More tests --- Lib/test/test_external_inspection.py | 120 +++++++++++++++++++- Modules/_remote_debugging_module.c | 22 ++-- Modules/clinic/_remote_debugging_module.c.h | 33 ++++-- 3 files changed, 157 insertions(+), 18 deletions(-) diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 62596ca1ba0649..2f8f5f0e169339 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -19,6 +19,11 @@ import subprocess +# Profiling mode constants +PROFILING_MODE_WALL = 0 +PROFILING_MODE_CPU = 1 +PROFILING_MODE_GIL = 2 + try: from concurrent import interpreters except ImportError: @@ -1747,7 +1752,8 @@ def busy(): attempts = 10 try: - unwinder = RemoteUnwinder(p.pid, all_threads=True, cpu_time=True) + unwinder = RemoteUnwinder(p.pid, all_threads=True, mode=PROFILING_MODE_CPU, + skip_non_matching_threads=False) for _ in range(attempts): traces = unwinder.get_stack_trace() # Check if any thread is running @@ -1780,5 +1786,117 @@ def busy(): p.terminate() p.wait(timeout=SHORT_TIMEOUT) + @unittest.skipIf( + sys.platform not in ("linux", "darwin", "win32"), + "Test only runs on unsupported platforms (not Linux, macOS, or Windows)", + ) + @unittest.skipIf(sys.platform == "android", "Android raises Linux-specific exception") + def test_thread_status_gil_detection(self): + port = find_unused_port() + script = textwrap.dedent( + f"""\ + import time, sys, socket, threading + import os + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect(('localhost', {port})) + + def sleeper(): + tid = threading.get_native_id() + sock.sendall(f'ready:sleeper:{{tid}}\\n'.encode()) + time.sleep(10000) + + def busy(): + tid = threading.get_native_id() + sock.sendall(f'ready:busy:{{tid}}\\n'.encode()) + x = 0 + while True: + x = x + 1 + time.sleep(0.5) + + t1 = threading.Thread(target=sleeper) + t2 = threading.Thread(target=busy) + t1.start() + t2.start() + sock.sendall(b'ready:main\\n') + t1.join() + t2.join() + sock.close() + """ + ) + with os_helper.temp_dir() as work_dir: + script_dir = os.path.join(work_dir, "script_pkg") + os.mkdir(script_dir) + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server_socket.bind(("localhost", port)) + server_socket.settimeout(SHORT_TIMEOUT) + server_socket.listen(1) + + script_name = _make_test_script(script_dir, "thread_status_script", script) + client_socket = None + try: + p = subprocess.Popen([sys.executable, script_name]) + client_socket, _ = server_socket.accept() + server_socket.close() + response = b"" + sleeper_tid = None + busy_tid = None + while True: + chunk = client_socket.recv(1024) + response += chunk + if b"ready:main" in response and b"ready:sleeper" in response and b"ready:busy" in response: + # Parse TIDs from the response + for line in response.split(b"\n"): + if line.startswith(b"ready:sleeper:"): + try: + sleeper_tid = int(line.split(b":")[-1]) + except Exception: + pass + elif line.startswith(b"ready:busy:"): + try: + busy_tid = int(line.split(b":")[-1]) + except Exception: + pass + break + + attempts = 10 + try: + unwinder = RemoteUnwinder(p.pid, all_threads=True, mode=PROFILING_MODE_GIL, + skip_non_matching_threads=False) + for _ in range(attempts): + traces = unwinder.get_stack_trace() + # Check if any thread is running + if any(thread_info.status == 0 for interpreter_info in traces + for thread_info in interpreter_info.threads): + break + time.sleep(0.5) # Give a bit of time to let threads settle + except PermissionError: + self.skipTest( + "Insufficient permissions to read the stack trace" + ) + + + # Find threads and their statuses + statuses = {} + for interpreter_info in traces: + for thread_info in interpreter_info.threads: + statuses[thread_info.thread_id] = thread_info.status + + self.assertIsNotNone(sleeper_tid, "Sleeper thread id not received") + self.assertIsNotNone(busy_tid, "Busy thread id not received") + self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads") + self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads") + self.assertEqual(statuses[sleeper_tid], 2, "Sleeper thread should be idle (1)") + self.assertEqual(statuses[busy_tid], 0, "Busy thread should be running (0)") + + finally: + if client_socket is not None: + client_socket.close() + p.terminate() + p.wait(timeout=SHORT_TIMEOUT) + + + if __name__ == "__main__": unittest.main() diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index 2e4fea0e0352e5..4ee02d31112e08 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -264,6 +264,7 @@ typedef struct { int debug; int only_active_thread; int mode; // Use enum _ProfilingMode values + int skip_non_matching_threads; // New option to skip threads that don't match mode RemoteDebuggingState *cached_state; // Cached module state #ifdef Py_GIL_DISABLED // TLBC cache invalidation tracking @@ -2654,12 +2655,14 @@ unwind_stack_for_thread( status = THREAD_STATE_RUNNING; } - // Check if we should skip this thread based on mode + // Check if we should skip this thread based on mode and the new option int should_skip = 0; - if (unwinder->mode == PROFILING_MODE_CPU && status != THREAD_STATE_RUNNING) { - should_skip = 1; - } else if (unwinder->mode == PROFILING_MODE_GIL && status != THREAD_STATE_RUNNING) { - should_skip = 1; + if (unwinder->skip_non_matching_threads) { + if (unwinder->mode == PROFILING_MODE_CPU && status != THREAD_STATE_RUNNING) { + should_skip = 1; + } else if (unwinder->mode == PROFILING_MODE_GIL && status != THREAD_STATE_RUNNING) { + should_skip = 1; + } } if (should_skip) { @@ -2743,6 +2746,7 @@ _remote_debugging.RemoteUnwinder.__init__ only_active_thread: bool = False mode: int = 0 debug: bool = False + skip_non_matching_threads: bool = True Initialize a new RemoteUnwinder object for debugging a remote Python process. @@ -2755,6 +2759,8 @@ Initialize a new RemoteUnwinder object for debugging a remote Python process. Cannot be used together with all_threads=True. debug: If True, chain exceptions to explain the sequence of events that lead to the exception. + skip_non_matching_threads: If True, skip threads that don't match the selected mode. + If False, include all threads regardless of mode. The RemoteUnwinder provides functionality to inspect and debug a running Python process, including examining thread states, stack frames and other runtime data. @@ -2770,8 +2776,9 @@ static int _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, int pid, int all_threads, int only_active_thread, - int mode, int debug) -/*[clinic end generated code: output=784e9990115aa569 input=d082d792d2ba9924]*/ + int mode, int debug, + int skip_non_matching_threads) +/*[clinic end generated code: output=abf5ea5cd58bcb36 input=08fb6ace023ec3b5]*/ { // Validate that all_threads and only_active_thread are not both True if (all_threads && only_active_thread) { @@ -2791,6 +2798,7 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, self->debug = debug; self->only_active_thread = only_active_thread; self->mode = mode; + self->skip_non_matching_threads = skip_non_matching_threads; self->cached_state = NULL; if (_Py_RemoteDebug_InitProcHandle(&self->handle, pid) < 0) { set_exception_cause(self, PyExc_RuntimeError, "Failed to initialize process handle"); diff --git a/Modules/clinic/_remote_debugging_module.c.h b/Modules/clinic/_remote_debugging_module.c.h index 8d06e70ccc70c5..7dd54e3124887b 100644 --- a/Modules/clinic/_remote_debugging_module.c.h +++ b/Modules/clinic/_remote_debugging_module.c.h @@ -11,7 +11,7 @@ preserve PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, "RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n" -" mode=0, debug=False)\n" +" mode=0, debug=False, skip_non_matching_threads=True)\n" "--\n" "\n" "Initialize a new RemoteUnwinder object for debugging a remote Python process.\n" @@ -25,6 +25,8 @@ PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, " Cannot be used together with all_threads=True.\n" " debug: If True, chain exceptions to explain the sequence of events that\n" " lead to the exception.\n" +" skip_non_matching_threads: If True, skip threads that don\'t match the selected mode.\n" +" If False, include all threads regardless of mode.\n" "\n" "The RemoteUnwinder provides functionality to inspect and debug a running Python\n" "process, including examining thread states, stack frames and other runtime data.\n" @@ -39,7 +41,8 @@ static int _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, int pid, int all_threads, int only_active_thread, - int mode, int debug); + int mode, int debug, + int skip_non_matching_threads); static int _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs) @@ -47,7 +50,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje int return_value = -1; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 5 + #define NUM_KEYWORDS 6 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -56,7 +59,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), }, + .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -65,14 +68,14 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", NULL}; + static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "RemoteUnwinder", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[5]; + PyObject *argsbuf[6]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; @@ -81,6 +84,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje int only_active_thread = 0; int mode = 0; int debug = 0; + int skip_non_matching_threads = 1; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -121,12 +125,21 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje goto skip_optional_kwonly; } } - debug = PyObject_IsTrue(fastargs[4]); - if (debug < 0) { + if (fastargs[4]) { + debug = PyObject_IsTrue(fastargs[4]); + if (debug < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + skip_non_matching_threads = PyObject_IsTrue(fastargs[5]); + if (skip_non_matching_threads < 0) { goto exit; } skip_optional_kwonly: - return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug); + return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads); exit: return return_value; @@ -308,4 +321,4 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject return return_value; } -/*[clinic end generated code: output=47ff0bccf5c45828 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=2caefeddf7683d32 input=a9049054013a1b77]*/ From 84c47e48193ffb12a9c49d61240064f5b4620e62 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 15 Sep 2025 13:29:57 +0100 Subject: [PATCH 09/15] More tests --- Modules/_remote_debugging_module.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index 4ee02d31112e08..3f137733d83f81 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -2655,14 +2655,11 @@ unwind_stack_for_thread( status = THREAD_STATE_RUNNING; } - // Check if we should skip this thread based on mode and the new option + // Check if we should skip this thread based on mode int should_skip = 0; - if (unwinder->skip_non_matching_threads) { - if (unwinder->mode == PROFILING_MODE_CPU && status != THREAD_STATE_RUNNING) { - should_skip = 1; - } else if (unwinder->mode == PROFILING_MODE_GIL && status != THREAD_STATE_RUNNING) { - should_skip = 1; - } + if ((unwinder->skip_non_matching_threads && status != THREAD_STATE_RUNNING) && + (unwinder->mode == PROFILING_MODE_CPU || unwinder->mode == PROFILING_MODE_GIL)) { + should_skip = 1; } if (should_skip) { From 0cc0b5a924a53890bc26149349e5be509b7b0f45 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 15 Sep 2025 14:06:35 +0100 Subject: [PATCH 10/15] More tests --- Modules/_remote_debugging_module.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index 3f137733d83f81..5d8615df54d4e3 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -2617,6 +2617,18 @@ get_thread_status(RemoteUnwinderObject *unwinder, uint64_t tid, uint64_t pthread #endif } +typedef struct { + unsigned int initialized:1; + unsigned int bound:1; + unsigned int unbound:1; + unsigned int bound_gilstate:1; + unsigned int active:1; + unsigned int finalizing:1; + unsigned int cleared:1; + unsigned int finalized:1; + unsigned int :24; +} _thread_status; + static PyObject* unwind_stack_for_thread( RemoteUnwinderObject *unwinder, @@ -2649,7 +2661,13 @@ unwind_stack_for_thread( goto error; } } else if (unwinder->mode == PROFILING_MODE_GIL) { +#ifdef Py_GIL_DISABLED + // All threads are considered running in free threading builds if they have a thread state attached + int active = GET_MEMBER(_thread_status, ts, unwinder->debug_offsets.thread_state.status).active; + status = active ? THREAD_STATE_RUNNING : THREAD_STATE_GIL_WAIT; +#else status = (*current_tstate == gil_holder_tstate) ? THREAD_STATE_RUNNING : THREAD_STATE_GIL_WAIT; +#endif } else { // PROFILING_MODE_WALL - all threads are considered running status = THREAD_STATE_RUNNING; @@ -2657,7 +2675,7 @@ unwind_stack_for_thread( // Check if we should skip this thread based on mode int should_skip = 0; - if ((unwinder->skip_non_matching_threads && status != THREAD_STATE_RUNNING) && + if (unwinder->skip_non_matching_threads && status != THREAD_STATE_RUNNING && (unwinder->mode == PROFILING_MODE_CPU || unwinder->mode == PROFILING_MODE_GIL)) { should_skip = 1; } From 7e2d94f09c11a5a4648437dba74e4a57cc1418e1 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 15 Sep 2025 14:17:09 +0100 Subject: [PATCH 11/15] More tests --- .../internal/pycore_global_objects_fini_generated.h | 2 +- Include/internal/pycore_global_strings.h | 2 +- Include/internal/pycore_runtime_init_generated.h | 2 +- Include/internal/pycore_unicodeobject_generated.h | 8 ++++---- Programs/test_frozenmain.h | 12 ++++++------ 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 56e6c06f880227..63bcbcee912c0f 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -896,7 +896,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(coro)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(count)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(covariant)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cpu_time)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ctx)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cwd)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(d_parameter_type)); @@ -1262,6 +1261,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(size)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sizehint)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(skip_file_prefixes)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(skip_non_matching_threads)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sleep)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sock)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sort)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 3a46fbaf8cd253..681d41cbde32a2 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -387,7 +387,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(coro) STRUCT_FOR_ID(count) STRUCT_FOR_ID(covariant) - STRUCT_FOR_ID(cpu_time) STRUCT_FOR_ID(ctx) STRUCT_FOR_ID(cwd) STRUCT_FOR_ID(d_parameter_type) @@ -753,6 +752,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(size) STRUCT_FOR_ID(sizehint) STRUCT_FOR_ID(skip_file_prefixes) + STRUCT_FOR_ID(skip_non_matching_threads) STRUCT_FOR_ID(sleep) STRUCT_FOR_ID(sock) STRUCT_FOR_ID(sort) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 00de62b9e3b1d8..840f6940ca273e 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -894,7 +894,6 @@ extern "C" { INIT_ID(coro), \ INIT_ID(count), \ INIT_ID(covariant), \ - INIT_ID(cpu_time), \ INIT_ID(ctx), \ INIT_ID(cwd), \ INIT_ID(d_parameter_type), \ @@ -1260,6 +1259,7 @@ extern "C" { INIT_ID(size), \ INIT_ID(sizehint), \ INIT_ID(skip_file_prefixes), \ + INIT_ID(skip_non_matching_threads), \ INIT_ID(sleep), \ INIT_ID(sock), \ INIT_ID(sort), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index f4176b98ad999d..3bd14f958da839 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1336,10 +1336,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); - string = &_Py_ID(cpu_time); - _PyUnicode_InternStatic(interp, &string); - assert(_PyUnicode_CheckConsistency(string, 1)); - assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(ctx); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2800,6 +2796,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(skip_non_matching_threads); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(sleep); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index dbeedb7ffe0ce6..b9bf4134b59718 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -13,10 +13,10 @@ unsigned char M_test_frozenmain[] = { 82,5,93,6,12,0,82,6,93,5,93,6,44,26,0,0, 0,0,0,0,0,0,0,0,12,0,50,4,52,1,0,0, 0,0,0,0,31,0,75,26,0,0,9,0,30,0,82,1, - 35,0,41,8,233,0,0,0,0,78,122,18,70,114,111,122, - 101,110,32,72,101,108,108,111,32,87,111,114,108,100,122,8, + 35,0,41,8,233,0,0,0,0,78,218,18,70,114,111,122, + 101,110,32,72,101,108,108,111,32,87,111,114,108,100,218,8, 115,121,115,46,97,114,103,118,218,6,99,111,110,102,105,103, - 122,7,99,111,110,102,105,103,32,122,2,58,32,41,5,218, + 218,7,99,111,110,102,105,103,32,218,2,58,32,41,5,218, 12,112,114,111,103,114,97,109,95,110,97,109,101,218,10,101, 120,101,99,117,116,97,98,108,101,218,15,117,115,101,95,101, 110,118,105,114,111,110,109,101,110,116,218,17,99,111,110,102, @@ -25,15 +25,15 @@ unsigned char M_test_frozenmain[] = { 3,115,121,115,218,17,95,116,101,115,116,105,110,116,101,114, 110,97,108,99,97,112,105,218,5,112,114,105,110,116,218,4, 97,114,103,118,218,11,103,101,116,95,99,111,110,102,105,103, - 115,114,3,0,0,0,218,3,107,101,121,169,0,243,0,0, + 115,114,5,0,0,0,218,3,107,101,121,169,0,243,0,0, 0,0,218,18,116,101,115,116,95,102,114,111,122,101,110,109, 97,105,110,46,112,121,218,8,60,109,111,100,117,108,101,62, - 114,18,0,0,0,1,0,0,0,115,94,0,0,0,240,3, + 114,22,0,0,0,1,0,0,0,115,94,0,0,0,240,3, 1,1,1,243,8,0,1,11,219,0,24,225,0,5,208,6, 26,212,0,27,217,0,5,128,106,144,35,151,40,145,40,212, 0,27,216,9,26,215,9,38,210,9,38,211,9,40,168,24, 213,9,50,128,6,243,2,6,12,2,128,67,241,14,0,5, 10,136,71,144,67,144,53,152,2,152,54,160,35,157,59,152, - 45,208,10,40,214,4,41,243,15,6,12,2,114,16,0,0, + 45,208,10,40,214,4,41,243,15,6,12,2,114,20,0,0, 0, }; From a95bdafc56dd1916042807af7ded6857978b1154 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 15 Sep 2025 16:02:44 +0100 Subject: [PATCH 12/15] Fix Programs/test_frozenmain.h --- Modules/_remote_debugging_module.c | 2 +- Programs/test_frozenmain.h | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index 5d8615df54d4e3..32710fafc12c3a 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -35,7 +35,7 @@ #endif // Returns thread status using proc_pidinfo, caches thread_id_offset on first use (macOS only) -#ifdef __APPLE__ +#if defined(__APPLE__) && TARGET_OS_OSX #include #include #define MAX_NATIVE_THREADS 4096 diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index b9bf4134b59718..dbeedb7ffe0ce6 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -13,10 +13,10 @@ unsigned char M_test_frozenmain[] = { 82,5,93,6,12,0,82,6,93,5,93,6,44,26,0,0, 0,0,0,0,0,0,0,0,12,0,50,4,52,1,0,0, 0,0,0,0,31,0,75,26,0,0,9,0,30,0,82,1, - 35,0,41,8,233,0,0,0,0,78,218,18,70,114,111,122, - 101,110,32,72,101,108,108,111,32,87,111,114,108,100,218,8, + 35,0,41,8,233,0,0,0,0,78,122,18,70,114,111,122, + 101,110,32,72,101,108,108,111,32,87,111,114,108,100,122,8, 115,121,115,46,97,114,103,118,218,6,99,111,110,102,105,103, - 218,7,99,111,110,102,105,103,32,218,2,58,32,41,5,218, + 122,7,99,111,110,102,105,103,32,122,2,58,32,41,5,218, 12,112,114,111,103,114,97,109,95,110,97,109,101,218,10,101, 120,101,99,117,116,97,98,108,101,218,15,117,115,101,95,101, 110,118,105,114,111,110,109,101,110,116,218,17,99,111,110,102, @@ -25,15 +25,15 @@ unsigned char M_test_frozenmain[] = { 3,115,121,115,218,17,95,116,101,115,116,105,110,116,101,114, 110,97,108,99,97,112,105,218,5,112,114,105,110,116,218,4, 97,114,103,118,218,11,103,101,116,95,99,111,110,102,105,103, - 115,114,5,0,0,0,218,3,107,101,121,169,0,243,0,0, + 115,114,3,0,0,0,218,3,107,101,121,169,0,243,0,0, 0,0,218,18,116,101,115,116,95,102,114,111,122,101,110,109, 97,105,110,46,112,121,218,8,60,109,111,100,117,108,101,62, - 114,22,0,0,0,1,0,0,0,115,94,0,0,0,240,3, + 114,18,0,0,0,1,0,0,0,115,94,0,0,0,240,3, 1,1,1,243,8,0,1,11,219,0,24,225,0,5,208,6, 26,212,0,27,217,0,5,128,106,144,35,151,40,145,40,212, 0,27,216,9,26,215,9,38,210,9,38,211,9,40,168,24, 213,9,50,128,6,243,2,6,12,2,128,67,241,14,0,5, 10,136,71,144,67,144,53,152,2,152,54,160,35,157,59,152, - 45,208,10,40,214,4,41,243,15,6,12,2,114,20,0,0, + 45,208,10,40,214,4,41,243,15,6,12,2,114,16,0,0, 0, }; From 1766992127edfde370f4ff2d96b1da8215fa3bb5 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Tue, 16 Sep 2025 10:00:49 +0100 Subject: [PATCH 13/15] Fix test --- Lib/test/test_profiling/test_sampling_profiler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_profiling/test_sampling_profiler.py b/Lib/test/test_profiling/test_sampling_profiler.py index 77e4cdaa70f7b3..fd6b1862230288 100644 --- a/Lib/test/test_profiling/test_sampling_profiler.py +++ b/Lib/test/test_profiling/test_sampling_profiler.py @@ -2716,8 +2716,8 @@ def gil_holding_work(): def main(): # Start both threads - idle_thread = threading.Thread(target=idle_worker) - cpu_thread = threading.Thread(target=cpu_active_worker) + idle_thread = threading.Thread(target=gil_releasing_work) + cpu_thread = threading.Thread(target=gil_holding_work) idle_thread.start() cpu_thread.start() idle_thread.join() From 5fae57e7e3ad598f649e6e81c89c37741a5018b9 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Wed, 17 Sep 2025 11:21:19 +0100 Subject: [PATCH 14/15] fixup! Fix test --- Modules/_remote_debugging_module.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index 32710fafc12c3a..d56f53aebb8717 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -2503,7 +2503,7 @@ process_frame_chain( static int get_thread_status(RemoteUnwinderObject *unwinder, uint64_t tid, uint64_t pthread_id) { -#ifdef __APPLE__ +#if defined(__APPLE__) && TARGET_OS_OSX if (unwinder->thread_id_offset == 0) { uint64_t *tids = (uint64_t *)PyMem_Malloc(MAX_NATIVE_THREADS * sizeof(uint64_t)); if (!tids) { @@ -2886,7 +2886,7 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, } #endif -#if defined(__APPLE__) +#if defined(__APPLE__) self->thread_id_offset = 0; #endif From 2328c847899e2c2a253dde7b2144efedf72d12a8 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Fri, 19 Sep 2025 11:06:39 +0100 Subject: [PATCH 15/15] Update Modules/_remote_debugging_module.c --- Modules/_remote_debugging_module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index d56f53aebb8717..701f4b0eabdb15 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -2886,7 +2886,7 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, } #endif -#if defined(__APPLE__) +#if defined(__APPLE__) self->thread_id_offset = 0; #endif