Skip to content

Commit fb8983c

Browse files
committed
gh-138709: Implement CPU time profiling in profiling.sample
1 parent 8b5ce31 commit fb8983c

13 files changed

+548
-37
lines changed

Include/internal/pycore_global_objects_fini_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_global_strings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ struct _Py_global_strings {
387387
STRUCT_FOR_ID(coro)
388388
STRUCT_FOR_ID(count)
389389
STRUCT_FOR_ID(covariant)
390+
STRUCT_FOR_ID(cpu_time)
390391
STRUCT_FOR_ID(ctx)
391392
STRUCT_FOR_ID(cwd)
392393
STRUCT_FOR_ID(d_parameter_type)

Include/internal/pycore_runtime_init_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_unicodeobject_generated.h

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/profiling/sampling/collector.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
from abc import ABC, abstractmethod
22

3+
# Enums are slow
4+
THREAD_STATE_RUNNING = 0
5+
THREAD_STATE_IDLE = 1
6+
THREAD_STATE_GIL_WAIT = 2
7+
THREAD_STATE_UNKNOWN = 3
8+
9+
STATUS = {
10+
THREAD_STATE_RUNNING: "running",
11+
THREAD_STATE_IDLE: "idle",
12+
THREAD_STATE_GIL_WAIT: "gil_wait",
13+
THREAD_STATE_UNKNOWN: "unknown",
14+
}
315

416
class Collector(ABC):
517
@abstractmethod
@@ -10,10 +22,12 @@ def collect(self, stack_frames):
1022
def export(self, filename):
1123
"""Export collected data to a file."""
1224

13-
def _iter_all_frames(self, stack_frames):
25+
def _iter_all_frames(self, stack_frames, skip_idle=False):
1426
"""Iterate over all frame stacks from all interpreters and threads."""
1527
for interpreter_info in stack_frames:
1628
for thread_info in interpreter_info.threads:
29+
if skip_idle and thread_info.status != THREAD_STATE_RUNNING:
30+
continue
1731
frames = thread_info.frame_info
1832
if frames:
1933
yield frames

Lib/profiling/sampling/pstats_collector.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66

77
class PstatsCollector(Collector):
8-
def __init__(self, sample_interval_usec):
8+
def __init__(self, sample_interval_usec, *, skip_idle=False):
99
self.result = collections.defaultdict(
1010
lambda: dict(total_rec_calls=0, direct_calls=0, cumulative_calls=0)
1111
)
@@ -14,6 +14,7 @@ def __init__(self, sample_interval_usec):
1414
self.callers = collections.defaultdict(
1515
lambda: collections.defaultdict(int)
1616
)
17+
self.skip_idle = skip_idle
1718

1819
def _process_frames(self, frames):
1920
"""Process a single thread's frame stack."""
@@ -40,7 +41,7 @@ def _process_frames(self, frames):
4041
self.callers[callee][caller] += 1
4142

4243
def collect(self, stack_frames):
43-
for frames in self._iter_all_frames(stack_frames):
44+
for frames in self._iter_all_frames(stack_frames, skip_idle=self.skip_idle):
4445
self._process_frames(frames)
4546

4647
def export(self, filename):

Lib/profiling/sampling/sample.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -120,18 +120,18 @@ def _run_with_sync(original_cmd):
120120

121121

122122
class SampleProfiler:
123-
def __init__(self, pid, sample_interval_usec, all_threads):
123+
def __init__(self, pid, sample_interval_usec, all_threads, *, cpu_time=False):
124124
self.pid = pid
125125
self.sample_interval_usec = sample_interval_usec
126126
self.all_threads = all_threads
127127
if _FREE_THREADED_BUILD:
128128
self.unwinder = _remote_debugging.RemoteUnwinder(
129-
self.pid, all_threads=self.all_threads
129+
self.pid, all_threads=self.all_threads, cpu_time=cpu_time
130130
)
131131
else:
132132
only_active_threads = bool(self.all_threads)
133133
self.unwinder = _remote_debugging.RemoteUnwinder(
134-
self.pid, only_active_thread=only_active_threads
134+
self.pid, only_active_thread=only_active_threads, cpu_time=cpu_time
135135
)
136136
# Track sample intervals and total sample count
137137
self.sample_intervals = deque(maxlen=100)
@@ -596,21 +596,22 @@ def sample(
596596
show_summary=True,
597597
output_format="pstats",
598598
realtime_stats=False,
599+
skip_idle=False,
599600
):
600601
profiler = SampleProfiler(
601-
pid, sample_interval_usec, all_threads=all_threads
602+
pid, sample_interval_usec, all_threads=all_threads, cpu_time=skip_idle
602603
)
603604
profiler.realtime_stats = realtime_stats
604605

605606
collector = None
606607
match output_format:
607608
case "pstats":
608-
collector = PstatsCollector(sample_interval_usec)
609+
collector = PstatsCollector(sample_interval_usec, skip_idle=skip_idle)
609610
case "collapsed":
610-
collector = CollapsedStackCollector()
611+
collector = CollapsedStackCollector(skip_idle=skip_idle)
611612
filename = filename or f"collapsed.{pid}.txt"
612613
case "flamegraph":
613-
collector = FlamegraphCollector()
614+
collector = FlamegraphCollector(skip_idle=skip_idle)
614615
filename = filename or f"flamegraph.{pid}.html"
615616
case _:
616617
raise ValueError(f"Invalid output format: {output_format}")
@@ -660,6 +661,7 @@ def wait_for_process_and_sample(pid, sort_value, args):
660661
filename = args.outfile
661662
if not filename and args.format == "collapsed":
662663
filename = f"collapsed.{pid}.txt"
664+
skip_idle = True if args.mode == "cpu" else False
663665

664666
sample(
665667
pid,
@@ -672,6 +674,7 @@ def wait_for_process_and_sample(pid, sort_value, args):
672674
show_summary=not args.no_summary,
673675
output_format=args.format,
674676
realtime_stats=args.realtime_stats,
677+
skip_idle=skip_idle,
675678
)
676679

677680

@@ -726,6 +729,15 @@ def main():
726729
help="Print real-time sampling statistics (Hz, mean, min, max, stdev) during profiling",
727730
)
728731

732+
# Mode options
733+
mode_group = parser.add_argument_group("Mode options")
734+
mode_group.add_argument(
735+
"--mode",
736+
choices=["wall", "cpu"],
737+
default="wall-time",
738+
help="Sampling mode: wall-time (default, skip_idle=False) or cpu-time (skip_idle=True)",
739+
)
740+
729741
# Output format selection
730742
output_group = parser.add_argument_group("Output options")
731743
output_format = output_group.add_mutually_exclusive_group()
@@ -850,6 +862,9 @@ def main():
850862
elif target_count > 1:
851863
parser.error("only one target type can be specified: -p/--pid, -m/--module, or script")
852864

865+
# Set skip_idle based on mode
866+
skip_idle = True if args.mode == "cpu" else False
867+
853868
if args.pid:
854869
sample(
855870
args.pid,
@@ -862,6 +877,7 @@ def main():
862877
show_summary=not args.no_summary,
863878
output_format=args.format,
864879
realtime_stats=args.realtime_stats,
880+
skip_idle=skip_idle,
865881
)
866882
elif args.module or args.args:
867883
if args.module:

Lib/profiling/sampling/stack_collector.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010

1111

1212
class StackTraceCollector(Collector):
13-
def __init__(self):
13+
def __init__(self, *, skip_idle=False):
1414
self.call_trees = []
1515
self.function_samples = collections.defaultdict(int)
16+
self.skip_idle = skip_idle
1617

1718
def _process_frames(self, frames):
1819
"""Process a single thread's frame stack."""
@@ -28,7 +29,7 @@ def _process_frames(self, frames):
2829
self.function_samples[frame] += 1
2930

3031
def collect(self, stack_frames):
31-
for frames in self._iter_all_frames(stack_frames):
32+
for frames in self._iter_all_frames(stack_frames, skip_idle=self.skip_idle):
3233
self._process_frames(frames)
3334

3435

@@ -49,8 +50,8 @@ def export(self, filename):
4950

5051

5152
class FlamegraphCollector(StackTraceCollector):
52-
def __init__(self):
53-
super().__init__()
53+
def __init__(self, *args, **kwargs):
54+
super().__init__(*args, **kwargs)
5455
self.stats = {}
5556

5657
def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None):

Lib/test/test_external_inspection.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1670,6 +1670,115 @@ def test_unsupported_platform_error(self):
16701670
str(cm.exception)
16711671
)
16721672

1673+
class TestDetectionOfThreadStatus(unittest.TestCase):
1674+
@unittest.skipIf(
1675+
sys.platform not in ("linux", "darwin", "win32"),
1676+
"Test only runs on unsupported platforms (not Linux, macOS, or Windows)",
1677+
)
1678+
@unittest.skipIf(sys.platform == "android", "Android raises Linux-specific exception")
1679+
def test_thread_status_detection(self):
1680+
port = find_unused_port()
1681+
script = textwrap.dedent(
1682+
f"""\
1683+
import time, sys, socket, threading
1684+
import os
1685+
1686+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1687+
sock.connect(('localhost', {port}))
1688+
1689+
def sleeper():
1690+
tid = threading.get_native_id()
1691+
sock.sendall(f'ready:sleeper:{{tid}}\\n'.encode())
1692+
time.sleep(10000)
1693+
1694+
def busy():
1695+
tid = threading.get_native_id()
1696+
sock.sendall(f'ready:busy:{{tid}}\\n'.encode())
1697+
x = 0
1698+
while True:
1699+
x = x + 1
1700+
time.sleep(0.5)
1701+
1702+
t1 = threading.Thread(target=sleeper)
1703+
t2 = threading.Thread(target=busy)
1704+
t1.start()
1705+
t2.start()
1706+
sock.sendall(b'ready:main\\n')
1707+
t1.join()
1708+
t2.join()
1709+
sock.close()
1710+
"""
1711+
)
1712+
with os_helper.temp_dir() as work_dir:
1713+
script_dir = os.path.join(work_dir, "script_pkg")
1714+
os.mkdir(script_dir)
1715+
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1716+
server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1717+
server_socket.bind(("localhost", port))
1718+
server_socket.settimeout(SHORT_TIMEOUT)
1719+
server_socket.listen(1)
1720+
1721+
script_name = _make_test_script(script_dir, "thread_status_script", script)
1722+
client_socket = None
1723+
try:
1724+
p = subprocess.Popen([sys.executable, script_name])
1725+
client_socket, _ = server_socket.accept()
1726+
server_socket.close()
1727+
response = b""
1728+
sleeper_tid = None
1729+
busy_tid = None
1730+
while True:
1731+
chunk = client_socket.recv(1024)
1732+
response += chunk
1733+
if b"ready:main" in response and b"ready:sleeper" in response and b"ready:busy" in response:
1734+
# Parse TIDs from the response
1735+
for line in response.split(b"\n"):
1736+
if line.startswith(b"ready:sleeper:"):
1737+
try:
1738+
sleeper_tid = int(line.split(b":")[-1])
1739+
except Exception:
1740+
pass
1741+
elif line.startswith(b"ready:busy:"):
1742+
try:
1743+
busy_tid = int(line.split(b":")[-1])
1744+
except Exception:
1745+
pass
1746+
break
1747+
1748+
attempts = 10
1749+
try:
1750+
unwinder = RemoteUnwinder(p.pid, all_threads=True, cpu_time=True)
1751+
for _ in range(attempts):
1752+
traces = unwinder.get_stack_trace()
1753+
# Check if any thread is running
1754+
if any(thread_info.status == 0 for interpreter_info in traces
1755+
for thread_info in interpreter_info.threads):
1756+
break
1757+
time.sleep(0.5) # Give a bit of time to let threads settle
1758+
except PermissionError:
1759+
self.skipTest(
1760+
"Insufficient permissions to read the stack trace"
1761+
)
1762+
1763+
1764+
# Find threads and their statuses
1765+
statuses = {}
1766+
for interpreter_info in traces:
1767+
for thread_info in interpreter_info.threads:
1768+
statuses[thread_info.thread_id] = thread_info.status
1769+
1770+
self.assertIsNotNone(sleeper_tid, "Sleeper thread id not received")
1771+
self.assertIsNotNone(busy_tid, "Busy thread id not received")
1772+
self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")
1773+
self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads")
1774+
self.assertEqual(statuses[sleeper_tid], 1, "Sleeper thread should be idle (1)")
1775+
self.assertEqual(statuses[busy_tid], 0, "Busy thread should be running (0)")
1776+
1777+
finally:
1778+
if client_socket is not None:
1779+
client_socket.close()
1780+
p.terminate()
1781+
p.wait(timeout=SHORT_TIMEOUT)
16731782

16741783
if __name__ == "__main__":
16751784
unittest.main()

0 commit comments

Comments
 (0)