diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js index a2b21da2970064..7882ca5ce35a07 100644 --- a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js +++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js @@ -64,6 +64,9 @@ function resolveStringIndices(node) { if (typeof resolved.funcname === 'number') { resolved.funcname = resolveString(resolved.funcname); } + if (typeof resolved.module_name === 'number') { + resolved.module_name = resolveString(resolved.module_name); + } if (Array.isArray(resolved.source)) { resolved.source = resolved.source.map(index => @@ -78,6 +81,11 @@ function resolveStringIndices(node) { return resolved; } +// Escape HTML special characters +function escapeHtml(str) { + return str.replace(/&/g, "&").replace(//g, ">"); +} + // ============================================================================ // Theme & UI Controls // ============================================================================ @@ -201,6 +209,7 @@ function setupLogos() { function updateStatusBar(nodeData, rootValue) { const funcname = resolveString(nodeData.funcname) || resolveString(nodeData.name) || "--"; const filename = resolveString(nodeData.filename) || ""; + const moduleName = resolveString(nodeData.module_name) || ""; const lineno = nodeData.lineno; const timeMs = (nodeData.value / 1000).toFixed(2); const percent = rootValue > 0 ? ((nodeData.value / rootValue) * 100).toFixed(1) : "0.0"; @@ -222,8 +231,7 @@ function updateStatusBar(nodeData, rootValue) { const fileEl = document.getElementById('status-file'); if (fileEl && filename && filename !== "~") { - const basename = filename.split('/').pop(); - fileEl.textContent = lineno ? `${basename}:${lineno}` : basename; + fileEl.textContent = lineno ? `${moduleName}:${lineno}` : moduleName; } const funcEl = document.getElementById('status-func'); @@ -272,6 +280,7 @@ function createPythonTooltip(data) { const funcname = resolveString(d.data.funcname) || resolveString(d.data.name); const filename = resolveString(d.data.filename) || ""; + const moduleName = escapeHtml(resolveString(d.data.module_name) || ""); const isSpecialFrame = filename === "~"; // Build source section @@ -280,7 +289,7 @@ function createPythonTooltip(data) { const sourceLines = source .map((line) => { const isCurrent = line.startsWith("→"); - const escaped = line.replace(/&/g, "&").replace(//g, ">"); + const escaped = escapeHtml(line); return `
${escaped}
`; }) .join(""); @@ -340,7 +349,7 @@ function createPythonTooltip(data) { } const fileLocationHTML = isSpecialFrame ? "" : ` -
${filename}${d.data.lineno ? ":" + d.data.lineno : ""}
`; +
${moduleName}${d.data.lineno ? ":" + d.data.lineno : ""}
`; const tooltipHTML = `
@@ -509,24 +518,24 @@ function updateSearchHighlight(searchTerm, searchInput) { const name = resolveString(d.data.name) || ""; const funcname = resolveString(d.data.funcname) || ""; const filename = resolveString(d.data.filename) || ""; + const moduleName = resolveString(d.data.module_name) || ""; const lineno = d.data.lineno; const term = searchTerm.toLowerCase(); - // Check if search term looks like file:line pattern + // Check if search term looks like module:line pattern const fileLineMatch = term.match(/^(.+):(\d+)$/); let matches = false; if (fileLineMatch) { - // Exact file:line matching const searchFile = fileLineMatch[1]; const searchLine = parseInt(fileLineMatch[2], 10); - const basename = filename.split('/').pop().toLowerCase(); - matches = basename.includes(searchFile) && lineno === searchLine; + matches = moduleName.toLowerCase().includes(searchFile) && lineno === searchLine; } else { // Regular substring search matches = name.toLowerCase().includes(term) || funcname.toLowerCase().includes(term) || + moduleName.toLowerCase().includes(term) || filename.toLowerCase().includes(term); } @@ -894,6 +903,7 @@ function populateStats(data) { let filename = resolveString(node.filename); let funcname = resolveString(node.funcname); + let moduleName = resolveString(node.module_name); if (!filename || !funcname) { const nameStr = resolveString(node.name); @@ -908,6 +918,7 @@ function populateStats(data) { filename = filename || 'unknown'; funcname = funcname || 'unknown'; + moduleName = moduleName || 'unknown'; if (filename !== 'unknown' && funcname !== 'unknown' && node.value > 0) { let childrenValue = 0; @@ -924,12 +935,14 @@ function populateStats(data) { existing.directPercent = (existing.directSamples / totalSamples) * 100; if (directSamples > existing.maxSingleSamples) { existing.filename = filename; + existing.module_name = moduleName; existing.lineno = node.lineno || '?'; existing.maxSingleSamples = directSamples; } } else { functionMap.set(funcKey, { filename: filename, + module_name: moduleName, lineno: node.lineno || '?', funcname: funcname, directSamples, @@ -964,6 +977,7 @@ function populateStats(data) { const h = hotSpots[i]; const filename = h.filename || 'unknown'; const lineno = h.lineno ?? '?'; + const moduleName = h.module_name || 'unknown'; const isSpecialFrame = filename === '~' && (lineno === 0 || lineno === '?'); let funcDisplay = h.funcname || 'unknown'; @@ -974,8 +988,7 @@ function populateStats(data) { if (isSpecialFrame) { fileEl.textContent = '--'; } else { - const basename = filename !== 'unknown' ? filename.split('/').pop() : 'unknown'; - fileEl.textContent = `${basename}:${lineno}`; + fileEl.textContent = `${moduleName}:${lineno}`; } } if (percentEl) percentEl.textContent = `${h.directPercent.toFixed(1)}%`; @@ -991,8 +1004,9 @@ function populateStats(data) { if (card) { if (i < hotSpots.length && hotSpots[i]) { const h = hotSpots[i]; - const basename = h.filename !== 'unknown' ? h.filename.split('/').pop() : ''; - const searchTerm = basename && h.lineno !== '?' ? `${basename}:${h.lineno}` : h.funcname; + const moduleName = h.module_name || 'unknown'; + const hasValidLocation = moduleName !== 'unknown' && h.lineno !== '?'; + const searchTerm = hasValidLocation ? `${moduleName}:${h.lineno}` : h.funcname; card.dataset.searchterm = searchTerm; card.onclick = () => searchForHotspot(searchTerm); card.style.cursor = 'pointer'; @@ -1147,6 +1161,7 @@ function accumulateInvertedNode(parent, stackFrame, leaf) { value: 0, children: {}, filename: stackFrame.filename, + module_name: stackFrame.module_name, lineno: stackFrame.lineno, funcname: stackFrame.funcname, source: stackFrame.source, diff --git a/Lib/profiling/sampling/heatmap_collector.py b/Lib/profiling/sampling/heatmap_collector.py index ea1beec70d39f8..5c36d78f5535e7 100644 --- a/Lib/profiling/sampling/heatmap_collector.py +++ b/Lib/profiling/sampling/heatmap_collector.py @@ -20,6 +20,7 @@ from .collector import normalize_location, extract_lineno from .opcode_utils import get_opcode_info, format_opcode from .stack_collector import StackTraceCollector +from .module_utils import extract_module_name, get_python_path_info # ============================================================================ @@ -49,126 +50,6 @@ class TreeNode: children: Dict[str, 'TreeNode'] = field(default_factory=dict) -# ============================================================================ -# Module Path Analysis -# ============================================================================ - -def get_python_path_info(): - """Get information about Python installation paths for module extraction. - - Returns: - dict: Dictionary containing stdlib path, site-packages paths, and sys.path entries. - """ - info = { - 'stdlib': None, - 'site_packages': [], - 'sys_path': [] - } - - # Get standard library path from os module location - try: - if hasattr(os, '__file__') and os.__file__: - info['stdlib'] = Path(os.__file__).parent - except (AttributeError, OSError): - pass # Silently continue if we can't determine stdlib path - - # Get site-packages directories - site_packages = [] - try: - site_packages.extend(Path(p) for p in site.getsitepackages()) - except (AttributeError, OSError): - pass # Continue without site packages if unavailable - - # Get user site-packages - try: - user_site = site.getusersitepackages() - if user_site and Path(user_site).exists(): - site_packages.append(Path(user_site)) - except (AttributeError, OSError): - pass # Continue without user site packages - - info['site_packages'] = site_packages - info['sys_path'] = [Path(p) for p in sys.path if p] - - return info - - -def extract_module_name(filename, path_info): - """Extract Python module name and type from file path. - - Args: - filename: Path to the Python file - path_info: Dictionary from get_python_path_info() - - Returns: - tuple: (module_name, module_type) where module_type is one of: - 'stdlib', 'site-packages', 'project', or 'other' - """ - if not filename: - return ('unknown', 'other') - - try: - file_path = Path(filename) - except (ValueError, OSError): - return (str(filename), 'other') - - # Check if it's in stdlib - if path_info['stdlib'] and _is_subpath(file_path, path_info['stdlib']): - try: - rel_path = file_path.relative_to(path_info['stdlib']) - return (_path_to_module(rel_path), 'stdlib') - except ValueError: - pass - - # Check site-packages - for site_pkg in path_info['site_packages']: - if _is_subpath(file_path, site_pkg): - try: - rel_path = file_path.relative_to(site_pkg) - return (_path_to_module(rel_path), 'site-packages') - except ValueError: - continue - - # Check other sys.path entries (project files) - if not str(file_path).startswith(('<', '[')): # Skip special files - for path_entry in path_info['sys_path']: - if _is_subpath(file_path, path_entry): - try: - rel_path = file_path.relative_to(path_entry) - return (_path_to_module(rel_path), 'project') - except ValueError: - continue - - # Fallback: just use the filename - return (_path_to_module(file_path), 'other') - - -def _is_subpath(file_path, parent_path): - try: - file_path.relative_to(parent_path) - return True - except (ValueError, OSError): - return False - - -def _path_to_module(path): - if isinstance(path, str): - path = Path(path) - - # Remove .py extension - if path.suffix == '.py': - path = path.with_suffix('') - - # Convert path separators to dots - parts = path.parts - - # Handle __init__ files - they represent the package itself - if parts and parts[-1] == '__init__': - parts = parts[:-1] - - return '.'.join(parts) if parts else path.stem - - # ============================================================================ # Helper Classes # ============================================================================ diff --git a/Lib/profiling/sampling/module_utils.py b/Lib/profiling/sampling/module_utils.py new file mode 100644 index 00000000000000..6d838d411902af --- /dev/null +++ b/Lib/profiling/sampling/module_utils.py @@ -0,0 +1,122 @@ +"""Utilities for extracting module names from file paths.""" + +import os +import site +import sys +from pathlib import Path + + +def get_python_path_info(): + """Get information about Python's search paths. + + Returns: + dict: Dictionary containing stdlib path, site-packages paths, and sys.path entries. + """ + info = { + 'stdlib': None, + 'site_packages': [], + 'sys_path': [] + } + + # Get standard library path from os module location + try: + if hasattr(os, '__file__') and os.__file__: + info['stdlib'] = Path(os.__file__).parent + except (AttributeError, OSError): + pass # Silently continue if we can't determine stdlib path + + # Get site-packages directories + site_packages = [] + try: + site_packages.extend(Path(p) for p in site.getsitepackages()) + except (AttributeError, OSError): + pass # Continue without site packages if unavailable + + # Get user site-packages + try: + user_site = site.getusersitepackages() + if user_site and Path(user_site).exists(): + site_packages.append(Path(user_site)) + except (AttributeError, OSError): + pass # Continue without user site packages + + info['site_packages'] = site_packages + info['sys_path'] = [Path(p) for p in sys.path if p] + + return info + + +def extract_module_name(filename, path_info): + """Extract Python module name and type from file path. + + Args: + filename: Path to the Python file + path_info: Dictionary from get_python_path_info() + + Returns: + tuple: (module_name, module_type) where module_type is one of: + 'stdlib', 'site-packages', 'project', or 'other' + """ + if not filename: + return ('unknown', 'other') + + try: + file_path = Path(filename) + except (ValueError, OSError): + return (str(filename), 'other') + + # Check if it's in stdlib + if path_info['stdlib'] and _is_subpath(file_path, path_info['stdlib']): + try: + rel_path = file_path.relative_to(path_info['stdlib']) + return (_path_to_module(rel_path), 'stdlib') + except ValueError: + pass + + # Check site-packages + for site_pkg in path_info['site_packages']: + if _is_subpath(file_path, site_pkg): + try: + rel_path = file_path.relative_to(site_pkg) + return (_path_to_module(rel_path), 'site-packages') + except ValueError: + continue + + # Check other sys.path entries (project files) + if not str(file_path).startswith(('<', '[')): # Skip special files + for path_entry in path_info['sys_path']: + if _is_subpath(file_path, path_entry): + try: + rel_path = file_path.relative_to(path_entry) + return (_path_to_module(rel_path), 'project') + except ValueError: + continue + + # Fallback: just use the filename + return (_path_to_module(file_path), 'other') + + +def _is_subpath(file_path, parent_path): + try: + file_path.relative_to(parent_path) + return True + except (ValueError, OSError): + return False + + +def _path_to_module(path): + if isinstance(path, str): + path = Path(path) + + # Remove .py extension + if path.suffix == '.py': + path = path.with_suffix('') + + # Convert path separators to dots + parts = path.parts + + # Handle __init__ files - they represent the package itself + if parts and parts[-1] == '__init__': + parts = parts[:-1] + + return '.'.join(parts) if parts else path.stem diff --git a/Lib/profiling/sampling/stack_collector.py b/Lib/profiling/sampling/stack_collector.py index 931bc2c487b55b..41f82721dd2943 100644 --- a/Lib/profiling/sampling/stack_collector.py +++ b/Lib/profiling/sampling/stack_collector.py @@ -12,6 +12,7 @@ from .collector import Collector, extract_lineno from .opcode_utils import get_opcode_mapping from .string_table import StringTable +from .module_utils import extract_module_name, get_python_path_info class StackTraceCollector(Collector): @@ -72,6 +73,7 @@ def __init__(self, *args, **kwargs): self._sample_count = 0 # Track actual number of samples (not thread traces) self._func_intern = {} self._string_table = StringTable() + self._module_cache = {} self._all_threads = set() # Thread status statistics (similar to LiveStatsCollector) @@ -168,19 +170,21 @@ def export(self, filename): @staticmethod @functools.lru_cache(maxsize=None) - def _format_function_name(func): + def _format_function_name(func, module_name): filename, lineno, funcname = func # Special frames like and should not show file:line if filename == "~" and lineno == 0: return funcname - if len(filename) > 50: - parts = filename.split("/") - if len(parts) > 2: - filename = f".../{'/'.join(parts[-2:])}" + return f"{funcname} ({module_name}:{lineno})" - return f"{funcname} ({filename}:{lineno})" + def _get_module_name(self, filename, path_info): + module_name = self._module_cache.get(filename) + if module_name is None: + module_name, _ = extract_module_name(filename, path_info) + self._module_cache[filename] = module_name + return module_name def _convert_to_flamegraph_format(self): if self._total_samples == 0: @@ -192,7 +196,7 @@ def _convert_to_flamegraph_format(self): "strings": self._string_table.get_strings() } - def convert_children(children, min_samples): + def convert_children(children, min_samples, path_info): out = [] for func, node in children.items(): samples = node["samples"] @@ -202,13 +206,17 @@ def convert_children(children, min_samples): # Intern all string components for maximum efficiency filename_idx = self._string_table.intern(func[0]) funcname_idx = self._string_table.intern(func[2]) - name_idx = self._string_table.intern(self._format_function_name(func)) + module_name = self._get_module_name(func[0], path_info) + + module_name_idx = self._string_table.intern(module_name) + name_idx = self._string_table.intern(self._format_function_name(func, module_name)) child_entry = { "name": name_idx, "value": samples, "children": [], "filename": filename_idx, + "module_name": module_name_idx, "lineno": func[1], "funcname": funcname_idx, "threads": sorted(list(node.get("threads", set()))), @@ -227,7 +235,7 @@ def convert_children(children, min_samples): # Recurse child_entry["children"] = convert_children( - node["children"], min_samples + node["children"], min_samples, path_info ) out.append(child_entry) @@ -238,8 +246,9 @@ def convert_children(children, min_samples): # Filter out very small functions (less than 0.1% of total samples) total_samples = self._total_samples min_samples = max(1, int(total_samples * 0.001)) + path_info = get_python_path_info() - root_children = convert_children(self._root["children"], min_samples) + root_children = convert_children(self._root["children"], min_samples, path_info) if not root_children: return { "name": self._string_table.intern("No significant data"), diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py index 8e6afa91e89daf..b1359d947f16a4 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py @@ -430,7 +430,7 @@ def test_flamegraph_collector_basic(self): ) self.assertIsInstance(name, str) self.assertTrue(name.startswith("Program Root: ")) - self.assertIn("func2 (file.py:20)", name) # formatted name + self.assertIn("func2 (file:20)", name) children = data.get("children", []) self.assertEqual(len(children), 1) child = children[0] @@ -441,7 +441,7 @@ def test_flamegraph_collector_basic(self): and 0 <= child_name_index < len(strings) else str(child_name_index) ) - self.assertIn("func1 (file.py:10)", child_name) # formatted name + self.assertIn("func1 (file:10)", child_name) self.assertEqual(child["value"], 1) def test_flamegraph_collector_export(self):