From d355f1a28bdab073dbdd084871608aeedf4e5f94 Mon Sep 17 00:00:00 2001 From: Tzanko Matev Date: Mon, 27 Oct 2025 13:03:41 +0200 Subject: [PATCH 1/8] fix: improve module identity resolution heuristics - canonicalise sys.path roots and normalise empty entries to the cwd so path-based lookups are stable - add project-marker heuristics to derive dotted names when sys.modules misses a match and extend coverage to enforce behaviour - prefer sys.modules names over filesystem-shaped fallbacks and let trace filters choose the most specific project root --- .../src/module_identity.rs | 286 +++++++++++++++++- .../src/runtime/tracer/runtime_tracer.rs | 6 +- .../src/trace_filter/engine.rs | 30 +- 3 files changed, 302 insertions(+), 20 deletions(-) diff --git a/codetracer-python-recorder/src/module_identity.rs b/codetracer-python-recorder/src/module_identity.rs index 84c382a..4d6ba7f 100644 --- a/codetracer-python-recorder/src/module_identity.rs +++ b/codetracer-python-recorder/src/module_identity.rs @@ -1,6 +1,8 @@ //! Shared helpers for deriving Python module names from filenames and module metadata. use std::borrow::Cow; +use std::cmp::Ordering; +use std::env; use std::path::{Component, Path}; use std::sync::Arc; @@ -27,8 +29,9 @@ impl ModuleIdentityResolver { /// Construct a resolver from an explicit list of module roots. Visible for tests. pub fn from_roots(roots: Vec) -> Self { + let module_roots = canonicalise_module_roots(roots); Self { - module_roots: Arc::from(roots), + module_roots, cache: DashMap::new(), } } @@ -38,9 +41,24 @@ impl ModuleIdentityResolver { if let Some(entry) = self.cache.get(absolute) { return entry.clone(); } - let resolved = module_name_from_roots(self.module_roots(), absolute) - .or_else(|| lookup_module_name(py, absolute)); - self.cache.insert(absolute.to_string(), resolved.clone()); + let mut path_candidate = module_name_from_roots(self.module_roots(), absolute); + if path_candidate + .as_ref() + .map(|name| is_filesystem_shaped_name(name, absolute)) + .unwrap_or(true) + { + if let Some(heuristic) = module_name_from_heuristics(absolute) { + path_candidate = Some(heuristic); + } + } + let sys_candidate = lookup_module_name(py, absolute); + let (resolved, cacheable) = match (sys_candidate, path_candidate) { + (Some(preferred), _) => (Some(preferred), true), + (None, candidate) => (candidate, false), + }; + if cacheable { + self.cache.insert(absolute.to_string(), resolved.clone()); + } resolved } @@ -84,6 +102,7 @@ impl ModuleIdentityCache { return entry.clone(); } + let globals_name = hints.globals_name.and_then(sanitise_module_name); let mut resolved = hints .preferred .and_then(sanitise_module_name) @@ -97,7 +116,18 @@ impl ModuleIdentityCache { .absolute_path .and_then(|absolute| self.resolver.resolve_absolute(py, absolute)) }) - .or_else(|| hints.globals_name.and_then(sanitise_module_name)); + .or_else(|| globals_name.clone()); + + if let Some(globals) = globals_name.as_ref() { + if globals == "__main__" + && resolved + .as_deref() + .map(|candidate| candidate != "__main__") + .unwrap_or(true) + { + resolved = Some(globals.clone()); + } + } if resolved.is_none() && hints.absolute_path.is_none() { if let Ok(filename) = code.filename(py) { @@ -153,11 +183,20 @@ impl<'a> ModuleNameHints<'a> { fn collect_module_roots(py: Python<'_>) -> Vec { let mut roots = Vec::new(); + let cwd = env::current_dir() + .ok() + .and_then(|dir| normalise_to_posix(dir.as_path())); if let Ok(sys) = py.import("sys") { if let Ok(path_obj) = sys.getattr("path") { if let Ok(path_list) = path_obj.downcast_into::() { for entry in path_list.iter() { if let Ok(raw) = entry.extract::() { + if raw.is_empty() { + if let Some(dir) = cwd.as_ref() { + roots.push(dir.clone()); + } + continue; + } if let Some(normalized) = normalise_to_posix(Path::new(&raw)) { roots.push(normalized); } @@ -169,6 +208,38 @@ fn collect_module_roots(py: Python<'_>) -> Vec { roots } +fn canonicalise_module_roots(roots: Vec) -> Arc<[String]> { + let mut canonical: Vec = roots.into_iter().map(canonicalise_root).collect(); + canonical.sort_by(|a, b| compare_roots(a, b)); + canonical.dedup(); + Arc::from(canonical) +} + +fn canonicalise_root(mut root: String) -> String { + if root.is_empty() { + if let Ok(cwd) = std::env::current_dir() { + if let Some(normalized) = normalise_to_posix(cwd.as_path()) { + return normalized; + } + } + return "/".to_string(); + } + while root.len() > 1 && root.ends_with('/') { + root.pop(); + } + root +} + +fn compare_roots(a: &str, b: &str) -> Ordering { + let len_a = a.len(); + let len_b = b.len(); + if len_a == len_b { + a.cmp(b) + } else { + len_b.cmp(&len_a) + } +} + pub(crate) fn module_name_from_roots(roots: &[String], absolute: &str) -> Option { for base in roots { if let Some(relative) = strip_posix_prefix(absolute, base) { @@ -180,6 +251,14 @@ pub(crate) fn module_name_from_roots(roots: &[String], absolute: &str) -> Option None } +fn module_name_from_heuristics(absolute: &str) -> Option { + let roots = heuristic_roots_for_absolute(absolute); + if roots.is_empty() { + return None; + } + module_name_from_roots(&roots, absolute) +} + fn lookup_module_name(py: Python<'_>, absolute: &str) -> Option { let sys = py.import("sys").ok()?; let modules_obj = sys.getattr("modules").ok()?; @@ -364,6 +443,69 @@ fn strip_posix_prefix<'a>(path: &'a str, base: &str) -> Option<&'a str> { } } +fn module_from_absolute(absolute: &str) -> Option { + let without_root = absolute.trim_start_matches('/'); + let trimmed = trim_drive_prefix(without_root); + if trimmed.is_empty() { + return None; + } + module_from_relative(trimmed) +} + +fn trim_drive_prefix(path: &str) -> &str { + if let Some((prefix, remainder)) = path.split_once('/') { + if prefix.ends_with(':') { + return remainder; + } + } + path +} + +fn is_filesystem_shaped_name(candidate: &str, absolute: &str) -> bool { + module_from_absolute(absolute) + .as_deref() + .map(|path_like| path_like == candidate) + .unwrap_or(false) +} + +fn heuristic_roots_for_absolute(absolute: &str) -> Vec { + if let Some(project_root) = find_nearest_project_root(absolute) { + vec![project_root] + } else if let Some(parent) = Path::new(absolute) + .parent() + .and_then(|dir| normalise_to_posix(dir)) + { + vec![parent] + } else { + Vec::new() + } +} + +fn find_nearest_project_root(absolute: &str) -> Option { + let mut current = Path::new(absolute).parent(); + while let Some(dir) = current { + if has_project_marker(dir) { + return normalise_to_posix(dir); + } + current = dir.parent(); + } + None +} + +fn has_project_marker(dir: &Path) -> bool { + const PROJECT_MARKER_FILES: &[&str] = &["pyproject.toml", "setup.cfg", "setup.py"]; + const PROJECT_MARKER_DIRS: &[&str] = &[".git", ".hg", ".svn"]; + + PROJECT_MARKER_DIRS + .iter() + .map(|marker| dir.join(marker)) + .any(|marker_dir| marker_dir.exists()) + || PROJECT_MARKER_FILES + .iter() + .map(|marker| dir.join(marker)) + .any(|marker_file| marker_file.exists()) +} + /// Normalise a filesystem path to a POSIX-style string used by trace filters. pub fn normalise_to_posix(path: &Path) -> Option { if path.as_os_str().is_empty() { @@ -486,4 +628,138 @@ mod tests { Some("pkg.module.sub") ); } + + #[test] + fn module_name_from_roots_prefers_specific_root_over_catch_all() { + Python::with_gil(|py| { + let tmp = tempfile::tempdir().expect("tempdir"); + let module_dir = tmp.path().join("pkg"); + std::fs::create_dir_all(&module_dir).expect("mkdir"); + let module_path = module_dir.join("mod.py"); + std::fs::write(&module_path, "def foo():\n return 1\n").expect("write"); + + let project_root = normalise_to_posix(tmp.path()).expect("normalize root"); + let resolver = + ModuleIdentityResolver::from_roots(vec!["/".to_string(), project_root.clone()]); + let absolute_norm = + normalise_to_posix(module_path.as_path()).expect("normalize absolute"); + let derived = module_name_from_roots(resolver.module_roots(), absolute_norm.as_str()); + + assert_eq!(derived.as_deref(), Some("pkg.mod")); + + // suppress unused warnings + let _ = py; + }); + } + + #[test] + fn resolve_absolute_prefers_sys_modules_name_over_path_fallback() { + Python::with_gil(|py| { + let tmp = tempfile::tempdir().expect("tempdir"); + let module_dir = tmp.path().join("pkg"); + std::fs::create_dir_all(&module_dir).expect("mkdir"); + let module_path = module_dir.join("mod.py"); + std::fs::write(&module_path, "def foo():\n return 42\n").expect("write"); + + let module_path_str = module_path.to_string_lossy().to_string(); + let module = load_module( + py, + "pkg.mod", + module_path_str.as_str(), + "def foo():\n return 42\n", + ) + .expect("load module"); + let _code = get_code(&module, "foo"); + + let resolver = ModuleIdentityResolver::from_roots(vec!["/".to_string()]); + let absolute_norm = + normalise_to_posix(module_path.as_path()).expect("normalize absolute"); + let resolved = resolver.resolve_absolute(py, absolute_norm.as_str()); + assert_eq!(resolved.as_deref(), Some("pkg.mod")); + + // clean up sys.modules to avoid cross-test contamination + if let Ok(sys) = py.import("sys") { + if let Ok(modules) = sys.getattr("modules") { + let _ = modules.del_item("pkg.mod"); + } + } + }); + } + + #[test] + fn resolve_absolute_uses_project_marker_root() { + Python::with_gil(|py| { + let tmp = tempfile::tempdir().expect("tempdir"); + let project_dir = tmp.path().join("project"); + let tests_dir = project_dir.join("tests"); + std::fs::create_dir_all(&tests_dir).expect("mkdir tests"); + std::fs::create_dir(project_dir.join(".git")).expect("mkdir git"); + let module_path = tests_dir.join("test_mod.py"); + std::fs::write(&module_path, "def sample():\n return 7\n").expect("write"); + + let resolver = ModuleIdentityResolver::from_roots(vec!["/".to_string()]); + let absolute_norm = + normalise_to_posix(module_path.as_path()).expect("normalize absolute"); + let resolved = resolver.resolve_absolute(py, absolute_norm.as_str()); + + assert_eq!(resolved.as_deref(), Some("tests.test_mod")); + }); + } + + #[test] + fn resolve_absolute_returns_main_for_runpy_module() { + Python::with_gil(|py| { + let tmp = tempfile::tempdir().expect("tempdir"); + let script_path = tmp.path().join("cli.py"); + std::fs::write( + &script_path, + "def entrypoint():\n return 0\n\nif __name__ == '__main__':\n entrypoint()\n", + ) + .expect("write script"); + + let script_norm = + normalise_to_posix(script_path.as_path()).expect("normalize absolute path"); + + let sys = py.import("sys").expect("import sys"); + let modules = sys.getattr("modules").expect("sys.modules"); + let original_main = modules + .get_item("__main__") + .ok() + .map(|obj| obj.clone().unbind()); + + let module = PyModule::new(py, "__main__").expect("create module"); + module + .setattr("__file__", script_path.to_string_lossy().as_ref()) + .expect("set __file__"); + module + .setattr("__name__", "__main__") + .expect("set __name__"); + module + .setattr("__package__", py.None()) + .expect("set __package__"); + module + .setattr("__spec__", py.None()) + .expect("set __spec__"); + modules + .set_item("__main__", module) + .expect("register __main__"); + + let resolver = ModuleIdentityResolver::from_roots(vec!["/".to_string()]); + let resolved = resolver.resolve_absolute(py, script_norm.as_str()); + assert_eq!(resolved.as_deref(), Some("__main__")); + + match original_main { + Some(previous) => { + modules + .set_item("__main__", previous) + .expect("restore __main__"); + } + None => { + modules + .del_item("__main__") + .expect("remove temporary __main__"); + } + } + }); + } } diff --git a/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs b/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs index b85de47..ba66d09 100644 --- a/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs +++ b/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs @@ -1388,7 +1388,11 @@ dropper() "expected no variables captured, found {:?}", variable_names ); - assert_eq!(return_values.len(), 1, "return event should remain balanced"); + assert_eq!( + return_values.len(), + 1, + "return event should remain balanced" + ); match &return_values[0] { ValueRecord::Error { msg, .. } => assert_eq!(msg, ""), other => panic!("expected dropped sentinel return value, got {other:?}"), diff --git a/codetracer-python-recorder/src/trace_filter/engine.rs b/codetracer-python-recorder/src/trace_filter/engine.rs index 2230bd6..0ee25c4 100644 --- a/codetracer-python-recorder/src/trace_filter/engine.rs +++ b/codetracer-python-recorder/src/trace_filter/engine.rs @@ -243,21 +243,23 @@ impl TraceFilterEngine { let mut context = ScopeContext::derive(filename, self.config.sources()); context.refresh_object_name(qualname); - let needs_module_name = context - .module_name - .as_ref() - .map(|name| !is_valid_module_name(name)) - .unwrap_or(true); - if needs_module_name { - if let Some(absolute) = context.absolute_path.clone() { - if let Some(module) = self.resolve_module_name(py, &absolute) { + if let Some(absolute) = context.absolute_path.clone() { + if let Some(module) = self.resolve_module_name(py, &absolute) { + let replace = match context.module_name.as_deref() { + None => true, + Some(existing) => { + !is_valid_module_name(existing) + || (module == "__main__" && !existing.contains('.')) + } + }; + if replace { context.update_module_name(module, qualname); - } else if context.module_name.is_none() { - log::debug!( - "[TraceFilter] unable to derive module name for '{}'; package selectors may not match", - absolute - ); } + } else if context.module_name.is_none() { + log::debug!( + "[TraceFilter] unable to derive module name for '{}'; package selectors may not match", + absolute + ); } } @@ -426,7 +428,7 @@ impl ScopeContext { let stripped_owned = stripped.to_path_buf(); let better = match &best_match { Some((_, current)) => { - stripped_owned.components().count() >= current.components().count() + stripped_owned.components().count() < current.components().count() } None => true, }; From 0701527e22392b3e0cdf566d150f3fee4fac1ebd Mon Sep 17 00:00:00 2001 From: Tzanko Matev Date: Mon, 27 Oct 2025 14:39:00 +0200 Subject: [PATCH 2/8] design: Module-name resolution simplification design-docs/adr/0016-module-name-resolution-via-globals-name.md: design-docs/module-name-resolution-deep-review.md: design-docs/module-name-resolution-globals-implementation-plan.md: Signed-off-by: Tzanko Matev --- ...module-name-resolution-via-globals-name.md | 49 +++++++++++ .../module-name-resolution-deep-review.md | 82 +++++++++++++++++++ ...-resolution-globals-implementation-plan.md | 64 +++++++++++++++ 3 files changed, 195 insertions(+) create mode 100644 design-docs/adr/0016-module-name-resolution-via-globals-name.md create mode 100644 design-docs/module-name-resolution-deep-review.md create mode 100644 design-docs/module-name-resolution-globals-implementation-plan.md diff --git a/design-docs/adr/0016-module-name-resolution-via-globals-name.md b/design-docs/adr/0016-module-name-resolution-via-globals-name.md new file mode 100644 index 0000000..807c622 --- /dev/null +++ b/design-docs/adr/0016-module-name-resolution-via-globals-name.md @@ -0,0 +1,49 @@ +# ADR 0016 – Module Name Resolution via `__name__` + +- **Status:** Proposed +- **Date:** 2025-03-18 +- **Stakeholders:** Runtime team, Trace Filter maintainers +- **Related Decisions:** ADR 0013 (Reliable Module Name Derivation), ADR 0014 (Module Call Event Naming) + +## Context + +Today both the runtime tracer and the trace filter engine depend on `ModuleIdentityResolver` to derive dotted module names from code-object filenames. The resolver walks `sys.path`, searches `sys.modules`, and applies filesystem heuristics to map absolute paths to module identifiers (`codetracer-python-recorder/src/module_identity.rs:18`). While this produces stable names, it adds complexity to hot code paths (`runtime_tracer.rs:280`) and creates divergence from conventions used elsewhere in the Python ecosystem. + +Python’s logging framework, import system, and standard introspection APIs all rely on the module object’s `__name__` attribute. During module execution the `py_start` callback already runs with the module frame active, meaning we can read `frame.f_globals["__name__"]` before we emit events or evaluate filters. This naturally aligns with user expectations and removes the need for filesystem heuristics. + +## Decision + +We will source module identities directly from `__name__` when a `py_start` callback observes a code object whose qualified name is ``. The runtime tracer will pass that value to both the trace filter coordinator and the event encoder, ensuring call records emit `<{__name__}>` labels and filters match the same string. + +Key points: + +1. `py_start` inspects the `CodeObjectWrapper` and, for `` qualnames, reads `frame.f_globals.get("__name__")`. If present and non-empty, this name becomes the module identifier for the current scope. +2. Filters always evaluate using the `__name__` value gathered at `py_start`; they no longer attempt to strip file paths or enumerate `sys.modules`. +3. We keep recording the absolute and relative file paths already emitted elsewhere (`TraceFilterEngine::ScopeContext` still normalises filenames for telemetry), but module-name derivation no longer depends on them. +4. We delete `ModuleIdentityResolver`, `ModuleIdentityCache`, and associated heuristics once all call sites switch to the new flow. + +## Consequences + +**Positive** + +- Simplifies hot-path logic, removing `DashMap` lookups and `sys.modules` scans. +- Harmonises trace filtering semantics with Python logging (same strings users already recognise). +- Eliminates filesystem heuristics that were fragile on mixed-case filesystems or when `sys.path` mutated mid-run. + +**Negative / Risks** + +- Direct scripts (`python my_tool.py`) continue to report `__name__ == "__main__"`. Filters must explicitly target `__main__` in those scenarios, whereas the current resolver maps to `package.module`. We will document the behaviour change and offer guidance for users relying on path-based selectors. +- Synthetic modules created via `runpy.run_path` or dynamic loaders may use ad-hoc `__name__` values. We rely on importers to supply meaningful identifiers, matching Python logging expectations. +- Tests and documentation referencing filesystem-based names must be updated. + +**Mitigations** + +- Provide a compatibility flag during rollout so filter configurations can opt into the new behaviour incrementally. +- Emit a debug log when `__name__` is missing or empty, falling back to `` exactly as today. +- Preserve path metadata in filter resolutions so existing file-based selectors continue to work. + +## Rollout Notes + +- Update existing ADR 0013/0014 statuses once this ADR is accepted and the code lands. +- Communicate the behavioural change to downstream teams who consume `` events or rely on path-derived module names. + diff --git a/design-docs/module-name-resolution-deep-review.md b/design-docs/module-name-resolution-deep-review.md new file mode 100644 index 0000000..14461d0 --- /dev/null +++ b/design-docs/module-name-resolution-deep-review.md @@ -0,0 +1,82 @@ +# Module Name Resolution Deep Review + +This document walks through how CodeTracer derives dotted module names from running Python code, why the problem matters, and where the current implementation may need refinement. The goal is to make the topic approachable for readers who only have basic familiarity with Python modules. + +## 1. What Problem Are We Solving? + +Python reports the top-level body of any module with the generic qualified name ``. When CodeTracer records execution, every package entry point therefore looks identical unless we derive the real dotted module name ourselves (for example, turning `` into ``). Without that mapping: + +- trace visualisations lose context because multiple files collapse to `` +- trace filters cannot match package-level rules (e.g., `pkg:glob:mypkg.*`) +- downstream tooling struggles to correlate events back to the source tree + +Our recorder must therefore infer the right module name from runtime metadata, file paths, and user configuration, even in tricky situations such as site-packages code, editable installs, or scripts executed directly. + +## 2. Python Module Background (Minimal Primer) + +The current design assumes the reader understands the following basics: + +- **Modules and packages** – every `.py` file is a module; a directory with an `__init__.py` is treated as a package whose submodules use dotted names like `package.module`. +- **`sys.path`** – Python searches each entry (directories or zip files) when importing modules. Joining a path entry with the relative file path yields the dotted name. +- **`sys.modules`** – a dictionary of module objects keyed by dotted module name. Each module typically exposes `__spec__`, `__name__`, and `__file__`, which reveal how and from where it was loaded. +- **Code objects** – functions and module bodies have code objects whose `co_filename` stores the source path and whose `co_qualname` stores the qualified name (`` for top-level bodies). +- **Imports are idempotent** – when Python imports a module it first creates and registers the module object in `sys.modules`, then executes its body. That guarantees CodeTracer can query `sys.modules` while tracing the import. + +## 3. Why CodeTracer Resolves Module Names + +- **Trace event labelling** – `RuntimeTracer::function_name` converts `` frames into `` so traces remain readable (`codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs:280`). +- **Trace filter decisions** – package selectors rely on module names, and file selectors reuse the same normalised paths. The filter engine caches per-code-object decisions together with the resolved module identity (`codetracer-python-recorder/src/trace_filter/engine.rs:183` and `:232`). +- **File path normalisation** – both subsystems need consistent POSIX-style paths for telemetry, redaction policies, and for emitting metadata in trace files. +- **Cross-component hints** – filter resolutions capture the module name, project-relative path, and absolute path and hand them to the runtime tracer so both parts agree on naming (`codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs:292`). + +## 4. Where the Logic Lives + +- `codetracer-python-recorder/src/module_identity.rs:18` – core resolver, caching, heuristics, and helpers (`normalise_to_posix`, `module_from_relative`, etc.). +- `codetracer-python-recorder/src/trace_filter/engine.rs:183` – owns a `ModuleIdentityResolver` instance, builds `ScopeContext`, and stores resolved module names inside `ScopeResolution`. +- `codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs:280` – turns `` frames into `<{module}>` labels using `ModuleIdentityCache`. +- `codetracer-python-recorder/src/runtime/tracer/filtering.rs:19` – captures filter outcomes and pipes their module hints back to the tracer. +- Tests exercising the behaviour: Rust unit tests in `codetracer-python-recorder/src/module_identity.rs:551` and Python integration coverage in `codetracer-python-recorder/tests/python/test_monitoring_events.py:199`. + +The pure-Python recorder does not perform advanced module-name derivation; all of the reusable logic lives in the Rust-backed module. + +## 5. How the Algorithm Works End to End + +### 5.1 Building the resolver +1. `ModuleIdentityResolver::new` snapshots `sys.path` under the GIL, normalises each entry to POSIX form, removes duplicates, and sorts by descending length so more specific roots win (`module_identity.rs:24`–`:227`). +2. Each entry stays cached for the duration of the recorder session; mutations to `sys.path` after startup are not observed automatically. + +### 5.2 Resolving an absolute filename +Given a normalised absolute path (e.g., `/home/app/pkg/service.py`): + +1. **Path-based attempt** – `module_name_from_roots` strips each known root and converts the remainder into a dotted form (`pkg/service.py → pkg.service`). This is fast and succeeds for project code and site-packages that live under a `sys.path` entry (`module_identity.rs:229`–`:238`). +2. **Heuristics** – if the first guess looks like a raw filesystem echo (meaning we probably matched a catch-all root like `/`), the resolver searches upward for project markers (`pyproject.toml`, `.git`, etc.) and retries with that directory as the root. Failing that, it uses the immediate parent directory (`module_identity.rs:240`–`:468`). +3. **`sys.modules` sweep** – the resolver iterates through loaded modules, comparing `__spec__.origin`, `__file__`, or `__cached__` paths (normalised to POSIX) against the target filename, accounting for `.py` vs `.pyc` differences. Any valid dotted name wins over heuristic guesses (`module_identity.rs:248`–`:335`). +4. The winning name (or lack thereof) is cached by absolute path in a `DashMap` so future lookups avoid repeated sys.modules scans (`module_identity.rs:54`–`:60`). + +### 5.3 Mapping code objects to module names + +`ModuleIdentityCache::resolve_for_code` accepts optional hints: + +1. **Preferred hint** – e.g., the filter engine’s stored module name. It is accepted only if it is a valid dotted identifier (`module_identity.rs:103`–`:116`). +2. **Relative path** – converted via `module_from_relative` when supplied (`module_identity.rs:107`–`:110`). +3. **Absolute path** – triggers the resolver described above (`module_identity.rs:112`–`:115`). +4. **Globals-based hint** – a last resort using `frame.f_globals["__name__"]` when available (`module_identity.rs:116`). + +If no hints contain an absolute path, the cache will read `co_filename`, normalise it, and resolve it once (`module_identity.rs:118`–`:127`). Results (including failures) are memoised per `code_id`. + +### 5.4 Feeding results into the rest of the system + +1. The trace filter builds a `ScopeContext` for every new code object. It records project-relative paths and module names derived from configuration roots, then calls back into the resolver if the preliminary name is missing or invalid (`trace_filter/engine.rs:420`–`:471`). +2. The resulting `ScopeResolution` is cached and exposed to the runtime tracer via `FilterCoordinator`, providing rich hints (`runtime/tracer/filtering.rs:43`). +3. During execution, `RuntimeTracer::function_name` reaches into the shared cache to turn `` qualnames into `` labels. If every heuristic fails, it safely falls back to the original `` string (`runtime_tracer.rs:280`–`:305`). +4. Both subsystems reuse the same `ModuleIdentityResolver`, ensuring trace files and filtering decisions stay consistent. + +## 6. Bugs, Risks, and Observations + +- **Prefix matching ignores path boundaries** – `strip_posix_prefix` checks `path.starts_with(base)` without verifying the next character is a separator. A root like `/opt/app` therefore incorrectly matches `/opt/application/module.py`, yielding the bogus module `lication.module`. When `sys.modules` lacks the correct entry (e.g., resolving a file before import), the resolver will cache this wrong answer (`module_identity.rs:410`–`:426`). +- **Case sensitivity on Windows** – normalisation preserves whatever casing the OS returns. If `co_filename` and `sys.modules` report the same path with different casing, `equivalent_posix_paths` will not treat them as equal, causing the fallback to miss (`module_identity.rs:317`–`:324`). Consider lowercasing drive prefixes or using `Path::eq` semantics behind the GIL. +- **`sys.path` mutations after startup** – the resolver snapshots roots once. If tooling modifies `sys.path` later (common in virtualenv activation scripts), we will never see the new prefix, so we fall back to heuristics or `sys.modules`. Documenting this behaviour or exposing a method to refresh the roots may avoid surprises. +- **Project-marker heuristics hit the filesystem** – `has_project_marker` calls `exists()` for every parent directory when the fast path fails (`module_identity.rs:470`–`:493`). Because results are cached per file this is usually acceptable, but tracing thousands of unique `site-packages` files on network storage could still become expensive. + +Addressing the first two items would materially improve correctness; the latter two are design trade-offs worth monitoring. + diff --git a/design-docs/module-name-resolution-globals-implementation-plan.md b/design-docs/module-name-resolution-globals-implementation-plan.md new file mode 100644 index 0000000..7bed4e9 --- /dev/null +++ b/design-docs/module-name-resolution-globals-implementation-plan.md @@ -0,0 +1,64 @@ +# Module Name Resolution via `__name__` – Implementation Plan + +This plan delivers ADR 0016 by retooling module-name derivation around `frame.f_globals["__name__"]` and retiring the existing filesystem-based resolver. + +## Goals +- Use `__name__` as the single source of truth for module identifiers during tracing and filtering. +- Remove the `ModuleIdentityResolver`/`ModuleIdentityCache` complex and associated heuristics. +- Preserve relative/absolute path metadata for selectors and telemetry. +- Ship the change behind a feature flag so we can roll out gradually. + +## Non-goals +- Changing how file selectors work (they should keep matching on normalised paths). +- Replacing hints elsewhere that already rely on `__name__` (e.g., value redaction logic). +- Revisiting logging or module instrumentation beyond the tracer/filter flow. + +## Work Breakdown + +### Stage 0 – Feature Flag and Compatibility Layer +- Add a recorder policy flag `module_name_from_globals` defaulting to `false`. +- Plumb the flag through CLI/env configuration and expose it via the Python bindings (mirroring other policy toggles). +- Update integration tests to parameterise expectations for both modes. + +### Stage 1 – Capture `__name__` at `py_start` +- In `runtime/tracer/events.rs`, augment the `on_py_start` handler to detect `` code objects. +- Fetch `frame.f_globals.get("__name__")`, validate it is a non-empty string, and store it in the scope resolution cache (likely inside `FilterCoordinator`). +- Thread the captured value into `FilterCoordinator::resolve` so the filter engine obtains the module name without invoking the resolver. +- Add unit tests that simulate modules with various `__name__` values (`__main__`, aliased imports, missing globals) to ensure fallbacks log appropriately. + +### Stage 2 – Simplify Filter Engine +- Remove the `module_resolver` field from `TraceFilterEngine` and delete calls to `ModuleIdentityResolver::resolve_absolute` (`codetracer-python-recorder/src/trace_filter/engine.rs:183`). +- Adjust `ScopeContext` to accept the module name supplied by the coordinator and skip path-derived module inference. +- Keep relative and absolute path normalisation for file selectors. +- Update Rust tests that previously expected filesystem-derived names to assert the new globals-based behaviour. + +### Stage 3 – Replace Runtime Function Naming +- Eliminate `ModuleIdentityCache` from `RuntimeTracer` and switch `function_name` to prefer the cached globals-derived module name (`runtime_tracer.rs:280`). +- Remove the `ModuleNameHints` plumbing and any resolver invocations. +- Update the Python integration test `test_module_imports_record_package_names` to expect `` coming directly from `__name__`. + +### Stage 4 – Remove Legacy Resolver Module +- Delete `src/module_identity.rs` and tidy references (Cargo module list, tests, exports). +- Drop resolver-specific tests and replace them with focused coverage around globals-based extraction. +- Update documentation and changelog to describe the new semantics. Reference ADR 0016 and mark ADR 0013 as superseded where appropriate. + +### Stage 5 – Flip the Feature Flag +- After validating in CI and canary environments, change the default for `module_name_from_globals` to `true`. +- Remove the compatibility flag once usage data confirms no regressions. + +## Testing Strategy +- Rust unit tests covering `on_py_start` logic, especially fallback to `` when `__name__` is absent. +- Python integration tests for direct scripts (`__main__`), package imports, and dynamic `runpy` execution to capture expected names. +- Regression test ensuring filters still match file-based selectors when module names change. +- Performance check to confirm hot-path allocations decreased after removing resolver lookups. + +## Risks & Mitigations +- **Filter regressions for direct scripts:** Document the `"__main__"` behaviour and add guardrails in tests; optionally add helper patterns in builtin filters. +- **Third-party loaders with non-string `__name__`:** Validate type and fall back gracefully when extraction fails. +- **Hidden dependencies on old naming:** Continue exposing absolute/relative paths in `ScopeResolution` so downstream tooling relying on paths keeps functioning. + +## Rollout Checklist +- ADR 0016 updated to “Accepted” once the feature flag defaults to on. +- Release notes highlight the new module-naming semantics and the opt-out flag during transition. +- Telemetry or logging confirms we do not hit the fallback path excessively in real workloads. + From b19c1ede895d46e424457defa72e745ccbf6dfed Mon Sep 17 00:00:00 2001 From: Tzanko Matev Date: Mon, 27 Oct 2025 14:44:45 +0200 Subject: [PATCH 3/8] Stage 0 --- .../codetracer_python_recorder/cli.py | 10 +++++++ codetracer-python-recorder/src/policy.rs | 9 ++++++- codetracer-python-recorder/src/policy/env.rs | 9 +++++++ codetracer-python-recorder/src/policy/ffi.rs | 27 +++++++++++++++++-- .../src/policy/model.rs | 6 +++++ .../src/runtime/tracer/runtime_tracer.rs | 6 +++++ .../tests/python/unit/test_cli.py | 16 +++++++++++ .../python/unit/test_policy_configuration.py | 8 ++++++ ...tion-globals-implementation-plan.status.md | 21 +++++++++++++++ 9 files changed, 109 insertions(+), 3 deletions(-) create mode 100644 design-docs/module-name-resolution-globals-implementation-plan.status.md diff --git a/codetracer-python-recorder/codetracer_python_recorder/cli.py b/codetracer-python-recorder/codetracer_python_recorder/cli.py index 105e427..865f837 100644 --- a/codetracer-python-recorder/codetracer_python_recorder/cli.py +++ b/codetracer-python-recorder/codetracer_python_recorder/cli.py @@ -120,6 +120,14 @@ def _parse_args(argv: Sequence[str]) -> RecorderCLIConfig: "'proxies+fd' also mirrors raw file-descriptor writes." ), ) + parser.add_argument( + "--module-name-from-globals", + action="store_true", + help=( + "Derive module names from the Python frame's __name__ attribute. " + "Intended for early opt-in while the feature flag is experimental." + ), + ) known, remainder = parser.parse_known_args(argv) pending: list[str] = list(remainder) @@ -181,6 +189,8 @@ def _parse_args(argv: Sequence[str]) -> RecorderCLIConfig: policy["io_capture_fd_fallback"] = True case other: # pragma: no cover - argparse choices block this parser.error(f"unsupported io-capture mode '{other}'") + if known.module_name_from_globals: + policy["module_name_from_globals"] = True return RecorderCLIConfig( trace_dir=trace_dir, diff --git a/codetracer-python-recorder/src/policy.rs b/codetracer-python-recorder/src/policy.rs index c26f81b..6c57f34 100644 --- a/codetracer-python-recorder/src/policy.rs +++ b/codetracer-python-recorder/src/policy.rs @@ -7,7 +7,8 @@ mod model; #[allow(unused_imports)] pub use env::{ configure_policy_from_env, ENV_CAPTURE_IO, ENV_JSON_ERRORS, ENV_KEEP_PARTIAL_TRACE, - ENV_LOG_FILE, ENV_LOG_LEVEL, ENV_ON_RECORDER_ERROR, ENV_REQUIRE_TRACE, + ENV_LOG_FILE, ENV_LOG_LEVEL, ENV_MODULE_NAME_FROM_GLOBALS, ENV_ON_RECORDER_ERROR, + ENV_REQUIRE_TRACE, }; #[allow(unused_imports)] pub use ffi::{configure_policy_py, py_configure_policy_from_env, py_policy_snapshot}; @@ -41,6 +42,7 @@ mod tests { assert!(snap.log_file.is_none()); assert!(snap.io_capture.line_proxies); assert!(!snap.io_capture.fd_fallback); + assert!(!snap.module_name_from_globals); } #[test] @@ -55,6 +57,7 @@ mod tests { update.json_errors = Some(true); update.io_capture_line_proxies = Some(true); update.io_capture_fd_fallback = Some(true); + update.module_name_from_globals = Some(true); apply_policy_update(update); @@ -67,6 +70,7 @@ mod tests { assert!(snap.json_errors); assert!(snap.io_capture.line_proxies); assert!(snap.io_capture.fd_fallback); + assert!(snap.module_name_from_globals); reset_policy(); } @@ -81,6 +85,7 @@ mod tests { std::env::set_var(ENV_LOG_FILE, "/tmp/out.log"); std::env::set_var(ENV_JSON_ERRORS, "yes"); std::env::set_var(ENV_CAPTURE_IO, "proxies,fd"); + std::env::set_var(ENV_MODULE_NAME_FROM_GLOBALS, "true"); configure_policy_from_env().expect("configure from env"); @@ -95,6 +100,7 @@ mod tests { assert!(snap.json_errors); assert!(snap.io_capture.line_proxies); assert!(snap.io_capture.fd_fallback); + assert!(snap.module_name_from_globals); reset_policy(); } @@ -156,6 +162,7 @@ mod tests { ENV_LOG_FILE, ENV_JSON_ERRORS, ENV_CAPTURE_IO, + ENV_MODULE_NAME_FROM_GLOBALS, ] { std::env::remove_var(key); } diff --git a/codetracer-python-recorder/src/policy/env.rs b/codetracer-python-recorder/src/policy/env.rs index 2392b7a..c706dc5 100644 --- a/codetracer-python-recorder/src/policy/env.rs +++ b/codetracer-python-recorder/src/policy/env.rs @@ -19,6 +19,8 @@ pub const ENV_LOG_FILE: &str = "CODETRACER_LOG_FILE"; pub const ENV_JSON_ERRORS: &str = "CODETRACER_JSON_ERRORS"; /// Environment variable toggling IO capture strategies. pub const ENV_CAPTURE_IO: &str = "CODETRACER_CAPTURE_IO"; +/// Environment variable toggling globals-based module name resolution. +pub const ENV_MODULE_NAME_FROM_GLOBALS: &str = "CODETRACER_MODULE_NAME_FROM_GLOBALS"; /// Load policy overrides from environment variables. pub fn configure_policy_from_env() -> RecorderResult<()> { @@ -60,6 +62,10 @@ pub fn configure_policy_from_env() -> RecorderResult<()> { update.io_capture_fd_fallback = Some(fd_fallback); } + if let Ok(value) = env::var(ENV_MODULE_NAME_FROM_GLOBALS) { + update.module_name_from_globals = Some(parse_bool(&value)?); + } + apply_policy_update(update); Ok(()) } @@ -141,6 +147,7 @@ mod tests { std::env::set_var(ENV_LOG_FILE, "/tmp/out.log"); std::env::set_var(ENV_JSON_ERRORS, "yes"); std::env::set_var(ENV_CAPTURE_IO, "proxies,fd"); + std::env::set_var(ENV_MODULE_NAME_FROM_GLOBALS, "true"); configure_policy_from_env().expect("configure from env"); let snap = policy_snapshot(); @@ -155,6 +162,7 @@ mod tests { assert!(snap.json_errors); assert!(snap.io_capture.line_proxies); assert!(snap.io_capture.fd_fallback); + assert!(snap.module_name_from_globals); } #[test] @@ -182,6 +190,7 @@ mod tests { ENV_LOG_FILE, ENV_JSON_ERRORS, ENV_CAPTURE_IO, + ENV_MODULE_NAME_FROM_GLOBALS, ] { std::env::remove_var(key); } diff --git a/codetracer-python-recorder/src/policy/ffi.rs b/codetracer-python-recorder/src/policy/ffi.rs index f2588df..dc6e6a3 100644 --- a/codetracer-python-recorder/src/policy/ffi.rs +++ b/codetracer-python-recorder/src/policy/ffi.rs @@ -11,7 +11,7 @@ use std::path::PathBuf; use std::str::FromStr; #[pyfunction(name = "configure_policy")] -#[pyo3(signature = (on_recorder_error=None, require_trace=None, keep_partial_trace=None, log_level=None, log_file=None, json_errors=None, io_capture_line_proxies=None, io_capture_fd_fallback=None))] +#[pyo3(signature = (on_recorder_error=None, require_trace=None, keep_partial_trace=None, log_level=None, log_file=None, json_errors=None, io_capture_line_proxies=None, io_capture_fd_fallback=None, module_name_from_globals=None))] pub fn configure_policy_py( on_recorder_error: Option<&str>, require_trace: Option, @@ -21,6 +21,7 @@ pub fn configure_policy_py( json_errors: Option, io_capture_line_proxies: Option, io_capture_fd_fallback: Option, + module_name_from_globals: Option, ) -> PyResult<()> { let mut update = PolicyUpdate::default(); @@ -64,6 +65,10 @@ pub fn configure_policy_py( update.io_capture_fd_fallback = Some(value); } + if let Some(value) = module_name_from_globals { + update.module_name_from_globals = Some(value); + } + apply_policy_update(update); Ok(()) } @@ -97,6 +102,10 @@ pub fn py_policy_snapshot(py: Python<'_>) -> PyResult { dict.set_item("log_file", py.None())?; } dict.set_item("json_errors", snapshot.json_errors)?; + dict.set_item( + "module_name_from_globals", + snapshot.module_name_from_globals, + )?; let io_dict = PyDict::new(py); io_dict.set_item("line_proxies", snapshot.io_capture.line_proxies)?; @@ -123,6 +132,7 @@ mod tests { Some(true), Some(true), Some(true), + Some(true), ) .expect("configure policy via PyO3 facade"); @@ -141,13 +151,24 @@ mod tests { assert!(snap.json_errors); assert!(snap.io_capture.line_proxies); assert!(snap.io_capture.fd_fallback); + assert!(snap.module_name_from_globals); reset_policy_for_tests(); } #[test] fn configure_policy_py_rejects_invalid_on_recorder_error() { reset_policy_for_tests(); - let err = configure_policy_py(Some("unknown"), None, None, None, None, None, None, None) + let err = configure_policy_py( + Some("unknown"), + None, + None, + None, + None, + None, + None, + None, + None, + ) .expect_err("invalid variant should error"); // Ensure the error maps through map_recorder_error by checking the display text. let message = Python::with_gil(|py| err.value(py).to_string()); @@ -186,6 +207,7 @@ mod tests { Some(true), Some(false), Some(false), + Some(false), ) .expect("configure policy"); @@ -218,6 +240,7 @@ mod tests { super::super::env::ENV_LOG_FILE, super::super::env::ENV_JSON_ERRORS, super::super::env::ENV_CAPTURE_IO, + super::super::env::ENV_MODULE_NAME_FROM_GLOBALS, ] { std::env::remove_var(key); } diff --git a/codetracer-python-recorder/src/policy/model.rs b/codetracer-python-recorder/src/policy/model.rs index 6b296b0..d8ab569 100644 --- a/codetracer-python-recorder/src/policy/model.rs +++ b/codetracer-python-recorder/src/policy/model.rs @@ -71,6 +71,7 @@ pub struct RecorderPolicy { pub log_file: Option, pub json_errors: bool, pub io_capture: IoCapturePolicy, + pub module_name_from_globals: bool, } impl Default for RecorderPolicy { @@ -83,6 +84,7 @@ impl Default for RecorderPolicy { log_file: None, json_errors: false, io_capture: IoCapturePolicy::default(), + module_name_from_globals: false, } } } @@ -123,6 +125,9 @@ impl RecorderPolicy { // fd fallback requires proxies to be on. self.io_capture.fd_fallback = fd_fallback && self.io_capture.line_proxies; } + if let Some(module_name_from_globals) = update.module_name_from_globals { + self.module_name_from_globals = module_name_from_globals; + } } } @@ -144,6 +149,7 @@ pub(crate) struct PolicyUpdate { pub(crate) json_errors: Option, pub(crate) io_capture_line_proxies: Option, pub(crate) io_capture_fd_fallback: Option, + pub(crate) module_name_from_globals: Option, } /// Snapshot the current policy. diff --git a/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs b/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs index ba66d09..7def7de 100644 --- a/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs +++ b/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs @@ -370,6 +370,7 @@ mod tests { Some(false), None, None, + Some(false), ) .expect("reset recorder policy"); } @@ -593,6 +594,7 @@ result = compute()\n" Some(false), Some(true), Some(false), + Some(false), ) .expect("enable io capture proxies"); @@ -682,6 +684,7 @@ result = compute()\n" Some(false), Some(true), Some(true), + Some(false), ) .expect("enable io capture with fd fallback"); @@ -788,6 +791,7 @@ result = compute()\n" Some(false), Some(true), Some(false), + Some(false), ) .expect("enable proxies without fd fallback"); @@ -2142,6 +2146,7 @@ snapshot() Some(false), None, None, + Some(false), ) .expect("enable require_trace policy"); @@ -2219,6 +2224,7 @@ snapshot() Some(false), None, None, + Some(false), ) .expect("enable keep_partial policy"); diff --git a/codetracer-python-recorder/tests/python/unit/test_cli.py b/codetracer-python-recorder/tests/python/unit/test_cli.py index 333dc57..c424efd 100644 --- a/codetracer-python-recorder/tests/python/unit/test_cli.py +++ b/codetracer-python-recorder/tests/python/unit/test_cli.py @@ -156,3 +156,19 @@ def test_parse_args_enables_io_capture_fd_mirroring(tmp_path: Path) -> None: "io_capture_line_proxies": True, "io_capture_fd_fallback": True, } + + +def test_parse_args_enables_module_name_from_globals(tmp_path: Path) -> None: + script = tmp_path / "entry.py" + _write_script(script) + + config = _parse_args( + [ + "--module-name-from-globals", + str(script), + ] + ) + + assert config.policy_overrides == { + "module_name_from_globals": True, + } diff --git a/codetracer-python-recorder/tests/python/unit/test_policy_configuration.py b/codetracer-python-recorder/tests/python/unit/test_policy_configuration.py index 3e62961..9b77b02 100644 --- a/codetracer-python-recorder/tests/python/unit/test_policy_configuration.py +++ b/codetracer-python-recorder/tests/python/unit/test_policy_configuration.py @@ -18,6 +18,7 @@ def reset_policy() -> None: log_level="", log_file="", json_errors=False, + module_name_from_globals=False, ) yield codetracer.configure_policy( @@ -27,6 +28,7 @@ def reset_policy() -> None: log_level="", log_file="", json_errors=False, + module_name_from_globals=False, ) @@ -39,6 +41,7 @@ def test_configure_policy_overrides_values(tmp_path: Path) -> None: log_level="info", log_file=str(target_log), json_errors=True, + module_name_from_globals=True, ) snapshot = codetracer.policy_snapshot() @@ -48,6 +51,7 @@ def test_configure_policy_overrides_values(tmp_path: Path) -> None: assert snapshot["log_level"] == "info" assert snapshot["log_file"] == str(target_log) assert snapshot["json_errors"] is True + assert snapshot["module_name_from_globals"] is True def test_policy_env_configuration(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: @@ -58,6 +62,7 @@ def test_policy_env_configuration(monkeypatch: pytest.MonkeyPatch, tmp_path: Pat log_file = tmp_path / "policy.log" monkeypatch.setenv("CODETRACER_LOG_FILE", str(log_file)) monkeypatch.setenv("CODETRACER_JSON_ERRORS", "yes") + monkeypatch.setenv("CODETRACER_MODULE_NAME_FROM_GLOBALS", "true") codetracer.configure_policy_from_env() @@ -68,6 +73,7 @@ def test_policy_env_configuration(monkeypatch: pytest.MonkeyPatch, tmp_path: Pat assert snapshot["log_level"] == "debug" assert snapshot["log_file"] == str(log_file) assert snapshot["json_errors"] is True + assert snapshot["module_name_from_globals"] is True def test_clearing_log_configuration(tmp_path: Path) -> None: @@ -83,6 +89,7 @@ def test_session_start_applies_policy_overrides(tmp_path: Path) -> None: "on_recorder_error": "disable", "log_file": tmp_path / "policy.log", "json_errors": True, + "module_name_from_globals": True, } session = session_api.start(tmp_path, policy=policy, apply_env_policy=False) @@ -91,6 +98,7 @@ def test_session_start_applies_policy_overrides(tmp_path: Path) -> None: assert snapshot["on_recorder_error"] == "disable" assert snapshot["log_file"] == str(tmp_path / "policy.log") assert snapshot["json_errors"] is True + assert snapshot["module_name_from_globals"] is True finally: session.stop() diff --git a/design-docs/module-name-resolution-globals-implementation-plan.status.md b/design-docs/module-name-resolution-globals-implementation-plan.status.md new file mode 100644 index 0000000..274ec9e --- /dev/null +++ b/design-docs/module-name-resolution-globals-implementation-plan.status.md @@ -0,0 +1,21 @@ +# Module Name Resolution via `__name__` – Status + +## Relevant Design Docs +- `design-docs/adr/0016-module-name-resolution-via-globals-name.md` +- `design-docs/module-name-resolution-globals-implementation-plan.md` + +## Key Source Files +- `codetracer-python-recorder/src/policy/model.rs` +- `codetracer-python-recorder/src/policy/env.rs` +- `codetracer-python-recorder/src/policy/ffi.rs` +- `codetracer-python-recorder/src/policy.rs` +- `codetracer-python-recorder/codetracer_python_recorder/cli.py` +- `codetracer-python-recorder/codetracer_python_recorder/session.py` +- `codetracer-python-recorder/tests/python/test_policy_configuration.py` +- `codetracer-python-recorder/tests/python/unit/test_cli.py` + +## Workstream Progress + +### Stage 0 – Feature Flag and Compatibility Layer +- **Status:** Completed + Added the `module_name_from_globals` policy flag with CLI flag, Python bindings, and the `CODETRACER_MODULE_NAME_FROM_GLOBALS` env hook. Regression tests cover CLI parsing, policy snapshots, and environment configuration. From aa3f613fa0816e211d4b5e4dcf1a0a3b6a445cdc Mon Sep 17 00:00:00 2001 From: Tzanko Matev Date: Mon, 27 Oct 2025 15:30:30 +0200 Subject: [PATCH 4/8] Stage 1 --- .../src/module_identity.rs | 4 +- .../src/monitoring/api.rs | 6 +- .../src/monitoring/mod.rs | 7 +-- codetracer-python-recorder/src/policy/ffi.rs | 2 +- .../src/runtime/tracer/events.rs | 33 +++++++++-- .../src/runtime/tracer/filtering.rs | 29 ++++++++++ .../src/runtime/tracer/lifecycle.rs | 5 +- .../src/runtime/tracer/runtime_tracer.rs | 57 ++++++++++++++++--- codetracer-python-recorder/src/session.rs | 6 +- .../src/trace_filter/engine.rs | 14 +++++ .../tests/python/test_monitoring_events.py | 20 +++++++ ...tion-globals-implementation-plan.status.md | 4 ++ 12 files changed, 154 insertions(+), 33 deletions(-) diff --git a/codetracer-python-recorder/src/module_identity.rs b/codetracer-python-recorder/src/module_identity.rs index 4d6ba7f..7602128 100644 --- a/codetracer-python-recorder/src/module_identity.rs +++ b/codetracer-python-recorder/src/module_identity.rs @@ -737,9 +737,7 @@ mod tests { module .setattr("__package__", py.None()) .expect("set __package__"); - module - .setattr("__spec__", py.None()) - .expect("set __spec__"); + module.setattr("__spec__", py.None()).expect("set __spec__"); modules .set_item("__main__", module) .expect("register __main__"); diff --git a/codetracer-python-recorder/src/monitoring/api.rs b/codetracer-python-recorder/src/monitoring/api.rs index fee4283..9ceccf4 100644 --- a/codetracer-python-recorder/src/monitoring/api.rs +++ b/codetracer-python-recorder/src/monitoring/api.rs @@ -108,11 +108,7 @@ pub trait Tracer: Send + Any { } /// Provide the process exit status ahead of tracer teardown. - fn set_exit_status( - &mut self, - _py: Python<'_>, - _exit_code: Option, - ) -> PyResult<()> { + fn set_exit_status(&mut self, _py: Python<'_>, _exit_code: Option) -> PyResult<()> { Ok(()) } diff --git a/codetracer-python-recorder/src/monitoring/mod.rs b/codetracer-python-recorder/src/monitoring/mod.rs index a582262..a4440ab 100644 --- a/codetracer-python-recorder/src/monitoring/mod.rs +++ b/codetracer-python-recorder/src/monitoring/mod.rs @@ -10,12 +10,7 @@ pub(crate) mod install; pub mod tracer; pub use api::Tracer; -pub use install::{ - flush_installed_tracer, - install_tracer, - uninstall_tracer, - update_exit_status, -}; +pub use install::{flush_installed_tracer, install_tracer, uninstall_tracer, update_exit_status}; const MONITORING_TOOL_NAME: &str = "codetracer"; diff --git a/codetracer-python-recorder/src/policy/ffi.rs b/codetracer-python-recorder/src/policy/ffi.rs index dc6e6a3..3ec2328 100644 --- a/codetracer-python-recorder/src/policy/ffi.rs +++ b/codetracer-python-recorder/src/policy/ffi.rs @@ -169,7 +169,7 @@ mod tests { None, None, ) - .expect_err("invalid variant should error"); + .expect_err("invalid variant should error"); // Ensure the error maps through map_recorder_error by checking the display text. let message = Python::with_gil(|py| err.value(py).to_string()); assert!( diff --git a/codetracer-python-recorder/src/runtime/tracer/events.rs b/codetracer-python-recorder/src/runtime/tracer/events.rs index dfcaea9..7079a13 100644 --- a/codetracer-python-recorder/src/runtime/tracer/events.rs +++ b/codetracer-python-recorder/src/runtime/tracer/events.rs @@ -176,6 +176,29 @@ impl Tracer for RuntimeTracer { code: &CodeObjectWrapper, _offset: i32, ) -> CallbackResult { + if self.module_name_from_globals { + if let Ok(qualname) = code.qualname(py) { + if qualname == "" { + let globals_name = match capture_frame(py, code) { + Ok(snapshot) => { + let mapping = snapshot.globals().unwrap_or_else(|| snapshot.locals()); + mapping + .get_item("__name__") + .ok() + .flatten() + .and_then(|value| value.extract::().ok()) + .map(|name| name.trim().to_string()) + .filter(|name| !name.is_empty()) + } + Err(_) => None, + }; + + self.filter + .set_module_name_hint(code.id(), globals_name.clone()); + } + } + } + if let Some(outcome) = self.evaluate_gate(py, code, true) { return Ok(outcome); } @@ -398,11 +421,7 @@ impl Tracer for RuntimeTracer { ) } - fn set_exit_status( - &mut self, - _py: Python<'_>, - exit_code: Option, - ) -> PyResult<()> { + fn set_exit_status(&mut self, _py: Python<'_>, exit_code: Option) -> PyResult<()> { self.record_exit_status(exit_code); Ok(()) } @@ -458,7 +477,9 @@ impl Tracer for RuntimeTracer { if self.lifecycle.encountered_failure() { if policy.keep_partial_trace { - if let Err(err) = self.lifecycle.finalise(&mut self.writer, &self.filter, &exit_summary) + if let Err(err) = + self.lifecycle + .finalise(&mut self.writer, &self.filter, &exit_summary) { with_error_code(ErrorCode::TraceIncomplete, || { log::warn!( diff --git a/codetracer-python-recorder/src/runtime/tracer/filtering.rs b/codetracer-python-recorder/src/runtime/tracer/filtering.rs index c4fd804..71b53e2 100644 --- a/codetracer-python-recorder/src/runtime/tracer/filtering.rs +++ b/codetracer-python-recorder/src/runtime/tracer/filtering.rs @@ -23,6 +23,7 @@ pub(crate) struct FilterCoordinator { engine: Option>, ignored_code_ids: HashSet, scope_cache: HashMap>, + module_name_hints: HashMap, stats: FilterStats, } @@ -32,6 +33,7 @@ impl FilterCoordinator { engine, ignored_code_ids: HashSet::new(), scope_cache: HashMap::new(), + module_name_hints: HashMap::new(), stats: FilterStats::default(), } } @@ -52,6 +54,21 @@ impl FilterCoordinator { self.stats.values_mut() } + pub(crate) fn set_module_name_hint(&mut self, code_id: usize, hint: Option) { + match hint { + Some(value) => { + self.module_name_hints.insert(code_id, value); + } + None => { + self.module_name_hints.remove(&code_id); + } + } + } + + pub(crate) fn module_name_hint(&self, code_id: usize) -> Option { + self.module_name_hints.get(&code_id).cloned() + } + pub(crate) fn clear_caches(&mut self) { self.ignored_code_ids.clear(); self.scope_cache.clear(); @@ -113,6 +130,17 @@ impl FilterCoordinator { match engine.resolve(py, code) { Ok(resolution) => { + let hint = self.module_name_hints.get(&code_id).cloned(); + let resolution = if let Some(hint) = hint { + if resolution.module_name() == Some(hint.as_str()) { + resolution + } else { + Arc::new(resolution.clone_with_module_name(Some(hint))) + } + } else { + resolution + }; + if resolution.exec() == ExecDecision::Trace { self.scope_cache.insert(code_id, Arc::clone(&resolution)); } else { @@ -140,6 +168,7 @@ impl FilterCoordinator { fn mark_ignored(&mut self, code_id: usize) { self.scope_cache.remove(&code_id); self.ignored_code_ids.insert(code_id); + self.module_name_hints.remove(&code_id); } } diff --git a/codetracer-python-recorder/src/runtime/tracer/lifecycle.rs b/codetracer-python-recorder/src/runtime/tracer/lifecycle.rs index 6cfa6bb..0c88490 100644 --- a/codetracer-python-recorder/src/runtime/tracer/lifecycle.rs +++ b/codetracer-python-recorder/src/runtime/tracer/lifecycle.rs @@ -121,7 +121,10 @@ impl LifecycleController { enverr!(ErrorCode::Io, "failed to finalise trace events") .with_context("source", err.to_string()) })?; - debug!("[Lifecycle] writing exit metadata: code={:?}, label={:?}", exit_summary.code, exit_summary.label); + debug!( + "[Lifecycle] writing exit metadata: code={:?}, label={:?}", + exit_summary.code, exit_summary.label + ); self.append_filter_metadata(filter)?; self.append_exit_metadata(exit_summary)?; Ok(()) diff --git a/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs b/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs index 7def7de..1b61c45 100644 --- a/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs +++ b/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs @@ -125,6 +125,7 @@ pub struct RuntimeTracer { pub(super) io: IoCoordinator, pub(super) filter: FilterCoordinator, pub(super) module_names: ModuleIdentityCache, + pub(super) module_name_from_globals: bool, session_exit: SessionExitState, } @@ -135,6 +136,7 @@ impl RuntimeTracer { format: TraceEventsFileFormat, activation_path: Option<&Path>, trace_filter: Option>, + module_name_from_globals: bool, ) -> Self { let mut writer = NonStreamingTraceWriter::new(program, args); writer.set_format(format); @@ -147,6 +149,7 @@ impl RuntimeTracer { io: IoCoordinator::new(), filter: FilterCoordinator::new(trace_filter), module_names: ModuleIdentityCache::new(), + module_name_from_globals, session_exit: SessionExitState::default(), } } @@ -231,7 +234,10 @@ impl RuntimeTracer { code: &CodeObjectWrapper, allow_disable: bool, ) -> Option { - let is_active = self.lifecycle.activation_mut().should_process_event(py, code); + let is_active = self + .lifecycle + .activation_mut() + .should_process_event(py, code); if matches!( self.should_trace_code(py, code), TraceDecision::SkipAndDisable @@ -290,6 +296,12 @@ impl RuntimeTracer { } fn derive_module_name(&self, py: Python<'_>, code: &CodeObjectWrapper) -> Option { + if self.module_name_from_globals { + if let Some(name) = self.filter.module_name_hint(code.id()) { + return Some(name); + } + } + let resolution = self.filter.cached_resolution(code.id()); if let Some(resolution) = resolution.as_ref() { let hints = ModuleNameHints { @@ -388,8 +400,14 @@ mod tests { #[test] fn skips_synthetic_filename_events() { Python::with_gil(|py| { - let mut tracer = - RuntimeTracer::new("test.py", &[], TraceEventsFileFormat::Json, None, None); + let mut tracer = RuntimeTracer::new( + "test.py", + &[], + TraceEventsFileFormat::Json, + None, + None, + false, + ); ensure_test_module(py); let script = format!("{PRELUDE}\nsnapshot()\n"); { @@ -485,6 +503,7 @@ result = compute()\n" TraceEventsFileFormat::Json, Some(script_path.as_path()), None, + false, ); { @@ -536,6 +555,7 @@ result = compute()\n" TraceEventsFileFormat::Json, None, None, + false, ); let store = tracer.line_snapshot_store(); @@ -612,6 +632,7 @@ result = compute()\n" TraceEventsFileFormat::Json, None, None, + false, ); let outputs = TraceOutputPaths::new(tmp.path(), TraceEventsFileFormat::Json); tracer.begin(&outputs, 1).expect("begin tracer"); @@ -705,6 +726,7 @@ result = compute()\n" TraceEventsFileFormat::Json, None, None, + false, ); let outputs = TraceOutputPaths::new(tmp.path(), TraceEventsFileFormat::Json); tracer.begin(&outputs, 1).expect("begin tracer"); @@ -812,6 +834,7 @@ result = compute()\n" TraceEventsFileFormat::Json, None, None, + false, ); let outputs = TraceOutputPaths::new(tmp.path(), TraceEventsFileFormat::Json); tracer.begin(&outputs, 1).expect("begin tracer"); @@ -1066,8 +1089,14 @@ def start_call(): fn run_traced_script(body: &str) -> Vec { Python::with_gil(|py| { - let mut tracer = - RuntimeTracer::new("test.py", &[], TraceEventsFileFormat::Json, None, None); + let mut tracer = RuntimeTracer::new( + "test.py", + &[], + TraceEventsFileFormat::Json, + None, + None, + false, + ); ensure_test_module(py); let tmp = tempfile::tempdir().expect("create temp dir"); let script_path = tmp.path().join("script.py"); @@ -1191,6 +1220,7 @@ sensitive("s3cr3t") TraceEventsFileFormat::Json, None, Some(engine), + false, ); { @@ -1297,8 +1327,14 @@ sensitive("s3cr3t") .call_method1("insert", (0, pkg_root.to_string_lossy().as_ref())) .expect("insert temp root"); - let tracer = - RuntimeTracer::new("runner.py", &[], TraceEventsFileFormat::Json, None, None); + let tracer = RuntimeTracer::new( + "runner.py", + &[], + TraceEventsFileFormat::Json, + None, + None, + false, + ); let builtins = py.import("builtins").expect("builtins"); let compile = builtins.getattr("compile").expect("compile builtin"); @@ -1361,6 +1397,7 @@ dropper() TraceEventsFileFormat::Json, None, Some(engine), + false, ); { @@ -1448,6 +1485,7 @@ initializer("omega") TraceEventsFileFormat::Json, None, Some(engine), + false, ); { @@ -1501,6 +1539,7 @@ initializer("omega") TraceEventsFileFormat::Json, None, None, + false, ); tracer.begin(&outputs, 1).expect("begin tracer"); tracer.record_exit_status(Some(7)); @@ -1607,6 +1646,7 @@ sensitive("s3cr3t") TraceEventsFileFormat::Json, None, Some(engine), + false, ); tracer.begin(&outputs, 1).expect("begin tracer"); @@ -2163,6 +2203,7 @@ snapshot() TraceEventsFileFormat::Json, None, None, + false, ); tracer.begin(&outputs, 1).expect("begin tracer"); @@ -2197,6 +2238,7 @@ snapshot() TraceEventsFileFormat::Json, None, None, + false, ); tracer.begin(&outputs, 1).expect("begin tracer"); tracer.mark_failure(); @@ -2241,6 +2283,7 @@ snapshot() TraceEventsFileFormat::Json, None, None, + false, ); tracer.begin(&outputs, 1).expect("begin tracer"); tracer.mark_failure(); diff --git a/codetracer-python-recorder/src/session.rs b/codetracer-python-recorder/src/session.rs index cd19cda..3f3c62d 100644 --- a/codetracer-python-recorder/src/session.rs +++ b/codetracer-python-recorder/src/session.rs @@ -11,10 +11,7 @@ use recorder_errors::{usage, ErrorCode}; use crate::ffi; use crate::logging::init_rust_logging_with_default; use crate::monitoring::{ - flush_installed_tracer, - install_tracer, - uninstall_tracer, - update_exit_status, + flush_installed_tracer, install_tracer, uninstall_tracer, update_exit_status, }; use crate::policy::policy_snapshot; use crate::runtime::{RuntimeTracer, TraceOutputPaths}; @@ -65,6 +62,7 @@ pub fn start_tracing( bootstrap.format(), bootstrap.activation_path(), bootstrap.trace_filter(), + policy.module_name_from_globals, ); tracer.begin(&outputs, 1)?; tracer.install_io_capture(py, &policy)?; diff --git a/codetracer-python-recorder/src/trace_filter/engine.rs b/codetracer-python-recorder/src/trace_filter/engine.rs index 0ee25c4..5ef9ec1 100644 --- a/codetracer-python-recorder/src/trace_filter/engine.rs +++ b/codetracer-python-recorder/src/trace_filter/engine.rs @@ -178,6 +178,20 @@ impl ScopeResolution { pub fn matched_rule_reason(&self) -> Option<&str> { self.matched_rule_reason.as_deref() } + + pub(crate) fn clone_with_module_name(&self, module_name: Option) -> ScopeResolution { + ScopeResolution { + exec: self.exec, + value_policy: Arc::clone(&self.value_policy), + module_name, + object_name: self.object_name.clone(), + relative_path: self.relative_path.clone(), + absolute_path: self.absolute_path.clone(), + matched_rule_index: self.matched_rule_index, + matched_rule_source: self.matched_rule_source, + matched_rule_reason: self.matched_rule_reason.clone(), + } + } } /// Runtime engine wrapping a compiled filter configuration. diff --git a/codetracer-python-recorder/tests/python/test_monitoring_events.py b/codetracer-python-recorder/tests/python/test_monitoring_events.py index da11e0d..1c1e15b 100644 --- a/codetracer-python-recorder/tests/python/test_monitoring_events.py +++ b/codetracer-python-recorder/tests/python/test_monitoring_events.py @@ -222,6 +222,26 @@ def test_module_imports_record_package_names(tmp_path: Path) -> None: assert any(name == "" for name in names), f"missing module name in {names}" +def test_module_name_follows_globals_policy(tmp_path: Path) -> None: + script = tmp_path / "script_globals.py" + script.write_text("VALUE = 1\n", encoding="utf-8") + + out_dir = ensure_trace_dir(tmp_path) + codetracer.configure_policy(module_name_from_globals=True) + + session = codetracer.start(out_dir, format=codetracer.TRACE_JSON, start_on_enter=script) + try: + runpy.run_path(str(script), run_name="__main__") + finally: + codetracer.flush() + codetracer.stop() + codetracer.configure_policy(module_name_from_globals=False) + + parsed = _parse_trace(out_dir) + names = [f["name"] for f in parsed.functions] + assert any(name == "<__main__>" for name in names), f"expected <__main__> in {names}" + + def test_all_argument_kinds_recorded_on_py_start(tmp_path: Path) -> None: # Arrange: write a script with a function using all Python argument kinds code = ( diff --git a/design-docs/module-name-resolution-globals-implementation-plan.status.md b/design-docs/module-name-resolution-globals-implementation-plan.status.md index 274ec9e..aa0c06a 100644 --- a/design-docs/module-name-resolution-globals-implementation-plan.status.md +++ b/design-docs/module-name-resolution-globals-implementation-plan.status.md @@ -19,3 +19,7 @@ ### Stage 0 – Feature Flag and Compatibility Layer - **Status:** Completed Added the `module_name_from_globals` policy flag with CLI flag, Python bindings, and the `CODETRACER_MODULE_NAME_FROM_GLOBALS` env hook. Regression tests cover CLI parsing, policy snapshots, and environment configuration. + +### Stage 1 – Capture `__name__` at `py_start` +- **Status:** Completed + `RuntimeTracer` now captures `frame.f_globals['__name__']` for `` code when the feature flag is on, threads the hint through `FilterCoordinator`, and prefers it during both filter decisions and runtime naming. Added integration coverage ensuring opt-in sessions record `<__main__>` for scripts, plus unit updates for the new plumbing. From 573fb78e4ef476926cc523e421e86b570395f996 Mon Sep 17 00:00:00 2001 From: Tzanko Matev Date: Mon, 27 Oct 2025 15:35:55 +0200 Subject: [PATCH 5/8] Stage 2 --- .../benches/trace_filter.rs | 4 +- .../src/runtime/tracer/events.rs | 34 ++--- .../src/runtime/tracer/filtering.rs | 14 +- .../src/trace_filter/engine.rs | 135 +++++++++--------- ...tion-globals-implementation-plan.status.md | 4 + 5 files changed, 90 insertions(+), 101 deletions(-) diff --git a/codetracer-python-recorder/benches/trace_filter.rs b/codetracer-python-recorder/benches/trace_filter.rs index 0ed7e2e..9c1c592 100644 --- a/codetracer-python-recorder/benches/trace_filter.rs +++ b/codetracer-python-recorder/benches/trace_filter.rs @@ -52,7 +52,7 @@ fn run_workload(engine: &TraceFilterEngine, dataset: &WorkloadDataset) { for &index in &dataset.event_indices { let code = dataset.codes[index].as_ref(); let resolution = engine - .resolve(py, code) + .resolve(py, code, None) .expect("trace filter resolution should succeed during benchmarking"); let policy = resolution.value_policy(); for name in dataset.locals.iter() { @@ -66,7 +66,7 @@ fn prewarm_engine(engine: &TraceFilterEngine, dataset: &WorkloadDataset) { Python::with_gil(|py| { for code in &dataset.codes { let _ = engine - .resolve(py, code.as_ref()) + .resolve(py, code.as_ref(), None) .expect("prewarm resolution failed"); } }); diff --git a/codetracer-python-recorder/src/runtime/tracer/events.rs b/codetracer-python-recorder/src/runtime/tracer/events.rs index 7079a13..288eff0 100644 --- a/codetracer-python-recorder/src/runtime/tracer/events.rs +++ b/codetracer-python-recorder/src/runtime/tracer/events.rs @@ -176,28 +176,20 @@ impl Tracer for RuntimeTracer { code: &CodeObjectWrapper, _offset: i32, ) -> CallbackResult { - if self.module_name_from_globals { - if let Ok(qualname) = code.qualname(py) { - if qualname == "" { - let globals_name = match capture_frame(py, code) { - Ok(snapshot) => { - let mapping = snapshot.globals().unwrap_or_else(|| snapshot.locals()); - mapping - .get_item("__name__") - .ok() - .flatten() - .and_then(|value| value.extract::().ok()) - .map(|name| name.trim().to_string()) - .filter(|name| !name.is_empty()) - } - Err(_) => None, - }; - - self.filter - .set_module_name_hint(code.id(), globals_name.clone()); - } + let globals_name = match capture_frame(py, code) { + Ok(snapshot) => { + let mapping = snapshot.globals().unwrap_or_else(|| snapshot.locals()); + mapping + .get_item("__name__") + .ok() + .flatten() + .and_then(|value| value.extract::().ok()) + .map(|name| name.trim().to_string()) + .filter(|name| !name.is_empty()) } - } + Err(_) => None, + }; + self.filter.set_module_name_hint(code.id(), globals_name); if let Some(outcome) = self.evaluate_gate(py, code, true) { return Ok(outcome); diff --git a/codetracer-python-recorder/src/runtime/tracer/filtering.rs b/codetracer-python-recorder/src/runtime/tracer/filtering.rs index 71b53e2..d0b4798 100644 --- a/codetracer-python-recorder/src/runtime/tracer/filtering.rs +++ b/codetracer-python-recorder/src/runtime/tracer/filtering.rs @@ -128,19 +128,9 @@ impl FilterCoordinator { return Some(existing.clone()); } - match engine.resolve(py, code) { + let hint = self.module_name_hints.get(&code_id).map(|s| s.as_str()); + match engine.resolve(py, code, hint) { Ok(resolution) => { - let hint = self.module_name_hints.get(&code_id).cloned(); - let resolution = if let Some(hint) = hint { - if resolution.module_name() == Some(hint.as_str()) { - resolution - } else { - Arc::new(resolution.clone_with_module_name(Some(hint))) - } - } else { - resolution - }; - if resolution.exec() == ExecDecision::Trace { self.scope_cache.insert(code_id, Arc::clone(&resolution)); } else { diff --git a/codetracer-python-recorder/src/trace_filter/engine.rs b/codetracer-python-recorder/src/trace_filter/engine.rs index 5ef9ec1..146249b 100644 --- a/codetracer-python-recorder/src/trace_filter/engine.rs +++ b/codetracer-python-recorder/src/trace_filter/engine.rs @@ -4,7 +4,7 @@ //! and caches per-code-object resolutions so the hot tracing callbacks only pay a fast lookup. use crate::code_object::CodeObjectWrapper; -use crate::module_identity::{is_valid_module_name, normalise_to_posix, ModuleIdentityResolver}; +use crate::module_identity::{is_valid_module_name, module_from_relative, normalise_to_posix}; use crate::trace_filter::config::{ ExecDirective, FilterSource, FilterSummary, ScopeRule, TraceFilterConfig, ValueAction, ValuePattern, @@ -178,20 +178,6 @@ impl ScopeResolution { pub fn matched_rule_reason(&self) -> Option<&str> { self.matched_rule_reason.as_deref() } - - pub(crate) fn clone_with_module_name(&self, module_name: Option) -> ScopeResolution { - ScopeResolution { - exec: self.exec, - value_policy: Arc::clone(&self.value_policy), - module_name, - object_name: self.object_name.clone(), - relative_path: self.relative_path.clone(), - absolute_path: self.absolute_path.clone(), - matched_rule_index: self.matched_rule_index, - matched_rule_source: self.matched_rule_source, - matched_rule_reason: self.matched_rule_reason.clone(), - } - } } /// Runtime engine wrapping a compiled filter configuration. @@ -202,7 +188,6 @@ pub struct TraceFilterEngine { default_value_source: usize, rules: Arc<[CompiledScopeRule]>, cache: DashMap>, - module_resolver: ModuleIdentityResolver, } impl TraceFilterEngine { @@ -212,7 +197,6 @@ impl TraceFilterEngine { let default_value_action = config.default_value_action(); let default_value_source = config.default_value_source(); let rules = compile_rules(config.rules()); - let module_resolver = ModuleIdentityResolver::new(); TraceFilterEngine { config: Arc::new(config), @@ -221,7 +205,6 @@ impl TraceFilterEngine { default_value_source, rules, cache: DashMap::new(), - module_resolver, } } @@ -230,12 +213,13 @@ impl TraceFilterEngine { &self, py: Python<'py>, code: &CodeObjectWrapper, + module_hint: Option<&str>, ) -> RecorderResult> { if let Some(entry) = self.cache.get(&code.id()) { return Ok(entry.clone()); } - let resolution = Arc::new(self.resolve_uncached(py, code)?); + let resolution = Arc::new(self.resolve_uncached(py, code, module_hint)?); let entry = self .cache .entry(code.id()) @@ -247,6 +231,7 @@ impl TraceFilterEngine { &self, py: Python<'_>, code: &CodeObjectWrapper, + module_hint: Option<&str>, ) -> RecorderResult { let filename = code .filename(py) @@ -257,23 +242,31 @@ impl TraceFilterEngine { let mut context = ScopeContext::derive(filename, self.config.sources()); context.refresh_object_name(qualname); - if let Some(absolute) = context.absolute_path.clone() { - if let Some(module) = self.resolve_module_name(py, &absolute) { - let replace = match context.module_name.as_deref() { - None => true, - Some(existing) => { - !is_valid_module_name(existing) - || (module == "__main__" && !existing.contains('.')) - } - }; - if replace { - context.update_module_name(module, qualname); + + if let Some(hint) = module_hint { + if is_valid_module_name(hint) { + context.module_name = Some(hint.to_string()); + context.refresh_object_name(qualname); + } + } + + if context + .module_name + .as_ref() + .map(|name| !is_valid_module_name(name)) + .unwrap_or(true) + && context.module_name.is_none() + { + if let Some(absolute) = context.absolute_path.as_deref() { + if let Some(module) = module_name_from_packages(Path::new(absolute)) { + context.module_name = Some(module); + context.refresh_object_name(qualname); + } else { + log::debug!( + "[TraceFilter] unable to derive module name for '{}'; package selectors may not match", + absolute + ); } - } else if context.module_name.is_none() { - log::debug!( - "[TraceFilter] unable to derive module name for '{}'; package selectors may not match", - absolute - ); } } @@ -338,10 +331,6 @@ impl TraceFilterEngine { pub fn summary(&self) -> FilterSummary { self.config.summary() } - - fn resolve_module_name(&self, py: Python<'_>, absolute: &str) -> Option { - self.module_resolver.resolve_absolute(py, absolute) - } } #[derive(Debug, Clone)] @@ -463,7 +452,8 @@ impl ScopeContext { let module_name = relative_path .as_deref() - .and_then(|rel| crate::module_identity::module_from_relative(rel)); + .and_then(|rel| module_from_relative(rel)) + .filter(|name| is_valid_module_name(name)); ScopeContext { module_name, @@ -482,11 +472,6 @@ impl ScopeContext { (None, true) => None, }; } - - fn update_module_name(&mut self, module: String, qualname: &str) { - self.module_name = Some(module); - self.refresh_object_name(qualname); - } } fn normalise_relative(relative: PathBuf) -> String { @@ -506,6 +491,39 @@ fn normalise_relative(relative: PathBuf) -> String { components.join("/") } +fn module_name_from_packages(path: &Path) -> Option { + let mut segments: Vec = Vec::new(); + let mut current = path.parent(); + + while let Some(dir) = current { + let init = dir.join("__init__.py"); + if init.exists() { + if let Some(name) = dir.file_name().and_then(|s| s.to_str()) { + if is_valid_module_name(name) { + segments.push(name.to_string()); + current = dir.parent(); + continue; + } + } + } + break; + } + + segments.reverse(); + + if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) { + if stem != "__init__" && is_valid_module_name(stem) { + segments.push(stem.to_string()); + } + } + + if segments.is_empty() { + return None; + } + + Some(segments.join(".")) +} + fn py_attr_error(attr: &str, err: PyErr) -> recorder_errors::RecorderError { target!( ErrorCode::FrameIntrospectionFailed, @@ -518,9 +536,8 @@ fn py_attr_error(attr: &str, err: PyErr) -> recorder_errors::RecorderError { #[cfg(test)] mod tests { use super::*; - use crate::module_identity::module_name_from_roots; use crate::trace_filter::config::TraceFilterConfig; - use pyo3::types::{PyAny, PyCode, PyDict, PyList, PyModule}; + use pyo3::types::{PyAny, PyCode, PyList, PyModule}; use std::ffi::CString; use std::fs; use std::io::Write; @@ -564,7 +581,7 @@ mod tests { let wrapper = CodeObjectWrapper::new(py, &code_obj); let engine = TraceFilterEngine::new(config.clone()); - let first = engine.resolve(py, &wrapper)?; + let first = engine.resolve(py, &wrapper, None)?; assert_eq!(first.exec(), ExecDecision::Trace); assert_eq!(first.matched_rule_index(), Some(0)); assert_eq!(first.module_name(), Some("app.foo")); @@ -583,7 +600,7 @@ mod tests { ValueAction::Allow ); - let second = engine.resolve(py, &wrapper)?; + let second = engine.resolve(py, &wrapper, None)?; assert!(Arc::ptr_eq(&first, &second)); Ok(()) }) @@ -619,7 +636,7 @@ mod tests { let wrapper = CodeObjectWrapper::new(py, &code_obj); let engine = TraceFilterEngine::new(config); - let resolution = engine.resolve(py, &wrapper)?; + let resolution = engine.resolve(py, &wrapper, None)?; assert_eq!(resolution.exec(), ExecDecision::Trace); assert_eq!(resolution.matched_rule_index(), Some(1)); @@ -672,13 +689,6 @@ mod tests { normalise_to_posix(Path::new(file_path.to_string_lossy().as_ref())).unwrap(); let module = py.import("app.foo").expect("import app.foo"); - let modules_any = sys.getattr("modules").expect("sys.modules"); - let modules: Bound<'_, PyDict> = - modules_any.downcast_into::().expect("modules dict"); - modules - .set_item("app.foo", module.as_any()) - .expect("register module"); - let func: Bound<'_, PyAny> = module.getattr("foo").expect("get foo"); let code_obj = func .getattr("__code__") @@ -690,18 +700,11 @@ mod tests { assert_eq!(recorded_filename, file_path.to_string_lossy()); let engine = TraceFilterEngine::new(config.clone()); - let expected_root = - normalise_to_posix(Path::new(project_root.to_string_lossy().as_ref())).unwrap(); - let resolver_roots = engine.module_resolver.module_roots(); - assert!(resolver_roots.iter().any(|root| root == &expected_root)); - let derived_from_roots = module_name_from_roots(resolver_roots, absolute_path.as_str()); - assert_eq!(derived_from_roots, Some("app.foo".to_string())); - let resolution = engine.resolve(py, &wrapper)?; + let resolution = engine.resolve(py, &wrapper, None)?; assert_eq!(resolution.absolute_path(), Some(absolute_path.as_str())); assert_eq!(resolution.module_name(), Some("app.foo")); assert_eq!(resolution.exec(), ExecDecision::Skip); - modules.del_item("app.foo").expect("remove module"); sys_path.del_item(0).expect("restore sys.path"); Ok(()) }) @@ -727,7 +730,7 @@ mod tests { let wrapper = CodeObjectWrapper::new(py, &code_obj); let engine = TraceFilterEngine::new(config); - let resolution = engine.resolve(py, &wrapper)?; + let resolution = engine.resolve(py, &wrapper, None)?; assert_eq!(resolution.exec(), ExecDecision::Skip); assert_eq!(resolution.relative_path(), Some("app/foo.py")); diff --git a/design-docs/module-name-resolution-globals-implementation-plan.status.md b/design-docs/module-name-resolution-globals-implementation-plan.status.md index aa0c06a..31cae8b 100644 --- a/design-docs/module-name-resolution-globals-implementation-plan.status.md +++ b/design-docs/module-name-resolution-globals-implementation-plan.status.md @@ -23,3 +23,7 @@ ### Stage 1 – Capture `__name__` at `py_start` - **Status:** Completed `RuntimeTracer` now captures `frame.f_globals['__name__']` for `` code when the feature flag is on, threads the hint through `FilterCoordinator`, and prefers it during both filter decisions and runtime naming. Added integration coverage ensuring opt-in sessions record `<__main__>` for scripts, plus unit updates for the new plumbing. + +### Stage 2 – Simplify Filter Engine +- **Status:** Completed + Removed the resolver dependency from `TraceFilterEngine`, teach it to accept globals-based hints, and added a package-structure fallback when relative paths are unavailable. Filtering decisions now rely on the new hint flow while keeping legacy behaviour intact for `module_name_from_globals` opt-out scenarios. From e73e4ac9c5e0a42ebe169992396e70761e71a3f2 Mon Sep 17 00:00:00 2001 From: Tzanko Matev Date: Mon, 27 Oct 2025 15:58:29 +0200 Subject: [PATCH 6/8] Stage 3 --- .../src/module_identity.rs | 747 +++--------------- .../src/runtime/tracer/events.rs | 2 - .../src/runtime/tracer/runtime_tracer.rs | 44 +- .../src/trace_filter/engine.rs | 37 +- ...event-naming-implementation-plan.status.md | 4 +- .../module-name-resolution-deep-review.md | 61 +- ...tion-globals-implementation-plan.status.md | 4 + 7 files changed, 171 insertions(+), 728 deletions(-) diff --git a/codetracer-python-recorder/src/module_identity.rs b/codetracer-python-recorder/src/module_identity.rs index 7602128..b7f47f2 100644 --- a/codetracer-python-recorder/src/module_identity.rs +++ b/codetracer-python-recorder/src/module_identity.rs @@ -1,405 +1,14 @@ -//! Shared helpers for deriving Python module names from filenames and module metadata. +//! Minimal helpers for deriving Python module identifiers from filesystem metadata. use std::borrow::Cow; -use std::cmp::Ordering; use std::env; use std::path::{Component, Path}; -use std::sync::Arc; -use dashmap::DashMap; use pyo3::prelude::*; -use pyo3::types::{PyAny, PyDict, PyList}; - -use crate::code_object::CodeObjectWrapper; - -/// Resolver that infers module names for absolute file paths using sys.path roots and -/// sys.modules fallbacks. Results are cached per path to avoid repeated scans. -#[derive(Debug, Clone)] -pub struct ModuleIdentityResolver { - module_roots: Arc<[String]>, - cache: DashMap>, -} - -impl ModuleIdentityResolver { - /// Construct a resolver using the current `sys.path`. - pub fn new() -> Self { - let roots = Python::with_gil(|py| collect_module_roots(py)); - Self::from_roots(roots) - } - - /// Construct a resolver from an explicit list of module roots. Visible for tests. - pub fn from_roots(roots: Vec) -> Self { - let module_roots = canonicalise_module_roots(roots); - Self { - module_roots, - cache: DashMap::new(), - } - } - - /// Resolve the module name for an absolute POSIX path (if any). - pub fn resolve_absolute(&self, py: Python<'_>, absolute: &str) -> Option { - if let Some(entry) = self.cache.get(absolute) { - return entry.clone(); - } - let mut path_candidate = module_name_from_roots(self.module_roots(), absolute); - if path_candidate - .as_ref() - .map(|name| is_filesystem_shaped_name(name, absolute)) - .unwrap_or(true) - { - if let Some(heuristic) = module_name_from_heuristics(absolute) { - path_candidate = Some(heuristic); - } - } - let sys_candidate = lookup_module_name(py, absolute); - let (resolved, cacheable) = match (sys_candidate, path_candidate) { - (Some(preferred), _) => (Some(preferred), true), - (None, candidate) => (candidate, false), - }; - if cacheable { - self.cache.insert(absolute.to_string(), resolved.clone()); - } - resolved - } - - /// Expose the sys.path roots used by this resolver (primarily for tests). - pub fn module_roots(&self) -> &[String] { - &self.module_roots - } -} - -/// Caches module names per code object id, reusing a shared resolver for filesystem lookups. -#[allow(dead_code)] -#[derive(Debug)] -pub struct ModuleIdentityCache { - resolver: ModuleIdentityResolver, - code_cache: DashMap>, -} - -#[allow(dead_code)] -impl ModuleIdentityCache { - /// Construct with a fresh resolver seeded from `sys.path`. - pub fn new() -> Self { - Self::with_resolver(ModuleIdentityResolver::new()) - } - - /// Construct using an explicit resolver (primarily for tests). - pub fn with_resolver(resolver: ModuleIdentityResolver) -> Self { - Self { - resolver, - code_cache: DashMap::new(), - } - } - - /// Resolve the dotted module name for a Python code object, caching the result by code id. - pub fn resolve_for_code<'py>( - &self, - py: Python<'py>, - code: &CodeObjectWrapper, - hints: ModuleNameHints<'_>, - ) -> Option { - if let Some(entry) = self.code_cache.get(&code.id()) { - return entry.clone(); - } - - let globals_name = hints.globals_name.and_then(sanitise_module_name); - let mut resolved = hints - .preferred - .and_then(sanitise_module_name) - .or_else(|| { - hints - .relative_path - .and_then(|relative| module_from_relative(relative)) - }) - .or_else(|| { - hints - .absolute_path - .and_then(|absolute| self.resolver.resolve_absolute(py, absolute)) - }) - .or_else(|| globals_name.clone()); - - if let Some(globals) = globals_name.as_ref() { - if globals == "__main__" - && resolved - .as_deref() - .map(|candidate| candidate != "__main__") - .unwrap_or(true) - { - resolved = Some(globals.clone()); - } - } - - if resolved.is_none() && hints.absolute_path.is_none() { - if let Ok(filename) = code.filename(py) { - let path = Path::new(filename); - if path.is_absolute() { - if let Some(normalized) = normalise_to_posix(path) { - resolved = self.resolver.resolve_absolute(py, normalized.as_str()); - } - } - } - } - - self.code_cache.insert(code.id(), resolved.clone()); - resolved - } - - /// Remove a cached module name for a code id. - pub fn invalidate(&self, code_id: usize) { - self.code_cache.remove(&code_id); - } - - /// Clear all cached code-object mappings. - pub fn clear(&self) { - self.code_cache.clear(); - } - - /// Access the underlying resolver (primarily for tests/runtime wiring). - pub fn resolver(&self) -> &ModuleIdentityResolver { - &self.resolver - } -} - -/// Optional hints supplied when resolving module names. -#[allow(dead_code)] -#[derive(Debug, Default, Clone, Copy)] -pub struct ModuleNameHints<'a> { - /// Module name provided by another subsystem (e.g., trace filters). - pub preferred: Option<&'a str>, - /// Normalised project-relative path (used for deterministic names within a project). - pub relative_path: Option<&'a str>, - /// Absolute POSIX path to the source file. - pub absolute_path: Option<&'a str>, - /// `__name__` extracted from frame globals during runtime tracing. - pub globals_name: Option<&'a str>, -} - -#[allow(dead_code)] -impl<'a> ModuleNameHints<'a> { - pub fn new() -> Self { - Self::default() - } -} - -fn collect_module_roots(py: Python<'_>) -> Vec { - let mut roots = Vec::new(); - let cwd = env::current_dir() - .ok() - .and_then(|dir| normalise_to_posix(dir.as_path())); - if let Ok(sys) = py.import("sys") { - if let Ok(path_obj) = sys.getattr("path") { - if let Ok(path_list) = path_obj.downcast_into::() { - for entry in path_list.iter() { - if let Ok(raw) = entry.extract::() { - if raw.is_empty() { - if let Some(dir) = cwd.as_ref() { - roots.push(dir.clone()); - } - continue; - } - if let Some(normalized) = normalise_to_posix(Path::new(&raw)) { - roots.push(normalized); - } - } - } - } - } - } - roots -} - -fn canonicalise_module_roots(roots: Vec) -> Arc<[String]> { - let mut canonical: Vec = roots.into_iter().map(canonicalise_root).collect(); - canonical.sort_by(|a, b| compare_roots(a, b)); - canonical.dedup(); - Arc::from(canonical) -} - -fn canonicalise_root(mut root: String) -> String { - if root.is_empty() { - if let Ok(cwd) = std::env::current_dir() { - if let Some(normalized) = normalise_to_posix(cwd.as_path()) { - return normalized; - } - } - return "/".to_string(); - } - while root.len() > 1 && root.ends_with('/') { - root.pop(); - } - root -} - -fn compare_roots(a: &str, b: &str) -> Ordering { - let len_a = a.len(); - let len_b = b.len(); - if len_a == len_b { - a.cmp(b) - } else { - len_b.cmp(&len_a) - } -} - -pub(crate) fn module_name_from_roots(roots: &[String], absolute: &str) -> Option { - for base in roots { - if let Some(relative) = strip_posix_prefix(absolute, base) { - if let Some(name) = relative_str_to_module(relative) { - return Some(name); - } - } - } - None -} - -fn module_name_from_heuristics(absolute: &str) -> Option { - let roots = heuristic_roots_for_absolute(absolute); - if roots.is_empty() { - return None; - } - module_name_from_roots(&roots, absolute) -} - -fn lookup_module_name(py: Python<'_>, absolute: &str) -> Option { - let sys = py.import("sys").ok()?; - let modules_obj = sys.getattr("modules").ok()?; - let modules: Bound<'_, PyDict> = modules_obj.downcast_into::().ok()?; - - let mut best: Option<(usize, String)> = None; - 'modules: for (name_obj, module_obj) in modules.iter() { - let module_name: String = name_obj.extract().ok()?; - if module_obj.is_none() { - continue; - } - for candidate in module_candidate_paths(&module_obj) { - if equivalent_posix_paths(&candidate, absolute) { - let preferred = preferred_module_name(&module_name, &module_obj); - let score = module_name_score(&preferred); - let update = match best { - Some((best_score, _)) => score < best_score, - None => true, - }; - if update { - best = Some((score, preferred)); - if score == 0 { - break 'modules; - } - } - } - } - } - - best.map(|(_, name)| name) -} - -fn module_candidate_paths(module: &Bound<'_, PyAny>) -> Vec { - let mut candidates = Vec::new(); - if let Ok(spec) = module.getattr("__spec__") { - if let Some(origin) = extract_normalised_spec_origin(&spec) { - candidates.push(origin); - } - } - if let Some(file) = extract_normalised_attr(module, "__file__") { - candidates.push(file); - } - if let Some(cached) = extract_normalised_attr(module, "__cached__") { - candidates.push(cached); - } - candidates -} - -fn extract_normalised_attr(module: &Bound<'_, PyAny>, attr: &str) -> Option { - let value = module.getattr(attr).ok()?; - extract_normalised_path(&value) -} - -fn extract_normalised_spec_origin(spec: &Bound<'_, PyAny>) -> Option { - if spec.is_none() { - return None; - } - let origin = spec.getattr("origin").ok()?; - extract_normalised_path(&origin) -} - -fn extract_normalised_path(value: &Bound<'_, PyAny>) -> Option { - if value.is_none() { - return None; - } - let raw: String = value.extract().ok()?; - normalise_to_posix(Path::new(raw.as_str())) -} - -fn equivalent_posix_paths(candidate: &str, target: &str) -> bool { - if candidate == target { - return true; - } - if candidate.ends_with(".pyc") && target.ends_with(".py") { - return candidate.trim_end_matches('c') == target; - } - false -} - -fn preferred_module_name(default: &str, module: &Bound<'_, PyAny>) -> String { - if let Ok(spec) = module.getattr("__spec__") { - if let Ok(name) = spec.getattr("name") { - if let Ok(raw) = name.extract::() { - if !raw.is_empty() { - return raw; - } - } - } - } - if let Ok(name_attr) = module.getattr("__name__") { - if let Ok(raw) = name_attr.extract::() { - if !raw.is_empty() { - return raw; - } - } - } - default.to_string() -} - -fn module_name_score(name: &str) -> usize { - if name - .split('.') - .all(|segment| !segment.is_empty() && segment.chars().all(is_identifier_char)) - { - 0 - } else { - 1 - } -} - -fn is_identifier_char(ch: char) -> bool { - ch == '_' || ch.is_ascii_alphanumeric() -} +use pyo3::types::PyList; /// Convert a normalised relative path (e.g., `pkg/foo.py`) into a dotted module name. pub fn module_from_relative(relative: &str) -> Option { - relative_str_to_module(relative) -} - -#[allow(dead_code)] -fn sanitise_module_name(candidate: &str) -> Option { - let trimmed = candidate.trim(); - if trimmed.is_empty() { - return None; - } - if is_valid_module_name(trimmed) { - Some(trimmed.to_string()) - } else { - None - } -} - -/// Return true when the supplied module name is a dotted identifier. -pub fn is_valid_module_name(name: &str) -> bool { - !name.is_empty() - && name - .split('.') - .all(|segment| !segment.is_empty() && segment.chars().all(is_identifier_char)) -} - -fn relative_str_to_module(relative: &str) -> Option { let mut parts: Vec<&str> = relative .split('/') .filter(|segment| !segment.is_empty()) @@ -425,10 +34,15 @@ fn strip_posix_prefix<'a>(path: &'a str, base: &str) -> Option<&'a str> { if base.is_empty() { return None; } - if base == "/" { - return path.strip_prefix('/'); + let base = if base == "/" { + String::from("") + } else { + base.to_string() + }; + if path == base { + return None; } - if path.starts_with(base) { + if path.starts_with(&base) { let mut remainder = &path[base.len()..]; if remainder.starts_with('/') { remainder = &remainder[1..]; @@ -443,67 +57,47 @@ fn strip_posix_prefix<'a>(path: &'a str, base: &str) -> Option<&'a str> { } } -fn module_from_absolute(absolute: &str) -> Option { - let without_root = absolute.trim_start_matches('/'); - let trimmed = trim_drive_prefix(without_root); - if trimmed.is_empty() { - return None; +/// Attempt to infer a module name by traversing `__init__.py` packages containing `path`. +pub fn module_name_from_packages(path: &Path) -> Option { + let mut segments: Vec = Vec::new(); + let mut current = path.parent(); + while let Some(dir) = current { + if dir.join("__init__.py").exists() { + if let Some(component) = dir.file_name().and_then(|s| s.to_str()) { + if is_valid_module_name(component) { + segments.push(component.to_string()); + current = dir.parent(); + continue; + } + } + } + break; } - module_from_relative(trimmed) -} + segments.reverse(); -fn trim_drive_prefix(path: &str) -> &str { - if let Some((prefix, remainder)) = path.split_once('/') { - if prefix.ends_with(':') { - return remainder; + if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) { + if stem != "__init__" && is_valid_module_name(stem) { + segments.push(stem.to_string()); } } - path -} - -fn is_filesystem_shaped_name(candidate: &str, absolute: &str) -> bool { - module_from_absolute(absolute) - .as_deref() - .map(|path_like| path_like == candidate) - .unwrap_or(false) -} -fn heuristic_roots_for_absolute(absolute: &str) -> Vec { - if let Some(project_root) = find_nearest_project_root(absolute) { - vec![project_root] - } else if let Some(parent) = Path::new(absolute) - .parent() - .and_then(|dir| normalise_to_posix(dir)) - { - vec![parent] + if segments.is_empty() { + None } else { - Vec::new() + Some(segments.join(".")) } } -fn find_nearest_project_root(absolute: &str) -> Option { - let mut current = Path::new(absolute).parent(); - while let Some(dir) = current { - if has_project_marker(dir) { - return normalise_to_posix(dir); - } - current = dir.parent(); - } - None +/// Return true when the supplied module name is a dotted identifier. +pub fn is_valid_module_name(name: &str) -> bool { + !name.is_empty() + && name + .split('.') + .all(|segment| !segment.is_empty() && segment.chars().all(is_identifier_char)) } -fn has_project_marker(dir: &Path) -> bool { - const PROJECT_MARKER_FILES: &[&str] = &["pyproject.toml", "setup.cfg", "setup.py"]; - const PROJECT_MARKER_DIRS: &[&str] = &[".git", ".hg", ".svn"]; - - PROJECT_MARKER_DIRS - .iter() - .map(|marker| dir.join(marker)) - .any(|marker_dir| marker_dir.exists()) - || PROJECT_MARKER_FILES - .iter() - .map(|marker| dir.join(marker)) - .any(|marker_file| marker_file.exists()) +fn is_identifier_char(ch: char) -> bool { + ch == '_' || ch.is_ascii_alphanumeric() } /// Normalise a filesystem path to a POSIX-style string used by trace filters. @@ -533,89 +127,9 @@ pub fn normalise_to_posix(path: &Path) -> Option { #[cfg(test)] mod tests { use super::*; - use crate::code_object::CodeObjectWrapper; - use pyo3::types::{PyAny, PyCode, PyModule}; - use std::ffi::CString; - - fn load_module<'py>( - py: Python<'py>, - module_name: &str, - file_path: &str, - source: &str, - ) -> PyResult> { - let code_c = CString::new(source).expect("source without NUL"); - let file_c = CString::new(file_path).expect("path without NUL"); - let module_c = CString::new(module_name).expect("module without NUL"); - PyModule::from_code( - py, - code_c.as_c_str(), - file_c.as_c_str(), - module_c.as_c_str(), - ) - } - - fn get_code<'py>(module: &Bound<'py, PyModule>, func_name: &str) -> Bound<'py, PyCode> { - let func: Bound<'py, PyAny> = module.getattr(func_name).expect("function"); - func.getattr("__code__") - .expect("__code__ attr") - .downcast_into::() - .expect("PyCode") - } - - #[test] - fn normalise_to_posix_handles_common_paths() { - let path = Path::new("src/lib/foo.py"); - assert_eq!(normalise_to_posix(path).as_deref(), Some("src/lib/foo.py")); - } - - #[test] - fn module_identity_cache_prefers_preferred_hint() { - Python::with_gil(|py| { - let module = - load_module(py, "tmp_mod", "tmp_mod.py", "def foo():\n return 1\n").unwrap(); - let code = get_code(&module, "foo"); - let wrapper = CodeObjectWrapper::new(py, &code); - let cache = ModuleIdentityCache::new(); - let hints = ModuleNameHints { - preferred: Some("pkg.actual"), - ..ModuleNameHints::default() - }; - let resolved = cache.resolve_for_code(py, &wrapper, hints); - assert_eq!(resolved.as_deref(), Some("pkg.actual")); - }); - } - - #[test] - fn module_identity_cache_uses_resolver_for_absolute_paths() { - Python::with_gil(|py| { - let tmp = tempfile::tempdir().expect("tempdir"); - let module_path = tmp.path().join("pkg").join("mod.py"); - std::fs::create_dir_all(module_path.parent().unwrap()).expect("mkdir"); - std::fs::write(&module_path, "def foo():\n return 1\n").expect("write source"); - - let module_path_str = module_path.to_string_lossy().to_string(); - let module = load_module( - py, - "pkg.mod", - module_path_str.as_str(), - "def foo():\n return 1\n", - ) - .unwrap(); - let code = get_code(&module, "foo"); - let wrapper = CodeObjectWrapper::new(py, &code); - let root = normalise_to_posix(tmp.path()).expect("normalize root"); - let resolver = ModuleIdentityResolver::from_roots(vec![root]); - let cache = ModuleIdentityCache::with_resolver(resolver); - let absolute_norm = normalise_to_posix(module_path.as_path()).expect("normalize abs"); - let hints = ModuleNameHints { - absolute_path: Some(absolute_norm.as_str()), - ..ModuleNameHints::default() - }; - - let resolved = cache.resolve_for_code(py, &wrapper, hints); - assert_eq!(resolved.as_deref(), Some("pkg.mod")); - }); - } + use pyo3::Python; + use std::fs; + use tempfile::tempdir; #[test] fn module_from_relative_strips_init() { @@ -630,134 +144,91 @@ mod tests { } #[test] - fn module_name_from_roots_prefers_specific_root_over_catch_all() { - Python::with_gil(|py| { - let tmp = tempfile::tempdir().expect("tempdir"); - let module_dir = tmp.path().join("pkg"); - std::fs::create_dir_all(&module_dir).expect("mkdir"); - let module_path = module_dir.join("mod.py"); - std::fs::write(&module_path, "def foo():\n return 1\n").expect("write"); - - let project_root = normalise_to_posix(tmp.path()).expect("normalize root"); - let resolver = - ModuleIdentityResolver::from_roots(vec!["/".to_string(), project_root.clone()]); - let absolute_norm = - normalise_to_posix(module_path.as_path()).expect("normalize absolute"); - let derived = module_name_from_roots(resolver.module_roots(), absolute_norm.as_str()); + fn module_name_from_packages_detects_package_hierarchy() { + let tmp = tempdir().expect("tempdir"); + let pkg_dir = tmp.path().join("pkg").join("sub"); + fs::create_dir_all(&pkg_dir).expect("create dirs"); + fs::write(pkg_dir.join("__init__.py"), "# pkg\n").expect("write __init__"); + fs::write(tmp.path().join("pkg").join("__init__.py"), "# pkg\n").expect("write init"); - assert_eq!(derived.as_deref(), Some("pkg.mod")); + let module_path = pkg_dir.join("mod.py"); + fs::write(&module_path, "value = 1\n").expect("write module"); - // suppress unused warnings - let _ = py; - }); + let derived = module_name_from_packages(module_path.as_path()); + assert_eq!(derived.as_deref(), Some("pkg.sub.mod")); } #[test] - fn resolve_absolute_prefers_sys_modules_name_over_path_fallback() { - Python::with_gil(|py| { - let tmp = tempfile::tempdir().expect("tempdir"); - let module_dir = tmp.path().join("pkg"); - std::fs::create_dir_all(&module_dir).expect("mkdir"); - let module_path = module_dir.join("mod.py"); - std::fs::write(&module_path, "def foo():\n return 42\n").expect("write"); - - let module_path_str = module_path.to_string_lossy().to_string(); - let module = load_module( - py, - "pkg.mod", - module_path_str.as_str(), - "def foo():\n return 42\n", - ) - .expect("load module"); - let _code = get_code(&module, "foo"); - - let resolver = ModuleIdentityResolver::from_roots(vec!["/".to_string()]); - let absolute_norm = - normalise_to_posix(module_path.as_path()).expect("normalize absolute"); - let resolved = resolver.resolve_absolute(py, absolute_norm.as_str()); - assert_eq!(resolved.as_deref(), Some("pkg.mod")); + fn module_name_from_packages_ignores_non_packages() { + let tmp = tempdir().expect("tempdir"); + let module_path = tmp.path().join("script.py"); + fs::write(&module_path, "value = 1\n").expect("write module"); - // clean up sys.modules to avoid cross-test contamination - if let Ok(sys) = py.import("sys") { - if let Ok(modules) = sys.getattr("modules") { - let _ = modules.del_item("pkg.mod"); - } - } - }); + assert_eq!( + module_name_from_packages(module_path.as_path()).as_deref(), + Some("script") + ); } #[test] - fn resolve_absolute_uses_project_marker_root() { + fn module_name_from_sys_path_uses_roots() { Python::with_gil(|py| { - let tmp = tempfile::tempdir().expect("tempdir"); - let project_dir = tmp.path().join("project"); - let tests_dir = project_dir.join("tests"); - std::fs::create_dir_all(&tests_dir).expect("mkdir tests"); - std::fs::create_dir(project_dir.join(".git")).expect("mkdir git"); - let module_path = tests_dir.join("test_mod.py"); - std::fs::write(&module_path, "def sample():\n return 7\n").expect("write"); + let tmp = tempdir().expect("tempdir"); + let pkg_dir = tmp.path().join("pkg"); + fs::create_dir_all(&pkg_dir).expect("create pkg"); + let module_path = pkg_dir.join("mod.py"); + fs::write(&module_path, "value = 1\n").expect("write module"); + + let sys = py.import("sys").expect("import sys"); + let sys_path = sys.getattr("path").expect("sys.path"); + sys_path + .call_method1("insert", (0, tmp.path().to_string_lossy().as_ref())) + .expect("insert tmp root"); - let resolver = ModuleIdentityResolver::from_roots(vec!["/".to_string()]); - let absolute_norm = - normalise_to_posix(module_path.as_path()).expect("normalize absolute"); - let resolved = resolver.resolve_absolute(py, absolute_norm.as_str()); + let derived = module_name_from_sys_path(py, module_path.as_path()); + assert_eq!(derived.as_deref(), Some("pkg.mod")); - assert_eq!(resolved.as_deref(), Some("tests.test_mod")); + sys_path + .call_method1("pop", (0,)) + .expect("restore sys.path"); }); } #[test] - fn resolve_absolute_returns_main_for_runpy_module() { - Python::with_gil(|py| { - let tmp = tempfile::tempdir().expect("tempdir"); - let script_path = tmp.path().join("cli.py"); - std::fs::write( - &script_path, - "def entrypoint():\n return 0\n\nif __name__ == '__main__':\n entrypoint()\n", - ) - .expect("write script"); - - let script_norm = - normalise_to_posix(script_path.as_path()).expect("normalize absolute path"); - - let sys = py.import("sys").expect("import sys"); - let modules = sys.getattr("modules").expect("sys.modules"); - let original_main = modules - .get_item("__main__") - .ok() - .map(|obj| obj.clone().unbind()); + fn normalise_to_posix_handles_common_paths() { + let path = Path::new("src/lib/foo.py"); + assert_eq!(normalise_to_posix(path).as_deref(), Some("src/lib/foo.py")); + } +} +pub fn module_name_from_sys_path(py: Python<'_>, path: &Path) -> Option { + let absolute = normalise_to_posix(path)?; + let sys = py.import("sys").ok()?; + let sys_path_obj = sys.getattr("path").ok()?; + let sys_path = sys_path_obj.downcast::().ok()?; - let module = PyModule::new(py, "__main__").expect("create module"); - module - .setattr("__file__", script_path.to_string_lossy().as_ref()) - .expect("set __file__"); - module - .setattr("__name__", "__main__") - .expect("set __name__"); - module - .setattr("__package__", py.None()) - .expect("set __package__"); - module.setattr("__spec__", py.None()).expect("set __spec__"); - modules - .set_item("__main__", module) - .expect("register __main__"); + let cwd = env::current_dir() + .ok() + .and_then(|dir| normalise_to_posix(dir.as_path())); - let resolver = ModuleIdentityResolver::from_roots(vec!["/".to_string()]); - let resolved = resolver.resolve_absolute(py, script_norm.as_str()); - assert_eq!(resolved.as_deref(), Some("__main__")); + for entry in sys_path.iter() { + let raw = entry.extract::().ok()?; + let root = if raw.is_empty() { + match cwd.as_ref() { + Some(current) => current.clone(), + None => continue, + } + } else if let Some(norm) = normalise_to_posix(Path::new(&raw)) { + norm + } else { + continue; + }; - match original_main { - Some(previous) => { - modules - .set_item("__main__", previous) - .expect("restore __main__"); - } - None => { - modules - .del_item("__main__") - .expect("remove temporary __main__"); - } + if let Some(remainder) = strip_posix_prefix(&absolute, &root) { + if let Some(name) = module_from_relative(remainder) { + return Some(name); } - }); + } } + + None } diff --git a/codetracer-python-recorder/src/runtime/tracer/events.rs b/codetracer-python-recorder/src/runtime/tracer/events.rs index 288eff0..0655bf2 100644 --- a/codetracer-python-recorder/src/runtime/tracer/events.rs +++ b/codetracer-python-recorder/src/runtime/tracer/events.rs @@ -494,7 +494,6 @@ impl Tracer for RuntimeTracer { .map_err(ffi::map_recorder_error)?; } self.function_ids.clear(); - self.module_names.clear(); self.io.clear_snapshots(); self.filter.reset(); self.lifecycle.reset_event_state(); @@ -508,7 +507,6 @@ impl Tracer for RuntimeTracer { .finalise(&mut self.writer, &self.filter, &exit_summary) .map_err(ffi::map_recorder_error)?; self.function_ids.clear(); - self.module_names.clear(); self.filter.reset(); self.io.clear_snapshots(); self.lifecycle.reset_event_state(); diff --git a/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs b/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs index 1b61c45..abcd9f6 100644 --- a/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs +++ b/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs @@ -4,7 +4,9 @@ use super::io::IoCoordinator; use super::lifecycle::LifecycleController; use crate::code_object::CodeObjectWrapper; use crate::ffi; -use crate::module_identity::{ModuleIdentityCache, ModuleNameHints}; +use crate::module_identity::{ + module_from_relative, module_name_from_packages, module_name_from_sys_path, +}; use crate::monitoring::CallbackOutcome; use crate::policy::RecorderPolicy; use crate::runtime::io_capture::{IoCaptureSettings, ScopedMuteIoCapture}; @@ -124,7 +126,6 @@ pub struct RuntimeTracer { pub(super) function_ids: HashMap, pub(super) io: IoCoordinator, pub(super) filter: FilterCoordinator, - pub(super) module_names: ModuleIdentityCache, pub(super) module_name_from_globals: bool, session_exit: SessionExitState, } @@ -148,7 +149,6 @@ impl RuntimeTracer { function_ids: HashMap::new(), io: IoCoordinator::new(), filter: FilterCoordinator::new(trace_filter), - module_names: ModuleIdentityCache::new(), module_name_from_globals, session_exit: SessionExitState::default(), } @@ -304,17 +304,35 @@ impl RuntimeTracer { let resolution = self.filter.cached_resolution(code.id()); if let Some(resolution) = resolution.as_ref() { - let hints = ModuleNameHints { - preferred: resolution.module_name(), - relative_path: resolution.relative_path(), - absolute_path: resolution.absolute_path(), - globals_name: None, - }; - self.module_names.resolve_for_code(py, code, hints) - } else { - self.module_names - .resolve_for_code(py, code, ModuleNameHints::default()) + if let Some(name) = resolution.module_name() { + return Some(name.to_string()); + } + if let Some(relative) = resolution.relative_path() { + if let Some(name) = module_from_relative(relative) { + return Some(name); + } + } + if let Some(absolute) = resolution.absolute_path() { + if let Some(name) = module_name_from_sys_path(py, Path::new(absolute)) { + return Some(name); + } + if let Some(name) = module_name_from_packages(Path::new(absolute)) { + return Some(name); + } + } } + + if let Ok(filename) = code.filename(py) { + let path = Path::new(filename); + if let Some(name) = module_name_from_sys_path(py, path) { + return Some(name); + } + if let Some(name) = module_name_from_packages(path) { + return Some(name); + } + } + + None } } diff --git a/codetracer-python-recorder/src/trace_filter/engine.rs b/codetracer-python-recorder/src/trace_filter/engine.rs index 146249b..5446085 100644 --- a/codetracer-python-recorder/src/trace_filter/engine.rs +++ b/codetracer-python-recorder/src/trace_filter/engine.rs @@ -4,7 +4,9 @@ //! and caches per-code-object resolutions so the hot tracing callbacks only pay a fast lookup. use crate::code_object::CodeObjectWrapper; -use crate::module_identity::{is_valid_module_name, module_from_relative, normalise_to_posix}; +use crate::module_identity::{ + is_valid_module_name, module_from_relative, module_name_from_packages, normalise_to_posix, +}; use crate::trace_filter::config::{ ExecDirective, FilterSource, FilterSummary, ScopeRule, TraceFilterConfig, ValueAction, ValuePattern, @@ -491,39 +493,6 @@ fn normalise_relative(relative: PathBuf) -> String { components.join("/") } -fn module_name_from_packages(path: &Path) -> Option { - let mut segments: Vec = Vec::new(); - let mut current = path.parent(); - - while let Some(dir) = current { - let init = dir.join("__init__.py"); - if init.exists() { - if let Some(name) = dir.file_name().and_then(|s| s.to_str()) { - if is_valid_module_name(name) { - segments.push(name.to_string()); - current = dir.parent(); - continue; - } - } - } - break; - } - - segments.reverse(); - - if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) { - if stem != "__init__" && is_valid_module_name(stem) { - segments.push(stem.to_string()); - } - } - - if segments.is_empty() { - return None; - } - - Some(segments.join(".")) -} - fn py_attr_error(attr: &str, err: PyErr) -> recorder_errors::RecorderError { target!( ErrorCode::FrameIntrospectionFailed, diff --git a/design-docs/module-call-event-naming-implementation-plan.status.md b/design-docs/module-call-event-naming-implementation-plan.status.md index 264f6b1..25549ff 100644 --- a/design-docs/module-call-event-naming-implementation-plan.status.md +++ b/design-docs/module-call-event-naming-implementation-plan.status.md @@ -19,12 +19,12 @@ ### WS1 – Shared Module Identity Helper - **Scope recap:** Extract and centralise module-name derivation (relative path stripping + `sys.modules` lookup) so both filters and the runtime tracer can reuse it with caching. - **Status:** _Completed_ -- **Notes:** `src/module_identity.rs` now owns `ModuleIdentityResolver`, `ModuleIdentityCache`, sanitisation helpers, and unit tests covering `.py` vs `.pyc`, package roots, and hint precedence. `TraceFilterEngine` uses the shared resolver for all module lookups, keeping behaviour aligned between filtering and runtime components. +-- **Notes:** `src/module_identity.rs` now provides the lightweight helpers (`module_from_relative`, `module_name_from_packages`, etc.) that both the tracer and filter engine reuse when hints are missing, keeping naming consistent across components. ### WS2 – Runtime Tracer Integration - **Scope recap:** Detect module-level code (`co_qualname == ""`) and rename call events to `` using the shared resolver; plumb filter-derived names to avoid duplicate work. - **Status:** _Completed_ -- **Notes:** `RuntimeTracer` now rewrites module-level call events via `ModuleIdentityCache`, clears the cache alongside `function_ids`, and exposes a test hook to verify naming logic. Added a unit test (`module_import_records_module_name`) and a Python integration test (`test_module_imports_record_package_names`) that traces a real import to confirm `` shows up in `trace.json`. +-- **Notes:** `RuntimeTracer` rewrites module-level call events using the globals-derived hint or the filter’s cached resolution, with integration tests (`module_import_records_module_name`, `test_module_imports_record_package_names`) confirming `` appears in `trace.json`. ### WS3 – Testing, Tooling, and Docs - **Scope recap:** Add regression tests (Python + Rust) validating the new naming, update documentation/changelog, and refresh any snapshot expectations. diff --git a/design-docs/module-name-resolution-deep-review.md b/design-docs/module-name-resolution-deep-review.md index 14461d0..f4c9b14 100644 --- a/design-docs/module-name-resolution-deep-review.md +++ b/design-docs/module-name-resolution-deep-review.md @@ -31,52 +31,35 @@ The current design assumes the reader understands the following basics: ## 4. Where the Logic Lives -- `codetracer-python-recorder/src/module_identity.rs:18` – core resolver, caching, heuristics, and helpers (`normalise_to_posix`, `module_from_relative`, etc.). -- `codetracer-python-recorder/src/trace_filter/engine.rs:183` – owns a `ModuleIdentityResolver` instance, builds `ScopeContext`, and stores resolved module names inside `ScopeResolution`. -- `codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs:280` – turns `` frames into `<{module}>` labels using `ModuleIdentityCache`. -- `codetracer-python-recorder/src/runtime/tracer/filtering.rs:19` – captures filter outcomes and pipes their module hints back to the tracer. -- Tests exercising the behaviour: Rust unit tests in `codetracer-python-recorder/src/module_identity.rs:551` and Python integration coverage in `codetracer-python-recorder/tests/python/test_monitoring_events.py:199`. +- `codetracer-python-recorder/src/module_identity.rs` – lightweight helpers such as `module_from_relative`, `module_name_from_packages`, and `normalise_to_posix` that turn paths into dotted names. +- `codetracer-python-recorder/src/trace_filter/engine.rs` – builds a `ScopeContext`, keeps relative/absolute paths, and applies package-aware heuristics whenever filters do not provide an explicit module hint. +- `codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs` – turns `` frames into `<{module}>` labels by preferring the globals-derived hint, falling back to filter resolutions, and finally to the on-disk package structure. +- `codetracer-python-recorder/src/runtime/tracer/filtering.rs` – captures filter outcomes, stores module hints collected during `py_start`, and exposes them to the tracer. +- Tests exercising the behaviour: Rust unit tests in `codetracer-python-recorder/src/module_identity.rs` and Python integration coverage in `codetracer-python-recorder/tests/python/test_monitoring_events.py`. -The pure-Python recorder does not perform advanced module-name derivation; all of the reusable logic lives in the Rust-backed module. +The pure-Python recorder does not perform advanced module-name derivation; all reusable logic lives in the Rust-backed module. ## 5. How the Algorithm Works End to End -### 5.1 Building the resolver -1. `ModuleIdentityResolver::new` snapshots `sys.path` under the GIL, normalises each entry to POSIX form, removes duplicates, and sorts by descending length so more specific roots win (`module_identity.rs:24`–`:227`). -2. Each entry stays cached for the duration of the recorder session; mutations to `sys.path` after startup are not observed automatically. +### 5.1 Capturing module hints +1. `RuntimeTracer::on_py_start` grabs `frame.f_globals['__name__']` for `` code objects and stores that value in `FilterCoordinator` before any gating decisions run. +2. The hint is retained for the lifetime of the code object (and cleared once non-module frames arrive) so both the filter engine and the tracer can reuse it. -### 5.2 Resolving an absolute filename -Given a normalised absolute path (e.g., `/home/app/pkg/service.py`): +### 5.2 Filter resolution +1. `TraceFilterEngine::resolve` begins with configuration metadata: project-relative paths, activation roots, and module names supplied by filters. +2. If no valid module name is present, the engine consults the incoming hint. Failing that, it walks up the filesystem looking for `__init__.py` packages (`module_name_from_packages`) and uses the file stem as a last resort. +3. The resulting `ScopeResolution` records the derived module name, relative and absolute paths, and the execution decision; the resolution is cached per code object. -1. **Path-based attempt** – `module_name_from_roots` strips each known root and converts the remainder into a dotted form (`pkg/service.py → pkg.service`). This is fast and succeeds for project code and site-packages that live under a `sys.path` entry (`module_identity.rs:229`–`:238`). -2. **Heuristics** – if the first guess looks like a raw filesystem echo (meaning we probably matched a catch-all root like `/`), the resolver searches upward for project markers (`pyproject.toml`, `.git`, etc.) and retries with that directory as the root. Failing that, it uses the immediate parent directory (`module_identity.rs:240`–`:468`). -3. **`sys.modules` sweep** – the resolver iterates through loaded modules, comparing `__spec__.origin`, `__file__`, or `__cached__` paths (normalised to POSIX) against the target filename, accounting for `.py` vs `.pyc` differences. Any valid dotted name wins over heuristic guesses (`module_identity.rs:248`–`:335`). -4. The winning name (or lack thereof) is cached by absolute path in a `DashMap` so future lookups avoid repeated sys.modules scans (`module_identity.rs:54`–`:60`). - -### 5.3 Mapping code objects to module names - -`ModuleIdentityCache::resolve_for_code` accepts optional hints: - -1. **Preferred hint** – e.g., the filter engine’s stored module name. It is accepted only if it is a valid dotted identifier (`module_identity.rs:103`–`:116`). -2. **Relative path** – converted via `module_from_relative` when supplied (`module_identity.rs:107`–`:110`). -3. **Absolute path** – triggers the resolver described above (`module_identity.rs:112`–`:115`). -4. **Globals-based hint** – a last resort using `frame.f_globals["__name__"]` when available (`module_identity.rs:116`). - -If no hints contain an absolute path, the cache will read `co_filename`, normalise it, and resolve it once (`module_identity.rs:118`–`:127`). Results (including failures) are memoised per `code_id`. - -### 5.4 Feeding results into the rest of the system - -1. The trace filter builds a `ScopeContext` for every new code object. It records project-relative paths and module names derived from configuration roots, then calls back into the resolver if the preliminary name is missing or invalid (`trace_filter/engine.rs:420`–`:471`). -2. The resulting `ScopeResolution` is cached and exposed to the runtime tracer via `FilterCoordinator`, providing rich hints (`runtime/tracer/filtering.rs:43`). -3. During execution, `RuntimeTracer::function_name` reaches into the shared cache to turn `` qualnames into `` labels. If every heuristic fails, it safely falls back to the original `` string (`runtime_tracer.rs:280`–`:305`). -4. Both subsystems reuse the same `ModuleIdentityResolver`, ensuring trace files and filtering decisions stay consistent. +### 5.3 Runtime naming +1. When emitting events, `RuntimeTracer::function_name` first checks the stored module hint. If the globals-derived name exists, it wins; this keeps opt-in behaviour aligned with Python logging. +2. Absent a hint, the tracer falls back to the cached `ScopeResolution` module name, then to package detection via `module_name_from_packages`, and finally leaves `` unchanged. +3. The tracer no longer keeps a resolver cache, so the hot path is reduced to string comparisons and light filesystem checks. ## 6. Bugs, Risks, and Observations -- **Prefix matching ignores path boundaries** – `strip_posix_prefix` checks `path.starts_with(base)` without verifying the next character is a separator. A root like `/opt/app` therefore incorrectly matches `/opt/application/module.py`, yielding the bogus module `lication.module`. When `sys.modules` lacks the correct entry (e.g., resolving a file before import), the resolver will cache this wrong answer (`module_identity.rs:410`–`:426`). -- **Case sensitivity on Windows** – normalisation preserves whatever casing the OS returns. If `co_filename` and `sys.modules` report the same path with different casing, `equivalent_posix_paths` will not treat them as equal, causing the fallback to miss (`module_identity.rs:317`–`:324`). Consider lowercasing drive prefixes or using `Path::eq` semantics behind the GIL. -- **`sys.path` mutations after startup** – the resolver snapshots roots once. If tooling modifies `sys.path` later (common in virtualenv activation scripts), we will never see the new prefix, so we fall back to heuristics or `sys.modules`. Documenting this behaviour or exposing a method to refresh the roots may avoid surprises. -- **Project-marker heuristics hit the filesystem** – `has_project_marker` calls `exists()` for every parent directory when the fast path fails (`module_identity.rs:470`–`:493`). Because results are cached per file this is usually acceptable, but tracing thousands of unique `site-packages` files on network storage could still become expensive. - -Addressing the first two items would materially improve correctness; the latter two are design trade-offs worth monitoring. +- **Globals may remain `__main__` for direct scripts** – this is intentional and matches logging, but filter authors must target `pkg:__main__` when skipping script bodies. +- **Package detection relies on `__init__.py`** – namespace packages without marker files produce the leaf module name only (e.g., `service.handler`). If this proves problematic we can detect `pyproject.toml`/`setup.cfg` to extend coverage. +- **Filesystem traversal on first encounter** – the package walk performs existence checks until it finds the first non-package directory. Results are cached per code object so the overhead is modest, but tracing thousands of unique modules on slow filesystems could still be noticeable. +- **Globals introspection can fail** – exotic frames that refuse `PyFrame_FastToLocalsWithError` leave the hint empty. We fall back to filesystem heuristics, but selectors relying solely on globals may miss until the module imports cleanly. +These trade-offs are significantly narrower than the previous resolver-based design and keep the module-name derivation consistent with Python's own conventions. diff --git a/design-docs/module-name-resolution-globals-implementation-plan.status.md b/design-docs/module-name-resolution-globals-implementation-plan.status.md index 31cae8b..98af556 100644 --- a/design-docs/module-name-resolution-globals-implementation-plan.status.md +++ b/design-docs/module-name-resolution-globals-implementation-plan.status.md @@ -27,3 +27,7 @@ ### Stage 2 – Simplify Filter Engine - **Status:** Completed Removed the resolver dependency from `TraceFilterEngine`, teach it to accept globals-based hints, and added a package-structure fallback when relative paths are unavailable. Filtering decisions now rely on the new hint flow while keeping legacy behaviour intact for `module_name_from_globals` opt-out scenarios. + +### Stage 3 – Runtime Naming Downstream +- **Status:** Completed + `RuntimeTracer` no longer depends on `ModuleIdentityCache`; it prefers globals hints, otherwise reuses filter resolutions, `sys.path` roots, or package markers before falling back to ``. The shared helpers in `module_identity.rs` were slimmed down accordingly, and updated unit/integration tests confirm the new flow. From 788d55cc131722c7093faf5a912a076847458a5e Mon Sep 17 00:00:00 2001 From: Tzanko Matev Date: Mon, 27 Oct 2025 16:01:31 +0200 Subject: [PATCH 7/8] Stage 4 --- codetracer-python-recorder/CHANGELOG.md | 2 +- codetracer-python-recorder/README.md | 2 +- .../adr/0016-module-name-resolution-via-globals-name.md | 3 +-- ...module-name-resolution-globals-implementation-plan.md | 9 ++++----- ...name-resolution-globals-implementation-plan.status.md | 4 ++++ 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/codetracer-python-recorder/CHANGELOG.md b/codetracer-python-recorder/CHANGELOG.md index 000e61a..4d9d783 100644 --- a/codetracer-python-recorder/CHANGELOG.md +++ b/codetracer-python-recorder/CHANGELOG.md @@ -9,7 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) - Balanced call-stack handling for generators, coroutines, and unwinding frames by subscribing to `PY_YIELD`, `PY_UNWIND`, `PY_RESUME`, and `PY_THROW`, mapping resume/throw events to `TraceWriter::register_call`, yield/unwind to `register_return`, and capturing `PY_THROW` arguments as `exception` using the existing value encoder. Added Python + Rust integration tests that drive `.send()`/`.throw()` on coroutines and generators to guarantee the trace stays balanced and that exception payloads are recorded. ### Changed -- Module-level call events now use the actual dotted module name (e.g., `` or ``) instead of the generic `` label. `RuntimeTracer` derives the name via the shared module-identity helper, caches the result per code object, and falls back to `` only for synthetic or nameless frames. Added Rust + Python tests plus README documentation covering the new semantics. +- Module-level call events now prefer the frame's `__name__`, fall back to filter hints, `sys.path`, and package markers, and no longer depend on the legacy resolver/cache. This keeps `<__main__>` for direct scripts while package imports continue to emit ``. Updated Rust + Python tests and documentation to describe the new semantics and opt-in flag. ## [0.2.0] - 2025-10-17 ### Added diff --git a/codetracer-python-recorder/README.md b/codetracer-python-recorder/README.md index c471fa5..01199e6 100644 --- a/codetracer-python-recorder/README.md +++ b/codetracer-python-recorder/README.md @@ -84,7 +84,7 @@ action = "drop" ## Trace naming semantics -- Module-level activations no longer appear as the ambiguous `` label. When the recorder sees `co_qualname == ""`, it derives the actual dotted package name (e.g., `` or ``) using project roots, `sys.modules`, and frame metadata. +- Module-level activations no longer appear as the ambiguous `` label. When the recorder sees `co_qualname == ""`, it first reuses the frame's `__name__`, then falls back to trace-filter hints, `sys.path` roots, and package markers so scripts report `<__main__>` while real modules keep their dotted names (e.g., `` or ``). - The angle-bracket convention remains for module entries so downstream tooling can distinguish top-level activations at a glance. - Traces will still emit `` for synthetic filenames (``, ``), frozen/importlib bootstrap frames, or exotic loaders that omit filenames entirely. This preserves previous behaviour when no reliable name exists. diff --git a/design-docs/adr/0016-module-name-resolution-via-globals-name.md b/design-docs/adr/0016-module-name-resolution-via-globals-name.md index 807c622..d35007e 100644 --- a/design-docs/adr/0016-module-name-resolution-via-globals-name.md +++ b/design-docs/adr/0016-module-name-resolution-via-globals-name.md @@ -1,6 +1,6 @@ # ADR 0016 – Module Name Resolution via `__name__` -- **Status:** Proposed +- **Status:** Accepted - **Date:** 2025-03-18 - **Stakeholders:** Runtime team, Trace Filter maintainers - **Related Decisions:** ADR 0013 (Reliable Module Name Derivation), ADR 0014 (Module Call Event Naming) @@ -46,4 +46,3 @@ Key points: - Update existing ADR 0013/0014 statuses once this ADR is accepted and the code lands. - Communicate the behavioural change to downstream teams who consume `` events or rely on path-derived module names. - diff --git a/design-docs/module-name-resolution-globals-implementation-plan.md b/design-docs/module-name-resolution-globals-implementation-plan.md index 7bed4e9..73308b6 100644 --- a/design-docs/module-name-resolution-globals-implementation-plan.md +++ b/design-docs/module-name-resolution-globals-implementation-plan.md @@ -37,10 +37,10 @@ This plan delivers ADR 0016 by retooling module-name derivation around `frame.f_ - Remove the `ModuleNameHints` plumbing and any resolver invocations. - Update the Python integration test `test_module_imports_record_package_names` to expect `` coming directly from `__name__`. -### Stage 4 – Remove Legacy Resolver Module -- Delete `src/module_identity.rs` and tidy references (Cargo module list, tests, exports). -- Drop resolver-specific tests and replace them with focused coverage around globals-based extraction. -- Update documentation and changelog to describe the new semantics. Reference ADR 0016 and mark ADR 0013 as superseded where appropriate. +### Stage 4 – Documentation and Changelog +- Update design docs, ADR status, and changelog entries to describe the new globals-based module naming. +- Ensure the developer guide explains how `__name__` hints interplay with filter selectors and `<__main__>` behaviour. +- Document the phase-out of the resolver to guide downstream integrators. ### Stage 5 – Flip the Feature Flag - After validating in CI and canary environments, change the default for `module_name_from_globals` to `true`. @@ -61,4 +61,3 @@ This plan delivers ADR 0016 by retooling module-name derivation around `frame.f_ - ADR 0016 updated to “Accepted” once the feature flag defaults to on. - Release notes highlight the new module-naming semantics and the opt-out flag during transition. - Telemetry or logging confirms we do not hit the fallback path excessively in real workloads. - diff --git a/design-docs/module-name-resolution-globals-implementation-plan.status.md b/design-docs/module-name-resolution-globals-implementation-plan.status.md index 98af556..62cebf5 100644 --- a/design-docs/module-name-resolution-globals-implementation-plan.status.md +++ b/design-docs/module-name-resolution-globals-implementation-plan.status.md @@ -31,3 +31,7 @@ ### Stage 3 – Runtime Naming Downstream - **Status:** Completed `RuntimeTracer` no longer depends on `ModuleIdentityCache`; it prefers globals hints, otherwise reuses filter resolutions, `sys.path` roots, or package markers before falling back to ``. The shared helpers in `module_identity.rs` were slimmed down accordingly, and updated unit/integration tests confirm the new flow. + +### Stage 4 – Documentation and Changelog +- **Status:** Completed + Updated `codetracer-python-recorder/CHANGELOG.md` and README to describe globals-first module naming, refreshed ADR 0016 to Accepted, and revised the implementation plan to cover the documentation rollout so downstream teams understand the new semantics and feature flag. From 1dc81d26a76c130b271fcd3f4b5f4f546264fabf Mon Sep 17 00:00:00 2001 From: Tzanko Matev Date: Mon, 27 Oct 2025 16:11:24 +0200 Subject: [PATCH 8/8] Stage 5 --- codetracer-python-recorder/CHANGELOG.md | 2 +- codetracer-python-recorder/README.md | 1 + .../codetracer_python_recorder/cli.py | 11 ++++++----- .../resources/trace_filters/builtin_default.toml | 2 +- codetracer-python-recorder/src/policy.rs | 2 +- codetracer-python-recorder/src/policy/env.rs | 11 +++++++++++ codetracer-python-recorder/src/policy/model.rs | 2 +- .../tests/python/test_monitoring_events.py | 4 +--- .../tests/python/unit/test_cli.py | 16 ++++++++++++++++ .../python/unit/test_policy_configuration.py | 4 ++-- ...16-module-name-resolution-via-globals-name.md | 1 + .../module-name-resolution-deep-review.md | 2 +- ...ame-resolution-globals-implementation-plan.md | 4 ++-- ...olution-globals-implementation-plan.status.md | 6 +++++- 14 files changed, 50 insertions(+), 18 deletions(-) diff --git a/codetracer-python-recorder/CHANGELOG.md b/codetracer-python-recorder/CHANGELOG.md index 4d9d783..10c0710 100644 --- a/codetracer-python-recorder/CHANGELOG.md +++ b/codetracer-python-recorder/CHANGELOG.md @@ -9,7 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) - Balanced call-stack handling for generators, coroutines, and unwinding frames by subscribing to `PY_YIELD`, `PY_UNWIND`, `PY_RESUME`, and `PY_THROW`, mapping resume/throw events to `TraceWriter::register_call`, yield/unwind to `register_return`, and capturing `PY_THROW` arguments as `exception` using the existing value encoder. Added Python + Rust integration tests that drive `.send()`/`.throw()` on coroutines and generators to guarantee the trace stays balanced and that exception payloads are recorded. ### Changed -- Module-level call events now prefer the frame's `__name__`, fall back to filter hints, `sys.path`, and package markers, and no longer depend on the legacy resolver/cache. This keeps `<__main__>` for direct scripts while package imports continue to emit ``. Updated Rust + Python tests and documentation to describe the new semantics and opt-in flag. +- Module-level call events now prefer the frame's `__name__`, fall back to filter hints, `sys.path`, and package markers, and no longer depend on the legacy resolver/cache. The globals-derived naming flag now defaults to enabled so direct scripts record `<__main__>` while package imports emit ``, with CLI and environment overrides available for the legacy resolver. ## [0.2.0] - 2025-10-17 ### Added diff --git a/codetracer-python-recorder/README.md b/codetracer-python-recorder/README.md index 01199e6..3d9e39e 100644 --- a/codetracer-python-recorder/README.md +++ b/codetracer-python-recorder/README.md @@ -85,6 +85,7 @@ action = "drop" ## Trace naming semantics - Module-level activations no longer appear as the ambiguous `` label. When the recorder sees `co_qualname == ""`, it first reuses the frame's `__name__`, then falls back to trace-filter hints, `sys.path` roots, and package markers so scripts report `<__main__>` while real modules keep their dotted names (e.g., `` or ``). +- The globals-derived naming flow ships enabled by default; disable it temporarily with `--no-module-name-from-globals`, `codetracer.configure_policy(module_name_from_globals=False)`, or `CODETRACER_MODULE_NAME_FROM_GLOBALS=0` if you need to compare against the legacy resolver. - The angle-bracket convention remains for module entries so downstream tooling can distinguish top-level activations at a glance. - Traces will still emit `` for synthetic filenames (``, ``), frozen/importlib bootstrap frames, or exotic loaders that omit filenames entirely. This preserves previous behaviour when no reliable name exists. diff --git a/codetracer-python-recorder/codetracer_python_recorder/cli.py b/codetracer-python-recorder/codetracer_python_recorder/cli.py index 865f837..9e72257 100644 --- a/codetracer-python-recorder/codetracer_python_recorder/cli.py +++ b/codetracer-python-recorder/codetracer_python_recorder/cli.py @@ -122,10 +122,11 @@ def _parse_args(argv: Sequence[str]) -> RecorderCLIConfig: ) parser.add_argument( "--module-name-from-globals", - action="store_true", + action=argparse.BooleanOptionalAction, + default=None, help=( - "Derive module names from the Python frame's __name__ attribute. " - "Intended for early opt-in while the feature flag is experimental." + "Derive module names from the Python frame's __name__ attribute (default: enabled). " + "Use '--no-module-name-from-globals' to fall back to the legacy resolver." ), ) @@ -189,8 +190,8 @@ def _parse_args(argv: Sequence[str]) -> RecorderCLIConfig: policy["io_capture_fd_fallback"] = True case other: # pragma: no cover - argparse choices block this parser.error(f"unsupported io-capture mode '{other}'") - if known.module_name_from_globals: - policy["module_name_from_globals"] = True + if known.module_name_from_globals is not None: + policy["module_name_from_globals"] = known.module_name_from_globals return RecorderCLIConfig( trace_dir=trace_dir, diff --git a/codetracer-python-recorder/resources/trace_filters/builtin_default.toml b/codetracer-python-recorder/resources/trace_filters/builtin_default.toml index 106e71b..7ad90ef 100644 --- a/codetracer-python-recorder/resources/trace_filters/builtin_default.toml +++ b/codetracer-python-recorder/resources/trace_filters/builtin_default.toml @@ -34,7 +34,7 @@ exec = "skip" reason = "Skip builtins module instrumentation" [[scope.rules]] -selector = 'pkg:glob:*_distutils_hack*' +selector = 'pkg:literal:_distutils_hack' exec = "skip" reason = "Skip setuptools shim module" diff --git a/codetracer-python-recorder/src/policy.rs b/codetracer-python-recorder/src/policy.rs index 6c57f34..a6dfd66 100644 --- a/codetracer-python-recorder/src/policy.rs +++ b/codetracer-python-recorder/src/policy.rs @@ -42,7 +42,7 @@ mod tests { assert!(snap.log_file.is_none()); assert!(snap.io_capture.line_proxies); assert!(!snap.io_capture.fd_fallback); - assert!(!snap.module_name_from_globals); + assert!(snap.module_name_from_globals); } #[test] diff --git a/codetracer-python-recorder/src/policy/env.rs b/codetracer-python-recorder/src/policy/env.rs index c706dc5..73312d6 100644 --- a/codetracer-python-recorder/src/policy/env.rs +++ b/codetracer-python-recorder/src/policy/env.rs @@ -165,6 +165,17 @@ mod tests { assert!(snap.module_name_from_globals); } + #[test] + fn configure_policy_from_env_disables_module_name_from_globals() { + let _guard = EnvGuard; + reset_policy_for_tests(); + std::env::set_var(ENV_MODULE_NAME_FROM_GLOBALS, "false"); + + configure_policy_from_env().expect("configure from env"); + let snap = policy_snapshot(); + assert!(!snap.module_name_from_globals); + } + #[test] fn parse_capture_io_handles_aliases() { assert_eq!(parse_capture_io("proxies+fd").unwrap(), (true, true)); diff --git a/codetracer-python-recorder/src/policy/model.rs b/codetracer-python-recorder/src/policy/model.rs index d8ab569..c0d77ea 100644 --- a/codetracer-python-recorder/src/policy/model.rs +++ b/codetracer-python-recorder/src/policy/model.rs @@ -84,7 +84,7 @@ impl Default for RecorderPolicy { log_file: None, json_errors: false, io_capture: IoCapturePolicy::default(), - module_name_from_globals: false, + module_name_from_globals: true, } } } diff --git a/codetracer-python-recorder/tests/python/test_monitoring_events.py b/codetracer-python-recorder/tests/python/test_monitoring_events.py index 1c1e15b..d4291ab 100644 --- a/codetracer-python-recorder/tests/python/test_monitoring_events.py +++ b/codetracer-python-recorder/tests/python/test_monitoring_events.py @@ -227,15 +227,13 @@ def test_module_name_follows_globals_policy(tmp_path: Path) -> None: script.write_text("VALUE = 1\n", encoding="utf-8") out_dir = ensure_trace_dir(tmp_path) - codetracer.configure_policy(module_name_from_globals=True) - session = codetracer.start(out_dir, format=codetracer.TRACE_JSON, start_on_enter=script) try: runpy.run_path(str(script), run_name="__main__") finally: codetracer.flush() codetracer.stop() - codetracer.configure_policy(module_name_from_globals=False) + codetracer.configure_policy(module_name_from_globals=True) parsed = _parse_trace(out_dir) names = [f["name"] for f in parsed.functions] diff --git a/codetracer-python-recorder/tests/python/unit/test_cli.py b/codetracer-python-recorder/tests/python/unit/test_cli.py index c424efd..b7cdd9b 100644 --- a/codetracer-python-recorder/tests/python/unit/test_cli.py +++ b/codetracer-python-recorder/tests/python/unit/test_cli.py @@ -172,3 +172,19 @@ def test_parse_args_enables_module_name_from_globals(tmp_path: Path) -> None: assert config.policy_overrides == { "module_name_from_globals": True, } + + +def test_parse_args_disables_module_name_from_globals(tmp_path: Path) -> None: + script = tmp_path / "entry.py" + _write_script(script) + + config = _parse_args( + [ + "--no-module-name-from-globals", + str(script), + ] + ) + + assert config.policy_overrides == { + "module_name_from_globals": False, + } diff --git a/codetracer-python-recorder/tests/python/unit/test_policy_configuration.py b/codetracer-python-recorder/tests/python/unit/test_policy_configuration.py index 9b77b02..3056c49 100644 --- a/codetracer-python-recorder/tests/python/unit/test_policy_configuration.py +++ b/codetracer-python-recorder/tests/python/unit/test_policy_configuration.py @@ -18,7 +18,7 @@ def reset_policy() -> None: log_level="", log_file="", json_errors=False, - module_name_from_globals=False, + module_name_from_globals=True, ) yield codetracer.configure_policy( @@ -28,7 +28,7 @@ def reset_policy() -> None: log_level="", log_file="", json_errors=False, - module_name_from_globals=False, + module_name_from_globals=True, ) diff --git a/design-docs/adr/0016-module-name-resolution-via-globals-name.md b/design-docs/adr/0016-module-name-resolution-via-globals-name.md index d35007e..48eef34 100644 --- a/design-docs/adr/0016-module-name-resolution-via-globals-name.md +++ b/design-docs/adr/0016-module-name-resolution-via-globals-name.md @@ -46,3 +46,4 @@ Key points: - Update existing ADR 0013/0014 statuses once this ADR is accepted and the code lands. - Communicate the behavioural change to downstream teams who consume `` events or rely on path-derived module names. +- Default the `module_name_from_globals` policy to `true` after validation, but retain CLI/env toggles so teams can temporarily fall back to the legacy resolver during rollout. diff --git a/design-docs/module-name-resolution-deep-review.md b/design-docs/module-name-resolution-deep-review.md index f4c9b14..d5a25eb 100644 --- a/design-docs/module-name-resolution-deep-review.md +++ b/design-docs/module-name-resolution-deep-review.md @@ -51,7 +51,7 @@ The pure-Python recorder does not perform advanced module-name derivation; all r 3. The resulting `ScopeResolution` records the derived module name, relative and absolute paths, and the execution decision; the resolution is cached per code object. ### 5.3 Runtime naming -1. When emitting events, `RuntimeTracer::function_name` first checks the stored module hint. If the globals-derived name exists, it wins; this keeps opt-in behaviour aligned with Python logging. +1. When emitting events, `RuntimeTracer::function_name` first checks the stored module hint. If the globals-derived name exists, it wins; this keeps the default behaviour aligned with Python logging while still permitting explicit opt-outs. 2. Absent a hint, the tracer falls back to the cached `ScopeResolution` module name, then to package detection via `module_name_from_packages`, and finally leaves `` unchanged. 3. The tracer no longer keeps a resolver cache, so the hot path is reduced to string comparisons and light filesystem checks. diff --git a/design-docs/module-name-resolution-globals-implementation-plan.md b/design-docs/module-name-resolution-globals-implementation-plan.md index 73308b6..2ca6022 100644 --- a/design-docs/module-name-resolution-globals-implementation-plan.md +++ b/design-docs/module-name-resolution-globals-implementation-plan.md @@ -43,8 +43,8 @@ This plan delivers ADR 0016 by retooling module-name derivation around `frame.f_ - Document the phase-out of the resolver to guide downstream integrators. ### Stage 5 – Flip the Feature Flag -- After validating in CI and canary environments, change the default for `module_name_from_globals` to `true`. -- Remove the compatibility flag once usage data confirms no regressions. +- Change the default for `module_name_from_globals` to `true` while leaving CLI/env toggles available for targeted opt-outs during rollout validation. +- Schedule removal of the compatibility flag once usage data confirms no regressions. ## Testing Strategy - Rust unit tests covering `on_py_start` logic, especially fallback to `` when `__name__` is absent. diff --git a/design-docs/module-name-resolution-globals-implementation-plan.status.md b/design-docs/module-name-resolution-globals-implementation-plan.status.md index 62cebf5..541a8b3 100644 --- a/design-docs/module-name-resolution-globals-implementation-plan.status.md +++ b/design-docs/module-name-resolution-globals-implementation-plan.status.md @@ -22,7 +22,7 @@ ### Stage 1 – Capture `__name__` at `py_start` - **Status:** Completed - `RuntimeTracer` now captures `frame.f_globals['__name__']` for `` code when the feature flag is on, threads the hint through `FilterCoordinator`, and prefers it during both filter decisions and runtime naming. Added integration coverage ensuring opt-in sessions record `<__main__>` for scripts, plus unit updates for the new plumbing. + `RuntimeTracer` now captures `frame.f_globals['__name__']` for `` code and threads the hint through `FilterCoordinator`, preferring it during both filter decisions and runtime naming. Added integration coverage ensuring sessions record `<__main__>` for scripts, plus unit updates for the new plumbing and opt-out cases. ### Stage 2 – Simplify Filter Engine - **Status:** Completed @@ -35,3 +35,7 @@ ### Stage 4 – Documentation and Changelog - **Status:** Completed Updated `codetracer-python-recorder/CHANGELOG.md` and README to describe globals-first module naming, refreshed ADR 0016 to Accepted, and revised the implementation plan to cover the documentation rollout so downstream teams understand the new semantics and feature flag. + +### Stage 5 – Flip the Feature Flag +- **Status:** Completed + Defaulted `module_name_from_globals` to `true` across the policy model, CLI, and environment/configuration pathways while keeping explicit overrides available for the legacy resolver during rollout validation.