From 0353190a2bd1931baf99babeae8f260920b6a2bf Mon Sep 17 00:00:00 2001 From: Nicklas Warming Jacobsen Date: Thu, 21 May 2026 12:40:48 +0200 Subject: [PATCH 1/4] Replace regexs with handrolled parser functions --- Cargo.lock | 26 --------- rootcause-backtrace/Cargo.toml | 1 - rootcause-backtrace/src/lib.rs | 97 ++++++++++++++++++++++------------ 3 files changed, 64 insertions(+), 60 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0b8f8bb..64f13c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -927,31 +927,6 @@ dependencies = [ "getrandom 0.3.4", ] -[[package]] -name = "regex" -version = "1.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" -dependencies = [ - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" -dependencies = [ - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" - [[package]] name = "reqwest" version = "0.13.3" @@ -1033,7 +1008,6 @@ name = "rootcause-backtrace" version = "0.12.1" dependencies = [ "backtrace", - "regex", "rootcause", "unicode-ident", ] diff --git a/rootcause-backtrace/Cargo.toml b/rootcause-backtrace/Cargo.toml index 28e559a..6e582ba 100644 --- a/rootcause-backtrace/Cargo.toml +++ b/rootcause-backtrace/Cargo.toml @@ -15,7 +15,6 @@ backtrace = { version = "0.3.76", default-features = false, features = [ "std", "cpp_demangle", ] } -regex = { version = "1.12.3", default-features = false } unicode-ident = "1.0.24" # Internal dependencies diff --git a/rootcause-backtrace/src/lib.rs b/rootcause-backtrace/src/lib.rs index 851d594..2aae555 100644 --- a/rootcause-backtrace/src/lib.rs +++ b/rootcause-backtrace/src/lib.rs @@ -904,36 +904,71 @@ impl Backtrace { } } -impl FramePath { - fn new(path: BytesOrWideString<'_>) -> Self { - static REGEXES: OnceLock<[regex::Regex; 2]> = OnceLock::new(); - let [std_regex, registry_regex] = REGEXES.get_or_init(|| { - [ - // Matches Rust standard library paths: - // - /lib/rustlib/src/rust/library/{std|core|alloc}/src/... - // - /rustc/{40-char-hash}/library/{std|core|alloc}/src/... - regex::Regex::new( - r"(?:/lib/rustlib/src/rust|^/rustc/[0-9a-f]{40})/library/(std|core|alloc)/src/.*$", - ) - .expect("built-in regex pattern for std library paths should be valid"), - // Matches Cargo registry paths: - // - /.cargo/registry/src/{index}-{16-char-hash}/{crate}-{version}/src/... - regex::Regex::new( - r"/\.cargo/registry/src/[^/]+-[0-9a-f]{16}/([^./]+)-[0-9]+\.[^/]*/src/.*$", - ) - .expect("built-in regex pattern for cargo registry paths should be valid"), - ] +/// Matches Rust standard library source paths and returns the crate name and byte offset. +/// +/// Recognised paths: +/// - `/lib/rustlib/src/rust/library/{std|core|alloc}/src/…` +/// - `^/rustc/{40-hex-char hash}/library/{std|core|alloc}/src/…` +fn match_std_library_path(path: &str) -> Option<(&'static str, usize)> { + const STD_CRATES: [&str; 3] = ["std", "core", "alloc"]; + + let (prefix, after_library) = path.split_once("/library/")?; + + // The prefix must end with "/lib/rustlib/src/rust" or *be* "/rustc/{40 hex chars}" + let valid_prefix = prefix.ends_with("/lib/rustlib/src/rust") + || prefix.strip_prefix("/rustc/").map_or(false, |hash| { + hash.len() == 40 && hash.bytes().all(|b| b.is_ascii_hexdigit()) }); + if !valid_prefix { + return None; + } + + // after_library is "{std|core|alloc}/src/…"; split on "/src/" to isolate the crate name. + let (crate_name, _) = after_library.split_once("/src/")?; + let crate_name = STD_CRATES + .iter() + .copied() + .find(|&name| name == crate_name)?; + + let crate_start = prefix.len() + "/library/".len(); + Some((crate_name, crate_start)) +} + +/// Matches Cargo registry source paths and returns the crate name and its byte offset. +/// +/// Recognised paths: +/// - `/.cargo/registry/src/{index}-{16-hex-char hash}/{crate}-{version}/src/…` +fn match_cargo_registry_path(path: &str) -> Option<(&str, usize)> { + let (before_cargo, after_cargo) = path.split_once("/.cargo/registry/src/")?; + + // Consume the "{index}-{16-hex-char}" directory component. + let (index_hash, after_index) = after_cargo.split_once('/')?; + let (_, hash) = index_hash.rsplit_once('-')?; + if hash.len() != 16 || !hash.bytes().all(|b| b.is_ascii_hexdigit()) { + return None; + } + + // after_index is "{crate}-{version}/src/…"; split on "/src/" to isolate "{crate}-{version}". + let (crate_version, _) = after_index.split_once("/src/")?; + + // create names + version numbers can be really silly. However crate names can't contain dots + // So find the first dot and split (crate-name-0.1.0-alpha into crate-name-0) + // Then right split on dash (crate-name-0 to crate-name) + let (crate_name, _) = crate_version.split_once('.')?; + let (crate_name, _) = crate_name.rsplit_once('-')?; + + let crate_start_abs = before_cargo.len() + "/.cargo/registry/src/".len() + index_hash.len() + 1; + Some((crate_name, crate_start_abs)) +} + +impl FramePath { + fn new(path: BytesOrWideString<'_>) -> Self { let path_str = path.to_string(); - if let Some(captures) = std_regex.captures(&path_str) { + if let Some((crate_name, crate_start)) = match_std_library_path(&path_str) { let raw_path = path.to_str_lossy().into_owned(); - let crate_capture = captures - .get(1) - .expect("regex capture group 1 should exist for std library paths"); - let split = crate_capture.start(); - let (prefix, suffix) = (&path_str[..split - 1], &path_str[split..]); + let (prefix, suffix) = (&path_str[..crate_start - 1], &path_str[crate_start..]); Self { raw_path, @@ -942,19 +977,15 @@ impl FramePath { prefix: prefix.to_string(), suffix: suffix.to_string(), }), - crate_name: Some(crate_capture.as_str().to_string().into()), + crate_name: Some(crate_name.to_string().into()), } - } else if let Some(captures) = registry_regex.captures(&path_str) { + } else if let Some((crate_name, crate_start)) = match_cargo_registry_path(&path_str) { let raw_path = path.to_str_lossy().into_owned(); - let crate_capture = captures - .get(1) - .expect("regex capture group 1 should exist for cargo registry paths"); - let split = crate_capture.start(); - let (prefix, suffix) = (&path_str[..split - 1], &path_str[split..]); + let (prefix, suffix) = (&path_str[..crate_start - 1], &path_str[crate_start..]); Self { raw_path, - crate_name: Some(crate_capture.as_str().to_string().into()), + crate_name: Some(crate_name.to_string().into()), split_path: Some(FramePrefix { prefix_kind: "CARGO", prefix: prefix.to_string(), From 4b834b3020635d194f592b2b4e177d837fd999d4 Mon Sep 17 00:00:00 2001 From: Nicklas Warming Jacobsen Date: Thu, 21 May 2026 13:39:40 +0200 Subject: [PATCH 2/4] Add tests for match_cargo_registry_path and match_std_library_path --- rootcause-backtrace/src/lib.rs | 137 +++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/rootcause-backtrace/src/lib.rs b/rootcause-backtrace/src/lib.rs index 2aae555..6c53593 100644 --- a/rootcause-backtrace/src/lib.rs +++ b/rootcause-backtrace/src/lib.rs @@ -1154,3 +1154,140 @@ where } } } + +#[cfg(test)] +mod tests { + use super::*; + + // ── match_std_library_path ──────────────────────────────────────────────── + + #[test] + fn std_path_rustlib_std() { + let path = "/home/user/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/mod.rs"; + let (name, pos) = match_std_library_path(path).expect("should match"); + assert_eq!(name, "std"); + assert_eq!(&path[pos..], "std/src/io/mod.rs"); + } + + #[test] + fn std_path_rustlib_core() { + let path = "/home/user/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/option.rs"; + let (name, pos) = match_std_library_path(path).expect("should match"); + assert_eq!(name, "core"); + assert_eq!(&path[pos..], "core/src/option.rs"); + } + + #[test] + fn std_path_rustlib_alloc() { + let path = "/home/user/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/alloc/src/vec/mod.rs"; + let (name, pos) = match_std_library_path(path).expect("should match"); + assert_eq!(name, "alloc"); + assert_eq!(&path[pos..], "alloc/src/vec/mod.rs"); + } + + #[test] + fn std_path_rustc_hash() { + let hash = "a".repeat(40); + let path = format!("/rustc/{hash}/library/std/src/panicking.rs"); + let (name, pos) = match_std_library_path(&path).expect("should match"); + assert_eq!(name, "std"); + assert_eq!(&path[pos..], "std/src/panicking.rs"); + } + + #[test] + fn std_path_no_match_unknown_crate() { + let path = "/lib/rustlib/src/rust/library/unknown/src/lib.rs"; + assert!(match_std_library_path(path).is_none()); + } + + #[test] + fn std_path_no_match_rustc_hash_too_short() { + let short_hash = "a".repeat(39); + let path = format!("/rustc/{short_hash}/library/std/src/lib.rs"); + assert!(match_std_library_path(&path).is_none()); + } + + #[test] + fn std_path_no_match_rustc_hash_not_at_root() { + let hash = "a".repeat(40); + let path = format!("/prefix/rustc/{hash}/library/std/src/lib.rs"); + assert!(match_std_library_path(&path).is_none()); + } + + #[test] + fn std_path_no_match_rustc_hash_non_hex() { + let hash = "z".repeat(40); + let path = format!("/rustc/{hash}/library/std/src/lib.rs"); + assert!(match_std_library_path(&path).is_none()); + } + + #[test] + fn std_path_no_match_missing_src_segment() { + let path = "/lib/rustlib/src/rust/library/std/nosrc/io.rs"; + assert!(match_std_library_path(path).is_none()); + } + + #[test] + fn std_path_no_match_cargo_registry_path() { + // A cargo registry path should not match the std matcher. + let path = "/home/user/.cargo/registry/src/index.crates.io-6f17d22bba15001f/serde-1.0.210/src/lib.rs"; + assert!(match_std_library_path(path).is_none()); + } + + // ── match_cargo_registry_path ───────────────────────────────────────────── + + #[test] + fn cargo_path_simple_crate() { + let path = "/home/user/.cargo/registry/src/index.crates.io-6f17d22bba15001f/serde-1.0.210/src/lib.rs"; + let (name, pos) = match_cargo_registry_path(path).expect("should match"); + assert_eq!(name, "serde"); + assert_eq!(&path[pos..], "serde-1.0.210/src/lib.rs"); + } + + #[test] + fn cargo_path_hyphenated_crate_name() { + let path = "/home/user/.cargo/registry/src/index.crates.io-6f17d22bba15001f/my-cool-crate-0.3.1/src/lib.rs"; + let (name, pos) = match_cargo_registry_path(path).expect("should match"); + assert_eq!(name, "my-cool-crate"); + assert_eq!(&path[pos..], "my-cool-crate-0.3.1/src/lib.rs"); + } + + #[test] + fn cargo_path_prerelease_version() { + let path = "/home/user/.cargo/registry/src/index.crates.io-6f17d22bba15001f/cratename-1.0.0-2beta/src/lib.rs"; + let (name, pos) = match_cargo_registry_path(path).expect("should match"); + assert_eq!(name, "cratename"); + assert_eq!(&path[pos..], "cratename-1.0.0-2beta/src/lib.rs"); + } + + #[test] + fn cargo_path_no_match_hash_too_short() { + let path = "/home/user/.cargo/registry/src/index.crates.io-6f17d22bba1500/serde-1.0.210/src/lib.rs"; + assert!(match_cargo_registry_path(path).is_none()); + } + + #[test] + fn cargo_path_no_match_hash_non_hex() { + let path = "/home/user/.cargo/registry/src/index.crates.io-zzzzzzzzzzzzzzzz/serde-1.0.210/src/lib.rs"; + assert!(match_cargo_registry_path(path).is_none()); + } + + #[test] + fn cargo_path_no_match_missing_src_segment() { + let path = "/home/user/.cargo/registry/src/index.crates.io-6f17d22bba15001f/serde-1.0.210/nosrc/lib.rs"; + assert!(match_cargo_registry_path(path).is_none()); + } + + #[test] + fn cargo_path_no_match_no_version() { + let path = + "/home/user/.cargo/registry/src/index.crates.io-6f17d22bba15001f/serde/src/lib.rs"; + assert!(match_cargo_registry_path(path).is_none()); + } + + #[test] + fn cargo_path_no_match_std_library_path() { + let path = "/lib/rustlib/src/rust/library/std/src/io/mod.rs"; + assert!(match_cargo_registry_path(path).is_none()); + } +} From 3c49ea2f93fd7e5b818d5cfc1c5392035b396180 Mon Sep 17 00:00:00 2001 From: Nicklas Warming Jacobsen Date: Thu, 21 May 2026 13:57:21 +0200 Subject: [PATCH 3/4] Run cargo clippy --- rootcause-backtrace/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rootcause-backtrace/src/lib.rs b/rootcause-backtrace/src/lib.rs index 6c53593..c646745 100644 --- a/rootcause-backtrace/src/lib.rs +++ b/rootcause-backtrace/src/lib.rs @@ -916,7 +916,7 @@ fn match_std_library_path(path: &str) -> Option<(&'static str, usize)> { // The prefix must end with "/lib/rustlib/src/rust" or *be* "/rustc/{40 hex chars}" let valid_prefix = prefix.ends_with("/lib/rustlib/src/rust") - || prefix.strip_prefix("/rustc/").map_or(false, |hash| { + || prefix.strip_prefix("/rustc/").is_some_and(|hash| { hash.len() == 40 && hash.bytes().all(|b| b.is_ascii_hexdigit()) }); From bc19ca217581a7436353f548abf0b62a09f96e62 Mon Sep 17 00:00:00 2001 From: Nicklas Warming Jacobsen Date: Thu, 21 May 2026 14:05:22 +0200 Subject: [PATCH 4/4] run cargo fmt --- rootcause-backtrace/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rootcause-backtrace/src/lib.rs b/rootcause-backtrace/src/lib.rs index c646745..e11d01c 100644 --- a/rootcause-backtrace/src/lib.rs +++ b/rootcause-backtrace/src/lib.rs @@ -916,9 +916,9 @@ fn match_std_library_path(path: &str) -> Option<(&'static str, usize)> { // The prefix must end with "/lib/rustlib/src/rust" or *be* "/rustc/{40 hex chars}" let valid_prefix = prefix.ends_with("/lib/rustlib/src/rust") - || prefix.strip_prefix("/rustc/").is_some_and(|hash| { - hash.len() == 40 && hash.bytes().all(|b| b.is_ascii_hexdigit()) - }); + || prefix + .strip_prefix("/rustc/") + .is_some_and(|hash| hash.len() == 40 && hash.bytes().all(|b| b.is_ascii_hexdigit())); if !valid_prefix { return None;