diff --git a/CHANGELOG.md b/CHANGELOG.md index ca979423..652b7cd6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [11.8.3](https://github.com/oxc-project/oxc-resolver/compare/v11.8.2...v11.8.3) - 2025-09-20 + +### 🚜 Refactor + +- split src/cache.rs into logical modules (by @Boshen) + +### Contributors + +* @Boshen +* @renovate[bot] + ## [11.8.2](https://github.com/oxc-project/oxc-resolver/compare/v11.8.1...v11.8.2) - 2025-09-18 ### ⚡ Performance diff --git a/Cargo.lock b/Cargo.lock index 0308b88d..847bb049 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -783,7 +783,7 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "oxc_resolver" -version = "11.8.2" +version = "11.8.3" dependencies = [ "cfg-if", "criterion2", @@ -813,7 +813,7 @@ dependencies = [ [[package]] name = "oxc_resolver_napi" -version = "11.8.2" +version = "11.8.3" dependencies = [ "fancy-regex", "mimalloc-safe", diff --git a/Cargo.toml b/Cargo.toml index bed1ba30..e59162fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,11 +16,11 @@ rust-version = "1.85.0" description = "ESM / CJS module resolution" [workspace.dependencies] -oxc_resolver = { version = "11.8.2", path = "." } +oxc_resolver = { version = "11.8.3", path = "." } [package] name = "oxc_resolver" -version = "11.8.2" +version = "11.8.3" authors.workspace = true categories.workspace = true edition.workspace = true diff --git a/napi/Cargo.toml b/napi/Cargo.toml index 58e85ba7..8f69b810 100644 --- a/napi/Cargo.toml +++ b/napi/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxc_resolver_napi" -version = "11.8.2" +version = "11.8.3" authors.workspace = true categories.workspace = true edition.workspace = true diff --git a/src/cache/borrowed_path.rs b/src/cache/borrowed_path.rs new file mode 100644 index 00000000..d385c8a5 --- /dev/null +++ b/src/cache/borrowed_path.rs @@ -0,0 +1,31 @@ +use std::{ + hash::{Hash, Hasher}, + path::Path, +}; + +use papaya::Equivalent; + +use super::cached_path::CachedPath; + +pub struct BorrowedCachedPath<'a> { + pub hash: u64, + pub path: &'a Path, +} + +impl Equivalent for BorrowedCachedPath<'_> { + fn equivalent(&self, other: &CachedPath) -> bool { + self.path.as_os_str() == other.path.as_os_str() + } +} + +impl Hash for BorrowedCachedPath<'_> { + fn hash(&self, state: &mut H) { + self.hash.hash(state); + } +} + +impl PartialEq for BorrowedCachedPath<'_> { + fn eq(&self, other: &Self) -> bool { + self.path.as_os_str() == other.path.as_os_str() + } +} diff --git a/src/cache.rs b/src/cache/cache_impl.rs similarity index 54% rename from src/cache.rs rename to src/cache/cache_impl.rs index c3a9b9ae..54b829c4 100644 --- a/src/cache.rs +++ b/src/cache/cache_impl.rs @@ -1,21 +1,14 @@ use std::{ borrow::Cow, - cell::RefCell, - convert::AsRef, - fmt, hash::{BuildHasherDefault, Hash, Hasher}, io, - ops::Deref, - path::{Component, Path, PathBuf}, - sync::{ - Arc, - atomic::{AtomicU64, Ordering}, - }, + path::{Path, PathBuf}, + sync::Arc, + sync::atomic::Ordering, }; use cfg_if::cfg_if; -use once_cell::sync::OnceCell as OnceLock; -use papaya::{Equivalent, HashMap, HashSet}; +use papaya::{HashMap, HashSet}; use rustc_hash::FxHasher; use crate::{ @@ -23,14 +16,15 @@ use crate::{ context::ResolveContext as Ctx, path::PathUtil, }; -static THREAD_COUNT: AtomicU64 = AtomicU64::new(1); +use super::{ + borrowed_path::BorrowedCachedPath, + cached_path::{CachedPath, CachedPathImpl}, + hasher::IdentityHasher, + thread_local::THREAD_ID, +}; -thread_local! { - /// Per-thread pre-allocated path that is used to perform operations on paths more quickly. - /// Learned from parcel - pub static SCRATCH_PATH: RefCell = RefCell::new(PathBuf::with_capacity(256)); - pub static THREAD_ID: u64 = THREAD_COUNT.fetch_add(1, Ordering::SeqCst); -} +#[cfg(feature = "yarn_pnp")] +use crate::pnp; /// Cache implementation used for caching filesystem access. #[derive(Default)] @@ -38,6 +32,8 @@ pub struct Cache { pub(crate) fs: Fs, paths: HashSet>, tsconfigs: HashMap, BuildHasherDefault>, + /// Normalized cache that stores paths relative to their package roots + normalized_paths: HashMap<(PathBuf, PathBuf), CachedPath, BuildHasherDefault>, #[cfg(feature = "yarn_pnp")] yarn_pnp_manifest: OnceLock, } @@ -46,6 +42,7 @@ impl Cache { pub fn clear(&self) { self.paths.pin().clear(); self.tsconfigs.pin().clear(); + self.normalized_paths.pin().clear(); } #[allow(clippy::cast_possible_truncation)] @@ -76,6 +73,34 @@ impl Cache { cached_path } + /// Optimized cache lookup that attempts to use package-relative paths for better cache hit rate. + /// This method tries to normalize paths relative to the nearest package.json for improved cache efficiency. + pub(crate) fn value_normalized(&self, path: &Path) -> CachedPath { + // First try to create the normal cached path + let cached_path = self.value(path); + + // Try to find package root for normalization + if let Some(package_root) = cached_path.find_package_root(&self.fs) { + let package_root_path = package_root.to_path_buf(); + + // Create relative path from package root + if let Ok(relative_path) = path.strip_prefix(&package_root_path) { + let normalized_key = (package_root_path, relative_path.to_path_buf()); + let normalized_cache = self.normalized_paths.pin(); + + // Check if we already have this normalized entry + if let Some(entry) = normalized_cache.get(&normalized_key) { + return entry.clone(); + } + + // Store the normalized entry for future lookups + normalized_cache.insert(normalized_key, cached_path.clone()); + } + } + + cached_path + } + pub(crate) fn canonicalize(&self, path: &CachedPath) -> Result { let cached_path = self.canonicalize_impl(path)?; let path = cached_path.to_path_buf(); @@ -228,6 +253,10 @@ impl Cache { .hasher(BuildHasherDefault::default()) .resize_mode(papaya::ResizeMode::Blocking) .build(), + normalized_paths: HashMap::builder() + .hasher(BuildHasherDefault::default()) + .resize_mode(papaya::ResizeMode::Blocking) + .build(), #[cfg(feature = "yarn_pnp")] yarn_pnp_manifest: OnceLock::new(), } @@ -285,274 +314,3 @@ impl Cache { .clone() } } - -#[derive(Clone)] -pub struct CachedPath(Arc); - -pub struct CachedPathImpl { - hash: u64, - path: Box, - parent: Option, - is_node_modules: bool, - inside_node_modules: bool, - meta: OnceLock>, - canonicalized: OnceLock>, - canonicalizing: AtomicU64, - node_modules: OnceLock>, - package_json: OnceLock)>>, -} - -impl CachedPathImpl { - fn new( - hash: u64, - path: Box, - is_node_modules: bool, - inside_node_modules: bool, - parent: Option, - ) -> Self { - Self { - hash, - path, - parent, - is_node_modules, - inside_node_modules, - meta: OnceLock::new(), - canonicalized: OnceLock::new(), - canonicalizing: AtomicU64::new(0), - node_modules: OnceLock::new(), - package_json: OnceLock::new(), - } - } -} - -impl Deref for CachedPath { - type Target = CachedPathImpl; - - fn deref(&self) -> &Self::Target { - self.0.as_ref() - } -} - -impl CachedPath { - pub(crate) fn path(&self) -> &Path { - &self.0.path - } - - pub(crate) fn to_path_buf(&self) -> PathBuf { - self.path.to_path_buf() - } - - pub(crate) fn parent(&self) -> Option<&Self> { - self.0.parent.as_ref() - } - - pub(crate) fn is_node_modules(&self) -> bool { - self.is_node_modules - } - - pub(crate) fn inside_node_modules(&self) -> bool { - self.inside_node_modules - } - - pub(crate) fn module_directory( - &self, - module_name: &str, - cache: &Cache, - ctx: &mut Ctx, - ) -> Option { - let cached_path = cache.value(&self.path.join(module_name)); - cache.is_dir(&cached_path, ctx).then_some(cached_path) - } - - pub(crate) fn cached_node_modules( - &self, - cache: &Cache, - ctx: &mut Ctx, - ) -> Option { - self.node_modules.get_or_init(|| self.module_directory("node_modules", cache, ctx)).clone() - } - - /// Find package.json of a path by traversing parent directories. - /// - /// # Errors - /// - /// * [ResolveError::Json] - pub(crate) fn find_package_json( - &self, - options: &ResolveOptions, - cache: &Cache, - ctx: &mut Ctx, - ) -> Result)>, ResolveError> { - let mut cache_value = self; - // Go up directories when the querying path is not a directory - while !cache.is_dir(cache_value, ctx) { - if let Some(cv) = &cache_value.parent { - cache_value = cv; - } else { - break; - } - } - let mut cache_value = Some(cache_value); - while let Some(cv) = cache_value { - if let Some(package_json) = cache.get_package_json(cv, options, ctx)? { - return Ok(Some(package_json)); - } - cache_value = cv.parent.as_ref(); - } - Ok(None) - } - - pub(crate) fn add_extension(&self, ext: &str, cache: &Cache) -> Self { - SCRATCH_PATH.with_borrow_mut(|path| { - path.clear(); - let s = path.as_mut_os_string(); - s.push(self.path.as_os_str()); - s.push(ext); - cache.value(path) - }) - } - - pub(crate) fn replace_extension(&self, ext: &str, cache: &Cache) -> Self { - SCRATCH_PATH.with_borrow_mut(|path| { - path.clear(); - let s = path.as_mut_os_string(); - let self_len = self.path.as_os_str().len(); - let self_bytes = self.path.as_os_str().as_encoded_bytes(); - let slice_to_copy = self.path.extension().map_or(self_bytes, |previous_extension| { - &self_bytes[..self_len - previous_extension.len() - 1] - }); - // SAFETY: ??? - s.push(unsafe { std::ffi::OsStr::from_encoded_bytes_unchecked(slice_to_copy) }); - s.push(ext); - cache.value(path) - }) - } - - /// Returns a new path by resolving the given subpath (including "." and ".." components) with this path. - pub(crate) fn normalize_with>( - &self, - subpath: P, - cache: &Cache, - ) -> Self { - let subpath = subpath.as_ref(); - let mut components = subpath.components(); - let Some(head) = components.next() else { return cache.value(subpath) }; - if matches!(head, Component::Prefix(..) | Component::RootDir) { - return cache.value(subpath); - } - SCRATCH_PATH.with_borrow_mut(|path| { - path.clear(); - path.push(&self.path); - for component in std::iter::once(head).chain(components) { - match component { - Component::CurDir => {} - Component::ParentDir => { - path.pop(); - } - Component::Normal(c) => { - cfg_if! { - if #[cfg(target_family = "wasm")] { - // Need to trim the extra \0 introduces by https://github.com/nodejs/uvwasi/issues/262 - path.push(c.to_string_lossy().trim_end_matches('\0')); - } else { - path.push(c); - } - } - } - Component::Prefix(..) | Component::RootDir => { - unreachable!("Path {:?} Subpath {:?}", self.path, subpath) - } - } - } - - cache.value(path) - }) - } - - #[inline] - #[cfg(windows)] - pub(crate) fn normalize_root(&self, cache: &Cache) -> Self { - if self.path().as_os_str().as_encoded_bytes().last() == Some(&b'/') { - let mut path_string = self.path.to_string_lossy().into_owned(); - path_string.pop(); - path_string.push('\\'); - cache.value(&PathBuf::from(path_string)) - } else { - self.clone() - } - } - - #[inline] - #[cfg(not(windows))] - pub(crate) fn normalize_root(&self, _cache: &Cache) -> Self { - self.clone() - } -} - -impl CachedPath { - fn meta(&self, fs: &Fs) -> Option { - *self.meta.get_or_init(|| fs.metadata(&self.path).ok()) - } -} - -impl Hash for CachedPath { - fn hash(&self, state: &mut H) { - self.hash.hash(state); - } -} - -impl PartialEq for CachedPath { - fn eq(&self, other: &Self) -> bool { - self.path.as_os_str() == other.path.as_os_str() - } -} - -impl Eq for CachedPath {} - -impl fmt::Debug for CachedPath { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("FsCachedPath").field("path", &self.path).finish() - } -} - -struct BorrowedCachedPath<'a> { - hash: u64, - path: &'a Path, -} - -impl Equivalent for BorrowedCachedPath<'_> { - fn equivalent(&self, other: &CachedPath) -> bool { - self.path.as_os_str() == other.path.as_os_str() - } -} - -impl Hash for BorrowedCachedPath<'_> { - fn hash(&self, state: &mut H) { - self.hash.hash(state); - } -} - -impl PartialEq for BorrowedCachedPath<'_> { - fn eq(&self, other: &Self) -> bool { - self.path.as_os_str() == other.path.as_os_str() - } -} - -/// Since the cache key is memoized, use an identity hasher -/// to avoid double cache. -#[derive(Default)] -struct IdentityHasher(u64); - -impl Hasher for IdentityHasher { - fn write(&mut self, _: &[u8]) { - unreachable!("Invalid use of IdentityHasher") - } - - fn write_u64(&mut self, n: u64) { - self.0 = n; - } - - fn finish(&self) -> u64 { - self.0 - } -} diff --git a/src/cache/cached_path.rs b/src/cache/cached_path.rs new file mode 100644 index 00000000..24b4cd7f --- /dev/null +++ b/src/cache/cached_path.rs @@ -0,0 +1,268 @@ +use std::{ + convert::AsRef, + fmt, + hash::{Hash, Hasher}, + ops::Deref, + path::{Component, Path, PathBuf}, + sync::{Arc, atomic::AtomicU64}, +}; + +use cfg_if::cfg_if; +use once_cell::sync::OnceCell as OnceLock; + +use crate::{ + FileMetadata, FileSystem, PackageJson, ResolveError, ResolveOptions, + context::ResolveContext as Ctx, +}; + +use super::{cache_impl::Cache, thread_local::SCRATCH_PATH}; + +#[derive(Clone)] +pub struct CachedPath(pub(crate) Arc); + +pub struct CachedPathImpl { + pub hash: u64, + pub path: Box, + pub parent: Option, + pub is_node_modules: bool, + pub inside_node_modules: bool, + pub meta: OnceLock>, + pub canonicalized: OnceLock>, + pub canonicalizing: AtomicU64, + pub node_modules: OnceLock>, + pub package_json: OnceLock)>>, + pub package_root: OnceLock>, +} + +impl CachedPathImpl { + pub fn new( + hash: u64, + path: Box, + is_node_modules: bool, + inside_node_modules: bool, + parent: Option, + ) -> Self { + Self { + hash, + path, + parent, + is_node_modules, + inside_node_modules, + meta: OnceLock::new(), + canonicalized: OnceLock::new(), + canonicalizing: AtomicU64::new(0), + node_modules: OnceLock::new(), + package_json: OnceLock::new(), + package_root: OnceLock::new(), + } + } +} + +impl Deref for CachedPath { + type Target = CachedPathImpl; + + fn deref(&self) -> &Self::Target { + self.0.as_ref() + } +} + +impl CachedPath { + pub(crate) fn path(&self) -> &Path { + &self.0.path + } + + pub(crate) fn to_path_buf(&self) -> PathBuf { + self.path.to_path_buf() + } + + pub(crate) fn parent(&self) -> Option<&Self> { + self.0.parent.as_ref() + } + + pub(crate) fn is_node_modules(&self) -> bool { + self.is_node_modules + } + + pub(crate) fn inside_node_modules(&self) -> bool { + self.inside_node_modules + } + + pub(crate) fn module_directory( + &self, + module_name: &str, + cache: &Cache, + ctx: &mut Ctx, + ) -> Option { + let cached_path = cache.value(&self.path.join(module_name)); + cache.is_dir(&cached_path, ctx).then_some(cached_path) + } + + pub(crate) fn cached_node_modules( + &self, + cache: &Cache, + ctx: &mut Ctx, + ) -> Option { + self.node_modules.get_or_init(|| self.module_directory("node_modules", cache, ctx)).clone() + } + + /// Find package.json of a path by traversing parent directories. + /// + /// # Errors + /// + /// * [ResolveError::Json] + pub(crate) fn find_package_json( + &self, + options: &ResolveOptions, + cache: &Cache, + ctx: &mut Ctx, + ) -> Result)>, ResolveError> { + let mut cache_value = self; + // Go up directories when the querying path is not a directory + while !cache.is_dir(cache_value, ctx) { + if let Some(cv) = &cache_value.parent { + cache_value = cv; + } else { + break; + } + } + let mut cache_value = Some(cache_value); + while let Some(cv) = cache_value { + if let Some(package_json) = cache.get_package_json(cv, options, ctx)? { + return Ok(Some(package_json)); + } + cache_value = cv.parent.as_ref(); + } + Ok(None) + } + + /// Find the package root for cache normalization by checking for package.json files. + /// Returns the directory containing the nearest package.json. + pub(crate) fn find_package_root(&self, fs: &Fs) -> Option { + self.package_root + .get_or_init(|| { + let mut current = Some(self); + while let Some(cv) = current { + // Check if this directory contains a package.json file + let package_json_path = cv.path().join("package.json"); + if fs.metadata(&package_json_path).is_ok() { + return Some(cv.clone()); + } + current = cv.parent.as_ref(); + } + None + }) + .clone() + } + + pub(crate) fn add_extension(&self, ext: &str, cache: &Cache) -> Self { + SCRATCH_PATH.with_borrow_mut(|path| { + path.clear(); + let s = path.as_mut_os_string(); + s.push(self.path.as_os_str()); + s.push(ext); + cache.value(path) + }) + } + + pub(crate) fn replace_extension(&self, ext: &str, cache: &Cache) -> Self { + SCRATCH_PATH.with_borrow_mut(|path| { + path.clear(); + let s = path.as_mut_os_string(); + let self_len = self.path.as_os_str().len(); + let self_bytes = self.path.as_os_str().as_encoded_bytes(); + let slice_to_copy = self.path.extension().map_or(self_bytes, |previous_extension| { + &self_bytes[..self_len - previous_extension.len() - 1] + }); + // SAFETY: ??? + s.push(unsafe { std::ffi::OsStr::from_encoded_bytes_unchecked(slice_to_copy) }); + s.push(ext); + cache.value(path) + }) + } + + /// Returns a new path by resolving the given subpath (including "." and ".." components) with this path. + pub(crate) fn normalize_with>( + &self, + subpath: P, + cache: &Cache, + ) -> Self { + let subpath = subpath.as_ref(); + let mut components = subpath.components(); + let Some(head) = components.next() else { return cache.value(subpath) }; + if matches!(head, Component::Prefix(..) | Component::RootDir) { + return cache.value(subpath); + } + SCRATCH_PATH.with_borrow_mut(|path| { + path.clear(); + path.push(&self.path); + for component in std::iter::once(head).chain(components) { + match component { + Component::CurDir => {} + Component::ParentDir => { + path.pop(); + } + Component::Normal(c) => { + cfg_if! { + if #[cfg(target_family = "wasm")] { + // Need to trim the extra \0 introduces by https://github.com/nodejs/uvwasi/issues/262 + path.push(c.to_string_lossy().trim_end_matches('\0')); + } else { + path.push(c); + } + } + } + Component::Prefix(..) | Component::RootDir => { + unreachable!("Path {:?} Subpath {:?}", self.path, subpath) + } + } + } + + cache.value(path) + }) + } + + #[inline] + #[cfg(windows)] + pub(crate) fn normalize_root(&self, cache: &Cache) -> Self { + if self.path().as_os_str().as_encoded_bytes().last() == Some(&b'/') { + let mut path_string = self.path.to_string_lossy().into_owned(); + path_string.pop(); + path_string.push('\\'); + cache.value(&PathBuf::from(path_string)) + } else { + self.clone() + } + } + + #[inline] + #[cfg(not(windows))] + pub(crate) fn normalize_root(&self, _cache: &Cache) -> Self { + self.clone() + } +} + +impl CachedPath { + pub fn meta(&self, fs: &Fs) -> Option { + *self.meta.get_or_init(|| fs.metadata(&self.path).ok()) + } +} + +impl Hash for CachedPath { + fn hash(&self, state: &mut H) { + self.hash.hash(state); + } +} + +impl PartialEq for CachedPath { + fn eq(&self, other: &Self) -> bool { + self.path.as_os_str() == other.path.as_os_str() + } +} + +impl Eq for CachedPath {} + +impl fmt::Debug for CachedPath { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("FsCachedPath").field("path", &self.path).finish() + } +} diff --git a/src/cache/hasher.rs b/src/cache/hasher.rs new file mode 100644 index 00000000..aaa4621f --- /dev/null +++ b/src/cache/hasher.rs @@ -0,0 +1,20 @@ +use std::hash::Hasher; + +/// Since the cache key is memoized, use an identity hasher +/// to avoid double cache. +#[derive(Default)] +pub struct IdentityHasher(u64); + +impl Hasher for IdentityHasher { + fn write(&mut self, _: &[u8]) { + unreachable!("Invalid use of IdentityHasher") + } + + fn write_u64(&mut self, n: u64) { + self.0 = n; + } + + fn finish(&self) -> u64 { + self.0 + } +} diff --git a/src/cache/mod.rs b/src/cache/mod.rs new file mode 100644 index 00000000..d1a5c314 --- /dev/null +++ b/src/cache/mod.rs @@ -0,0 +1,14 @@ +mod borrowed_path; +mod cache_impl; +mod cached_path; +mod hasher; +mod thread_local; + +pub use cache_impl::Cache; +pub use cached_path::CachedPath; + +// Internal types used within the cache module +pub(crate) use borrowed_path::BorrowedCachedPath; +pub(crate) use cached_path::CachedPathImpl; +pub(crate) use hasher::IdentityHasher; +pub(crate) use thread_local::{SCRATCH_PATH, THREAD_ID}; diff --git a/src/cache/thread_local.rs b/src/cache/thread_local.rs new file mode 100644 index 00000000..7c8c7c9d --- /dev/null +++ b/src/cache/thread_local.rs @@ -0,0 +1,14 @@ +use std::{ + cell::RefCell, + path::PathBuf, + sync::atomic::{AtomicU64, Ordering}, +}; + +static THREAD_COUNT: AtomicU64 = AtomicU64::new(1); + +thread_local! { + /// Per-thread pre-allocated path that is used to perform operations on paths more quickly. + /// Learned from parcel + pub static SCRATCH_PATH: RefCell = RefCell::new(PathBuf::with_capacity(256)); + pub static THREAD_ID: u64 = THREAD_COUNT.fetch_add(1, Ordering::SeqCst); +}