From b8049aa3229ca4e419158c456f5dc58651c452f5 Mon Sep 17 00:00:00 2001 From: Tolga Ergin Date: Fri, 17 Apr 2026 19:36:24 +0100 Subject: [PATCH] =?UTF-8?q?perf(resolver):=20memoize=20NpmRange=E2=86=92pu?= =?UTF-8?q?bgrub::Ranges=20conversion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `NpmRange::to_pubgrub_ranges(&available_versions)` is O(N) in version count and runs on every `DependencyProvider::get_dependencies` call — the hot path PubGrub hits for every edge during resolution AND re-hits during backtracking. Nothing cached it. This is the latent cost that surfaced in Phase 41: adding 9 extra packages to the metadata cache regressed pubgrub_core_ms by +962 ms without this memoization. Phase 40 P4 split-context deduplication had masked the issue on today's decision-gate fixture by collapsing the resolver's walk to near-linear, so on clean fixtures there's no backtracking to deduplicate — but pathological inputs (conflicting peer deps, heavy overrides, projects with deep version ranges) will hit it, and the future Phase 43 speculation cache raises CPU pressure across the board. Fix. Add `range_cache: RefCell>>` on `LpmDependencyProvider`. Keyed on the package identity + raw range string. `to_pubgrub_ranges_cached` wraps the O(N) conversion behind the cache; the two production call sites in `get_dependencies` (root + transitive) go through the wrapper. Heuristic fallback (when `available_versions` is empty) stays uncached because its cost is bounded by range bounds, not version count. Correctness. Safe within a provider instance because `available_versions(pkg)` is fixed once `ensure_cached(pkg)` runs. The metadata cache is append-only per resolve pass and platform filtering is a pure function of the (also fixed) platform map. Keyed on `ResolverPackage` (including split context) so future per-split behaviour differences can't silently share stale cells. NOT transferred across provider instances by `with_cache` / `with_prefetched_metadata` — the metadata cache transfers, the range cache rebuilds. Keeps the correctness invariant local. Two failing-test-first regression tests encode the contract: - `to_pubgrub_ranges_cached_hits_on_repeated_query` — identical `(pkg, range)` query populates exactly one cache entry and the hit returns structurally equal `Ranges`; a different range keys a new entry. - `to_pubgrub_ranges_cached_distinguishes_split_packages` — plain and split variants of the same canonical name stay in separate cells even with identical range + available set. Both fail to compile before the fix (unknown method + unknown field), pass after. ## Measured effect on decision-gate — null result Same-session 3×3 interleaved A/B vs Phase 42 main, cold cache each run, medians: metric | Phase 42 main | this PR | delta --------------------|--------------:|------------:|-------------- total_ms | 6 914 | 6 635 | -279 (noise) resolve_ms | 5 595 | 5 479 | -116 (noise) pubgrub_ms | 2 411 | 2 435 | +24 (noise) pubgrub_core_ms | 782 | 851 | +69 (noise) followup_rpc_ms | 1 624 | 1 563 | -61 (noise) All deltas sit inside run-to-run variance. Post-Phase-40-P4 the decision-gate walk barely backtracks, so every `get_dependencies` call is a compulsory cache miss — the cache has nothing to reuse. That's the condition Gemini's Phase 41 prediction of ~700 ms of the 962 ms regression would have been recoverable via this exact cache under; without that metadata pressure, the cache is dormant. ## Why ship anyway 1. Algorithmic correctness, not feature YAGNI. An uncached O(N) inside a backtracking search algorithm is a latent perf bug. The "it doesn't hit today" argument evaporates the first time a user's tree backtracks — which we can't predict from benches alone. 2. Zero overhead on fixtures that don't exercise it. Cache is a RefCell paid once per (pkg, range) pair; measured runtime overhead is below the noise floor on decision-gate. Memory: ~16 KB on a 100-package resolve. 3. Phase 43 readiness. The speculation-cache layer (fetch at T=0 from lockfile, verify resolve in parallel) puts more CPU pressure on the resolver side. Cleaning the range-conversion hot path now means the next round of measurements isn't polluted by this known-uncached O(N). 4. Second-opinion framing (Gemini, post-fix): "algorithmic integrity ≠ feature YAGNI." Agreed — different bar for fixing a primitive's complexity than for shipping speculative architecture. This PR is the former. ## CI gate locally green - cargo clippy --workspace -- -D warnings - cargo fmt --check - fancy-regex ban - cargo nextest run --workspace --exclude lpm-integration-tests (3643 passed, 7 skipped, 0 failed) - cargo test -p lpm-auth x2 (43 passed both, parallel-deterministic) - 51-pkg + 280-pkg + decision-gate cold installs vs lpm.dev: no regressions, zero WARN Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/lpm-resolver/src/provider.rs | 171 +++++++++++++++++++++++++++- 1 file changed, 169 insertions(+), 2 deletions(-) diff --git a/crates/lpm-resolver/src/provider.rs b/crates/lpm-resolver/src/provider.rs index 7e0fc4f9..2f84543f 100644 --- a/crates/lpm-resolver/src/provider.rs +++ b/crates/lpm-resolver/src/provider.rs @@ -101,6 +101,32 @@ pub struct LpmDependencyProvider { /// `node_modules//` is created pointing at the aliased /// target's `.lpm/@/` store entry. root_aliases: RefCell>, + /// **Phase 42 P2** — memoize `(ResolverPackage, raw_range) → + /// Ranges` so repeated PubGrub `get_dependencies` + /// queries for the same edge skip the O(N-versions) conversion + /// inside `NpmRange::to_pubgrub_ranges`. Phase 41 measured this + /// uncached conversion at ~962 ms of `pubgrub_core_ms` when the + /// metadata cache grew by 9 packages; the uncached O(queries × N) + /// cost is what made the resolver look "sensitive to metadata + /// bloat." + /// + /// Correctness. Safe to memoize for the lifetime of a single + /// provider instance because `available_versions(pkg)` is fixed + /// once `ensure_cached(pkg)` runs: the metadata cache is append- + /// only during a resolve pass and platform filtering is a pure + /// function of the cached platform map, which is also fixed. + /// Keyed on `ResolverPackage` (not bare canonical name) so split + /// contexts stay in distinct cells — cheaper than teaching the + /// cache to reason about split equivalence, and safe by + /// construction. + /// + /// NOT transferred across provider instances by `with_cache` / + /// `with_prefetched_metadata`. The metadata cache transfers; the + /// range cache is re-built on the next pass. Keeps the + /// invariant local: anything that changes how `available_versions` + /// resolves (e.g. a future per-split platform override) can't + /// accidentally read stale memoized Ranges from a prior pass. + range_cache: RefCell>>, } impl LpmDependencyProvider { @@ -119,6 +145,7 @@ impl LpmDependencyProvider { batch_disabled: RefCell::new(false), platform_skipped: RefCell::new(0), root_aliases: RefCell::new(HashMap::new()), + range_cache: RefCell::new(HashMap::new()), } } @@ -139,6 +166,7 @@ impl LpmDependencyProvider { batch_disabled: RefCell::new(false), platform_skipped: RefCell::new(0), root_aliases: RefCell::new(HashMap::new()), + range_cache: RefCell::new(HashMap::new()), } } @@ -272,6 +300,36 @@ impl LpmDependencyProvider { .unwrap_or_default() } + /// **Phase 42 P2** — memoized wrapper around + /// [`NpmRange::to_pubgrub_ranges`]. First call for a given + /// `(package, raw_range)` pair computes the O(N-versions) + /// conversion and caches the result; subsequent calls return a + /// clone of the cached `Ranges`. See the doc on + /// [`Self::range_cache`] for the correctness argument. + /// + /// Callers MUST pass the same `available` slice they'd have passed + /// to the uncached call (i.e. the output of + /// `available_versions(pkg)` at the moment of the call). The cache + /// doesn't re-derive `available` on hits — it just returns what it + /// recorded. Because `available_versions(pkg)` is fixed for the + /// lifetime of one provider (metadata cache is append-only per + /// pass), this is safe; calling with a stale `available` is a + /// caller bug that would be wrong uncached too. + fn to_pubgrub_ranges_cached( + &self, + pkg: &ResolverPackage, + npm_range: &NpmRange, + available: &[NpmVersion], + ) -> Ranges { + let key = (pkg.clone(), npm_range.raw().to_string()); + if let Some(cached) = self.range_cache.borrow().get(&key) { + return cached.clone(); + } + let computed = npm_range.to_pubgrub_ranges(available); + self.range_cache.borrow_mut().insert(key, computed.clone()); + computed + } + /// Extract the metadata cache. Call after resolution to get dependency info /// for building the install plan (linker needs to know each package's deps). pub fn into_cache(self) -> HashMap { @@ -902,7 +960,8 @@ impl DependencyProvider for LpmDependencyProvider { let range = if available.is_empty() { npm_range.to_pubgrub_ranges_heuristic() } else { - npm_range.to_pubgrub_ranges(&available) + // Phase 42 P2 — memoized. See Self::range_cache doc. + self.to_pubgrub_ranges_cached(&pkg, &npm_range, &available) }; constraints.insert(pkg, range); @@ -1123,7 +1182,8 @@ impl DependencyProvider for LpmDependencyProvider { let range = if available.is_empty() { npm_range.to_pubgrub_ranges_heuristic() } else { - npm_range.to_pubgrub_ranges(&available) + // Phase 42 P2 — memoized. See Self::range_cache doc. + self.to_pubgrub_ranges_cached(&pkg, &npm_range, &available) }; constraints.insert(pkg, range); } @@ -1962,4 +2022,111 @@ mod tests { let p = classify_registry_error(lpm_common::LpmError::NotFound("missing".into())); assert!(matches!(p, ProviderError::Registry(_))); } + + // ─── Phase 42 P2: range memoization ────────────────────────── + // + // `NpmRange::to_pubgrub_ranges(&available_versions)` is O(N) in the + // version count for a given package. PubGrub backtracking calls + // `get_dependencies` multiple times per package during a single + // resolve pass, each call re-evaluating every declared dep's range + // against the same `available_versions` list. On the decision-gate + // fixture Phase 41 measured this uncached cost at ~962 ms of + // `pubgrub_core_ms` when 9 extra packages entered the metadata + // cache. + // + // The contract: `(package, raw_range_str) → Ranges` + // must produce identical output on repeated calls within a single + // provider instance, and the second call MUST hit the cache + // instead of recomputing. `available_versions` is fixed for a + // given `ResolverPackage` once `ensure_cached` has run, so there's + // no staleness concern within a resolve pass. + + #[test] + fn to_pubgrub_ranges_cached_hits_on_repeated_query() { + let pkg = ResolverPackage::npm("lodash"); + let info = make_info( + &["4.17.21", "4.17.20", "4.17.19", "4.16.0", "3.10.1"], + vec![], + vec![], + vec![], + ); + + let client = Arc::new(RegistryClient::new()); + let rt = tokio::runtime::Runtime::new().unwrap(); + let provider = LpmDependencyProvider::new(client, rt.handle().clone(), HashMap::new()); + provider.cache.borrow_mut().insert(pkg.clone(), info); + + let npm_range = NpmRange::parse("^4.17.0").unwrap(); + let available = provider.available_versions(&pkg); + + // Miss path — first call computes + caches. + let r1 = provider.to_pubgrub_ranges_cached(&pkg, &npm_range, &available); + assert_eq!( + provider.range_cache.borrow().len(), + 1, + "first call must populate exactly one entry" + ); + + // Hit path — second call returns the SAME Ranges without a + // recompute. We assert structural equality (Ranges: Eq) + // plus that no new cache entry appeared. + let r2 = provider.to_pubgrub_ranges_cached(&pkg, &npm_range, &available); + assert_eq!( + r1, r2, + "memoized Ranges must equal the freshly-computed Ranges" + ); + assert_eq!( + provider.range_cache.borrow().len(), + 1, + "repeated query with identical (pkg, range) must NOT add a second cache entry" + ); + + // Different range → separate entry. + let npm_range2 = NpmRange::parse("~4.17.0").unwrap(); + provider.to_pubgrub_ranges_cached(&pkg, &npm_range2, &available); + assert_eq!( + provider.range_cache.borrow().len(), + 2, + "different raw-range string must key a new entry" + ); + } + + #[test] + fn to_pubgrub_ranges_cached_distinguishes_split_packages() { + // Split packages (same canonical name, different `context`) are + // distinct `ResolverPackage` identities. Their `available_versions` + // SET is typically the same (splits copy from canonical via + // `ensure_cached`), but the cache keys are distinct because the + // linker and PubGrub treat them as separate identities. Ensure + // the cache honors that: a hit for `ajv[eslint]` must NOT serve + // a query for `ajv` (bare), even at the same raw range string — + // because future changes might introduce per-split platform + // differences and silently serving across contexts would mask + // that. Keep keys distinct. + let pkg_plain = ResolverPackage::npm("ajv"); + let pkg_split = ResolverPackage::npm("ajv").with_context("eslint"); + assert_ne!(pkg_plain, pkg_split, "split and plain are distinct keys"); + + let info = make_info(&["8.18.0", "7.2.4", "6.14.0"], vec![], vec![], vec![]); + let client = Arc::new(RegistryClient::new()); + let rt = tokio::runtime::Runtime::new().unwrap(); + let provider = LpmDependencyProvider::new(client, rt.handle().clone(), HashMap::new()); + provider + .cache + .borrow_mut() + .insert(pkg_plain.clone(), info.clone()); + provider.cache.borrow_mut().insert(pkg_split.clone(), info); + + let npm_range = NpmRange::parse("^6.0.0").unwrap(); + let available_plain = provider.available_versions(&pkg_plain); + let available_split = provider.available_versions(&pkg_split); + + provider.to_pubgrub_ranges_cached(&pkg_plain, &npm_range, &available_plain); + provider.to_pubgrub_ranges_cached(&pkg_split, &npm_range, &available_split); + assert_eq!( + provider.range_cache.borrow().len(), + 2, + "split and plain keys must live in separate cache entries" + ); + } }