diff --git a/crates/lpm-cli/src/commands/rebuild.rs b/crates/lpm-cli/src/commands/rebuild.rs index 1b7612e9..9609aec6 100644 --- a/crates/lpm-cli/src/commands/rebuild.rs +++ b/crates/lpm-cli/src/commands/rebuild.rs @@ -992,33 +992,53 @@ fn read_lifecycle_scripts(pkg_json_path: &Path) -> Option bool { + let scopes = parse_trusted_scopes(project_dir); + name_matches_trusted_scope(package_name, &scopes) +} + +/// Read `project_dir/package.json` ONCE and return the +/// `lpm.scripts.trustedScopes` list. Returns an empty vec if the file +/// is missing, malformed, or the field is absent — matching the +/// fail-closed posture of [`is_scope_trusted`]. +/// +/// Exposed for hot per-N call sites that previously paid an O(N) tax +/// for re-parsing the same manifest in a loop. +fn parse_trusted_scopes(project_dir: &Path) -> Vec { let pkg_json_path = project_dir.join("package.json"); - let content = match std::fs::read_to_string(&pkg_json_path) { - Ok(c) => c, - Err(_) => return false, + let Ok(content) = std::fs::read_to_string(&pkg_json_path) else { + return Vec::new(); }; - let parsed: serde_json::Value = match serde_json::from_str(&content) { - Ok(v) => v, - Err(_) => return false, + let Ok(parsed) = serde_json::from_str::(&content) else { + return Vec::new(); }; - - // Check lpm.scripts.trustedScopes - let scopes = parsed + parsed .get("lpm") .and_then(|l| l.get("scripts")) .and_then(|s| s.get("trustedScopes")) - .and_then(|t| t.as_array()); - - let Some(scopes) = scopes else { - return false; - }; - - for scope in scopes { - let Some(pattern) = scope.as_str() else { - continue; - }; + .and_then(|t| t.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(String::from)) + .collect() + }) + .unwrap_or_default() +} +/// Pure helper: match a package name against a precomputed list of +/// `trustedScopes` glob patterns. Same semantics as the original +/// `is_scope_trusted` body — kept identical so behavior under all +/// existing tests is preserved. +fn name_matches_trusted_scope(package_name: &str, scopes: &[String]) -> bool { + for pattern in scopes { // Simple glob matching: "@myorg/*" matches "@myorg/anything" if let Some(prefix) = pattern.strip_suffix("/*") { if package_name.starts_with(prefix) && package_name.len() > prefix.len() + 1 { @@ -1028,7 +1048,6 @@ fn is_scope_trusted(package_name: &str, project_dir: &Path) -> bool { return true; } } - false } @@ -1429,15 +1448,38 @@ pub(crate) fn scriptable_package_rows( requested_capabilities: &crate::capability::CapabilitySet, user_bound: &crate::capability::UserBound, ) -> Vec { - let mut rows = Vec::new(); - - for (name, version, integrity) in packages { + use rayon::prelude::*; + + // **Phase 51 W2: hoist trustedScopes parse out of the per-package + // loop.** The previous implementation called `is_scope_trusted` + // inside the loop, which re-read AND re-parsed + // `project_dir/package.json` for every package. On the 266-pkg + // bench/fixture-large fixture that's 266 redundant disk reads of + // the same file. Reading once before the loop keeps the contract + // (same scopes against same names → same answer) while turning + // the per-package step into a pure in-memory glob match. + let trusted_scopes = parse_trusted_scopes(project_dir); + + let walk_start = std::time::Instant::now(); + + // **Phase 51 W2: parallelize the per-package walk via rayon.** + // Same pattern as `build_state::compute_blocked_packages_with_metadata`. + // Each iteration is independent — pure CPU + read-only disk: one + // package.json read, one `BUILD_MARKER` stat, one + // `compute_script_hash`, and pure policy/capability lookups. + // No shared mutable state, so `par_iter().filter_map().collect()` + // is drop-in. Output ordering matches input ordering; downstream + // (hint print + `unbuilt` filter) does not depend on a specific + // ordering of equally-typed rows but the Vec preserves + // input order anyway under rayon's stable collect. + let per_pkg = |(name, version, integrity): &(String, String, Option)| + -> Option { let pkg_dir = store.package_dir(name, version); let pkg_json_path = pkg_dir.join("package.json"); let scripts = match read_lifecycle_scripts(&pkg_json_path) { Some(s) if !s.is_empty() => s, - _ => continue, + _ => return None, }; let is_built = pkg_dir.join(BUILD_MARKER).exists(); @@ -1457,7 +1499,7 @@ pub(crate) fn scriptable_package_rows( script_hash.as_deref(), ); let strict_trust = matches!(trust, TrustMatch::Strict | TrustMatch::LegacyNameOnly); - let scope_trust = is_scope_trusted(name, project_dir); + let scope_trust = name_matches_trusted_scope(name, &trusted_scopes); let base_trusted = strict_trust || scope_trust; // **Phase 48 P0 sub-slice 6d follow-up.** If the script- @@ -1478,14 +1520,22 @@ pub(crate) fn scriptable_package_rows( }; let is_trusted = base_trusted && !capability_blocks_trust; - rows.push(ScriptableHintRow { + Some(ScriptableHintRow { name: name.clone(), version: version.clone(), scripts, is_built, is_trusted, - }); - } + }) + }; + + let rows: Vec = packages.par_iter().filter_map(per_pkg).collect(); + + tracing::debug!( + "perf.scriptable_package_rows pkgs={} ms={}", + packages.len(), + walk_start.elapsed().as_millis() + ); rows } diff --git a/crates/lpm-cli/src/provenance_fetch.rs b/crates/lpm-cli/src/provenance_fetch.rs index 2aa388a5..e45aa5e3 100644 --- a/crates/lpm-cli/src/provenance_fetch.rs +++ b/crates/lpm-cli/src/provenance_fetch.rs @@ -345,9 +345,32 @@ async fn fetch_and_parse(http: &reqwest::Client, url: &str) -> Result Result MAX_BUNDLE_BYTES { + tracing::debug!( + target: "lpm_cli::provenance_fetch", + url = %url, + declared_bytes = declared, + cap_bytes = MAX_BUNDLE_BYTES, + stage = "content_length_cap", + "attestation bundle exceeds declared size cap", + ); return Err(()); } @@ -368,33 +399,96 @@ async fn fetch_and_parse(http: &reqwest::Client, url: &str) -> Result = Vec::with_capacity(64 * 1024); let mut stream = response.bytes_stream(); while let Some(chunk) = stream.next().await { - let chunk = chunk.map_err(|_| ())?; + let chunk = chunk.map_err(|e| { + tracing::debug!( + target: "lpm_cli::provenance_fetch", + url = %url, + error = %e, + buffered_bytes = buf.len(), + stage = "chunk", + "attestation body chunk read error", + ); + })?; // Reject BEFORE copying the chunk into `buf`: the check is // `buf.len() + chunk.len()` so even a single oversized chunk // can't land in our Vec. if buf.len().saturating_add(chunk.len()) > MAX_BUNDLE_BYTES { + tracing::debug!( + target: "lpm_cli::provenance_fetch", + url = %url, + buffered_bytes = buf.len(), + chunk_bytes = chunk.len(), + cap_bytes = MAX_BUNDLE_BYTES, + stage = "stream_cap", + "attestation bundle exceeded streaming size cap", + ); return Err(()); } buf.extend_from_slice(&chunk); } - parse_sigstore_bundle(&buf) + parse_sigstore_bundle(&buf).map_err(|()| { + // parse_sigstore_bundle has already emitted the + // failure-point-specific debug line; this outer log adds the + // URL so the trace stream's "which package failed how" pair + // stays adjacent. + tracing::debug!( + target: "lpm_cli::provenance_fetch", + url = %url, + body_bytes = buf.len(), + stage = "parse", + "attestation bundle parse returned Err", + ); + }) } /// Parse a Sigstore bundle JSON and extract the leaf cert + its SAN /// identity. Exposed for unit tests; the production path goes /// through [`fetch_and_parse`]. fn parse_sigstore_bundle(body: &[u8]) -> Result { - let bundle: serde_json::Value = serde_json::from_slice(body).map_err(|_| ())?; + let bundle: serde_json::Value = serde_json::from_slice(body).map_err(|e| { + tracing::debug!( + target: "lpm_cli::provenance_fetch", + error = %e, + body_bytes = body.len(), + stage = "json_parse", + "attestation bundle JSON parse failed", + ); + })?; // The Sigstore bundle shape puts the cert chain at // `verificationMaterial.x509CertificateChain.certificates[0].rawBytes` // (base64-encoded DER). Some bundles (multi-subject responses, // e.g., npm's `{ attestations: [...] }` list) wrap the bundle one // level deeper; try both shapes. - let cert_b64 = find_leaf_cert_rawbytes(&bundle).ok_or(())?; - - let der = BASE64.decode(&cert_b64).map_err(|_| ())?; + let cert_b64 = find_leaf_cert_rawbytes(&bundle).ok_or(()).map_err(|()| { + // Capture the bundle's top-level keys so a shape drift (e.g., + // npm switches to `dsseEnvelope`-only or wraps under a new + // root) is diagnosable from the log without reproducing the + // request. Top-level keys aren't sensitive — the cert and + // signatures sit one or two levels deeper. + let top_keys: Vec<&str> = bundle + .as_object() + .map(|m| m.keys().map(String::as_str).collect()) + .unwrap_or_default(); + tracing::debug!( + target: "lpm_cli::provenance_fetch", + stage = "cert_lookup", + top_level_keys = ?top_keys, + body_bytes = body.len(), + "attestation bundle missing leaf cert rawBytes — shape drift?", + ); + })?; + + let der = BASE64.decode(&cert_b64).map_err(|e| { + tracing::debug!( + target: "lpm_cli::provenance_fetch", + error = %e, + cert_b64_len = cert_b64.len(), + stage = "base64_decode", + "attestation leaf cert base64 decode failed", + ); + })?; let cert_sha = { let mut hasher = Sha256::new(); hasher.update(&der); @@ -413,11 +507,33 @@ fn parse_sigstore_bundle(body: &[u8]) -> Result { } /// Walk a Sigstore bundle JSON looking for the leaf cert's -/// `rawBytes`. Handles both the standard bundle shape and npm's -/// attestations-list wrapper. +/// `rawBytes`. Handles three shapes: +/// +/// 1. **Sigstore Bundle v0.1 / v0.2** — chain shape: +/// `verificationMaterial.x509CertificateChain.certificates[0].rawBytes`. +/// The original protobuf-specs layout. Still produced by some +/// older signers; the original parser only knew this one. +/// 2. **Sigstore Bundle v0.3** — single-cert shape: +/// `verificationMaterial.certificate.rawBytes`. v0.3 collapsed +/// the cert chain into one leaf field because in practice the +/// chain only ever held one cert. **This is what npm's +/// attestations endpoint serves today** for every Fulcio-issued +/// GitHub Actions provenance attestation, and the absence of +/// this branch in the original parser is what caused the Phase +/// 50 close-out's "warm install never caches ~18 packages" bug +/// — every attested URL parsed past the cert-lookup stage and +/// degraded to `Ok(None)`, which is never written to disk. +/// 3. **npm attestations-list wrapper** — +/// `{ attestations: [{ bundle: { } }] }`. +/// npm currently serves TWO attestations per package: the +/// publish-time attestation signed with npm's own keypair (which +/// carries `verificationMaterial.publicKey` and NO leaf cert — +/// that's normal for non-Fulcio attestations), followed by the +/// Fulcio-issued GitHub Actions provenance. The recursive call +/// walks the list in order; the publicKey-only entry returns +/// None and the loop falls through to the cert-bearing entry. fn find_leaf_cert_rawbytes(v: &serde_json::Value) -> Option { - // Standard bundle: - // { verificationMaterial: { x509CertificateChain: { certificates: [{ rawBytes: ... }] } } } + // Shape 1: legacy chain. if let Some(raw) = v .get("verificationMaterial") .and_then(|m| m.get("x509CertificateChain")) @@ -430,8 +546,23 @@ fn find_leaf_cert_rawbytes(v: &serde_json::Value) -> Option { return Some(raw.to_string()); } - // npm attestations-list wrapper: - // { attestations: [{ bundle: { } }] } + // Shape 2: v0.3 single-cert. Checked BEFORE the wrapper recursion + // so a directly-passed v0.3 bundle (test fixtures, future callers + // that pass an inner bundle without the npm list wrapper) hits the + // cheap path without falling through to a list-walk. + if let Some(raw) = v + .get("verificationMaterial") + .and_then(|m| m.get("certificate")) + .and_then(|c| c.get("rawBytes")) + .and_then(|r| r.as_str()) + { + return Some(raw.to_string()); + } + + // Shape 3: npm wrapper. Recurse so the inner bundle hits Shape 1 + // or Shape 2 above. Skips publicKey-only entries automatically: + // those have neither `x509CertificateChain` nor `certificate`, so + // the recursive call returns None and the loop continues. if let Some(list) = v.get("attestations").and_then(|a| a.as_array()) { for att in list { if let Some(bundle) = att.get("bundle") @@ -788,6 +919,49 @@ mod tests { }) } + /// **Sigstore Bundle v0.3 fixture** — the single-cert shape that + /// npm's attestations endpoint serves today for every Fulcio-issued + /// GitHub Actions provenance attestation. See `find_leaf_cert_rawbytes` + /// for the full shape rationale. + fn sigstore_bundle_v3_with_cert(der: &[u8]) -> serde_json::Value { + serde_json::json!({ + "mediaType": "application/vnd.dev.sigstore.bundle.v0.3+json", + "verificationMaterial": { + "certificate": { "rawBytes": BASE64.encode(der) } + } + }) + } + + /// **npm publish-attestation fixture** — mediaType v0.2 with a + /// `publicKey` reference and NO leaf cert. npm signs this entry + /// with its own keypair, so there is nothing for the drift-check + /// to walk into. The fix's recursive `find_leaf_cert_rawbytes` + /// must skip this entry without erroring and continue to the next + /// list item. + fn sigstore_bundle_publickey_only() -> serde_json::Value { + serde_json::json!({ + "mediaType": "application/vnd.dev.sigstore.bundle+json;version=0.2", + "verificationMaterial": { + "publicKey": { "hint": "npm-publisher-keypair" } + } + }) + } + + /// **Real-world npm wrapper** — the actual two-element shape served + /// by `https://registry.npmjs.org/-/npm/v1/attestations/` for + /// any GitHub-Actions-published, attested package. First element + /// is the npm publish attestation (publicKey-only); second is the + /// Fulcio-issued GitHub Actions provenance (v0.3 single-cert). Our + /// parser must walk past the first and pick up the second. + fn npm_attestations_real_world_shape(der: &[u8]) -> serde_json::Value { + serde_json::json!({ + "attestations": [ + { "bundle": sigstore_bundle_publickey_only() }, + { "bundle": sigstore_bundle_v3_with_cert(der) } + ] + }) + } + #[test] fn parse_bundle_standard_shape_extracts_identity_and_cert_sha() { let der = cert_der_with_san_uri( @@ -829,6 +1003,133 @@ mod tests { ); } + /// **Phase 51 regression — Sigstore Bundle v0.3 single-cert shape.** + /// + /// Before this test was added, `parse_sigstore_bundle` would fail + /// on the v0.3 shape (`verificationMaterial.certificate.rawBytes`) + /// and degrade to `Err(())`, which the install pipeline maps to + /// `Ok(None)` — never written to cache, so the same bundle re- + /// fetches on every install. Empirically this affected ~30 of + /// 254 packages on the bench/fixture-large fixture, costing + /// ~4.6 s of `prov_sum_ms` on every warm install. + /// + /// This test pins the v0.3 shape so any future refactor of + /// `find_leaf_cert_rawbytes` that drops the v0.3 branch fails + /// before it ships. + #[test] + fn parse_bundle_v3_single_cert_shape_extracts_identity_phase_51_regression() { + let der = cert_der_with_san_uri( + "https://github.com/iamkun/dayjs/.github/workflows/release.yml@refs/tags/1.11.20", + ); + let bundle = sigstore_bundle_v3_with_cert(&der); + let snap = parse_sigstore_bundle(bundle.to_string().as_bytes()).unwrap(); + + assert!(snap.present); + assert_eq!(snap.publisher.as_deref(), Some("github:iamkun/dayjs")); + assert_eq!( + snap.workflow_path.as_deref(), + Some(".github/workflows/release.yml"), + ); + assert_eq!(snap.workflow_ref.as_deref(), Some("refs/tags/1.11.20")); + } + + /// **Phase 51 regression — real-world npm attestations wrapper.** + /// + /// npm currently serves a 2-element list: index 0 is npm's own + /// publish attestation (publicKey-only, no Fulcio cert), index 1 + /// is the Fulcio-issued GitHub Actions provenance (v0.3 single- + /// cert). The parser must walk past the publicKey-only entry and + /// pick up the cert-bearing one. This test encodes the actual + /// production shape verified by curling + /// `registry.npmjs.org/-/npm/v1/attestations/` on 2026-04-25. + #[test] + fn parse_bundle_npm_real_world_skips_publickey_falls_through_to_v3_cert() { + let der = cert_der_with_san_uri( + "https://github.com/axios/axios/.github/workflows/publish.yml@refs/tags/v1.15.2", + ); + let wrapper = npm_attestations_real_world_shape(&der); + let snap = parse_sigstore_bundle(wrapper.to_string().as_bytes()).unwrap(); + + assert!( + snap.present, + "real-world npm shape (v0.2 publicKey + v0.3 cert) must \ + produce a present snapshot", + ); + assert_eq!(snap.publisher.as_deref(), Some("github:axios/axios")); + assert_eq!( + snap.workflow_path.as_deref(), + Some(".github/workflows/publish.yml"), + ); + assert_eq!(snap.workflow_ref.as_deref(), Some("refs/tags/v1.15.2")); + + // Cert SHA must hash the v0.3 leaf, not the npm publicKey + // entry. If the parser accidentally hashed the publicKey-only + // entry it would fail base64-decoding earlier, but pinning + // the SHA defends against a future refactor that swaps + // attestation order. + let expected_sha = format!("sha256-{}", hex::encode(Sha256::digest(&der))); + assert_eq!( + snap.attestation_cert_sha256.as_deref(), + Some(expected_sha.as_str()) + ); + } + + /// **Phase 51 regression — npm wrapper with no cert anywhere.** + /// + /// If npm ever ships only a publish attestation (no GitHub Actions + /// provenance), the wrapper is a single publicKey-only entry. The + /// parser must reject this — `present: false` is wrong (a publish + /// attestation IS present, just not a Fulcio one), and `Err(())` + /// degrades to `Ok(None)` (transient/unknown) which is the + /// correct semantic per the module-level fetch-failure docs. + #[test] + fn parse_bundle_npm_publickey_only_with_no_cert_yields_err() { + let wrapper = serde_json::json!({ + "attestations": [ + { "bundle": sigstore_bundle_publickey_only() } + ] + }); + assert!( + parse_sigstore_bundle(wrapper.to_string().as_bytes()).is_err(), + "npm wrapper containing only a publicKey-only attestation \ + (no Fulcio cert) must return Err so the caller treats it \ + as unknown rather than caching a falsely-absent snapshot", + ); + } + + /// **Phase 51 regression — v0.3 cert wins over v0.2 chain when both + /// are present.** + /// + /// Defensive: ensure the parser doesn't hash a stale v0.2 chain + /// entry when a v0.3 single-cert entry sits beside it under the + /// same `verificationMaterial`. Real bundles never put both, but + /// the lookup order must be stable: v0.2 chain first (legacy + /// path), then v0.3 single-cert. A future refactor that flips + /// the order would change the cert-sha output for any package + /// that grew a v0.3 entry, breaking the drift-check's content- + /// addressable identity. Pinning the order here makes that + /// breakage loud. + #[test] + fn find_leaf_cert_rawbytes_prefers_v2_chain_when_both_shapes_coexist() { + let v2_der = b"v2-chain-leaf-fake-der"; + let v3_der = b"v3-single-cert-fake-der"; + let bundle = serde_json::json!({ + "verificationMaterial": { + "x509CertificateChain": { + "certificates": [{ "rawBytes": BASE64.encode(v2_der) }] + }, + "certificate": { "rawBytes": BASE64.encode(v3_der) } + } + }); + let got = find_leaf_cert_rawbytes(&bundle).unwrap(); + assert_eq!( + got, + BASE64.encode(v2_der), + "v0.2 chain must take precedence so existing cache keys \ + stay stable; v0.3 single-cert is the fallback", + ); + } + #[test] fn parse_bundle_with_cert_but_no_extractable_identity_still_present() { // A cert with a non-GitHub SAN still produces a `present: