From 9da2e1e7231054437fbde2c7f8eaace9c1b67897 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Sun, 9 Apr 2023 19:14:56 -0700 Subject: [PATCH] feat(isolated): add support for isolated dependency installation (#226) Fixes: https://github.com/orogene/orogene/issues/225 --- Cargo.lock | 11 + Cargo.toml | 1 + crates/node-maintainer/Cargo.toml | 3 + crates/node-maintainer/src/linkers/hoisted.rs | 106 +--- .../node-maintainer/src/linkers/isolated.rs | 580 ++++++++++++++++++ crates/node-maintainer/src/linkers/mod.rs | 109 +++- crates/node-maintainer/src/maintainer.rs | 65 +- crates/node-maintainer/src/resolver.rs | 8 +- src/commands/restore.rs | 70 ++- tests/snapshots/help__restore.snap | 14 +- 10 files changed, 829 insertions(+), 138 deletions(-) create mode 100644 crates/node-maintainer/src/linkers/isolated.rs diff --git a/Cargo.lock b/Cargo.lock index 53a5d6b3..e5c2cfe8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1649,6 +1649,16 @@ dependencies = [ "serde", ] +[[package]] +name = "junction" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca39ef0d69b18e6a2fd14c2f0a1d593200f4a4ed949b240b5917ab51fac754cb" +dependencies = [ + "scopeguard", + "winapi", +] + [[package]] name = "kdl" version = "5.0.0-alpha.1" @@ -1936,6 +1946,7 @@ dependencies = [ "indicatif", "insta", "js-sys", + "junction", "kdl", "maplit", "miette", diff --git a/Cargo.toml b/Cargo.toml index e47f054d..3f1ba9c5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,6 +79,7 @@ http-cache-reqwest = "0.6.0" humansize = "1.1.0" insta = "1.28.0" js-sys = "0.3.61" +junction = "1.0.0" kdl = "5.0.0-alpha.1" maplit = "1.0.2" miette = "5.7.0" diff --git a/crates/node-maintainer/Cargo.toml b/crates/node-maintainer/Cargo.toml index aa15409a..3e37badb 100644 --- a/crates/node-maintainer/Cargo.toml +++ b/crates/node-maintainer/Cargo.toml @@ -30,6 +30,9 @@ tracing = { workspace = true } unicase = "2.6.0" url = { workspace = true } +[target.'cfg(windows)'.dependencies] +junction = { workspace = true } + [target.'cfg(not(target_arch = "wasm32"))'.dependencies] oro-script = { version = "=0.3.18", path = "../oro-script" } oro-shim-bin = { version = "=0.3.18", path = "../oro-shim-bin" } diff --git a/crates/node-maintainer/src/linkers/hoisted.rs b/crates/node-maintainer/src/linkers/hoisted.rs index bb3eb9ad..01d95389 100644 --- a/crates/node-maintainer/src/linkers/hoisted.rs +++ b/crates/node-maintainer/src/linkers/hoisted.rs @@ -1,6 +1,5 @@ use std::ffi::OsStr; use std::io::{BufRead, BufReader}; -use std::path::Path; use std::sync::atomic::AtomicUsize; use std::sync::{atomic, Arc}; @@ -12,7 +11,7 @@ use walkdir::WalkDir; use crate::error::NodeMaintainerError; use crate::graph::Graph; -use crate::META_FILE_NAME; +use crate::{META_FILE_NAME, STORE_DIR_NAME}; use super::LinkerOptions; @@ -20,25 +19,37 @@ pub(crate) struct HoistedLinker(pub(crate) LinkerOptions); impl HoistedLinker { pub async fn prune(&self, graph: &Graph) -> Result { + let start = std::time::Instant::now(); + let prefix = self.0.root.join("node_modules"); if !prefix.exists() { + tracing::debug!( + "Nothing to prune. Completed check in {}ms.", + start.elapsed().as_micros() / 1000 + ); return Ok(0); } - let start = std::time::Instant::now(); - - if self.0.actual_tree.is_none() { + if self.0.actual_tree.is_none() + || async_std::path::Path::new(&prefix.join(STORE_DIR_NAME)) + .exists() + .await + { // If there's no actual tree previously calculated, we can't trust // *anything* inside node_modules, so everything is immediately // extraneous and we wipe it all. Sorry. let mut entries = async_std::fs::read_dir(&prefix).await?; while let Some(entry) = entries.next().await { let entry = entry?; - if entry.file_type().await?.is_dir() { + let ty = entry.file_type().await?; + if ty.is_dir() { async_std::fs::remove_dir_all(entry.path()).await?; - } else { + } else if ty.is_file() { async_std::fs::remove_file(entry.path()).await?; + } else if ty.is_symlink() && async_std::fs::remove_file(entry.path()).await.is_err() + { + async_std::fs::remove_dir_all(entry.path()).await?; } } @@ -183,7 +194,7 @@ impl HoistedLinker { std::fs::create_dir_all(&node_modules)?; let prefer_copy = self.0.prefer_copy || match self.0.cache.as_deref() { - Some(cache) => supports_reflink(cache, &node_modules), + Some(cache) => super::supports_reflink(cache, &node_modules), None => false, }; let validate = self.0.validate; @@ -245,7 +256,7 @@ impl HoistedLinker { Ok(actually_extracted) } - pub async fn link_bins(&self, graph: &Graph) -> Result { + async fn link_bins(&self, graph: &Graph) -> Result { tracing::debug!("Linking bins..."); let start = std::time::Instant::now(); let root = &self.0.root; @@ -311,7 +322,7 @@ impl HoistedLinker { std::fs::remove_file(&to)?; } } - link_bin(&from, &to)?; + super::link_bin(&from, &to)?; tracing::trace!( "Linked bin for {} from {} to {}", name, @@ -357,24 +368,24 @@ impl HoistedLinker { Ok(()) } - pub async fn run_scripts(&self, graph: &Graph, event: &str) -> Result<(), NodeMaintainerError> { + async fn run_scripts(&self, graph: &Graph, event: &str) -> Result<(), NodeMaintainerError> { tracing::debug!("Running {event} lifecycle scripts"); let start = std::time::Instant::now(); let root = &self.0.root; futures::stream::iter(graph.inner.node_indices()) .map(Ok) .try_for_each_concurrent(self.0.script_concurrency, move |idx| async move { - if idx == graph.root { - return Ok::<_, NodeMaintainerError>(()); - } - - let subdir = graph - .node_path(idx) - .iter() - .map(|x| x.to_string()) - .collect::>() - .join("/node_modules/"); - let package_dir = root.join("node_modules").join(subdir); + let package_dir = if idx == graph.root { + root.clone() + } else { + let subdir = graph + .node_path(idx) + .iter() + .map(|x| x.to_string()) + .collect::>() + .join("/node_modules/"); + root.join("node_modules").join(subdir) + }; let build_mani = BuildManifest::from_path(package_dir.join("package.json")) .map_err(|e| { @@ -453,54 +464,3 @@ impl HoistedLinker { Ok(()) } } - -fn supports_reflink(src_dir: &Path, dest_dir: &Path) -> bool { - let temp = match tempfile::NamedTempFile::new_in(src_dir) { - Ok(t) => t, - Err(e) => { - tracing::debug!("error creating tempfile while checking for reflink support: {e}."); - return false; - } - }; - match std::fs::write(&temp, "a") { - Ok(_) => {} - Err(e) => { - tracing::debug!("error writing to tempfile while checking for reflink support: {e}."); - return false; - } - }; - let tempdir = match tempfile::TempDir::new_in(dest_dir) { - Ok(t) => t, - Err(e) => { - tracing::debug!( - "error creating destination tempdir while checking for reflink support: {e}." - ); - return false; - } - }; - let supports_reflink = reflink::reflink(temp.path(), tempdir.path().join("b")) - .map(|_| true) - .map_err(|e| { - tracing::debug!( - "reflink support check failed. Files will be hard linked or copied. ({e})" - ); - e - }) - .unwrap_or(false); - - if supports_reflink { - tracing::debug!("Verified reflink support. Extracted data will use copy-on-write reflinks instead of hard links or full copies.") - } - - supports_reflink -} - -fn link_bin(from: &Path, to: &Path) -> Result<(), NodeMaintainerError> { - #[cfg(windows)] - oro_shim_bin::shim_bin(from, to)?; - #[cfg(not(windows))] - { - std::os::unix::fs::symlink(from, to)?; - } - Ok(()) -} diff --git a/crates/node-maintainer/src/linkers/isolated.rs b/crates/node-maintainer/src/linkers/isolated.rs new file mode 100644 index 00000000..1ad7a9ee --- /dev/null +++ b/crates/node-maintainer/src/linkers/isolated.rs @@ -0,0 +1,580 @@ +use std::{ + collections::{HashMap, HashSet}, + io::{BufRead, BufReader}, + path::Path, + sync::{ + atomic::{self, AtomicUsize}, + Arc, + }, +}; + +use futures::{StreamExt, TryStreamExt}; +use oro_common::BuildManifest; +use oro_script::OroScript; +use petgraph::{stable_graph::NodeIndex, visit::EdgeRef, Direction}; +use ssri::Integrity; + +use crate::{graph::Graph, NodeMaintainerError, META_FILE_NAME, STORE_DIR_NAME}; + +use super::LinkerOptions; + +pub(crate) struct IsolatedLinker(pub(crate) LinkerOptions); + +impl IsolatedLinker { + pub async fn prune(&self, graph: &Graph) -> Result { + let start = std::time::Instant::now(); + + let prefix = self.0.root.join("node_modules"); + + if !prefix.exists() { + tracing::debug!( + "Nothing to prune. Completed check in {}ms.", + start.elapsed().as_micros() / 1000 + ); + return Ok(0); + } + + let store = prefix.join(STORE_DIR_NAME); + + if self.0.actual_tree.is_none() || !async_std::path::Path::new(&store).exists().await { + // If there's no actual tree previously calculated, we can't trust + // *anything* inside node_modules, so everything is immediately + // extraneous and we wipe it all. Sorry. + let mut entries = async_std::fs::read_dir(&prefix).await?; + while let Some(entry) = entries.next().await { + let entry = entry?; + let path = entry.path(); + let ty = entry.file_type().await?; + if ty.is_dir() { + async_std::fs::remove_dir_all(&path).await?; + } else if ty.is_file() { + async_std::fs::remove_file(&path).await?; + } else if ty.is_symlink() && async_std::fs::remove_file(entry.path()).await.is_err() + { + async_std::fs::remove_dir_all(&path).await?; + } + } + + tracing::debug!("No metadata file found in node_modules/. Pruned entire node_modules/ directory in {}ms.", start.elapsed().as_micros() / 1000); + + // TODO: get an accurate count here? + return Ok(0); + } + + let mut expected = HashSet::new(); + + let expected_mut = &mut expected; + let store_ref = &store; + // Clean out individual node_modules within + let indices = graph.inner.node_indices().map(move |idx| { + if idx != graph.root { + let pkg_store_dir = store_ref.join(package_dir_name(graph, idx)); + + expected_mut.insert(pkg_store_dir); + } + idx + }); + + let prefix_ref = &prefix; + futures::stream::iter(indices) + .map(Ok) + .try_for_each_concurrent(self.0.concurrency, move |idx| async move { + let pkg = &graph[idx].package; + + let pkg_nm = if idx == graph.root { + prefix_ref.to_owned() + } else { + store_ref + .join(package_dir_name(graph, idx)) + .join("node_modules") + .join(pkg.name()) + .join("node_modules") + }; + + let mut expected_deps = HashMap::new(); + + for edge in graph.inner.edges_directed(idx, Direction::Outgoing) { + let dep_pkg = &graph[edge.target()].package; + let dep_store_dir = async_std::path::PathBuf::from( + store_ref + .join(package_dir_name(graph, edge.target())) + .join("node_modules") + .join(dep_pkg.name()), + ); + let dep_nm_entry = async_std::path::PathBuf::from(pkg_nm.join(dep_pkg.name())); + expected_deps.insert(dep_nm_entry, dep_store_dir); + } + + if async_std::path::Path::new(&pkg_nm).exists().await { + let expected_ref = Arc::new(expected_deps); + + async_std::fs::read_dir(&pkg_nm) + .await? + .map(|e| Ok((e, expected_ref.clone()))) + .try_for_each(move |(entry, expected)| async move { + let entry = entry?; + let path = entry.path(); + if let Some(target) = expected.get(&path) { + let target = target.clone(); + let ty = entry.file_type().await?; + if ty.is_file() { + async_std::fs::remove_file(&path).await?; + } else if ty.is_dir() { + async_std::fs::remove_dir_all(&path).await?; + } else if ty.is_symlink() && target != path.read_link().await? { + if async_std::fs::remove_file(&path).await.is_err() { + async_std::fs::remove_dir_all(&path).await?; + } + } else if ty.is_dir() { + async_std::fs::remove_dir_all(&path).await?; + } else { + #[cfg(windows)] + let path_clone = path.clone(); + #[cfg(windows)] + if async_std::task::spawn_blocking(move || { + Ok::<_, std::io::Error>( + !junction::exists(&path_clone)? + || async_std::path::PathBuf::from( + &junction::get_target(&path_clone)?, + ) != target, + ) + }) + .await? + && async_std::fs::remove_file(&path).await.is_err() + { + async_std::fs::remove_dir_all(&path).await?; + } + } + } + Ok::<_, NodeMaintainerError>(()) + }) + .await?; + } + + Ok::<_, NodeMaintainerError>(()) + }) + .await?; + + let expected_ref = &expected; + + let pruned = Arc::new(AtomicUsize::new(0)); + + // Clean out any extraneous things in the store dir itself. We've + // already verified the store dir at least exists. + async_std::fs::read_dir(&store) + .await? + .map(|entry| Ok((entry, pruned.clone()))) + .try_for_each_concurrent(self.0.concurrency, move |(entry, pruned)| async move { + let entry = entry?; + let _path = entry.path(); + let path: &Path = _path.as_ref(); + if !expected_ref.contains(path) { + let ty = entry.file_type().await?; + if ty.is_dir() { + if path + .file_name() + .expect("must have filename") + .to_string_lossy() + .starts_with('@') + { + let mut iter = async_std::fs::read_dir(path).await?; + while let Some(next) = iter.next().await { + let next = next?; + if !expected_ref.contains::(&next.path().into()) + { + let ty = next.file_type().await?; + if ty.is_file() { + async_std::fs::remove_file(next.path()).await?; + } else if ty.is_dir() { + async_std::fs::remove_dir_all(next.path()).await?; + } else if ty.is_symlink() + && async_std::fs::remove_file(next.path()).await.is_err() + { + async_std::fs::remove_dir_all(next.path()).await?; + } + pruned.fetch_add(1, atomic::Ordering::SeqCst); + } + } + } else { + async_std::fs::remove_dir_all(entry.path()).await?; + pruned.fetch_add(1, atomic::Ordering::SeqCst); + } + } else if ty.is_file() { + async_std::fs::remove_file(entry.path()).await?; + pruned.fetch_add(1, atomic::Ordering::SeqCst); + } else if ty.is_symlink() + && async_std::fs::remove_file(entry.path()).await.is_err() + { + async_std::fs::remove_dir_all(entry.path()).await?; + pruned.fetch_add(1, atomic::Ordering::SeqCst); + } + } + Ok::<_, NodeMaintainerError>(()) + }) + .await?; + + let pruned = pruned.load(atomic::Ordering::SeqCst); + if pruned == 0 { + tracing::debug!( + "Nothing to prune. Completed check in {}ms.", + start.elapsed().as_micros() / 1000 + ); + } else { + tracing::debug!( + "Pruned {pruned} extraneous package{} in {}ms.", + start.elapsed().as_micros() / 1000, + if pruned == 1 { "" } else { "s" }, + ); + } + Ok(pruned) + } + + pub async fn extract(&self, graph: &Graph) -> Result { + tracing::debug!("Applying node_modules/..."); + let start = std::time::Instant::now(); + + let root = &self.0.root; + let store = root.join("node_modules").join(STORE_DIR_NAME); + let store_ref = &store; + let stream = futures::stream::iter(graph.inner.node_indices()); + let concurrent_count = Arc::new(AtomicUsize::new(0)); + let actually_extracted = Arc::new(AtomicUsize::new(0)); + let total = graph.inner.node_count(); + let total_completed = Arc::new(AtomicUsize::new(0)); + let node_modules = root.join("node_modules"); + std::fs::create_dir_all(&node_modules)?; + let prefer_copy = self.0.prefer_copy + || match self.0.cache.as_deref() { + Some(cache) => super::supports_reflink(cache, &node_modules), + None => false, + }; + let validate = self.0.validate; + stream + .map(|idx| Ok((idx, concurrent_count.clone(), total_completed.clone(), actually_extracted.clone()))) + .try_for_each_concurrent( + self.0.concurrency, + move |(child_idx, concurrent_count, total_completed, actually_extracted)| async move { + if child_idx == graph.root { + link_deps(graph, child_idx, store_ref, &root.join("node_modules")).await?; + return Ok(()); + } + + concurrent_count.fetch_add(1, atomic::Ordering::SeqCst); + + let pkg = &graph[child_idx].package; + + // Actual package contents are extracted to + // `node_modules/.oro-store/-/node_modules/` + let target_dir = store_ref.join(package_dir_name(graph, child_idx)).join("node_modules").join(pkg.name()); + + let start = std::time::Instant::now(); + + if !target_dir.exists() { + graph[child_idx] + .package + .extract_to_dir(&target_dir, prefer_copy, validate) + .await?; + actually_extracted.fetch_add(1, atomic::Ordering::SeqCst); + } + + link_deps(graph, child_idx, store_ref, &target_dir.join("node_modules")).await?; + + if let Some(on_extract) = &self.0.on_extract_progress { + on_extract(&graph[child_idx].package); + } + + tracing::trace!( + in_flight = concurrent_count.fetch_sub(1, atomic::Ordering::SeqCst) - 1, + "Extracted {} to {} in {:?}ms. {}/{total} done.", + graph[child_idx].package.name(), + target_dir.display(), + start.elapsed().as_millis(), + total_completed.fetch_add(1, atomic::Ordering::SeqCst) + 1, + ); + + Ok::<_, NodeMaintainerError>(()) + }, + ) + .await?; + std::fs::write( + node_modules.join(META_FILE_NAME), + graph.to_kdl()?.to_string(), + )?; + let actually_extracted = actually_extracted.load(atomic::Ordering::SeqCst); + + tracing::debug!( + "Extracted {actually_extracted} package{} in {}ms.", + if actually_extracted == 1 { "" } else { "s" }, + start.elapsed().as_millis(), + ); + Ok(actually_extracted) + } + + async fn link_bins(&self, graph: &Graph) -> Result { + tracing::debug!("Linking bins..."); + let start = std::time::Instant::now(); + let root = &self.0.root; + let store = root.join("node_modules").join(STORE_DIR_NAME); + let store_ref = &store; + let linked = Arc::new(AtomicUsize::new(0)); + + futures::stream::iter(graph.inner.node_indices()) + .map(|idx| Ok((idx, linked.clone()))) + .try_for_each_concurrent(self.0.concurrency, move |(idx, linked)| async move { + if idx == graph.root { + let added = link_dep_bins( + graph, + idx, + store_ref, + &root.join("node_modules").join(".bin"), + ) + .await?; + linked.fetch_add(added, atomic::Ordering::SeqCst); + return Ok(()); + } + + let pkg = &graph[idx].package; + let pkg_bin_dir = store_ref + .join(package_dir_name(graph, idx)) + .join("node_modules") + .join(pkg.name()) + .join("node_modules") + .join(".bin"); + + let added = link_dep_bins(graph, idx, store_ref, &pkg_bin_dir).await?; + linked.fetch_add(added, atomic::Ordering::SeqCst); + + Ok::<_, NodeMaintainerError>(()) + }) + .await?; + + let linked = linked.load(atomic::Ordering::SeqCst); + tracing::debug!( + "Linked {linked} package bins in {}ms.", + start.elapsed().as_millis() + ); + Ok(linked) + } + + pub async fn rebuild( + &self, + graph: &Graph, + ignore_scripts: bool, + ) -> Result<(), NodeMaintainerError> { + tracing::debug!("Running lifecycle scripts..."); + let start = std::time::Instant::now(); + if !ignore_scripts { + self.run_scripts(graph, "preinstall").await?; + } + self.link_bins(graph).await?; + if !ignore_scripts { + self.run_scripts(graph, "install").await?; + self.run_scripts(graph, "postinstall").await?; + } + tracing::debug!( + "Ran lifecycle scripts in {}ms.", + start.elapsed().as_millis() + ); + Ok(()) + } + + async fn run_scripts(&self, graph: &Graph, event: &str) -> Result<(), NodeMaintainerError> { + tracing::debug!("Running {event} lifecycle scripts"); + let start = std::time::Instant::now(); + let root = &self.0.root; + let store = root.join("node_modules").join(STORE_DIR_NAME); + let store_ref = &store; + futures::stream::iter(graph.inner.node_indices()) + .map(Ok) + .try_for_each_concurrent(self.0.script_concurrency, move |idx| async move { + let pkg_dir = if idx == graph.root { + root.clone() + } else { + let pkg = &graph[idx].package; + store_ref + .join(package_dir_name(graph, idx)) + .join("node_modules") + .join(pkg.name()) + }; + + let build_mani = + BuildManifest::from_path(pkg_dir.join("package.json")).map_err(|e| { + NodeMaintainerError::BuildManifestReadError(pkg_dir.join("package.json"), e) + })?; + + let name = graph[idx].package.name().to_string(); + if build_mani.scripts.contains_key(event) { + let package_dir = pkg_dir.clone(); + let package_dir_clone = package_dir.clone(); + let event = event.to_owned(); + let span = tracing::info_span!("script::{name}::{event}"); + let _span_enter = span.enter(); + if let Some(on_script_start) = &self.0.on_script_start { + on_script_start(&graph[idx].package, &event); + } + std::mem::drop(_span_enter); + let mut script = async_std::task::spawn_blocking(move || { + OroScript::new(package_dir, event)? + // For isolated installs, we don't add ancestor node_modules/.bin + .workspace_path(package_dir_clone) + .spawn() + }) + .await?; + let stdout = script.stdout.take(); + let stderr = script.stderr.take(); + let stdout_name = name.clone(); + let stderr_name = name.clone(); + let stdout_on_line = self.0.on_script_line.clone(); + let stderr_on_line = self.0.on_script_line.clone(); + let stdout_span = span; + let stderr_span = stdout_span.clone(); + futures::try_join!( + async_std::task::spawn_blocking(move || { + let _enter = stdout_span.enter(); + if let Some(stdout) = stdout { + for line in BufReader::new(stdout).lines() { + let line = line?; + tracing::debug!("stdout::{stdout_name}: {}", line); + if let Some(on_script_line) = &stdout_on_line { + on_script_line(&line); + } + } + } + Ok::<_, NodeMaintainerError>(()) + }), + async_std::task::spawn_blocking(move || { + let _enter = stderr_span.enter(); + if let Some(stderr) = stderr { + for line in BufReader::new(stderr).lines() { + let line = line?; + tracing::debug!("stderr::{stderr_name}: {}", line); + if let Some(on_script_line) = &stderr_on_line { + on_script_line(&line); + } + } + } + Ok::<_, NodeMaintainerError>(()) + }), + async_std::task::spawn_blocking(move || { + script.wait()?; + Ok::<_, NodeMaintainerError>(()) + }), + )?; + } + + Ok::<_, NodeMaintainerError>(()) + }) + .await?; + tracing::debug!( + "Ran lifecycle scripts for {event} in {}ms.", + start.elapsed().as_millis() + ); + Ok(()) + } +} + +fn package_dir_name(graph: &Graph, idx: NodeIndex) -> String { + let pkg = &graph[idx].package; + let subdir = graph + .node_path(idx) + .iter() + .map(|x| x.to_string()) + .collect::>() + .join("/node_modules/"); + + let mut name = pkg.name().to_string(); + name.push('@'); + let (_, mut hex) = Integrity::from(subdir).to_hex(); + hex.truncate(8); + name.push_str(&hex); + name +} + +async fn link_deps( + graph: &Graph, + node: NodeIndex, + store_ref: &Path, + target_nm: &Path, +) -> Result<(), NodeMaintainerError> { + // Then we symlink/junction all of the package's dependencies into its `node_modules` dir. + for edge in graph.inner.edges_directed(node, Direction::Outgoing) { + let dep_pkg = &graph[edge.target()].package; + let dep_store_dir = store_ref + .join(package_dir_name(graph, edge.target())) + .join("node_modules") + .join(dep_pkg.name()); + let dep_nm_entry = target_nm.join(dep_pkg.name()); + if dep_nm_entry.exists() { + continue; + } + let relative = pathdiff::diff_paths( + &dep_store_dir, + dep_nm_entry.parent().expect("must have a parent"), + ) + .expect("this should never fail"); + async_std::task::spawn_blocking(move || { + std::fs::create_dir_all(dep_nm_entry.parent().expect("definitely has a parent"))?; + if dep_nm_entry.symlink_metadata().is_err() { + // We don't check the link target here because we assume prune() has already been run and removed any incorrect links. + #[cfg(windows)] + std::os::windows::fs::symlink_dir(&relative, &dep_nm_entry) + .or_else(|_| junction::create(&dep_store_dir, &dep_nm_entry))?; + #[cfg(unix)] + std::os::unix::fs::symlink(&relative, &dep_nm_entry)?; + } + Ok::<(), NodeMaintainerError>(()) + }) + .await?; + } + Ok(()) +} + +async fn link_dep_bins( + graph: &Graph, + node: NodeIndex, + store_ref: &Path, + target_bin: &Path, +) -> Result { + let mut linked = 0; + for edge in graph.inner.edges_directed(node, Direction::Outgoing) { + let dep_pkg = &graph[edge.target()].package; + let dep_store_dir = store_ref + .join(package_dir_name(graph, edge.target())) + .join("node_modules") + .join(dep_pkg.name()); + let build_mani = + BuildManifest::from_path(dep_store_dir.join("package.json")).map_err(|e| { + NodeMaintainerError::BuildManifestReadError(dep_store_dir.join("package.json"), e) + })?; + for (name, path) in &build_mani.bin { + let target_bin = target_bin.to_owned(); + let to = target_bin.join(name); + let from = dep_store_dir.join(path); + let name = name.clone(); + async_std::task::spawn_blocking(move || { + // We only create a symlink if the target bin exists. + if from.symlink_metadata().is_ok() { + std::fs::create_dir_all(target_bin)?; + if let Ok(meta) = to.symlink_metadata() { + if meta.is_dir() { + std::fs::remove_dir_all(&to)?; + } else { + std::fs::remove_file(&to)?; + } + } + super::link_bin(&from, &to)?; + tracing::trace!( + "Linked bin for {} from {} to {}", + name, + from.display(), + to.display() + ); + } + Ok::<_, NodeMaintainerError>(()) + }) + .await?; + linked += 1; + } + } + Ok(linked) +} diff --git a/crates/node-maintainer/src/linkers/mod.rs b/crates/node-maintainer/src/linkers/mod.rs index fde2a97e..188e3f44 100644 --- a/crates/node-maintainer/src/linkers/mod.rs +++ b/crates/node-maintainer/src/linkers/mod.rs @@ -1,16 +1,23 @@ #[cfg(not(target_arch = "wasm32"))] mod hoisted; +#[cfg(not(target_arch = "wasm32"))] +mod isolated; -use std::path::PathBuf; +#[cfg(not(target_arch = "wasm32"))] +use std::path::{Path, PathBuf}; #[cfg(not(target_arch = "wasm32"))] use hoisted::HoistedLinker; +#[cfg(not(target_arch = "wasm32"))] +use isolated::IsolatedLinker; +#[cfg(not(target_arch = "wasm32"))] use crate::{ graph::Graph, Lockfile, NodeMaintainerError, ProgressHandler, PruneProgress, ScriptLineHandler, ScriptStartHandler, }; +#[cfg(not(target_arch = "wasm32"))] pub(crate) struct LinkerOptions { pub(crate) concurrency: usize, pub(crate) actual_tree: Option, @@ -24,7 +31,10 @@ pub(crate) struct LinkerOptions { pub(crate) on_script_start: Option, pub(crate) on_script_line: Option, } + pub(crate) enum Linker { + #[cfg(not(target_arch = "wasm32"))] + Isolated(IsolatedLinker), #[cfg(not(target_arch = "wasm32"))] Hoisted(HoistedLinker), #[allow(dead_code)] @@ -32,6 +42,11 @@ pub(crate) enum Linker { } impl Linker { + #[cfg(not(target_arch = "wasm32"))] + pub fn isolated(opts: LinkerOptions) -> Self { + Self::Isolated(IsolatedLinker(opts)) + } + #[cfg(not(target_arch = "wasm32"))] pub fn hoisted(opts: LinkerOptions) -> Self { Self::Hoisted(HoistedLinker(opts)) @@ -42,47 +57,105 @@ impl Linker { Self::Null } - pub async fn prune(&self, graph: &Graph) -> Result { + #[cfg(not(target_arch = "wasm32"))] + pub async fn prune( + &self, + #[allow(dead_code)] graph: &Graph, + ) -> Result { match self { + #[cfg(not(target_arch = "wasm32"))] + Self::Isolated(isolated) => isolated.prune(graph).await, #[cfg(not(target_arch = "wasm32"))] Self::Hoisted(hoisted) => hoisted.prune(graph).await, Self::Null => Ok(0), } } - pub async fn extract(&self, graph: &Graph) -> Result { + #[cfg(not(target_arch = "wasm32"))] + pub async fn extract( + &self, + #[allow(dead_code)] graph: &Graph, + ) -> Result { match self { #[cfg(not(target_arch = "wasm32"))] - Self::Hoisted(hoisted) => hoisted.extract(graph).await, - Self::Null => Ok(0), - } - } - - pub async fn link_bins(&self, graph: &Graph) -> Result { - match self { + Self::Isolated(isolated) => isolated.extract(graph).await, #[cfg(not(target_arch = "wasm32"))] - Self::Hoisted(hoisted) => hoisted.link_bins(graph).await, + Self::Hoisted(hoisted) => hoisted.extract(graph).await, Self::Null => Ok(0), } } + #[cfg(not(target_arch = "wasm32"))] pub async fn rebuild( &self, - graph: &Graph, - ignore_scripts: bool, + #[allow(dead_code)] graph: &Graph, + #[allow(dead_code)] ignore_scripts: bool, ) -> Result<(), NodeMaintainerError> { match self { + #[cfg(not(target_arch = "wasm32"))] + Self::Isolated(isolated) => isolated.rebuild(graph, ignore_scripts).await, #[cfg(not(target_arch = "wasm32"))] Self::Hoisted(hoisted) => hoisted.rebuild(graph, ignore_scripts).await, Self::Null => Ok(()), } } +} - pub async fn run_scripts(&self, graph: &Graph, event: &str) -> Result<(), NodeMaintainerError> { - match self { - #[cfg(not(target_arch = "wasm32"))] - Self::Hoisted(hoisted) => hoisted.run_scripts(graph, event).await, - Self::Null => Ok(()), +#[cfg(not(target_arch = "wasm32"))] +pub(crate) fn supports_reflink(src_dir: &Path, dest_dir: &Path) -> bool { + let temp = match tempfile::NamedTempFile::new_in(src_dir) { + Ok(t) => t, + Err(e) => { + tracing::debug!("error creating tempfile while checking for reflink support: {e}."); + return false; + } + }; + match std::fs::write(&temp, "a") { + Ok(_) => {} + Err(e) => { + tracing::debug!("error writing to tempfile while checking for reflink support: {e}."); + return false; + } + }; + let tempdir = match tempfile::TempDir::new_in(dest_dir) { + Ok(t) => t, + Err(e) => { + tracing::debug!( + "error creating destination tempdir while checking for reflink support: {e}." + ); + return false; } + }; + let supports_reflink = reflink::reflink(temp.path(), tempdir.path().join("b")) + .map(|_| true) + .map_err(|e| { + tracing::debug!( + "reflink support check failed. Files will be hard linked or copied. ({e})" + ); + e + }) + .unwrap_or(false); + + if supports_reflink { + tracing::debug!("Verified reflink support. Extracted data will use copy-on-write reflinks instead of hard links or full copies.") + } + + supports_reflink +} + +#[cfg(not(target_arch = "wasm32"))] +pub(crate) fn link_bin(from: &Path, to: &Path) -> Result<(), NodeMaintainerError> { + #[cfg(windows)] + oro_shim_bin::shim_bin(from, to)?; + #[cfg(not(windows))] + { + use std::os::unix::fs::PermissionsExt; + let meta = from.metadata()?; + let mut perms = meta.permissions(); + perms.set_mode(0o755); + std::fs::set_permissions(from, perms)?; + let relative = pathdiff::diff_paths(from, to.parent().unwrap()).unwrap(); + std::os::unix::fs::symlink(relative, to)?; } + Ok(()) } diff --git a/crates/node-maintainer/src/maintainer.rs b/crates/node-maintainer/src/maintainer.rs index b78cae8e..3904c03c 100644 --- a/crates/node-maintainer/src/maintainer.rs +++ b/crates/node-maintainer/src/maintainer.rs @@ -10,13 +10,16 @@ use url::Url; use crate::error::NodeMaintainerError; use crate::graph::{Graph, Node}; -use crate::linkers::{Linker, LinkerOptions}; +use crate::linkers::Linker; +#[cfg(not(target_arch = "wasm32"))] +use crate::linkers::LinkerOptions; use crate::resolver::Resolver; use crate::{IntoKdl, Lockfile}; pub const DEFAULT_CONCURRENCY: usize = 50; pub const DEFAULT_SCRIPT_CONCURRENCY: usize = 6; pub const META_FILE_NAME: &str = ".orogene-meta.kdl"; +pub const STORE_DIR_NAME: &str = ".oro-store"; pub type ProgressAdded = Arc; pub type ProgressHandler = Arc; @@ -31,6 +34,8 @@ pub struct NodeMaintainerOptions { kdl_lock: Option, npm_lock: Option, + #[allow(dead_code)] + hoisted: bool, #[allow(dead_code)] script_concurrency: usize, #[allow(dead_code)] @@ -45,9 +50,13 @@ pub struct NodeMaintainerOptions { // Intended for progress bars on_resolution_added: Option, on_resolve_progress: Option, + #[allow(dead_code)] on_prune_progress: Option, + #[allow(dead_code)] on_extract_progress: Option, + #[allow(dead_code)] on_script_start: Option, + #[allow(dead_code)] on_script_line: Option, } @@ -153,6 +162,16 @@ impl NodeMaintainerOptions { self } + /// Use the hoisted installation mode, where all dependencies and their + /// transitive dependencies are installed as high up in the `node_modules` + /// tree as possible. This can potentially mean that packages have access + /// to dependencies they did not specify in their package.json, but it + /// might be useful for compatibility. + pub fn hoisted(mut self, hoisted: bool) -> Self { + self.hoisted = hoisted; + self + } + pub fn on_resolution_added(mut self, f: F) -> Self where F: Fn() + Send + Sync + 'static, @@ -273,10 +292,10 @@ impl NodeMaintainerOptions { }; let node = resolver.graph.inner.add_node(Node::new(root_pkg, root)); resolver.graph[node].root = node; - let (graph, actual_tree) = resolver.run_resolver(lockfile).await?; + let (graph, _actual_tree) = resolver.run_resolver(lockfile).await?; #[cfg(not(target_arch = "wasm32"))] let linker = Linker::hoisted(LinkerOptions { - actual_tree, + actual_tree: _actual_tree, concurrency: self.concurrency, script_concurrency: self.script_concurrency, cache: self.cache, @@ -318,10 +337,10 @@ impl NodeMaintainerOptions { let corgi = root_pkg.corgi_metadata().await?.manifest; let node = resolver.graph.inner.add_node(Node::new(root_pkg, corgi)); resolver.graph[node].root = node; - let (graph, actual_tree) = resolver.run_resolver(lockfile).await?; + let (graph, _actual_tree) = resolver.run_resolver(lockfile).await?; #[cfg(not(target_arch = "wasm32"))] - let linker = Linker::hoisted(LinkerOptions { - actual_tree, + let linker_opts = LinkerOptions { + actual_tree: _actual_tree, concurrency: self.concurrency, script_concurrency: self.script_concurrency, cache: self.cache, @@ -332,10 +351,18 @@ impl NodeMaintainerOptions { on_extract_progress: self.on_extract_progress, on_script_start: self.on_script_start, on_script_line: self.on_script_line, - }); - #[cfg(target_arch = "wasm32")] - let linker = Linker::null(); - let nm = NodeMaintainer { graph, linker }; + }; + let nm = NodeMaintainer { + graph, + #[cfg(target_arch = "wasm32")] + linker: Linker::null(), + #[cfg(not(target_arch = "wasm32"))] + linker: if self.hoisted { + Linker::hoisted(linker_opts) + } else { + Linker::isolated(linker_opts) + }, + }; #[cfg(debug_assertions)] nm.graph.validate()?; Ok(nm) @@ -351,6 +378,7 @@ impl Default for NodeMaintainerOptions { npm_lock: None, script_concurrency: DEFAULT_SCRIPT_CONCURRENCY, cache: None, + hoisted: false, prefer_copy: false, validate: false, root: None, @@ -366,7 +394,7 @@ impl Default for NodeMaintainerOptions { /// Resolves and manages `node_modules` for a given project. pub struct NodeMaintainer { - graph: Graph, + pub(crate) graph: Graph, #[allow(dead_code)] linker: Linker, } @@ -441,25 +469,10 @@ impl NodeMaintainer { self.linker.extract(&self.graph).await } - /// Links package binaries to their corresponding `node_modules/.bin` - /// directories. On Windows, this will create `.cmd`, `.ps1`, and `sh` - /// shims instead of link directly to the bins. - #[cfg(not(target_arch = "wasm32"))] - pub async fn link_bins(&self) -> Result { - self.linker.link_bins(&self.graph).await - } - /// Runs the `preinstall`, `install`, and `postinstall` lifecycle scripts, /// as well as linking the package bins as needed. #[cfg(not(target_arch = "wasm32"))] pub async fn rebuild(&self, ignore_scripts: bool) -> Result<(), NodeMaintainerError> { self.linker.rebuild(&self.graph, ignore_scripts).await } - - /// Concurrently executes the lifecycle scripts for the given event across - /// all packages in the graph. - #[cfg(not(target_arch = "wasm32"))] - pub async fn run_scripts(&self, event: impl AsRef) -> Result<(), NodeMaintainerError> { - self.linker.run_scripts(&self.graph, event.as_ref()).await - } } diff --git a/crates/node-maintainer/src/resolver.rs b/crates/node-maintainer/src/resolver.rs index d4b0beef..137fa6c1 100644 --- a/crates/node-maintainer/src/resolver.rs +++ b/crates/node-maintainer/src/resolver.rs @@ -7,7 +7,9 @@ use async_std::sync::Mutex; #[cfg(not(target_arch = "wasm32"))] use colored::Colorize; use futures::{StreamExt, TryFutureExt}; -use nassun::{Nassun, Package, PackageSpec}; +use nassun::client::Nassun; +use nassun::package::Package; +use nassun::PackageSpec; use oro_common::{CorgiManifest, CorgiVersionMetadata}; use petgraph::stable_graph::NodeIndex; use petgraph::visit::EdgeRef; @@ -16,7 +18,9 @@ use unicase::UniCase; use crate::error::NodeMaintainerError; use crate::graph::{DepType, Edge, Graph, Node}; -use crate::{Lockfile, LockfileNode, ProgressAdded, ProgressHandler, META_FILE_NAME}; +#[cfg(not(target_arch = "wasm32"))] +use crate::META_FILE_NAME; +use crate::{Lockfile, LockfileNode, ProgressAdded, ProgressHandler}; #[derive(Debug, Clone)] struct NodeDependency { diff --git a/src/commands/restore.rs b/src/commands/restore.rs index 7c3af4a0..9d6fe45d 100644 --- a/src/commands/restore.rs +++ b/src/commands/restore.rs @@ -63,9 +63,26 @@ pub struct RestoreCmd { #[arg(long, default_value_t = node_maintainer::DEFAULT_SCRIPT_CONCURRENCY)] script_concurrency: usize, - /// Skip writing the lockfile. + /// Whether to write the lockfile after operations complete. Disable by + /// using `--no-lockfile`. + /// + /// Note that lockfiles are only written after all operations complete + /// successfully. + #[arg(long)] + lockfile: bool, + + /// Use the hoisted installation mode, where all dependencies and their + /// transitive dependencies are installed as high up in the `node_modules` + /// tree as possible. + /// + /// This can potentially mean that packages have access to dependencies + /// they did not specify in their package.json, but it might be useful for + /// compatibility. + /// + /// By default, dependencies are installed in "isolated" mode, using a + /// symlink/junction structure to simulate a dependency tree. #[arg(long)] - no_lockfile: bool, + hoisted: bool, #[arg(from_global)] registry: Url, @@ -104,7 +121,7 @@ impl OroCommand for RestoreCmd { ); } - if !self.no_lockfile { + if self.lockfile { maintainer .write_lockfile(root.join("package-lock.kdl")) .await?; @@ -137,6 +154,7 @@ impl RestoreCmd { .root(root) .prefer_copy(self.prefer_copy) .validate(self.validate) + .hoisted(self.hoisted) .on_resolution_added(move || { Span::current().pb_inc_length(1); }) @@ -282,24 +300,38 @@ impl RestoreCmd { async fn rebuild(&self, maintainer: &NodeMaintainer) -> Result<()> { let script_time = std::time::Instant::now(); - let script_span = tracing::info_span!("Building"); - script_span.pb_set_style( - &ProgressStyle::default_bar() - .template(&format!( - "{{spinner}} {}Running scripts {{wide_msg:.dim}}", - self.emoji_run(), - )) - .unwrap(), - ); + let script_span = if self.ignore_scripts { + tracing::debug_span!("Building") + } else { + tracing::info_span!("Building") + }; + if !self.ignore_scripts { + script_span.pb_set_style( + &ProgressStyle::default_bar() + .template(&format!( + "{{spinner}} {}Running scripts {{wide_msg:.dim}}", + self.emoji_run(), + )) + .unwrap(), + ); + } maintainer .rebuild(self.ignore_scripts) .instrument(script_span) .await?; - tracing::info!( - "{}Ran lifecycle scripts in {}s.", - self.emoji_run(), - script_time.elapsed().as_millis() as f32 / 1000.0 - ); + if self.ignore_scripts { + tracing::info!( + "{}Linked script bins in {}s.", + self.emoji_link(), + script_time.elapsed().as_millis() as f32 / 1000.0 + ); + } else { + tracing::info!( + "{}Ran lifecycle scripts in {}s.", + self.emoji_run(), + script_time.elapsed().as_millis() as f32 / 1000.0 + ); + } Ok(()) } @@ -327,6 +359,10 @@ impl RestoreCmd { self.maybe_emoji("🎉 ") } + fn emoji_link(&self) -> &'static str { + self.maybe_emoji("🔗 ") + } + fn maybe_emoji(&self, emoji: &'static str) -> &'static str { if self.emoji { emoji diff --git a/tests/snapshots/help__restore.snap b/tests/snapshots/help__restore.snap index b01246cb..77fa2628 100644 --- a/tests/snapshots/help__restore.snap +++ b/tests/snapshots/help__restore.snap @@ -61,9 +61,19 @@ This option is separate from `concurrency` because executing concurrent scripts \[default: 6] -#### `--no-lockfile` +#### `--lockfile` -Skip writing the lockfile +Whether to write the lockfile after operations complete. Disable by using `--no-lockfile`. + +Note that lockfiles are only written after all operations complete successfully. + +#### `--hoisted` + +Use the hoisted installation mode, where all dependencies and their transitive dependencies are installed as high up in the `node_modules` tree as possible. + +This can potentially mean that packages have access to dependencies they did not specify in their package.json, but it might be useful for compatibility. + +By default, dependencies are installed in "isolated" mode, using a symlink/junction structure to simulate a dependency tree. #### `-h, --help`