From f01ed09d4b24db42e3901feebfc5bbc2dc0f4254 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Mon, 24 Apr 2023 15:16:10 -0700 Subject: [PATCH] fix(bin): semi-preserve file modes and make binaries executable (#259) Fixes: https://github.com/orogene/orogene/issues/250 This makes sure that: 1. All files extracted are writable. 2. The mode that was set on the tarball files is preserved (while ensuring rw perms for owner) 3. Any files being used as package bins are executable --- Cargo.lock | 89 ++++++++++++ Cargo.toml | 1 + crates/nassun/Cargo.toml | 1 + crates/nassun/src/error.rs | 10 ++ crates/nassun/src/package.rs | 39 ++++-- crates/nassun/src/tarball.rs | 128 ++++++++++++++++-- crates/node-maintainer/src/linkers/hoisted.rs | 13 +- .../node-maintainer/src/linkers/isolated.rs | 13 +- 8 files changed, 264 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 06de72b1..0481c116 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -400,6 +400,28 @@ version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" +[[package]] +name = "bytecheck" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13fe11640a23eb24562225322cd3e452b93a3d4091d62fab69c70542fcd17d1f" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31225543cb46f81a7e224762764f4a6a0f097b1db0b175f69e8065efaa42de5" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "bytecount" version = "0.6.3" @@ -1901,6 +1923,7 @@ dependencies = [ "oro-client", "oro-common", "oro-package-spec", + "rkyv", "serde", "serde-wasm-bindgen", "serde_json", @@ -2512,6 +2535,26 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "quote" version = "1.0.26" @@ -2663,6 +2706,15 @@ version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +[[package]] +name = "rend" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581008d2099240d37fb08d77ad713bcaec2c4d89d50b5b21a8bb1996bbab68ab" +dependencies = [ + "bytecheck", +] + [[package]] name = "reqwest" version = "0.11.16" @@ -2753,6 +2805,31 @@ dependencies = [ "bytemuck", ] +[[package]] +name = "rkyv" +version = "0.7.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21499ed91807f07ae081880aabb2ccc0235e9d88011867d984525e9a4c3cfa3e" +dependencies = [ + "bytecheck", + "hashbrown", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac1c672430eb41556291981f45ca900a0239ad007242d1cb4b4167af842db666" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "ron" version = "0.7.1" @@ -2877,6 +2954,12 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1" +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + [[package]] name = "security-framework" version = "2.8.2" @@ -3044,6 +3127,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simdutf8" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" + [[package]] name = "similar" version = "2.2.1" diff --git a/Cargo.toml b/Cargo.toml index aa243018..5dd11f5d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -111,6 +111,7 @@ regex = "1.7.2" reqwest = "0.11.14" reqwest-middleware = "0.2.0" resvg = "0.29.0" +rkyv = "0.7.41" serde = "1.0.152" serde_json = "1.0.93" serde-wasm-bindgen = "0.4.5" diff --git a/crates/nassun/Cargo.toml b/crates/nassun/Cargo.toml index e5ba0800..27d877aa 100644 --- a/crates/nassun/Cargo.toml +++ b/crates/nassun/Cargo.toml @@ -49,6 +49,7 @@ backon = { workspace = true } cacache = { workspace = true } flate2 = { workspace = true } io_tee = { workspace = true } +rkyv = { workspace = true, features = ["validation"] } tar = { workspace = true } tempfile = { workspace = true } which = { workspace = true } diff --git a/crates/nassun/src/error.rs b/crates/nassun/src/error.rs index 53970f8c..8d390d89 100644 --- a/crates/nassun/src/error.rs +++ b/crates/nassun/src/error.rs @@ -69,6 +69,11 @@ pub enum NassunError { #[diagnostic(code(nassun::cache::extract), url(docsrs))] ExtractCacheError(#[source] cacache::Error, Option), + #[cfg(not(target_arch = "wasm32"))] + #[error("Missing file index for cache entry for {0}.")] + #[diagnostic(code(nassun::cache::missing_index), url(docsrs))] + CacheMissingIndexError(String), + /// A generic IO error occurred. Refer tot he error message for more /// details. #[error(transparent)] @@ -155,6 +160,11 @@ pub enum NassunError { #[diagnostic(code(nassun::dummy_no_name), url(docsrs))] DummyNoName, + /// An error occurred while serializing tarball metadata to cache. + #[error("Failed to serialize tarball metadata to cache: {0}")] + #[diagnostic(code(nassun::cache::serialize), url(docsrs))] + SerializeCacheError(String), + /// A miscellaneous, usually internal error. This is used mainly to wrap /// either manual InternalErrors, or those using external errors that /// don't implement std::error::Error. diff --git a/crates/nassun/src/package.rs b/crates/nassun/src/package.rs index de49ea2c..552e69ef 100644 --- a/crates/nassun/src/package.rs +++ b/crates/nassun/src/package.rs @@ -15,6 +15,8 @@ use crate::error::Result; use crate::fetch::PackageFetcher; use crate::resolver::PackageResolution; use crate::tarball::Tarball; +#[cfg(not(target_arch = "wasm32"))] +use crate::tarball::TarballIndex; /// A resolved package. A concrete version has been determined from its /// PackageSpec by the version resolver. @@ -254,20 +256,26 @@ impl Package { dir: &Path, cache: &Path, entry: cacache::Metadata, - prefer_copy: bool, + mut prefer_copy: bool, validate: bool, ) -> Result<()> { let dir = PathBuf::from(dir); let cache = PathBuf::from(cache); + let name = self.name().to_owned(); async_std::task::spawn_blocking(move || { let mut created = std::collections::HashSet::new(); - let map = entry - .metadata - .as_object() - .expect("how is this not an object?"); - for (path, sri) in map { - let sri: Integrity = sri.as_str().expect("how is this not a string?").parse()?; - let path = dir.join(path); + let index = unsafe { + rkyv::util::archived_root::( + entry + .raw_metadata + .as_ref() + .ok_or_else(|| NassunError::CacheMissingIndexError(name))?, + ) + }; + prefer_copy = index.should_copy || prefer_copy; + for (path, (sri, mode)) in index.files.iter() { + let sri: Integrity = sri.parse()?; + let path = dir.join(&path[..]); let parent = PathBuf::from(path.parent().expect("this will always have a parent")); if !created.contains(&parent) { std::fs::create_dir_all(path.parent().expect("this will always have a parent")) @@ -281,7 +289,20 @@ impl Package { created.insert(parent); } - crate::tarball::extract_from_cache(&cache, &sri, &path, prefer_copy, validate)?; + crate::tarball::extract_from_cache( + &cache, + &sri, + &path, + prefer_copy, + validate, + *mode, + )?; + } + #[cfg(unix)] + for binpath in index.bin_paths.iter() { + { + crate::tarball::set_bin_mode(&dir.join(&binpath[..]))?; + } } Ok::<_, NassunError>(()) }) diff --git a/crates/nassun/src/tarball.rs b/crates/nassun/src/tarball.rs index f3e707d2..1993cea9 100644 --- a/crates/nassun/src/tarball.rs +++ b/crates/nassun/src/tarball.rs @@ -1,4 +1,6 @@ #[cfg(not(target_arch = "wasm32"))] +use std::collections::HashMap; +#[cfg(not(target_arch = "wasm32"))] use std::io::Write; #[cfg(not(target_arch = "wasm32"))] use std::io::{Read, Seek}; @@ -20,6 +22,8 @@ use cacache::WriteOpts; use futures::AsyncReadExt; use futures::{AsyncRead, StreamExt}; #[cfg(not(target_arch = "wasm32"))] +use oro_common::BuildManifest; +#[cfg(not(target_arch = "wasm32"))] use ssri::IntegrityOpts; use ssri::{Integrity, IntegrityChecker}; #[cfg(not(target_arch = "wasm32"))] @@ -200,9 +204,10 @@ impl TempTarball { dir: &Path, tarball_integrity: Option, cache: Option<&Path>, - prefer_copy: bool, + mut prefer_copy: bool, ) -> Result { - let mut file_index = serde_json::Map::new(); + let mut build_mani: Option = None; + let mut tarball_index = TarballIndex::default(); let mut drain_buf = [0u8; 1024 * 8]; self.rewind()?; @@ -233,6 +238,7 @@ impl TempTarball { ) })?; let header = file.header(); + let mode = header.mode().unwrap_or(0o644) | 0o600; let entry_path = header.path().map_err(|e| { NassunError::ExtractIoError(e, None, "reading path from entry header.".into()) })?; @@ -266,12 +272,43 @@ impl TempTarball { .commit() .map_err(|e| NassunError::ExtractCacheError(e, Some(path.clone())))?; - extract_from_cache(cache, &sri, &path, prefer_copy, false)?; - - file_index.insert( - entry_subpath.to_string_lossy().into(), - sri.to_string().into(), - ); + extract_from_cache(cache, &sri, &path, prefer_copy, false, mode)?; + + let entry_subpath = entry_subpath.to_string_lossy().to_string(); + + // We check whether the package has any install scripts. + // If so, we need to re-extract all previous files as full + // copies and mark the package as having scripts in it. + if entry_subpath == "package.json" { + let manifest = BuildManifest::from_path(&path)?; + if ["preinstall", "install", "postinstall"] + .iter() + .any(|s| manifest.scripts.contains_key(*s)) + || !manifest.bin.is_empty() + { + tarball_index.should_copy = true; + if !prefer_copy { + prefer_copy = true; + for (entry, (sri, mode)) in &tarball_index.files { + let path = dir.join(entry); + std::fs::remove_file(&path)?; + let sri = sri.parse()?; + extract_from_cache( + cache, + &sri, + &path, + prefer_copy, + false, + *mode, + )?; + } + } + } + build_mani = Some(manifest); + } + tarball_index + .files + .insert(entry_subpath, (sri.to_string(), mode)); } else { let mut writer = std::fs::OpenOptions::new() .write(true) @@ -289,10 +326,22 @@ impl TempTarball { std::io::copy(&mut file, &mut writer).map_err(|e| { NassunError::ExtractIoError( e, - Some(path), + Some(path.clone()), "Copying file to node_modules destination.".into(), ) })?; + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(mode)) + .map_err(|e| { + NassunError::ExtractIoError( + e, + Some(path.clone()), + "setting permissions on extracted file.".into(), + ) + })?; + } } } else { loop { @@ -306,6 +355,16 @@ impl TempTarball { } } + if let Some(BuildManifest { bin, .. }) = &build_mani { + for binpath in bin.values() { + tarball_index + .bin_paths + .push(binpath.to_string_lossy().to_string()); + #[cfg(unix)] + set_bin_mode(&dir.join(binpath))?; + } + } + // Drain the rest of the tarball to make sure we have its full // contents (there can be trailing data); loop { @@ -326,7 +385,11 @@ impl TempTarball { WriteOpts::new() // This is just so the index entry is loadable. .integrity("sha256-deadbeef".parse().unwrap()) - .metadata(file_index.into()), + .raw_metadata( + rkyv::util::to_bytes::<_, 1024>(&tarball_index) + .map_err(|e| NassunError::SerializeCacheError(format!("{e}")))? + .into_vec(), + ), ) .map_err(|e| NassunError::ExtractCacheError(e, None))?; } @@ -355,6 +418,15 @@ impl std::io::Seek for TempTarball { } } +#[cfg(not(target_arch = "wasm32"))] +#[derive(rkyv::Archive, rkyv::Serialize, Default)] +#[archive(check_bytes)] +pub(crate) struct TarballIndex { + pub(crate) should_copy: bool, + pub(crate) bin_paths: Vec, + pub(crate) files: HashMap, +} + #[cfg(not(target_arch = "wasm32"))] fn strip_one(path: &Path) -> Option<&Path> { let mut comps = path.components(); @@ -373,10 +445,8 @@ pub(crate) fn extract_from_cache( to: &Path, prefer_copy: bool, validate: bool, + #[allow(unused_variables)] mode: u32, ) -> Result<()> { - // TODO: enable `prefer_copy` if the package has install scripts. Maybe - // this can be cached as part of tarball caching, right in the object, and - // we make the determination at that point. if prefer_copy { copy_from_cache(cache, sri, to, validate)?; } else { @@ -396,6 +466,38 @@ pub(crate) fn extract_from_cache( .call() .or_else(|_| copy_from_cache(cache, sri, to, validate))?; } + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + std::fs::set_permissions(to, std::fs::Permissions::from_mode(mode)).map_err(|e| { + NassunError::ExtractIoError( + e, + Some(to.to_path_buf()), + "setting permissions on extracted file.".into(), + ) + })?; + } + Ok(()) +} + +#[cfg(unix)] +pub(crate) fn set_bin_mode(path: &Path) -> Result<()> { + use std::os::unix::fs::PermissionsExt; + let metadata = std::fs::metadata(path).map_err(|e| { + NassunError::ExtractIoError( + e, + Some(path.to_path_buf()), + "Getting extracted file metadata.".into(), + ) + })?; + let mode = metadata.permissions().mode(); + std::fs::set_permissions(path, std::fs::Permissions::from_mode(mode | 0o111)).map_err(|e| { + NassunError::ExtractIoError( + e, + Some(path.to_path_buf()), + "setting permissions on extracted file.".into(), + ) + })?; Ok(()) } diff --git a/crates/node-maintainer/src/linkers/hoisted.rs b/crates/node-maintainer/src/linkers/hoisted.rs index 2e9beee3..09c46863 100644 --- a/crates/node-maintainer/src/linkers/hoisted.rs +++ b/crates/node-maintainer/src/linkers/hoisted.rs @@ -402,14 +402,15 @@ impl HoistedLinker { let package_dir = package_dir.clone(); let root = root.clone(); let event = event.to_owned(); - let span = tracing::info_span!("script::{name}::{event}"); + let event_clone = event.clone(); + let span = tracing::info_span!("script"); let _span_enter = span.enter(); if let Some(on_script_start) = &self.0.on_script_start { on_script_start(&graph[idx].package, &event); } std::mem::drop(_span_enter); let mut script = match async_std::task::spawn_blocking(move || { - OroScript::new(package_dir, event)? + OroScript::new(package_dir, event_clone)? .workspace_path(root) .spawn() }) @@ -431,13 +432,14 @@ impl HoistedLinker { let stderr_on_line = self.0.on_script_line.clone(); let stdout_span = span; let stderr_span = stdout_span.clone(); + let event_clone = event.clone(); let join = futures::try_join!( async_std::task::spawn_blocking(move || { let _enter = stdout_span.enter(); if let Some(stdout) = stdout { for line in BufReader::new(stdout).lines() { let line = line?; - tracing::debug!("stdout::{stdout_name}: {}", line); + tracing::debug!("stdout::{stdout_name}::{event}: {}", line); if let Some(on_script_line) = &stdout_on_line { on_script_line(&line); } @@ -450,7 +452,10 @@ impl HoistedLinker { if let Some(stderr) = stderr { for line in BufReader::new(stderr).lines() { let line = line?; - tracing::debug!("stderr::{stderr_name}: {}", line); + tracing::debug!( + "stderr::{stderr_name}::{event_clone}: {}", + line + ); if let Some(on_script_line) = &stderr_on_line { on_script_line(&line); } diff --git a/crates/node-maintainer/src/linkers/isolated.rs b/crates/node-maintainer/src/linkers/isolated.rs index 0c4477fa..0a437d6e 100644 --- a/crates/node-maintainer/src/linkers/isolated.rs +++ b/crates/node-maintainer/src/linkers/isolated.rs @@ -409,14 +409,15 @@ impl IsolatedLinker { let package_dir = pkg_dir.clone(); let package_dir_clone = package_dir.clone(); let event = event.to_owned(); - let span = tracing::info_span!("script::{name}::{event}"); + let event_clone = event.clone(); + let span = tracing::info_span!("script"); let _span_enter = span.enter(); if let Some(on_script_start) = &self.0.on_script_start { on_script_start(&graph[idx].package, &event); } std::mem::drop(_span_enter); let mut script = match async_std::task::spawn_blocking(move || { - OroScript::new(package_dir, event)? + OroScript::new(package_dir, event_clone)? .workspace_path(package_dir_clone) .spawn() }) @@ -438,13 +439,14 @@ impl IsolatedLinker { let stderr_on_line = self.0.on_script_line.clone(); let stdout_span = span; let stderr_span = stdout_span.clone(); + let event_clone = event.clone(); let join = futures::try_join!( async_std::task::spawn_blocking(move || { let _enter = stdout_span.enter(); if let Some(stdout) = stdout { for line in BufReader::new(stdout).lines() { let line = line?; - tracing::debug!("stdout::{stdout_name}: {}", line); + tracing::debug!("stdout::{stdout_name}::{event}: {}", line); if let Some(on_script_line) = &stdout_on_line { on_script_line(&line); } @@ -457,7 +459,10 @@ impl IsolatedLinker { if let Some(stderr) = stderr { for line in BufReader::new(stderr).lines() { let line = line?; - tracing::debug!("stderr::{stderr_name}: {}", line); + tracing::debug!( + "stderr::{stderr_name}::{event_clone}: {}", + line + ); if let Some(on_script_line) = &stderr_on_line { on_script_line(&line); }