Skip to content

Commit

Permalink
feat(validate): optionally validate cache contents during extraction (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
zkat committed Mar 11, 2023
1 parent 16ad5ba commit 0e22a5f
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 21 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Expand Up @@ -55,7 +55,7 @@ async-trait = "0.1.64"
backon = "0.4.0"
bincode = "1.3.1"
bytecount = "0.6.0"
cacache = "11.3.0"
cacache = "11.4.0"
chrono = "0.4.23"
chrono-humanize = "0.0.11"
clap = "4.1.4"
Expand Down
70 changes: 59 additions & 11 deletions crates/nassun/src/package.rs
Expand Up @@ -130,12 +130,18 @@ impl Package {
&self,
dir: impl AsRef<Path>,
prefer_copy: bool,
validate: bool,
) -> Result<Integrity> {
async fn inner(me: &Package, dir: &Path, prefer_copy: bool) -> Result<Integrity> {
me.extract_to_dir_inner(dir, me.resolved.integrity(), prefer_copy)
async fn inner(
me: &Package,
dir: &Path,
prefer_copy: bool,
validate: bool,
) -> Result<Integrity> {
me.extract_to_dir_inner(dir, me.resolved.integrity(), prefer_copy, validate)
.await
}
inner(self, dir.as_ref(), prefer_copy).await
inner(self, dir.as_ref(), prefer_copy, validate).await
}

/// Extract tarball to a directory, optionally caching its contents. The
Expand All @@ -146,11 +152,18 @@ impl Package {
&self,
dir: impl AsRef<Path>,
prefer_copy: bool,
validate: bool,
) -> Result<Integrity> {
async fn inner(me: &Package, dir: &Path, prefer_copy: bool) -> Result<Integrity> {
me.extract_to_dir_inner(dir, None, prefer_copy).await
async fn inner(
me: &Package,
dir: &Path,
prefer_copy: bool,
validate: bool,
) -> Result<Integrity> {
me.extract_to_dir_inner(dir, None, prefer_copy, validate)
.await
}
inner(self, dir.as_ref(), prefer_copy).await
inner(self, dir.as_ref(), prefer_copy, validate).await
}

/// Extract tarball to a directory, optionally caching its contents. The
Expand All @@ -162,16 +175,19 @@ impl Package {
dir: impl AsRef<Path>,
sri: Integrity,
prefer_copy: bool,
validate: bool,
) -> Result<Integrity> {
async fn inner(
me: &Package,
dir: &Path,
sri: Integrity,
prefer_copy: bool,
validate: bool,
) -> Result<Integrity> {
me.extract_to_dir_inner(dir, Some(&sri), prefer_copy).await
me.extract_to_dir_inner(dir, Some(&sri), prefer_copy, validate)
.await
}
inner(self, dir.as_ref(), sri, prefer_copy).await
inner(self, dir.as_ref(), sri, prefer_copy, validate).await
}

#[cfg(not(target_arch = "wasm32"))]
Expand All @@ -180,6 +196,7 @@ impl Package {
dir: &Path,
integrity: Option<&Integrity>,
prefer_copy: bool,
validate: bool,
) -> Result<Integrity> {
if let Some(sri) = integrity {
if let Some(cache) = self.cache.as_deref() {
Expand All @@ -191,12 +208,19 @@ impl Package {
// (bad data, etc), then go ahead and do a network
// extract.
match self
.extract_from_cache(dir, cache, entry, prefer_copy)
.extract_from_cache(dir, cache, entry, prefer_copy, validate)
.await
{
Ok(_) => return Ok(sri),
Err(e) => {
tracing::debug!("extract_from_cache failed: {}", e);
tracing::warn!("extracting package {:?} from cache failed, possily due to cache corruption: {e}", self.resolved());
if let Some(entry) =
cacache::index::find(cache, &crate::tarball::tarball_key(&sri))
.map_err(|e| NassunError::ExtractCacheError(e, None))?
{
tracing::debug!("removing corrupted cache entry.");
clean_from_cache(cache, &sri, entry)?;
}
return self
.tarball_checked(sri)
.await?
Expand Down Expand Up @@ -231,6 +255,7 @@ impl Package {
cache: &Path,
entry: cacache::Metadata,
prefer_copy: bool,
validate: bool,
) -> Result<()> {
let dir = PathBuf::from(dir);
let cache = PathBuf::from(cache);
Expand All @@ -256,7 +281,7 @@ impl Package {
created.insert(parent);
}

crate::tarball::extract_from_cache(&cache, &sri, &path, prefer_copy)?;
crate::tarball::extract_from_cache(&cache, &sri, &path, prefer_copy, validate)?;
}
Ok::<_, NassunError>(())
})
Expand All @@ -271,6 +296,29 @@ impl fmt::Debug for Package {
.field("from", &self.from)
.field("name", &self.name)
.field("resolved", &self.resolved)
.field("base_dir", &self.resolved)
.finish()
}
}

#[cfg(not(target_arch = "wasm32"))]
fn clean_from_cache(cache: &Path, sri: &Integrity, entry: cacache::Metadata) -> Result<()> {
let map = entry
.metadata
.as_object()
.expect("how is this not an object?");
for sri in map.values() {
let sri: Integrity = sri.as_str().expect("how is this not a string?").parse()?;
match cacache::remove_hash_sync(cache, &sri) {
Ok(_) => {}
// We don't care if the file doesn't exist.
Err(cacache::Error::IoError(e, _)) if e.kind() == std::io::ErrorKind::NotFound => {}
Err(e) => {
return Err(NassunError::ExtractCacheError(e, None));
}
}
}
cacache::remove_sync(cache, crate::tarball::tarball_key(sri))
.map_err(|e| NassunError::ExtractCacheError(e, None))?;
Ok(())
}
33 changes: 28 additions & 5 deletions crates/nassun/src/tarball.rs
Expand Up @@ -266,7 +266,7 @@ impl TempTarball {
.commit()
.map_err(|e| NassunError::ExtractCacheError(e, Some(path.clone())))?;

extract_from_cache(cache, &sri, &path, prefer_copy)?;
extract_from_cache(cache, &sri, &path, prefer_copy, false)?;

file_index.insert(
entry_subpath.to_string_lossy().into(),
Expand Down Expand Up @@ -371,16 +371,16 @@ pub(crate) fn extract_from_cache(
sri: &Integrity,
to: &Path,
prefer_copy: bool,
validate: bool,
) -> Result<()> {
if prefer_copy {
cacache::copy_hash_unchecked_sync(cache, sri, to)
.map_err(|e| NassunError::ExtractCacheError(e, Some(PathBuf::from(to))))?;
copy_from_cache(cache, sri, to, validate)?;
} else {
// HACK: This is horrible, but on wsl2 (at least), this
// was sometimes crashing with an ENOENT (?!), which
// really REALLY shouldn't happen. So we just retry a few
// times and hope the problem goes away.
let op = || cacache::hard_link_hash_unchecked_sync(cache, sri, to);
let op = || hard_link_from_cache(cache, sri, to, validate);
op.retry(&ConstantBuilder::default().with_delay(Duration::from_millis(50)))
.notify(|err, wait| {
tracing::debug!(
Expand All @@ -390,7 +390,30 @@ pub(crate) fn extract_from_cache(
)
})
.call()
.or_else(|_| cacache::copy_hash_unchecked_sync(cache, sri, to))
.or_else(|_| copy_from_cache(cache, sri, to, validate))?;
}
Ok(())
}

#[cfg(not(target_arch = "wasm32"))]
fn copy_from_cache(cache: &Path, sri: &Integrity, to: &Path, validate: bool) -> Result<()> {
if validate {
cacache::copy_hash_sync(cache, sri, to)
.map_err(|e| NassunError::ExtractCacheError(e, Some(PathBuf::from(to))))?;
} else {
cacache::copy_hash_unchecked_sync(cache, sri, to)
.map_err(|e| NassunError::ExtractCacheError(e, Some(PathBuf::from(to))))?;
}
Ok(())
}

#[cfg(not(target_arch = "wasm32"))]
fn hard_link_from_cache(cache: &Path, sri: &Integrity, to: &Path, validate: bool) -> Result<()> {
if validate {
cacache::hard_link_hash_sync(cache, sri, to)
.map_err(|e| NassunError::ExtractCacheError(e, Some(PathBuf::from(to))))?;
} else {
cacache::hard_link_hash_unchecked_sync(cache, sri, to)
.map_err(|e| NassunError::ExtractCacheError(e, Some(PathBuf::from(to))))?;
}
Ok(())
Expand Down
19 changes: 18 additions & 1 deletion crates/node-maintainer/src/maintainer.rs
Expand Up @@ -45,6 +45,8 @@ pub struct NodeMaintainerOptions {
cache: Option<PathBuf>,
#[allow(dead_code)]
prefer_copy: bool,
#[allow(dead_code)]
validate: bool,
}

impl NodeMaintainerOptions {
Expand Down Expand Up @@ -114,6 +116,16 @@ impl NodeMaintainerOptions {
self
}

/// When this is true, node-maintainer will validate integrity hashes for
/// all files extracted from the cache, as well as verify that any files
/// in the existing `node_modules` are unmodified. If verification fails,
/// the packages will be reinstalled.
#[cfg(not(target_arch = "wasm32"))]
pub fn validate(mut self, validate: bool) -> Self {
self.validate = validate;
self
}

pub async fn resolve_manifest(
self,
root: CorgiManifest,
Expand All @@ -128,6 +140,7 @@ impl NodeMaintainerOptions {
progress_bar: self.progress_bar,
cache: self.cache,
prefer_copy: self.prefer_copy,
validate: self.validate,
};
let node = nm.graph.inner.add_node(Node::new(root_pkg, root));
nm.graph[node].root = node;
Expand All @@ -151,6 +164,7 @@ impl NodeMaintainerOptions {
progress_bar: self.progress_bar,
cache: self.cache,
prefer_copy: self.prefer_copy,
validate: self.validate,
};
let corgi = root_pkg.corgi_metadata().await?.manifest;
let node = nm.graph.inner.add_node(Node::new(root_pkg, corgi));
Expand All @@ -173,6 +187,7 @@ impl Default for NodeMaintainerOptions {
progress_bar: false,
cache: None,
prefer_copy: false,
validate: false,
}
}
}
Expand All @@ -193,6 +208,7 @@ pub struct NodeMaintainer {
progress_bar: bool,
cache: Option<PathBuf>,
prefer_copy: bool,
validate: bool,
}

impl NodeMaintainer {
Expand Down Expand Up @@ -274,6 +290,7 @@ impl NodeMaintainer {
Some(cache) => supports_reflink(cache, &node_modules),
None => false,
};
let validate = me.validate;
stream
.map(|idx| Ok((idx, concurrent_count.clone(), total_completed.clone())))
.try_for_each_concurrent(
Expand All @@ -297,7 +314,7 @@ impl NodeMaintainer {

me.graph[child_idx]
.package
.extract_to_dir(&target_dir, prefer_copy)
.extract_to_dir(&target_dir, prefer_copy, validate)
.await?;

#[cfg(not(target_arch = "wasm32"))]
Expand Down
27 changes: 26 additions & 1 deletion src/commands/restore.rs
Expand Up @@ -25,6 +25,28 @@ pub struct RestoreCmd {

#[clap(from_global)]
cache: Option<PathBuf>,

/// Prefer copying files over hard linking them.
///
/// On filesystems that don't support copy-on-write/reflinks (usually NTFS
/// or ext4), orogene defaults to hard linking package files from a
/// centralized cache. As such, this can cause global effects if a file
/// inside a node_modules is modified, where other projects that have
/// installed that same file will see those modifications.
///
/// In order to prevent this, you can use this flag to force orogene to
/// always copy files, at a performance cost.
#[arg(short, long)]
prefer_copy: bool,

/// Validate the integrity of installed files.
///
/// When this is true, orogene will verify all files extracted from the
/// cache, as well as verify that any files in the existing `node_modules`
/// are unmodified. If verification fails, the packages will be
/// reinstalled.
#[arg(short, long)]
validate: bool,
}

#[async_trait]
Expand All @@ -34,7 +56,10 @@ impl OroCommand for RestoreCmd {
.root
.expect("root should've been set by global defaults");
let mut nm = NodeMaintainerOptions::new();
nm = nm.progress_bar(true);
nm = nm
.progress_bar(true)
.prefer_copy(self.prefer_copy)
.validate(self.validate);
if let Some(registry) = self.registry {
nm = nm.registry(registry);
}
Expand Down

0 comments on commit 0e22a5f

Please sign in to comment.