Skip to content

Commit

Permalink
feat(perf): be smarter about reflinks and mkdirp
Browse files Browse the repository at this point in the history
  • Loading branch information
zkat committed Oct 8, 2023
1 parent 3f005c1 commit 581dda6
Show file tree
Hide file tree
Showing 12 changed files with 456 additions and 289 deletions.
269 changes: 149 additions & 120 deletions Cargo.lock

Large diffs are not rendered by default.

19 changes: 15 additions & 4 deletions Cargo.toml
Expand Up @@ -83,7 +83,7 @@ backon = "0.4.0"
base64 = "0.21.2"
bincode = "1.3.1"
bytecount = "0.6.0"
cacache = "11.6.0"
cacache = "12.0.0"
chrono = "0.4.23"
chrono-humanize = "0.0.11"
clap = "4.2.1"
Expand Down Expand Up @@ -159,6 +159,10 @@ which = "4.0.2"
wiremock = "0.5.17"
reqwest-retry = "0.2.2"

# [patch.crates-io]
# cacache = { path = "../cacache-rs" }
# reflink-copy = { path = "../reflink-copy" }

# Config for 'cargo dist'
[workspace.metadata.dist]
# The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax)
Expand All @@ -168,7 +172,12 @@ rust-toolchain-version = "1.72.1"
# CI backends to support
ci = ["github"]
# Target platforms to build apps for (Rust target-triple syntax)
targets = ["x86_64-unknown-linux-gnu", "aarch64-apple-darwin", "x86_64-apple-darwin", "x86_64-pc-windows-msvc"]
targets = [
"x86_64-unknown-linux-gnu",
"aarch64-apple-darwin",
"x86_64-apple-darwin",
"x86_64-pc-windows-msvc",
]
# The installers to generate for each app
installers = ["shell", "powershell", "npm", "msi"]
# The archive format to use for windows builds (defaults .zip)
Expand All @@ -190,8 +199,10 @@ path = "src/main.rs"
[profile.release]
lto = "thin"
opt-level = 3
strip = true
debug = false
# strip = true
# debug = false
strip = false
debug = true

# The profile that 'cargo dist' will build with
[profile.dist]
Expand Down
2 changes: 1 addition & 1 deletion crates/nassun/src/fetch/git.rs
Expand Up @@ -93,7 +93,7 @@ impl GitFetcher {
async fn fetch_tarball(&self, dir: &Path, tarball: &Url) -> Result<()> {
let tarball = self.client.stream_external(tarball).await?;
Tarball::new_unchecked(tarball)
.extract_from_tarball_data(dir, None, false)
.extract_from_tarball_data(dir, None, crate::ExtractMode::AutoHardlink)
.await?;
Ok(())
}
Expand Down
89 changes: 56 additions & 33 deletions crates/nassun/src/package.rs
Expand Up @@ -18,6 +18,32 @@ use crate::tarball::Tarball;
#[cfg(not(target_arch = "wasm32"))]
use crate::tarball::TarballIndex;

#[cfg(not(target_arch = "wasm32"))]
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ExtractMode {
/// Automatically decide whether to Copy or Reflink, based on fallbacks. Will never hardlink.
#[default]
Auto,
/// Copy contents from the cache in their entirety.
Copy,
/// Reflink contents from the cache instead of doing full copies.
Reflink,
/// Try to hard link contents from the cache. Fall back to reflink, then copy if that fails.
AutoHardlink,
/// Hard link contents from the cache instead of doing full copies.
Hardlink,
}

#[cfg(not(target_arch = "wasm32"))]
impl ExtractMode {
pub fn is_copy(&self) -> bool {
matches!(
self,
ExtractMode::Copy | ExtractMode::Auto | ExtractMode::Reflink
)
}
}

/// A resolved package. A concrete version has been determined from its
/// PackageSpec by the version resolver.
#[derive(Clone)]
Expand Down Expand Up @@ -131,13 +157,13 @@ impl Package {
pub async fn extract_to_dir(
&self,
dir: impl AsRef<Path>,
prefer_copy: bool,
extract_mode: ExtractMode,
) -> Result<Integrity> {
async fn inner(me: &Package, dir: &Path, prefer_copy: bool) -> Result<Integrity> {
me.extract_to_dir_inner(dir, me.resolved.integrity(), prefer_copy)
async fn inner(me: &Package, dir: &Path, extract_mode: ExtractMode) -> Result<Integrity> {
me.extract_to_dir_inner(dir, me.resolved.integrity(), extract_mode)
.await
}
inner(self, dir.as_ref(), prefer_copy).await
inner(self, dir.as_ref(), extract_mode).await
}

/// Extract tarball to a directory, optionally caching its contents. The
Expand All @@ -147,12 +173,12 @@ impl Package {
pub async fn extract_to_dir_unchecked(
&self,
dir: impl AsRef<Path>,
prefer_copy: bool,
extract_mode: ExtractMode,
) -> Result<Integrity> {
async fn inner(me: &Package, dir: &Path, prefer_copy: bool) -> Result<Integrity> {
me.extract_to_dir_inner(dir, None, prefer_copy).await
async fn inner(me: &Package, dir: &Path, extract_mode: ExtractMode) -> Result<Integrity> {
me.extract_to_dir_inner(dir, None, extract_mode).await
}
inner(self, dir.as_ref(), prefer_copy).await
inner(self, dir.as_ref(), extract_mode).await
}

/// Extract tarball to a directory, optionally caching its contents. The
Expand All @@ -163,25 +189,25 @@ impl Package {
&self,
dir: impl AsRef<Path>,
sri: Integrity,
prefer_copy: bool,
extract_mode: ExtractMode,
) -> Result<Integrity> {
async fn inner(
me: &Package,
dir: &Path,
sri: Integrity,
prefer_copy: bool,
extract_mode: ExtractMode,
) -> Result<Integrity> {
me.extract_to_dir_inner(dir, Some(&sri), prefer_copy).await
me.extract_to_dir_inner(dir, Some(&sri), extract_mode).await
}
inner(self, dir.as_ref(), sri, prefer_copy).await
inner(self, dir.as_ref(), sri, extract_mode).await
}

#[cfg(not(target_arch = "wasm32"))]
async fn extract_to_dir_inner(
&self,
dir: &Path,
integrity: Option<&Integrity>,
prefer_copy: bool,
extract_mode: ExtractMode,
) -> Result<Integrity> {
if let Some(sri) = integrity {
if let Some(cache) = self.cache.as_deref() {
Expand All @@ -190,7 +216,7 @@ impl Package {
{
let sri = sri.clone();
match self
.extract_from_cache(dir, cache, entry, prefer_copy)
.extract_from_cache(dir, cache, entry, extract_mode)
.await
{
Ok(_) => return Ok(sri),
Expand All @@ -209,26 +235,26 @@ impl Package {
return self
.tarball_checked(sri)
.await?
.extract_from_tarball_data(dir, self.cache.as_deref(), prefer_copy)
.extract_from_tarball_data(dir, self.cache.as_deref(), extract_mode)
.await;
}
}
} else {
return self
.tarball_checked(sri.clone())
.await?
.extract_from_tarball_data(dir, self.cache.as_deref(), prefer_copy)
.extract_from_tarball_data(dir, self.cache.as_deref(), extract_mode)
.await;
}
}
self.tarball_checked(sri.clone())
.await?
.extract_from_tarball_data(dir, self.cache.as_deref(), prefer_copy)
.extract_from_tarball_data(dir, self.cache.as_deref(), extract_mode)
.await
} else {
self.tarball_unchecked()
.await?
.extract_from_tarball_data(dir, self.cache.as_deref(), prefer_copy)
.extract_from_tarball_data(dir, self.cache.as_deref(), extract_mode)
.await
}
}
Expand All @@ -239,44 +265,41 @@ impl Package {
dir: &Path,
cache: &Path,
entry: cacache::Metadata,
mut prefer_copy: bool,
mut extract_mode: ExtractMode,
) -> Result<()> {
let dir = PathBuf::from(dir);
let cache = PathBuf::from(cache);
let name = self.name().to_owned();
async_std::task::spawn_blocking(move || {
let mut created = std::collections::HashSet::new();
let created = dashmap::DashSet::new();
let index = rkyv::check_archived_root::<TarballIndex>(
entry
.raw_metadata
.as_ref()
.ok_or_else(|| NassunError::CacheMissingIndexError(name))?,
)
.map_err(|e| NassunError::DeserializeCacheError(e.to_string()))?;
prefer_copy = index.should_copy || prefer_copy;
extract_mode = if index.should_copy && !extract_mode.is_copy() {
// In general, if reflinks are supported, we would have
// received them as extract_mode already. So there's no need
// to try and do a fallback here.
ExtractMode::Copy
} else {
extract_mode
};
for (archived_path, (sri, mode)) in index.files.iter() {
let sri: Integrity = sri.parse()?;
let path = dir.join(&archived_path[..]);
let parent = PathBuf::from(path.parent().expect("this will always have a parent"));
if !created.contains(&parent) {
std::fs::create_dir_all(path.parent().expect("this will always have a parent"))
.map_err(|e| {
NassunError::ExtractIoError(
e,
Some(PathBuf::from(path.parent().unwrap())),
"creating destination directory for tarball.".into(),
)
})?;
created.insert(parent);
}
crate::tarball::mkdirp(&parent, &created)?;

let mode = if index.bin_paths.contains(archived_path) {
*mode | 0o111
} else {
*mode
};

crate::tarball::extract_from_cache(&cache, &sri, &path, prefer_copy, mode)?;
crate::tarball::extract_from_cache(&cache, &sri, &path, extract_mode, mode)?;
}
Ok::<_, NassunError>(())
})
Expand Down
7 changes: 1 addition & 6 deletions crates/nassun/src/resolver.rs
Expand Up @@ -54,12 +54,7 @@ impl std::fmt::Debug for PackageResolution {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use PackageResolution::*;
match self {
Npm {
tarball,
name,
version,
..
} => write!(f, "{name}@{version} ({tarball})"),
Npm { name, version, .. } => write!(f, "{name}@{version}"),
Dir { path, name } => write!(f, "{name}@{}", path.to_string_lossy()),
Git { name, info } => write!(f, "{name}@{info}"),
}
Expand Down

0 comments on commit 581dda6

Please sign in to comment.