Skip to content

Commit

Permalink
perf: create directories up front (#533)
Browse files Browse the repository at this point in the history
When linking packages a lot of time is spent creating directories as
this flamegraph highlights:


![image](https://github.com/mamba-org/rattler/assets/4995967/90c34e8c-4d4f-4f37-a3a0-3aec531952e5)

Most of the time is spent checking if a directory (or its parents)
already exists. However, we already know upfront all the directories
that need to be created so why not do that? Well, say no more! That is
exactly what this PR does!

New situation:


![image](https://github.com/mamba-org/rattler/assets/4995967/9e47f348-835d-4da8-a8d8-7c8a9476614b)

Its completely gone. Great success! 👍

In my preliminary dead simple benchmarks, this can easily save 20% of
installation times.
  • Loading branch information
baszalmstra committed May 14, 2024
1 parent b395b81 commit 55871a7
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 14 deletions.
15 changes: 3 additions & 12 deletions crates/rattler/src/install/link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,6 @@ pub enum LinkFileError {
#[error("unexpected io operation while {0}")]
IoError(String, #[source] std::io::Error),

/// The parent directory of the destination file could not be created.
#[error("failed to create parent directory")]
FailedToCreateParentDirectory(#[source] std::io::Error),

/// The source file could not be opened.
#[error("could not open source file for reading")]
FailedToOpenSourceFile(#[source] std::io::Error),
Expand Down Expand Up @@ -154,11 +150,6 @@ pub fn link_file(

let destination_path = target_dir.join(&destination_relative_path);

// Ensure that all directories up to the path exist.
if let Some(parent) = destination_path.parent() {
std::fs::create_dir_all(parent).map_err(LinkFileError::FailedToCreateParentDirectory)?;
}

// Temporary variables to store intermediate computations in. If we already computed the file
// size or the sha hash we dont have to recompute them at the end of the function.
let mut sha256 = None;
Expand Down Expand Up @@ -398,17 +389,17 @@ fn reflink_to_destination(
})?;
}
Err(e) if e.kind() == ErrorKind::Unsupported && allow_hard_links => {
return hardlink_to_destination(source_path, destination_path)
return hardlink_to_destination(source_path, destination_path);
}
Err(e) if e.kind() == ErrorKind::Unsupported && !allow_hard_links => {
return copy_to_destination(source_path, destination_path)
return copy_to_destination(source_path, destination_path);
}
Err(_) => {
return if allow_hard_links {
hardlink_to_destination(source_path, destination_path)
} else {
copy_to_destination(source_path, destination_path)
}
};
}
}
}
Expand Down
39 changes: 37 additions & 2 deletions crates/rattler/src/install/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,16 @@ pub use python::PythonInfo;
use rattler_conda_types::package::{IndexJson, LinkJson, NoArchLinks, PackageFile};

use futures::stream::FuturesUnordered;
use itertools::Itertools;

use rattler_conda_types::{package::PathsJson, Platform};
use std::cmp::Ordering;
use std::collections::binary_heap::PeekMut;
use std::collections::BinaryHeap;
use std::collections::{BinaryHeap, HashSet};
use std::io::ErrorKind;
use std::sync::Arc;
use std::{
fs,
future::ready,
path::{Path, PathBuf},
};
Expand Down Expand Up @@ -78,6 +81,10 @@ pub enum InstallError {
#[error("failed to link '{0}'")]
FailedToLink(PathBuf, #[source] LinkFileError),

/// A directory could not be created.
#[error("failed to create directory '{0}")]
FailedToCreateDirectory(PathBuf, #[source] std::io::Error),

/// The target prefix is not UTF-8.
#[error("target prefix is not UTF-8")]
TargetPrefixIsNotUtf8,
Expand Down Expand Up @@ -210,7 +217,7 @@ pub struct InstallOptions {
///
/// Returns a [`PathsEntry`] for every file that was linked into the target directory. The entries
/// are ordered in the same order as they appear in the `paths.json` file of the package.
#[instrument(skip_all, fields(package_dir = %package_dir.display()))]
#[instrument(skip_all, fields(package_dir = % package_dir.display()))]
pub async fn link_package(
package_dir: &Path,
target_dir: &Path,
Expand Down Expand Up @@ -282,6 +289,34 @@ pub async fn link_package(
}
}

// Figure out all the directories that we are going to need
let mut directories_to_construct = HashSet::new();
for (_, computed_path) in final_paths.iter() {
let mut current_path = computed_path.parent();
while let Some(path) = current_path {
if !path.as_os_str().is_empty() && directories_to_construct.insert(path.to_path_buf()) {
current_path = path.parent();
} else {
break;
}
}
}

let directories_target_dir = target_dir.to_path_buf();
driver
.run_blocking_io_task(move || {
for directory in directories_to_construct.into_iter().sorted() {
let full_path = directories_target_dir.join(directory);
match fs::create_dir(&full_path) {
Ok(_) => (),
Err(e) if e.kind() == ErrorKind::AlreadyExists => (),
Err(e) => return Err(InstallError::FailedToCreateDirectory(full_path, e)),
}
}
Ok(())
})
.await?;

// Wrap the python info in an `Arc` so we can more easily share it with async tasks.
let python_info = options.python_info.map(Arc::new);

Expand Down

0 comments on commit 55871a7

Please sign in to comment.