Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid capturing Snapshots for previously digested codegen outputs #7241

Merged

Add a Digest hint to PathGlobsAndRoot, and use it to bypass Snapshot …

…capture if stored.
  • Loading branch information...
stuhood committed Feb 12, 2019
commit ef8843c783cc84024ae157b96bd0e75aa34cb234
Copy path View file
@@ -10,7 +10,7 @@
from pants.engine.rules import RootRule
from pants.option.custom_types import GlobExpansionConjunction
from pants.option.global_options import GlobMatchErrorBehavior
from pants.util.objects import datatype
from pants.util.objects import Exactly, datatype


class FileContent(datatype([('path', text_type), ('content', binary_type)])):
@@ -61,10 +61,6 @@ def __new__(cls, include, exclude=(), glob_match_error_behavior=None, conjunctio
conjunction=GlobExpansionConjunction.create(conjunction, none_is_default=True))


class PathGlobsAndRoot(datatype([('path_globs', PathGlobs), ('root', text_type)])):
pass


class Digest(datatype([('fingerprint', text_type), ('serialized_bytes_length', int)])):
"""A Digest is a content-digest fingerprint, and a length of underlying content.
@@ -93,6 +89,23 @@ def __str__(self):
return repr(self)


class PathGlobsAndRoot(datatype([
('path_globs', PathGlobs),
('root', text_type),
('digest_hint', Exactly(Digest, type(None))),
])):
"""A set of PathGlobs to capture relative to some root (which may exist outside of the buildroot).
If the `digest_hint` is set, it must be the Digest that we would expect to get if we were to
expand and Digest the globs. The hint is an optimization that allows for bypassing filesystem
operations in cases where the expected Digest is known, and the content for the Digest is already
stored.
"""

def __new__(cls, path_globs, root, digest_hint=None):
return super(PathGlobsAndRoot, cls).__new__(cls, path_globs, root, digest_hint)


class Snapshot(datatype([('directory_digest', Digest), ('files', tuple), ('dirs', tuple)])):
"""A Snapshot is a collection of file paths and dir paths fingerprinted by their names/content.
@@ -3,7 +3,7 @@

use crate::glob_matching::GlobMatching;
use crate::pool::ResettablePool;
use crate::{File, PathGlobs, PathStat, PosixFS, Store};
use crate::{Dir, File, PathGlobs, PathStat, PosixFS, Store};
use bazel_protos;
use boxfuture::{try_future, BoxFuture, Boxable};
use futures::future::{self, join_all};
@@ -65,6 +65,34 @@ impl Snapshot {
.to_boxed()
}

pub fn from_digest(store: Store, digest: Digest) -> BoxFuture<Snapshot, String> {
store
.walk(digest, |_, path_so_far, _, directory| {
let mut path_stats = Vec::new();
path_stats.extend(directory.get_directories().iter().map(move |dir_node| {
let path = path_so_far.join(dir_node.get_name());
PathStat::dir(path.clone(), Dir(path))
}));
path_stats.extend(directory.get_files().iter().map(move |file_node| {
let path = path_so_far.join(file_node.get_name());
PathStat::file(
path.clone(),
File {
path,
is_executable: file_node.is_executable,
},
)
}));
future::ok(path_stats).to_boxed()
})
.map(move |path_stats_per_directory| {
let path_stats =
Iterator::flatten(path_stats_per_directory.into_iter().map(|v| v.into_iter())).collect();
Snapshot { digest, path_stats }
})
.to_boxed()
}

pub fn digest_from_path_stats<
S: StoreFileByDigest<Error> + Sized + Clone,
Error: fmt::Debug + 'static + Send,
@@ -312,29 +340,44 @@ impl Snapshot {
.to_boxed()
}

pub fn capture_snapshot_from_arbitrary_root<P: AsRef<Path>>(
///
/// Capture a Snapshot of a presumed-immutable piece of the filesystem.
///
/// Note that we don't use a Graph here, and don't cache any intermediate steps, we just place
/// the resultant Snapshot into the store and return it. This is important, because we're reading
/// things from arbitrary filepaths which we don't want to cache in the graph, as we don't watch
/// them for changes.
///
/// If the `digest_hint` is given, first attempt to load the Snapshot using that Digest, and only
/// fall back to actually walking the filesystem if we don't have it (either due to garbage
/// collection or Digest-oblivious legacy caching).
///
pub fn capture_snapshot_from_arbitrary_root<P: AsRef<Path> + Send + 'static>(
store: Store,
fs_pool: Arc<ResettablePool>,
root_path: P,
path_globs: PathGlobs,
digest_hint: Option<Digest>,
) -> BoxFuture<Snapshot, String> {
// Note that we don't use a Graph here, and don't cache any intermediate steps, we just place
// the resultant Snapshot into the store and return it. This is important, because we're reading
// things from arbitrary filepaths which we don't want to cache in the graph, as we don't watch
// them for changes.
// We assume that this Snapshot is of an immutable piece of the filesystem.

let posix_fs = Arc::new(try_future!(PosixFS::new(root_path, fs_pool, &[])));

posix_fs
.expand(path_globs)
.map_err(|err| format!("Error expanding globs: {:?}", err))
.and_then(|path_stats| {
Snapshot::from_path_stats(
store.clone(),
&OneOffStoreFileByDigest::new(store, posix_fs),
path_stats,
)
// Attempt to use the digest hint to load a Snapshot without expanding the globs; otherwise,
// expand the globs to capture a Snapshot.
let store2 = store.clone();
future::result(digest_hint.ok_or_else(|| "No digest hint provided.".to_string()))
.and_then(move |digest| Snapshot::from_digest(store, digest))
.or_else(|_| {
let posix_fs = Arc::new(try_future!(PosixFS::new(root_path, fs_pool, &[])));

posix_fs
.expand(path_globs)
.map_err(|err| format!("Error expanding globs: {:?}", err))
.and_then(|path_stats| {
Snapshot::from_path_stats(
store2.clone(),
&OneOffStoreFileByDigest::new(store2, posix_fs),
path_stats,
)
})
.to_boxed()
})
.to_boxed()
}
Copy path View file
@@ -658,15 +658,24 @@ pub extern "C" fn capture_snapshots(
path_globs_and_root_tuple_wrapper: Handle,
) -> PyResult {
let values = externs::project_multi(&path_globs_and_root_tuple_wrapper.into(), "dependencies");
let path_globs_and_roots_result: Result<Vec<(fs::PathGlobs, PathBuf)>, String> = values
let path_globs_and_roots_result = values
.iter()
.map(|value| {
let root = PathBuf::from(externs::project_str(&value, "root"));
let path_globs =
nodes::Snapshot::lift_path_globs(&externs::project_ignoring_type(&value, "path_globs"));
path_globs.map(|path_globs| (path_globs, root))
let digest_hint = {
let maybe_digest = externs::project_ignoring_type(&value, "digest_hint");
// TODO: Extract a singleton Key for None.
if maybe_digest == externs::eval("None").unwrap() {
None
} else {
Some(nodes::lift_digest(&maybe_digest)?)
}
};
path_globs.map(|path_globs| (path_globs, root, digest_hint))
})
.collect();
.collect::<Result<Vec<_>, _>>();

let path_globs_and_roots = match path_globs_and_roots_result {
Ok(v) => v,
@@ -681,13 +690,14 @@ pub extern "C" fn capture_snapshots(
futures::future::join_all(
path_globs_and_roots
.into_iter()
.map(|(path_globs, root)| {
.map(|(path_globs, root, digest_hint)| {
let core = core.clone();
fs::Snapshot::capture_snapshot_from_arbitrary_root(
core.store(),
core.fs_pool.clone(),
root,
path_globs,
digest_hint,
)
.map(move |snapshot| nodes::Snapshot::store_snapshot(&core, &snapshot))
})
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.