From ed2100dd05b83835a6a89aa1e1115d6dc86c81f8 Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Thu, 1 Aug 2019 15:14:36 -0700 Subject: [PATCH] Disable symlink checking for uncached Snapshot captures (#8074) ### Problem As described in #7963: when we capture Snapshots synchronously via `Scheduler.capture_snapshots`, we do it without the benefit of caching or invalidation. But we still end up using a symlink-aware codepath, meaning that the restrictions we place on absolute and escaping symlinks are unnecessary. More generally, _all_ awareness of symlinks is unnecessary in that case, as the only reason we track them is to enable correct invalidation of caches. ### Solution Add a `symlink_aware: bool` flag to `PosixFS`, and use it to prevent a consumer from observing the existence of a symlink via `scandir`. Use the flag while capturing `Snapshots` synchronously in `Scheduler.capture_snapshots`. Additionally, make `Metadata` capture lazy in `scandir`, to avoid actually computing it for directories and links. ### Result Fixes #7963 and unblocks #8071. --- src/rust/engine/fs/src/lib.rs | 219 ++++++++++++++---- src/rust/engine/fs/store/src/snapshot.rs | 14 +- src/rust/engine/src/nodes.rs | 13 +- .../python/pants_test/backend/jvm/tasks/BUILD | 2 +- .../test_dep_exports_integration.py | 10 - 5 files changed, 185 insertions(+), 73 deletions(-) diff --git a/src/rust/engine/fs/src/lib.rs b/src/rust/engine/fs/src/lib.rs index 0a0ce5d90ea..a5db59d9e57 100644 --- a/src/rust/engine/fs/src/lib.rs +++ b/src/rust/engine/fs/src/lib.rs @@ -540,14 +540,26 @@ pub struct GlobWithSource { source: GlobSource, } +#[derive(Clone)] +pub enum SymlinkBehavior { + Aware, + Oblivious, +} + /// -/// All Stats consumed or return by this type are relative to the root. +/// All Stats consumed or returned by this type are relative to the root. +/// +/// If `symlink_behavior` is Aware (as it is by default), `scandir` will produce `Link` entries so +/// that a consumer can explicitly track their expansion. Otherwise, if Oblivious, operations will +/// allow the operating system to expand links to their underlying types without regard to the +/// links traversed, and `scandir` will produce only `Dir` and `File` entries. /// #[derive(Clone)] pub struct PosixFS { root: Dir, ignore: Arc, executor: task_executor::Executor, + symlink_behavior: SymlinkBehavior, } impl PosixFS { @@ -555,6 +567,15 @@ impl PosixFS { root: P, ignore_patterns: &[String], executor: task_executor::Executor, + ) -> Result { + Self::new_with_symlink_behavior(root, ignore_patterns, executor, SymlinkBehavior::Aware) + } + + pub fn new_with_symlink_behavior>( + root: P, + ignore_patterns: &[String], + executor: task_executor::Executor, + symlink_behavior: SymlinkBehavior, ) -> Result { let root: &Path = root.as_ref(); let canonical_root = root @@ -583,6 +604,7 @@ impl PosixFS { root: canonical_root, ignore: ignore, executor: executor, + symlink_behavior: symlink_behavior, }) } @@ -605,10 +627,23 @@ impl PosixFS { .read_dir()? .map(|readdir| { let dir_entry = readdir?; + let (file_type, compute_metadata): (_, Box Result<_, _>>) = + match self.symlink_behavior { + SymlinkBehavior::Aware => { + // Use the dir_entry metadata, which is symlink aware. + (dir_entry.file_type()?, Box::new(|| dir_entry.metadata())) + } + SymlinkBehavior::Oblivious => { + // Use an independent stat call to get metadata, which is symlink oblivious. + let metadata = std::fs::metadata(dir_abs.join(dir_entry.file_name()))?; + (metadata.file_type(), Box::new(|| Ok(metadata))) + } + }; PosixFS::stat_internal( - dir_relative_to_root.0.join(dir_entry.file_name()), &root, - &std::fs::symlink_metadata(dir_abs.join(dir_entry.file_name()))?, + dir_relative_to_root.0.join(dir_entry.file_name()), + file_type, + compute_metadata, ) }) .filter(|s| match s { @@ -620,7 +655,13 @@ impl PosixFS { } Err(_) => true, }) - .collect::, io::Error>>()?; + .collect::, io::Error>>() + .map_err(|e| { + io::Error::new( + e.kind(), + format!("Failed to scan directory {:?}: {}", dir_abs, e), + ) + })?; stats.sort_by(|s1, s2| s1.path().cmp(s2.path())); Ok(DirectoryListing(stats)) } @@ -635,51 +676,75 @@ impl PosixFS { self .executor .spawn_on_io_pool(futures::future::lazy(move || { - std::fs::File::open(&path_abs).and_then(|mut f| { - let mut content = Vec::new(); - f.read_to_end(&mut content)?; - Ok(FileContent { - path: path, - content: Bytes::from(content), + std::fs::File::open(&path_abs) + .and_then(|mut f| { + let mut content = Vec::new(); + f.read_to_end(&mut content)?; + Ok(FileContent { + path: path, + content: Bytes::from(content), + }) + }) + .map_err(|e| { + io::Error::new( + e.kind(), + format!("Failed to read file {:?}: {}", path_abs, e), + ) }) - }) })) } pub fn read_link(&self, link: &Link) -> impl Future { let link_parent = link.0.parent().map(Path::to_owned); - let link_abs = self.root.0.join(link.0.as_path()).to_owned(); + let link_abs = self.root.0.join(link.0.as_path()); self .executor .spawn_on_io_pool(futures::future::lazy(move || { - link_abs.read_link().and_then(|path_buf| { - if path_buf.is_absolute() { - Err(io::Error::new( - io::ErrorKind::InvalidData, - format!("Absolute symlink: {:?}", link_abs), - )) - } else { - link_parent - .map(|parent| parent.join(path_buf)) - .ok_or_else(|| { - io::Error::new( - io::ErrorKind::InvalidData, - format!("Symlink without a parent?: {:?}", link_abs), - ) - }) - } - }) + link_abs + .read_link() + .and_then(|path_buf| { + if path_buf.is_absolute() { + Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Absolute symlink: {:?}", link_abs), + )) + } else { + link_parent + .map(|parent| parent.join(path_buf)) + .ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("Symlink without a parent?: {:?}", link_abs), + ) + }) + } + }) + .map_err(|e| { + io::Error::new( + e.kind(), + format!("Failed to read link {:?}: {}", link_abs, e), + ) + }) })) } /// /// Makes a Stat for path_for_stat relative to absolute_path_to_root. /// - fn stat_internal( - path_for_stat: PathBuf, + /// This method takes both a `FileType` and a getter for `Metadata` because on Unixes, + /// directory walks cheaply return the `FileType` without extra syscalls, but other + /// metadata requires additional syscall(s) to compute. We can avoid those calls for + /// Dirs and Links. + /// + fn stat_internal( absolute_path_to_root: &Path, - metadata: &std::fs::Metadata, - ) -> Result { + path_for_stat: PathBuf, + file_type: std::fs::FileType, + compute_metadata: F, + ) -> Result + where + F: FnOnce() -> Result, + { if !path_for_stat.is_relative() { return Err(io::Error::new( io::ErrorKind::InvalidInput, @@ -699,11 +764,10 @@ impl PosixFS { ), )); } - let file_type = metadata.file_type(); if file_type.is_symlink() { Ok(Stat::Link(Link(path_for_stat))) } else if file_type.is_file() { - let is_executable = metadata.permissions().mode() & 0o100 == 0o100; + let is_executable = compute_metadata()?.permissions().mode() & 0o100 == 0o100; Ok(Stat::File(File { path: path_for_stat, is_executable: is_executable, @@ -722,12 +786,14 @@ impl PosixFS { } pub fn stat_sync(&self, relative_path: PathBuf) -> Result { - PosixFS::stat_path_sync(relative_path, &self.root.0) - } - - fn stat_path_sync(relative_path: PathBuf, root: &Path) -> Result { - let metadata = fs::symlink_metadata(root.join(&relative_path))?; - PosixFS::stat_internal(relative_path, root, &metadata) + let abs_path = self.root.0.join(&relative_path); + let metadata = match self.symlink_behavior { + SymlinkBehavior::Aware => fs::symlink_metadata(abs_path)?, + SymlinkBehavior::Oblivious => fs::metadata(abs_path)?, + }; + PosixFS::stat_internal(&self.root.0, relative_path, metadata.file_type(), || { + Ok(metadata) + }) } } @@ -759,13 +825,11 @@ impl PathStatGetter for Arc { paths .into_iter() .map(|path| { - let root = self.root.0.clone(); let fs = self.clone(); + let fs2 = self.clone(); self .executor - .spawn_on_io_pool(futures::future::lazy(move || { - PosixFS::stat_path_sync(path, &root) - })) + .spawn_on_io_pool(futures::future::lazy(move || fs2.stat_sync(path))) .then(|stat_result| match stat_result { Ok(v) => Ok(Some(v)), Err(err) => match err.kind() { @@ -867,7 +931,7 @@ mod posixfs_test { use super::{ Dir, DirectoryListing, File, GlobExpansionConjunction, GlobMatching, Link, PathGlobs, PathStat, - PathStatGetter, PosixFS, Stat, StrictGlobMatching, VFS, + PathStatGetter, PosixFS, Stat, StrictGlobMatching, SymlinkBehavior, VFS, }; use boxfuture::{BoxFuture, Boxable}; use futures::future::{self, Future}; @@ -982,6 +1046,25 @@ mod posixfs_test { ) } + #[test] + fn stat_symlink_oblivious() { + let dir = tempfile::TempDir::new().unwrap(); + let posix_fs = new_posixfs_symlink_oblivious(&dir.path()); + let path = PathBuf::from("marmosets"); + make_file(&dir.path().join(&path), &[], 0o600); + + let link_path = PathBuf::from("remarkably_similar_marmoset"); + std::os::unix::fs::symlink(&dir.path().join(path), dir.path().join(&link_path)).unwrap(); + // Symlink oblivious stat will give us the destination type. + assert_eq!( + posix_fs.stat_sync(link_path.clone()).unwrap(), + super::Stat::File(File { + path: link_path, + is_executable: false, + }) + ) + } + #[test] fn stat_other() { new_posixfs("/dev") @@ -1014,7 +1097,6 @@ mod posixfs_test { #[test] fn scandir() { let dir = tempfile::TempDir::new().unwrap(); - let posix_fs = new_posixfs(&dir.path()); let path = PathBuf::from("enclosure"); std::fs::create_dir(dir.path().join(&path)).unwrap(); @@ -1043,8 +1125,34 @@ mod posixfs_test { let mut runtime = tokio::runtime::Runtime::new().unwrap(); + // Symlink aware. assert_eq!( - runtime.block_on(posix_fs.scandir(Dir(path))).unwrap(), + runtime + .block_on(new_posixfs(&dir.path()).scandir(Dir(path.clone()))) + .unwrap(), + DirectoryListing(vec![ + Stat::File(File { + path: a_marmoset.clone(), + is_executable: false, + }), + Stat::File(File { + path: feed.clone(), + is_executable: true, + }), + Stat::Dir(Dir(hammock.clone())), + Stat::Link(Link(remarkably_similar_marmoset.clone())), + Stat::File(File { + path: sneaky_marmoset.clone(), + is_executable: false, + }), + ]) + ); + + // Symlink oblivious. + assert_eq!( + runtime + .block_on(new_posixfs_symlink_oblivious(&dir.path()).scandir(Dir(path))) + .unwrap(), DirectoryListing(vec![ Stat::File(File { path: a_marmoset, @@ -1055,7 +1163,10 @@ mod posixfs_test { is_executable: true, }), Stat::Dir(Dir(hammock)), - Stat::Link(Link(remarkably_similar_marmoset)), + Stat::File(File { + path: remarkably_similar_marmoset, + is_executable: false, + }), Stat::File(File { path: sneaky_marmoset, is_executable: false, @@ -1205,6 +1316,16 @@ mod posixfs_test { PosixFS::new(dir.as_ref(), &[], task_executor::Executor::new()).unwrap() } + fn new_posixfs_symlink_oblivious>(dir: P) -> PosixFS { + PosixFS::new_with_symlink_behavior( + dir.as_ref(), + &[], + task_executor::Executor::new(), + SymlinkBehavior::Oblivious, + ) + .unwrap() + } + /// /// An in-memory implementation of VFS, useful for precisely reproducing glob matching behavior for /// a set of file paths. diff --git a/src/rust/engine/fs/store/src/snapshot.rs b/src/rust/engine/fs/store/src/snapshot.rs index d77a07e33db..d95d494aa7c 100644 --- a/src/rust/engine/fs/store/src/snapshot.rs +++ b/src/rust/engine/fs/store/src/snapshot.rs @@ -4,7 +4,7 @@ use crate::Store; use bazel_protos; use boxfuture::{try_future, BoxFuture, Boxable}; -use fs::{Dir, File, GlobMatching, PathGlobs, PathStat, PosixFS}; +use fs::{Dir, File, GlobMatching, PathGlobs, PathStat, PosixFS, SymlinkBehavior}; use futures::future::{self, join_all}; use futures::Future; use hashing::{Digest, EMPTY_DIGEST}; @@ -459,7 +459,8 @@ impl Snapshot { /// Note that we don't use a Graph here, and don't cache any intermediate steps, we just place /// the resultant Snapshot into the store and return it. This is important, because we're reading /// things from arbitrary filepaths which we don't want to cache in the graph, as we don't watch - /// them for changes. + /// them for changes. Because we're not caching things, we can safely configure the virtual + /// filesystem to be symlink-oblivious. /// /// If the `digest_hint` is given, first attempt to load the Snapshot using that Digest, and only /// fall back to actually walking the filesystem if we don't have it (either due to garbage @@ -478,11 +479,16 @@ impl Snapshot { future::result(digest_hint.ok_or_else(|| "No digest hint provided.".to_string())) .and_then(move |digest| Snapshot::from_digest(store, digest)) .or_else(|_| { - let posix_fs = Arc::new(try_future!(PosixFS::new(root_path, &[], executor))); + let posix_fs = Arc::new(try_future!(PosixFS::new_with_symlink_behavior( + root_path, + &[], + executor, + SymlinkBehavior::Oblivious + ))); posix_fs .expand(path_globs) - .map_err(|err| format!("Error expanding globs: {:?}", err)) + .map_err(|err| format!("Error expanding globs: {}", err)) .and_then(|path_stats| { Snapshot::from_path_stats( store2.clone(), diff --git a/src/rust/engine/src/nodes.rs b/src/rust/engine/src/nodes.rs index fba4ad28348..c7aee43eaa1 100644 --- a/src/rust/engine/src/nodes.rs +++ b/src/rust/engine/src/nodes.rs @@ -443,13 +443,12 @@ impl WrappedNode for ReadLink { type Item = LinkDest; fn run(self, context: Context) -> NodeFuture { - let link = self.0.clone(); context .core .vfs .read_link(&self.0) .map(LinkDest) - .map_err(move |e| throw(&format!("Failed to read_link for {:?}: {:?}", link, e))) + .map_err(|e| throw(&format!("{}", e))) .to_boxed() } } @@ -470,12 +469,11 @@ impl WrappedNode for DigestFile { type Item = hashing::Digest; fn run(self, context: Context) -> NodeFuture { - let file = self.0.clone(); context .core .vfs .read_file(&self.0) - .map_err(move |e| throw(&format!("Error reading file {:?}: {:?}", file, e,))) + .map_err(|e| throw(&format!("{}", e))) .and_then(move |c| { context .core @@ -504,15 +502,12 @@ impl WrappedNode for Scandir { type Item = Arc; fn run(self, context: Context) -> NodeFuture> { - let dir = self.0.clone(); context .core .vfs .scandir(self.0) - .then(move |listing_res| match listing_res { - Ok(listing) => Ok(Arc::new(listing)), - Err(e) => Err(throw(&format!("Failed to scandir for {:?}: {:?}", dir, e))), - }) + .map(Arc::new) + .map_err(|e| throw(&format!("{}", e))) .to_boxed() } } diff --git a/tests/python/pants_test/backend/jvm/tasks/BUILD b/tests/python/pants_test/backend/jvm/tasks/BUILD index b50c5244435..4da39a32b0d 100644 --- a/tests/python/pants_test/backend/jvm/tasks/BUILD +++ b/tests/python/pants_test/backend/jvm/tasks/BUILD @@ -475,7 +475,7 @@ python_tests( 'tests/python/pants_test:int-test', ], tags = {'integration'}, - timeout = 180, + timeout = 240, ) python_tests( diff --git a/tests/python/pants_test/backend/jvm/tasks/jvm_compile/test_dep_exports_integration.py b/tests/python/pants_test/backend/jvm/tasks/jvm_compile/test_dep_exports_integration.py index 629efcdb8e6..88cccbbe4cc 100644 --- a/tests/python/pants_test/backend/jvm/tasks/jvm_compile/test_dep_exports_integration.py +++ b/tests/python/pants_test/backend/jvm/tasks/jvm_compile/test_dep_exports_integration.py @@ -19,16 +19,6 @@ class DepExportsIntegrationTest(PantsRunIntegrationTest): def hermetic(cls): return True - def test_compilation(self): - for lang in self.SRC_TYPES: - path = os.path.join(self.SRC_PREFIX, lang, self.SRC_PACKAGE) - pants_run = self.run_pants(['list', '{}::'.format(path)]) - self.assert_success(pants_run) - target_list = pants_run.stdout_data.strip().split('\n') - for target in target_list: - pants_run = self.run_pants(['compile', '--lint-scalafmt-skip', target]) - self.assert_success(pants_run) - def modify_exports_and_compile(self, target, modify_file): with self.temporary_sourcedir() as tmp_src: src_dir = os.path.relpath(os.path.join(tmp_src, os.path.basename(self.SRC_PACKAGE)), get_buildroot())