Skip to content

Commit

Permalink
Disable symlink checking for uncached Snapshot captures (#8074)
Browse files Browse the repository at this point in the history
### Problem

As described in #7963: when we capture Snapshots synchronously via `Scheduler.capture_snapshots`, we do it without the benefit of caching or invalidation. But we still end up using a symlink-aware codepath, meaning that the restrictions we place on absolute and escaping symlinks are unnecessary. More generally, _all_ awareness of symlinks is unnecessary in that case, as the only reason we track them is to enable correct invalidation of caches.

### Solution

Add a `symlink_aware: bool` flag to `PosixFS`, and use it to prevent a consumer from observing the existence of a symlink via `scandir`. Use the flag while capturing `Snapshots` synchronously in `Scheduler.capture_snapshots`.

Additionally, make `Metadata` capture lazy in `scandir`, to avoid actually computing it for directories and links.

### Result

Fixes #7963 and unblocks #8071.
  • Loading branch information
stuhood committed Aug 1, 2019
1 parent d2c904e commit ed2100d
Show file tree
Hide file tree
Showing 5 changed files with 185 additions and 73 deletions.
219 changes: 170 additions & 49 deletions src/rust/engine/fs/src/lib.rs
Expand Up @@ -540,21 +540,42 @@ pub struct GlobWithSource {
source: GlobSource,
}

#[derive(Clone)]
pub enum SymlinkBehavior {
Aware,
Oblivious,
}

///
/// All Stats consumed or return by this type are relative to the root.
/// All Stats consumed or returned by this type are relative to the root.
///
/// If `symlink_behavior` is Aware (as it is by default), `scandir` will produce `Link` entries so
/// that a consumer can explicitly track their expansion. Otherwise, if Oblivious, operations will
/// allow the operating system to expand links to their underlying types without regard to the
/// links traversed, and `scandir` will produce only `Dir` and `File` entries.
///
#[derive(Clone)]
pub struct PosixFS {
root: Dir,
ignore: Arc<GitignoreStyleExcludes>,
executor: task_executor::Executor,
symlink_behavior: SymlinkBehavior,
}

impl PosixFS {
pub fn new<P: AsRef<Path>>(
root: P,
ignore_patterns: &[String],
executor: task_executor::Executor,
) -> Result<PosixFS, String> {
Self::new_with_symlink_behavior(root, ignore_patterns, executor, SymlinkBehavior::Aware)
}

pub fn new_with_symlink_behavior<P: AsRef<Path>>(
root: P,
ignore_patterns: &[String],
executor: task_executor::Executor,
symlink_behavior: SymlinkBehavior,
) -> Result<PosixFS, String> {
let root: &Path = root.as_ref();
let canonical_root = root
Expand Down Expand Up @@ -583,6 +604,7 @@ impl PosixFS {
root: canonical_root,
ignore: ignore,
executor: executor,
symlink_behavior: symlink_behavior,
})
}

Expand All @@ -605,10 +627,23 @@ impl PosixFS {
.read_dir()?
.map(|readdir| {
let dir_entry = readdir?;
let (file_type, compute_metadata): (_, Box<FnOnce() -> Result<_, _>>) =
match self.symlink_behavior {
SymlinkBehavior::Aware => {
// Use the dir_entry metadata, which is symlink aware.
(dir_entry.file_type()?, Box::new(|| dir_entry.metadata()))
}
SymlinkBehavior::Oblivious => {
// Use an independent stat call to get metadata, which is symlink oblivious.
let metadata = std::fs::metadata(dir_abs.join(dir_entry.file_name()))?;
(metadata.file_type(), Box::new(|| Ok(metadata)))
}
};
PosixFS::stat_internal(
dir_relative_to_root.0.join(dir_entry.file_name()),
&root,
&std::fs::symlink_metadata(dir_abs.join(dir_entry.file_name()))?,
dir_relative_to_root.0.join(dir_entry.file_name()),
file_type,
compute_metadata,
)
})
.filter(|s| match s {
Expand All @@ -620,7 +655,13 @@ impl PosixFS {
}
Err(_) => true,
})
.collect::<Result<Vec<_>, io::Error>>()?;
.collect::<Result<Vec<_>, io::Error>>()
.map_err(|e| {
io::Error::new(
e.kind(),
format!("Failed to scan directory {:?}: {}", dir_abs, e),
)
})?;
stats.sort_by(|s1, s2| s1.path().cmp(s2.path()));
Ok(DirectoryListing(stats))
}
Expand All @@ -635,51 +676,75 @@ impl PosixFS {
self
.executor
.spawn_on_io_pool(futures::future::lazy(move || {
std::fs::File::open(&path_abs).and_then(|mut f| {
let mut content = Vec::new();
f.read_to_end(&mut content)?;
Ok(FileContent {
path: path,
content: Bytes::from(content),
std::fs::File::open(&path_abs)
.and_then(|mut f| {
let mut content = Vec::new();
f.read_to_end(&mut content)?;
Ok(FileContent {
path: path,
content: Bytes::from(content),
})
})
.map_err(|e| {
io::Error::new(
e.kind(),
format!("Failed to read file {:?}: {}", path_abs, e),
)
})
})
}))
}

pub fn read_link(&self, link: &Link) -> impl Future<Item = PathBuf, Error = io::Error> {
let link_parent = link.0.parent().map(Path::to_owned);
let link_abs = self.root.0.join(link.0.as_path()).to_owned();
let link_abs = self.root.0.join(link.0.as_path());
self
.executor
.spawn_on_io_pool(futures::future::lazy(move || {
link_abs.read_link().and_then(|path_buf| {
if path_buf.is_absolute() {
Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Absolute symlink: {:?}", link_abs),
))
} else {
link_parent
.map(|parent| parent.join(path_buf))
.ok_or_else(|| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("Symlink without a parent?: {:?}", link_abs),
)
})
}
})
link_abs
.read_link()
.and_then(|path_buf| {
if path_buf.is_absolute() {
Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Absolute symlink: {:?}", link_abs),
))
} else {
link_parent
.map(|parent| parent.join(path_buf))
.ok_or_else(|| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("Symlink without a parent?: {:?}", link_abs),
)
})
}
})
.map_err(|e| {
io::Error::new(
e.kind(),
format!("Failed to read link {:?}: {}", link_abs, e),
)
})
}))
}

///
/// Makes a Stat for path_for_stat relative to absolute_path_to_root.
///
fn stat_internal(
path_for_stat: PathBuf,
/// This method takes both a `FileType` and a getter for `Metadata` because on Unixes,
/// directory walks cheaply return the `FileType` without extra syscalls, but other
/// metadata requires additional syscall(s) to compute. We can avoid those calls for
/// Dirs and Links.
///
fn stat_internal<F>(
absolute_path_to_root: &Path,
metadata: &std::fs::Metadata,
) -> Result<Stat, io::Error> {
path_for_stat: PathBuf,
file_type: std::fs::FileType,
compute_metadata: F,
) -> Result<Stat, io::Error>
where
F: FnOnce() -> Result<std::fs::Metadata, io::Error>,
{
if !path_for_stat.is_relative() {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
Expand All @@ -699,11 +764,10 @@ impl PosixFS {
),
));
}
let file_type = metadata.file_type();
if file_type.is_symlink() {
Ok(Stat::Link(Link(path_for_stat)))
} else if file_type.is_file() {
let is_executable = metadata.permissions().mode() & 0o100 == 0o100;
let is_executable = compute_metadata()?.permissions().mode() & 0o100 == 0o100;
Ok(Stat::File(File {
path: path_for_stat,
is_executable: is_executable,
Expand All @@ -722,12 +786,14 @@ impl PosixFS {
}

pub fn stat_sync(&self, relative_path: PathBuf) -> Result<Stat, io::Error> {
PosixFS::stat_path_sync(relative_path, &self.root.0)
}

fn stat_path_sync(relative_path: PathBuf, root: &Path) -> Result<Stat, io::Error> {
let metadata = fs::symlink_metadata(root.join(&relative_path))?;
PosixFS::stat_internal(relative_path, root, &metadata)
let abs_path = self.root.0.join(&relative_path);
let metadata = match self.symlink_behavior {
SymlinkBehavior::Aware => fs::symlink_metadata(abs_path)?,
SymlinkBehavior::Oblivious => fs::metadata(abs_path)?,
};
PosixFS::stat_internal(&self.root.0, relative_path, metadata.file_type(), || {
Ok(metadata)
})
}
}

Expand Down Expand Up @@ -759,13 +825,11 @@ impl PathStatGetter<io::Error> for Arc<PosixFS> {
paths
.into_iter()
.map(|path| {
let root = self.root.0.clone();
let fs = self.clone();
let fs2 = self.clone();
self
.executor
.spawn_on_io_pool(futures::future::lazy(move || {
PosixFS::stat_path_sync(path, &root)
}))
.spawn_on_io_pool(futures::future::lazy(move || fs2.stat_sync(path)))
.then(|stat_result| match stat_result {
Ok(v) => Ok(Some(v)),
Err(err) => match err.kind() {
Expand Down Expand Up @@ -867,7 +931,7 @@ mod posixfs_test {

use super::{
Dir, DirectoryListing, File, GlobExpansionConjunction, GlobMatching, Link, PathGlobs, PathStat,
PathStatGetter, PosixFS, Stat, StrictGlobMatching, VFS,
PathStatGetter, PosixFS, Stat, StrictGlobMatching, SymlinkBehavior, VFS,
};
use boxfuture::{BoxFuture, Boxable};
use futures::future::{self, Future};
Expand Down Expand Up @@ -982,6 +1046,25 @@ mod posixfs_test {
)
}

#[test]
fn stat_symlink_oblivious() {
let dir = tempfile::TempDir::new().unwrap();
let posix_fs = new_posixfs_symlink_oblivious(&dir.path());
let path = PathBuf::from("marmosets");
make_file(&dir.path().join(&path), &[], 0o600);

let link_path = PathBuf::from("remarkably_similar_marmoset");
std::os::unix::fs::symlink(&dir.path().join(path), dir.path().join(&link_path)).unwrap();
// Symlink oblivious stat will give us the destination type.
assert_eq!(
posix_fs.stat_sync(link_path.clone()).unwrap(),
super::Stat::File(File {
path: link_path,
is_executable: false,
})
)
}

#[test]
fn stat_other() {
new_posixfs("/dev")
Expand Down Expand Up @@ -1014,7 +1097,6 @@ mod posixfs_test {
#[test]
fn scandir() {
let dir = tempfile::TempDir::new().unwrap();
let posix_fs = new_posixfs(&dir.path());
let path = PathBuf::from("enclosure");
std::fs::create_dir(dir.path().join(&path)).unwrap();

Expand Down Expand Up @@ -1043,8 +1125,34 @@ mod posixfs_test {

let mut runtime = tokio::runtime::Runtime::new().unwrap();

// Symlink aware.
assert_eq!(
runtime.block_on(posix_fs.scandir(Dir(path))).unwrap(),
runtime
.block_on(new_posixfs(&dir.path()).scandir(Dir(path.clone())))
.unwrap(),
DirectoryListing(vec![
Stat::File(File {
path: a_marmoset.clone(),
is_executable: false,
}),
Stat::File(File {
path: feed.clone(),
is_executable: true,
}),
Stat::Dir(Dir(hammock.clone())),
Stat::Link(Link(remarkably_similar_marmoset.clone())),
Stat::File(File {
path: sneaky_marmoset.clone(),
is_executable: false,
}),
])
);

// Symlink oblivious.
assert_eq!(
runtime
.block_on(new_posixfs_symlink_oblivious(&dir.path()).scandir(Dir(path)))
.unwrap(),
DirectoryListing(vec![
Stat::File(File {
path: a_marmoset,
Expand All @@ -1055,7 +1163,10 @@ mod posixfs_test {
is_executable: true,
}),
Stat::Dir(Dir(hammock)),
Stat::Link(Link(remarkably_similar_marmoset)),
Stat::File(File {
path: remarkably_similar_marmoset,
is_executable: false,
}),
Stat::File(File {
path: sneaky_marmoset,
is_executable: false,
Expand Down Expand Up @@ -1205,6 +1316,16 @@ mod posixfs_test {
PosixFS::new(dir.as_ref(), &[], task_executor::Executor::new()).unwrap()
}

fn new_posixfs_symlink_oblivious<P: AsRef<Path>>(dir: P) -> PosixFS {
PosixFS::new_with_symlink_behavior(
dir.as_ref(),
&[],
task_executor::Executor::new(),
SymlinkBehavior::Oblivious,
)
.unwrap()
}

///
/// An in-memory implementation of VFS, useful for precisely reproducing glob matching behavior for
/// a set of file paths.
Expand Down
14 changes: 10 additions & 4 deletions src/rust/engine/fs/store/src/snapshot.rs
Expand Up @@ -4,7 +4,7 @@
use crate::Store;
use bazel_protos;
use boxfuture::{try_future, BoxFuture, Boxable};
use fs::{Dir, File, GlobMatching, PathGlobs, PathStat, PosixFS};
use fs::{Dir, File, GlobMatching, PathGlobs, PathStat, PosixFS, SymlinkBehavior};
use futures::future::{self, join_all};
use futures::Future;
use hashing::{Digest, EMPTY_DIGEST};
Expand Down Expand Up @@ -459,7 +459,8 @@ impl Snapshot {
/// Note that we don't use a Graph here, and don't cache any intermediate steps, we just place
/// the resultant Snapshot into the store and return it. This is important, because we're reading
/// things from arbitrary filepaths which we don't want to cache in the graph, as we don't watch
/// them for changes.
/// them for changes. Because we're not caching things, we can safely configure the virtual
/// filesystem to be symlink-oblivious.
///
/// If the `digest_hint` is given, first attempt to load the Snapshot using that Digest, and only
/// fall back to actually walking the filesystem if we don't have it (either due to garbage
Expand All @@ -478,11 +479,16 @@ impl Snapshot {
future::result(digest_hint.ok_or_else(|| "No digest hint provided.".to_string()))
.and_then(move |digest| Snapshot::from_digest(store, digest))
.or_else(|_| {
let posix_fs = Arc::new(try_future!(PosixFS::new(root_path, &[], executor)));
let posix_fs = Arc::new(try_future!(PosixFS::new_with_symlink_behavior(
root_path,
&[],
executor,
SymlinkBehavior::Oblivious
)));

posix_fs
.expand(path_globs)
.map_err(|err| format!("Error expanding globs: {:?}", err))
.map_err(|err| format!("Error expanding globs: {}", err))
.and_then(|path_stats| {
Snapshot::from_path_stats(
store2.clone(),
Expand Down

0 comments on commit ed2100d

Please sign in to comment.