Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate the names of directory children, and normalize output directory/file names #10850

Merged
merged 3 commits into from Sep 24, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/rust/engine/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion src/rust/engine/fs/Cargo.toml
Expand Up @@ -8,7 +8,6 @@ publish = false
[dependencies]
async-trait = "0.1"
bytes = "0.4.5"
derivative = "2.1.1"
dirs = "1"
futures = "0.3"
glob = "0.2.11"
Expand Down
21 changes: 13 additions & 8 deletions src/rust/engine/fs/src/lib.rs
Expand Up @@ -33,16 +33,14 @@ mod glob_matching_tests;
#[cfg(test)]
mod posixfs_tests;

#[macro_use]
extern crate derivative;

pub use crate::glob_matching::{
ExpandablePathGlobs, GlobMatching, PathGlob, PreparedPathGlobs, DOUBLE_STAR_GLOB,
SINGLE_STAR_GLOB,
};

use std::cmp::min;
use std::io::{self, Read};
use std::ops::Deref;
use std::os::unix::fs::PermissionsExt;
use std::path::{Component, Path, PathBuf};
use std::sync::Arc;
Expand Down Expand Up @@ -89,8 +87,7 @@ pub fn default_config_path() -> PathBuf {
config_path.join("pants")
}

#[derive(Derivative, Clone, Debug, Eq)]
#[derivative(PartialEq, Hash)]
#[derive(Clone, Debug, PartialEq, Eq, Ord, PartialOrd, Hash)]
pub struct RelativePath(PathBuf);

impl RelativePath {
Expand Down Expand Up @@ -133,15 +130,23 @@ impl RelativePath {
}
}

impl Deref for RelativePath {
type Target = PathBuf;

fn deref(&self) -> &PathBuf {
&self.0
}
}

impl AsRef<Path> for RelativePath {
fn as_ref(&self) -> &Path {
self.0.as_path()
}
}

impl Into<PathBuf> for RelativePath {
fn into(self) -> PathBuf {
self.0
impl From<RelativePath> for PathBuf {
fn from(p: RelativePath) -> Self {
p.0
}
}

Expand Down
5 changes: 5 additions & 0 deletions src/rust/engine/fs/src/tests.rs
Expand Up @@ -16,3 +16,8 @@ fn relative_path_err() {
assert!(RelativePath::new("../a").is_err());
assert!(RelativePath::new("/a").is_err());
}

#[test]
fn relative_path_normalize() {
assert_eq!(Some("a"), RelativePath::new("a/").unwrap().to_str());
}
9 changes: 6 additions & 3 deletions src/rust/engine/fs/store/src/lib.rs
Expand Up @@ -401,8 +401,8 @@ impl Store {
.load_bytes_with(
EntryType::Directory,
digest,
// Trust that locally stored values were canonical when they were written into the CAS,
// don't bother to check this, as it's slightly expensive.
// Trust that locally stored values were canonical when they were written into the CAS
// and only verify in debug mode, as it's slightly expensive.
move |bytes: &[u8]| {
let mut directory = remexec::Directory::new();
directory.merge_from_bytes(&bytes).map_err(|e| {
Expand All @@ -411,6 +411,9 @@ impl Store {
digest, e
)
})?;
if cfg!(debug_assertions) {
bazel_protos::verify_directory_canonical(digest, &directory).unwrap();
}
Ok(directory)
},
// Eagerly verify that CAS-returned Directories are canonical, so that we don't write them
Expand All @@ -423,7 +426,7 @@ impl Store {
digest, e
)
})?;
bazel_protos::verify_directory_canonical(&directory)?;
bazel_protos::verify_directory_canonical(digest, &directory)?;
Ok(directory)
},
)
Expand Down
34 changes: 23 additions & 11 deletions src/rust/engine/process_execution/bazel_protos/src/verification.rs
@@ -1,16 +1,22 @@
use crate::remote_execution;

use hashing::Digest;
use std::collections::HashSet;

pub fn verify_directory_canonical(directory: &remote_execution::Directory) -> Result<(), String> {
pub fn verify_directory_canonical(
digest: Digest,
directory: &remote_execution::Directory,
) -> Result<(), String> {
verify_no_unknown_fields(directory)?;
verify_nodes(directory.get_files(), |n| n.get_name(), |n| n.get_digest())?;
verify_nodes(directory.get_files(), |n| n.get_name(), |n| n.get_digest())
.map_err(|e| format!("Invalid file in {:?}: {}", digest, e))?;
verify_nodes(
directory.get_directories(),
|n| n.get_name(),
|n| n.get_digest(),
)?;
let file_names: HashSet<&str> = directory
)
.map_err(|e| format!("Invalid directory in {:?}: {}", digest, e))?;
let child_names: HashSet<&str> = directory
.get_files()
.iter()
.map(remote_execution::FileNode::get_name)
Expand All @@ -21,10 +27,10 @@ pub fn verify_directory_canonical(directory: &remote_execution::Directory) -> Re
.map(remote_execution::DirectoryNode::get_name),
)
.collect();
if file_names.len() != directory.get_files().len() + directory.get_directories().len() {
if child_names.len() != directory.get_files().len() + directory.get_directories().len() {
return Err(format!(
"Children must be unique, but a path was both a file and a directory: {:?}",
directory
"Child paths must be unique, but a child path of {:?} was both a file and a directory: {:?}",
digest, directory
));
}
Ok(())
Expand All @@ -44,18 +50,24 @@ where
for node in nodes {
verify_no_unknown_fields(node)?;
verify_no_unknown_fields(get_digest(node))?;
if get_name(node).contains('/') {
let name = get_name(node);
if name.is_empty() {
return Err(format!(
"A child name must not be empty, but {:?} had an empty name.",
get_digest(node),
));
} else if name.contains('/') {
return Err(format!(
"All children must have one path segment, but found {}",
get_name(node)
name
));
}
if let Some(p) = prev {
if get_name(node) <= get_name(p) {
if name <= get_name(p) {
return Err(format!(
"Children must be sorted and unique, but {} was before {}",
get_name(p),
get_name(node)
name,
));
}
}
Expand Down
Expand Up @@ -100,6 +100,28 @@ fn unknown_field_in_file_node() {
);
}

#[test]
fn empty_child_name() {
let mut directory = Directory::new();
directory.mut_directories().push({
let mut dir = DirectoryNode::new();
dir.set_name("".to_owned());
dir.set_digest({
let mut digest = Digest::new();
digest.set_size_bytes(DIRECTORY_SIZE);
digest.set_hash(DIRECTORY_HASH.to_owned());
digest
});
dir
});

let error = verify_directory_canonical(&directory).expect_err("Want error");
assert!(
error.contains("A child name must not be empty"),
format!("Bad error message: {}", error)
);
}

#[test]
fn multiple_path_segments_in_directory() {
let mut directory = Directory::new();
Expand Down
3 changes: 2 additions & 1 deletion src/rust/engine/process_execution/src/cache_tests.rs
Expand Up @@ -12,6 +12,7 @@ use sharded_lmdb::{ShardedLmdb, DEFAULT_LEASE_TIME};
use store::Store;
use tempfile::TempDir;
use testutil::data::TestData;
use testutil::relative_paths;
use tokio::runtime::Handle;

struct RoundtripResults {
Expand Down Expand Up @@ -85,7 +86,7 @@ fn create_script(script_exit_code: i8) -> (Process, PathBuf, TempDir) {
testutil::path::find_bash(),
format!("{}", script_path.display()),
])
.output_files(vec![PathBuf::from("roland")].into_iter().collect());
.output_files(relative_paths(&["roland"]).collect());

(process, script_path, script_dir)
}
Expand Down
8 changes: 4 additions & 4 deletions src/rust/engine/process_execution/src/lib.rs
Expand Up @@ -180,9 +180,9 @@ pub struct Process {

pub input_files: hashing::Digest,

pub output_files: BTreeSet<PathBuf>,
pub output_files: BTreeSet<RelativePath>,

pub output_directories: BTreeSet<PathBuf>,
pub output_directories: BTreeSet<RelativePath>,

pub timeout: Option<std::time::Duration>,

Expand Down Expand Up @@ -267,15 +267,15 @@ impl Process {
///
/// Replaces the output files for this process.
///
pub fn output_files(mut self, output_files: BTreeSet<PathBuf>) -> Process {
pub fn output_files(mut self, output_files: BTreeSet<RelativePath>) -> Process {
self.output_files = output_files;
self
}

///
/// Replaces the output directories for this process.
///
pub fn output_directories(mut self, output_directories: BTreeSet<PathBuf>) -> Process {
pub fn output_directories(mut self, output_directories: BTreeSet<RelativePath>) -> Process {
self.output_directories = output_directories;
self
}
Expand Down
15 changes: 10 additions & 5 deletions src/rust/engine/process_execution/src/local.rs
Expand Up @@ -75,18 +75,22 @@ impl CommandRunner {
fn construct_output_snapshot(
store: Store,
posix_fs: Arc<fs::PosixFS>,
output_file_paths: BTreeSet<PathBuf>,
output_dir_paths: BTreeSet<PathBuf>,
output_file_paths: BTreeSet<RelativePath>,
output_dir_paths: BTreeSet<RelativePath>,
) -> BoxFuture<Snapshot, String> {
let output_paths: Result<Vec<String>, String> = output_dir_paths
.into_iter()
.flat_map(|p| {
let mut dir_glob = p.into_os_string();
let mut dir_glob = PathBuf::from(p).into_os_string();
let dir = dir_glob.clone();
dir_glob.push("/**");
vec![dir, dir_glob]
})
.chain(output_file_paths.into_iter().map(PathBuf::into_os_string))
.chain(
output_file_paths
.into_iter()
.map(|p| PathBuf::from(p).into_os_string()),
)
.map(|s| {
s.into_string()
.map_err(|e| format!("Error stringifying output paths: {:?}", e))
Expand Down Expand Up @@ -451,7 +455,8 @@ pub trait CapturedWorkdir {
let parent_paths_to_create: HashSet<_> = output_file_paths
.iter()
.chain(output_dir_paths.iter())
.chain(named_cache_symlinks.iter().map(|s| &s.dst))
.map(|relative_path| relative_path.as_ref())
.chain(named_cache_symlinks.iter().map(|s| s.dst.as_path()))
.filter_map(|rel_path| rel_path.parent())
.map(|parent_relpath| workdir_path2.join(parent_relpath))
.collect();
Expand Down