Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port owners-discovery PathGlob matching to rust #7299

Merged
merged 1 commit into from Mar 1, 2019
Merged
Changes from all commits
Commits
File filter...
Filter file types
Jump to…
Jump to file or symbol
Failed to load files and symbols.

Always

Just for now

@@ -55,7 +55,6 @@ python_library(
':selectors',
'src/python/pants/base:project_tree',
'src/python/pants/option',
'src/python/pants/source',
'src/python/pants/util:meta',
'src/python/pants/util:objects',
]
@@ -674,6 +674,12 @@ def decompress_tarball(self, tarfile_path, dest_dir):
result = self.lib.decompress_tarball(tarfile_path, dest_dir)
return self.context.raise_or_return(result)

def match_path_globs(self, path_globs, paths):
path_globs = self.context.to_value(path_globs)
paths_buf = self.context.utf8_buf_buf(tuple(paths))
result = self.lib.match_path_globs(path_globs, paths_buf)
return self.context.raise_or_return(result)

def new_tasks(self):
return self.gc(self.lib.tasks_create(), self.lib.tasks_destroy)

@@ -10,6 +10,8 @@ python_library(
'src/python/pants/base:build_environment',
'src/python/pants/base:payload_field',
'src/python/pants/base:project_tree',
'src/python/pants/engine:fs',
'src/python/pants/engine:native',
'src/python/pants/option',
'src/python/pants/subsystem',
'src/python/pants/util:dirutil',
@@ -4,69 +4,20 @@

from __future__ import absolute_import, division, print_function, unicode_literals

import re


def glob_to_regex(pattern):
"""Given a glob pattern, return an equivalent regex expression.
TODO: Replace with implementation in `fs.rs`. See https://github.com/pantsbuild/pants/issues/6795.
:param string glob: The glob pattern. "**" matches 0 or more dirs recursively.
"*" only matches patterns in a single dir.
:returns: A regex string that matches same paths as the input glob does.
"""
out = ['^']
components = pattern.strip('/').replace('.', '[.]').replace('$','[$]').split('/')
doublestar = False
for component in components:
if len(out) == 1:
if pattern.startswith('/'):
out.append('/')
else:
if not doublestar:
out.append('/')

if '**' in component:
if component != '**':
raise ValueError('Invalid usage of "**", use "*" instead.')

if not doublestar:
out.append('(([^/]+/)*)')
doublestar = True
else:
out.append(component.replace('*', '[^/]*'))
doublestar = False

if doublestar:
out.append('[^/]*')

out.append('$')

return ''.join(out)
from pants.engine.fs import PathGlobs
from pants.engine.native import Native


def globs_matches(paths, patterns, exclude_patterns):
def excluded(path):
if excluded.regexes is None:
excluded.regexes = [re.compile(glob_to_regex(ex)) for ex in exclude_patterns]
return any(ex.match(path) for ex in excluded.regexes)
excluded.regexes = None
for pattern in patterns:
regex = re.compile(glob_to_regex(pattern))
for path in paths:
if regex.match(path) and not excluded(path):
return True
return False
path_globs = PathGlobs(include=patterns, exclude=exclude_patterns)
return Native().match_path_globs(path_globs, paths)

This comment has been minimized.

Copy link
@cosmicexplorer

cosmicexplorer Mar 1, 2019

Contributor

I didn't realize you could just construct a Native() in any random python code and it's giving me ideas!

This comment has been minimized.

Copy link
@stuhood

stuhood Mar 1, 2019

Author Member

It got much easier recentlyish: #6979



def matches_filespec(path, spec):
return any_matches_filespec([path], spec)


def any_matches_filespec(paths, spec):
if not paths or not spec:
return False
exclude_patterns = []
for exclude_spec in spec.get('exclude', []):
exclude_patterns.extend(exclude_spec.get('globs', []))
@@ -44,9 +44,10 @@ pub use crate::pool::ResettablePool;
pub use serverset::BackoffConfig;

use std::cmp::min;
use std::collections::HashMap;
use std::io::{self, Read};
use std::os::unix::fs::PermissionsExt;
use std::path::{Component, Path, PathBuf};
use std::path::{Component, Components, Path, PathBuf};
use std::sync::Arc;
use std::{fmt, fs};

@@ -72,6 +73,17 @@ impl Stat {
&Stat::Link(Link(ref p)) => p.as_path(),
}
}

fn dir(path: PathBuf) -> Stat {
Stat::Dir(Dir(path))
}

fn file(path: PathBuf, is_executable: bool) -> Stat {
Stat::File(File {
path,
is_executable,
})
}
}

#[derive(Clone, Debug, Eq, Hash, PartialEq)]
@@ -764,6 +776,81 @@ impl PathStatGetter<io::Error> for Arc<PosixFS> {
}
}

///
/// An in-memory implementation of VFS, useful for precisely reproducing glob matching behavior for
/// a set of file paths.
///
pub struct MemFS {
contents: HashMap<Dir, Arc<DirectoryListing>>,
}

impl MemFS {
pub fn new(paths: Vec<PathBuf>) -> MemFS {
let mut unordered_contents = HashMap::new();
let empty_path = PathBuf::new();
for path in paths {
Self::add_path(&mut unordered_contents, &empty_path, path.components());
}
let contents = unordered_contents
.into_iter()
.map(|(dir, mut stats)| {
stats.sort_by(|a, b| a.path().cmp(b.path()));
(dir, Arc::new(DirectoryListing(stats)))
})
.collect();
MemFS { contents }
}

fn add_path(
contents: &mut HashMap<Dir, Vec<Stat>>,
path_so_far: &Path,
mut remainder: Components,
) -> bool {
if let Some(component) = remainder.next() {
// The component represents a directory if it has child components: otherwise, a file.
let path = path_so_far.join(component);
let stat = if Self::add_path(contents, &path, remainder) {
Stat::dir(path)
} else {
Stat::file(path, false)
};
contents
.entry(Dir(path_so_far.to_owned()))
.or_insert_with(Vec::new)
.push(stat);
true
} else {
false
}
}
}

impl VFS<String> for Arc<MemFS> {
fn read_link(&self, link: &Link) -> BoxFuture<PathBuf, String> {
// The creation of a static filesystem does not allow for Links.
future::err(format!("{:?} does not exist within this filesystem.", link)).to_boxed()
}

fn scandir(&self, dir: Dir) -> BoxFuture<Arc<DirectoryListing>, String> {
future::result(
self
.contents
.get(&dir)
.cloned()
.ok_or_else(|| format!("{:?} does not exist within this filesystem.", dir)),
)
.to_boxed()
}

fn is_ignored(&self, _stat: &Stat) -> bool {
false
}

fn mk_error(msg: &str) -> String {
msg.to_owned()
}
}

///
/// A context for filesystem operations parameterized on an error type 'E'.
///
@@ -829,7 +916,8 @@ mod posixfs_test {
use testutil;

use super::{
Dir, DirectoryListing, File, Link, PathStat, PathStatGetter, PosixFS, ResettablePool, Stat,
Dir, DirectoryListing, File, GlobExpansionConjunction, GlobMatching, Link, MemFS, PathGlobs,
PathStat, PathStatGetter, PosixFS, ResettablePool, Stat, StrictGlobMatching,
};
use futures::Future;
use std;
@@ -1114,6 +1202,35 @@ mod posixfs_test {
assert_eq!(v, path_stats);
}

#[test]
fn memfs_expand_basic() {
// Create two files, with the effect that there is a nested directory for the longer path.
let p1 = PathBuf::from("some/file");
let p2 = PathBuf::from("some/other");
let fs = Arc::new(MemFS::new(vec![p1.clone(), p2.join("file")]));
let globs = PathGlobs::create(
&["some/*".into()],
&[],
StrictGlobMatching::Ignore,
GlobExpansionConjunction::AnyMatch,
)
.unwrap();

assert_eq!(
fs.expand(globs).wait().unwrap(),
vec![
PathStat::file(
p1.clone(),
File {
path: p1,
is_executable: false,
},
),
PathStat::dir(p2.clone(), Dir(p2)),
],
);
}

fn assert_only_file_is_executable(path: &Path, want_is_executable: bool) {
let fs = new_posixfs(path);
let stats = fs.scandir(&Dir(PathBuf::from("."))).wait().unwrap();
@@ -54,6 +54,7 @@ use std::mem;
use std::os::raw;
use std::panic;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;

use crate::context::Core;
@@ -70,6 +71,7 @@ use crate::rule_graph::{GraphMaker, RuleGraph};
use crate::scheduler::{ExecutionRequest, RootResult, Scheduler, Session};
use crate::tasks::Tasks;
use crate::types::Types;
use fs::{GlobMatching, MemFS, PathStat};
use futures::Future;
use hashing::Digest;
use log::error;
@@ -653,6 +655,36 @@ pub extern "C" fn lease_files_in_graph(scheduler_ptr: *mut Scheduler) {
});
}

#[no_mangle]
pub extern "C" fn match_path_globs(path_globs: Handle, paths_buf: BufferBuffer) -> PyResult {
let path_globs = match nodes::Snapshot::lift_path_globs(&path_globs.into()) {
Ok(path_globs) => path_globs,
Err(msg) => {
let e: Result<(), _> = Err(msg);
return e.into();
}
};

let static_fs = Arc::new(MemFS::new(
paths_buf
.to_os_strings()
.into_iter()
.map(PathBuf::from)
.collect(),
));

static_fs
.expand(path_globs)
.wait()
.map(|path_stats| {
externs::store_bool(path_stats.iter().any(|p| match p {
PathStat::File { .. } => true,
PathStat::Dir { .. } => false,
}))
})
.into()
}

#[no_mangle]
pub extern "C" fn capture_snapshots(
scheduler_ptr: *mut Scheduler,
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.