Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix rule graph nondeterminism due to undetected ambiguity #7379

Merged
merged 6 commits into from Mar 14, 2019
@@ -3,6 +3,7 @@

use std::ffi::OsStr;
use std::ffi::OsString;
use std::fmt;
use std::mem;
use std::os::raw;
use std::os::unix::ffi::{OsStrExt, OsStringExt};
@@ -486,6 +487,17 @@ pub struct Get {
pub subject: Key,
}

impl fmt::Display for Get {
fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
write!(
f,
"Get({}, {})",
type_to_str(self.product),
key_to_str(&self.subject)
)
}
}

pub enum GeneratorResponse {
Break(Value),
Get(Get),
@@ -119,6 +119,16 @@ pub enum Entry {
Singleton(Key, TypeId),
}

impl Entry {
fn params(&self) -> Vec<TypeId> {
match self {
&Entry::WithDeps(ref e) => e.params().iter().cloned().collect(),
&Entry::Param(ref type_id) => vec![*type_id],
&Entry::Singleton { .. } => vec![],
}
}
}

#[derive(Eq, Hash, PartialEq, Clone, Debug)]
pub struct RootEntry {
params: ParamTypes,
@@ -384,16 +394,16 @@ impl<'t> GraphMaker<'t> {
let dependency_keys = entry.dependency_keys();

for select_key in dependency_keys {
let (params, product) = match &select_key {
&SelectKey::JustSelect(ref s) => (entry.params().clone(), s.product),
let (params, product, provided_param) = match &select_key {
&SelectKey::JustSelect(ref s) => (entry.params().clone(), s.product, None),
&SelectKey::JustGet(ref g) => {
// Unlike Selects, Gets introduce new parameter values into a subgraph.
let get_params = {
let mut p = entry.params().clone();
p.insert(g.subject);
p
};
(get_params, g.product)
(get_params, g.product, Some(g.subject))
}
};

@@ -415,6 +425,15 @@ impl<'t> GraphMaker<'t> {
fulfillable_candidates.push(
simplified_entries
.into_iter()
.filter(|e| {
// Only entries that actually consume a provided (Get) parameter are eligible
// for consideration.
if let Some(pp) = provided_param {
e.params().contains(&pp)
} else {
true
}
})
.map(Entry::WithDeps)
.collect::<Vec<_>>(),
);
@@ -484,9 +503,6 @@ impl<'t> GraphMaker<'t> {
}

// No dependencies were completely unfulfillable (although some may have been cyclic).
//
// If this is an Aggregration, flatten the candidates by duplicating the SelectKey to treat
// each concrete rule as a group of candidates. Otherwise, flatten each group of candidates.
let flattened_fulfillable_candidates_by_key = fulfillable_candidates_by_key
.into_iter()
.map(|(k, candidate_group)| (k, Itertools::flatten(candidate_group.into_iter()).collect()))
@@ -558,8 +574,17 @@ impl<'t> GraphMaker<'t> {
let params_powerset: Vec<Vec<TypeId>> = {
let mut all_used_params = BTreeSet::new();
for (key, inputs) in deps {
let provided_param = match &key {
&SelectKey::JustSelect(_) => None,
&SelectKey::JustGet(ref g) => Some(&g.subject),
};
for input in inputs {
all_used_params.extend(Self::used_params(key, input));
all_used_params.extend(
input
.params()
.into_iter()
.filter(|p| Some(p) != provided_param),
);
}
}
// Compute the powerset ordered by ascending set size.
@@ -620,12 +645,17 @@ impl<'t> GraphMaker<'t> {
) -> Result<Option<Vec<(&'a SelectKey, &'a Entry)>>, Diagnostic> {
let mut combination = Vec::new();
for (key, input_entries) in deps {
let provided_param = match &key {
&SelectKey::JustSelect(_) => None,
&SelectKey::JustGet(ref g) => Some(&g.subject),
};
let satisfiable_entries = input_entries
.iter()
.filter(|input_entry| {
Self::used_params(key, input_entry)
input_entry
.params()
.iter()
.all(|p| available_params.contains(p))
.all(|p| available_params.contains(p) || Some(p) == provided_param)
})
.collect::<Vec<_>>();

@@ -668,51 +698,25 @@ impl<'t> GraphMaker<'t> {
}) {
vec![*param]
} else {
// Group by the simplified version of each rule: if exactly one, we're finished. We prefer
// the non-ambiguous rule with the smallest set of Params, as that minimizes Node identities
// in the graph and biases toward receiving values from dependencies (which do not affect our
// identity) rather than dependents.
let mut rules_by_kind: HashMap<EntryWithDeps, (usize, &Entry)> = HashMap::new();
for satisfiable_entry in &satisfiable_entries {
// We prefer the non-ambiguous rule with the smallest set of Params, as that minimizes Node
// identities in the graph and biases toward receiving values from dependencies (which do not

This comment has been minimized.

Copy link
@ity

ity Mar 14, 2019

Contributor

s/identifies/selections?

This comment has been minimized.

Copy link
@stuhood

stuhood Mar 14, 2019

Author Member

This is "identities", which is accurate here... we refer to the Params that a @rule consumes as part of its "identity", because the same @rule can be used with multiple sets of Params, which might each (as seen in this ticket!) affect their behaviour.

// affect our identity) rather than dependents.
let mut minimum_param_set_size = ::std::usize::MAX;
let mut rules = Vec::new();
for satisfiable_entry in satisfiable_entries {
if let &Entry::WithDeps(ref wd) = satisfiable_entry {
rules_by_kind
.entry(wd.simplified(BTreeSet::new()))
.and_modify(|e| {
if e.0 > wd.params().len() {
*e = (wd.params().len(), satisfiable_entry);
}
})
.or_insert((wd.params().len(), satisfiable_entry));
let param_set_size = wd.params().len();
if param_set_size < minimum_param_set_size {
rules.clear();
rules.push(satisfiable_entry);
minimum_param_set_size = param_set_size;
} else if param_set_size == minimum_param_set_size {
rules.push(satisfiable_entry);
}
}
}

rules_by_kind
.into_iter()
.map(|(_, (_, rule))| rule)
.collect::<Vec<_>>()
}
}

///
/// Computes the parameters used by the given SelectKey and Entry.
///
fn used_params(key: &SelectKey, entry: &Entry) -> Vec<TypeId> {
// `Get`s introduce new Params to a subgraph, so using a Param provided by a Get does not
// count toward an Entry's used params.
let provided_param = match &key {
&SelectKey::JustSelect(_) => None,
&SelectKey::JustGet(ref g) => Some(&g.subject),
};

match &entry {
&Entry::WithDeps(ref e) => e
.params()
.iter()
.filter(|&type_id| Some(type_id) != provided_param)
.cloned()
.collect(),
&Entry::Param(ref type_id) if Some(type_id) != provided_param => vec![*type_id],
&Entry::Singleton { .. } | &Entry::Param(_) => vec![],
rules
}
}

@@ -10,7 +10,10 @@ python_tests(

python_library(
name = 'example_lib',
sources = 'example_source.py',
sources = [
'example_source.py',
'__init__.py',
],
)

python_tests(
No changes.
@@ -180,10 +180,10 @@ python_tests(
sources=['test_rules.py'],
coverage=['pants.engine.rules'],
dependencies=[
'3rdparty/python:future',
'3rdparty/python/twitter/commons:twitter.common.collections',
':util',
'3rdparty/python:future',
':scheduler_test_base',
':util',
'src/python/pants/engine:build_files',
'src/python/pants/engine:console',
'src/python/pants/engine:rules',
@@ -192,6 +192,7 @@ python_tests(
'tests/python/pants_test/engine/examples:parsers',
'tests/python/pants_test/subsystem:subsystem_utils',
'tests/python/pants_test/testutils:py2_compat',
'tests/python/pants_test:test_base',
]
)

@@ -81,7 +81,7 @@ def test_creation_fails_with_bad_declaration_type(self):
RuleIndex.create([A()])


class RulesetValidatorTest(unittest.TestCase):
class RuleGraphTest(unittest.TestCase):
def test_ruleset_with_missing_product_type(self):
@rule(A, [Select(B)])
def a_from_b_noop(b):
@@ -240,12 +240,6 @@ def a_from_c(c):
""").strip(),
str(cm.exception))

assert_equal_with_printing = assert_equal_with_printing


class RuleGraphMakerTest(unittest.TestCase):
# TODO HasProducts?

def test_smallest_full_test(self):
@rule(A, [Select(SubA)])
def a_from_suba(suba):
@@ -385,6 +379,50 @@ def b():
}""").strip(),
subgraph)

def test_potentially_ambiguous_get(self):
# In this case, we validate that a Get is satisfied by a rule that actually consumes its
# parameter, rather than by having the same dependency rule consume a parameter that was
# already in the context.
#
# This accounts for the fact that when someone uses Get (rather than Select), it's because
# they intend for the Get's parameter to be consumed in the subgraph. Anything else would
# be surprising.
@rule(A, [Select(SubA)])
def a(sub_a):
_ = yield Get(B, C()) # noqa: F841

@rule(B, [Select(SubA)])
def b_from_suba(suba):
pass

@rule(SubA, [Select(C)])
def suba_from_c(c):
pass

rules = [
a,
b_from_suba,
suba_from_c,
]

subgraph = self.create_subgraph(A, rules, SubA())
self.assert_equal_with_printing(
dedent("""
digraph {
// root subject types: SubA
// root entries
"Select(A) for SubA" [color=blue]
"Select(A) for SubA" -> {"(A, [Select(SubA)], [Get(B, C)], a()) for SubA"}
// internal entries
"(A, [Select(SubA)], [Get(B, C)], a()) for SubA" -> {"(B, [Select(SubA)], b_from_suba()) for C" "Param(SubA)"}
"(B, [Select(SubA)], b_from_suba()) for C" -> {"(SubA, [Select(C)], suba_from_c()) for C"}
"(B, [Select(SubA)], b_from_suba()) for SubA" -> {"Param(SubA)"}
"(SubA, [Select(C)], suba_from_c()) for C" -> {"Param(C)"}
}
""").strip(),
subgraph,
)

def test_one_level_of_recursion(self):
@rule(A, [Select(B)])
def a_from_b(b):
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.