diff --git a/rust/src/graph/cycles.rs b/rust/src/graph/cycles.rs new file mode 100644 index 00000000..e66fea58 --- /dev/null +++ b/rust/src/graph/cycles.rs @@ -0,0 +1,28 @@ +use crate::errors::GrimpResult; +use crate::graph::pathfinding; +use crate::graph::{ExtendWithDescendants, Graph, ModuleToken}; +use rustc_hash::{FxHashMap, FxHashSet}; +use tap::Conv; + +impl Graph { + pub fn find_shortest_cycle( + &self, + module: ModuleToken, + as_package: bool, + ) -> GrimpResult>> { + let modules: Vec = if as_package { + let mut vec = module.conv::>().with_descendants(self); + vec.sort_by_key(|token| self.get_module(*token).unwrap().name()); + vec + } else { + let vec: Vec<_> = module.conv::>(); + vec + }; + pathfinding::find_shortest_cycle( + self, + &modules, + &FxHashSet::default(), + &FxHashMap::default(), + ) + } +} diff --git a/rust/src/graph/mod.rs b/rust/src/graph/mod.rs index 7b508101..eab91bf8 100644 --- a/rust/src/graph/mod.rs +++ b/rust/src/graph/mod.rs @@ -7,6 +7,7 @@ use std::sync::{LazyLock, RwLock}; use string_interner::backend::StringBackend; use string_interner::{DefaultSymbol, StringInterner}; +pub mod cycles; pub mod direct_import_queries; pub mod graph_manipulation; pub mod hierarchy_queries; diff --git a/rust/src/graph/pathfinding.rs b/rust/src/graph/pathfinding.rs index be0f115d..d80fe5ae 100644 --- a/rust/src/graph/pathfinding.rs +++ b/rust/src/graph/pathfinding.rs @@ -1,6 +1,7 @@ use crate::errors::{GrimpError, GrimpResult}; use crate::graph::{EMPTY_MODULE_TOKENS, Graph, ModuleToken}; use indexmap::{IndexMap, IndexSet}; +use itertools::Itertools; use rustc_hash::{FxHashMap, FxHashSet, FxHasher}; use slotmap::SecondaryMap; use std::hash::BuildHasherDefault; @@ -40,29 +41,83 @@ pub fn find_shortest_path( return Err(GrimpError::SharedDescendants); } - let mut predecessors: FxIndexMap> = from_modules + let predecessors: FxIndexMap> = from_modules .clone() .into_iter() .map(|m| (m, None)) .collect(); - let mut successors: FxIndexMap> = + let successors: FxIndexMap> = to_modules.clone().into_iter().map(|m| (m, None)).collect(); + _find_shortest_path( + graph, + predecessors, + successors, + excluded_modules, + excluded_imports, + false, + ) +} + +/// Finds the shortest cycle from `modules` to `modules`, via a bidirectional BFS. +pub fn find_shortest_cycle( + graph: &Graph, + modules: &[ModuleToken], + excluded_modules: &FxHashSet, + excluded_imports: &FxHashMap>, +) -> GrimpResult>> { + // Exclude imports internal to `modules` + let mut excluded_imports = excluded_imports.clone(); + for (m1, m2) in modules.iter().tuple_combinations() { + excluded_imports.entry(*m1).or_default().insert(*m2); + excluded_imports.entry(*m2).or_default().insert(*m1); + } + + let predecessors: FxIndexMap> = + modules.iter().cloned().map(|m| (m, None)).collect(); + + let successors: FxIndexMap> = predecessors.clone(); + + _find_shortest_path( + graph, + predecessors, + successors, + excluded_modules, + &excluded_imports, + true, + ) +} + +fn _find_shortest_path( + graph: &Graph, + mut predecessors: FxIndexMap>, + mut successors: FxIndexMap>, + excluded_modules: &FxHashSet, + excluded_imports: &FxHashMap>, + sorted: bool, +) -> GrimpResult>> { let mut i_forwards = 0; let mut i_backwards = 0; let middle = 'l: loop { for _ in 0..(predecessors.len() - i_forwards) { let module = *predecessors.get_index(i_forwards).unwrap().0; - let next_modules = graph.imports.get(module).unwrap(); + let mut next_modules: Vec = + graph.imports.get(module).unwrap().iter().cloned().collect(); + + if sorted { + next_modules + .sort_by_key(|next_module| graph.get_module(*next_module).unwrap().name()); + } + for next_module in next_modules { - if import_is_excluded(&module, next_module, excluded_modules, excluded_imports) { + if import_is_excluded(&module, &next_module, excluded_modules, excluded_imports) { continue; } - if !predecessors.contains_key(next_module) { - predecessors.insert(*next_module, Some(module)); + if !predecessors.contains_key(&next_module) { + predecessors.insert(next_module, Some(module)); } - if successors.contains_key(next_module) { - break 'l Some(*next_module); + if successors.contains_key(&next_module) { + break 'l Some(next_module); } } i_forwards += 1; @@ -70,16 +125,28 @@ pub fn find_shortest_path( for _ in 0..(successors.len() - i_backwards) { let module = *successors.get_index(i_backwards).unwrap().0; - let next_modules = graph.reverse_imports.get(module).unwrap(); + let mut next_modules: Vec = graph + .reverse_imports + .get(module) + .unwrap() + .iter() + .cloned() + .collect(); + + if sorted { + next_modules + .sort_by_key(|next_module| graph.get_module(*next_module).unwrap().name()); + } + for next_module in next_modules { - if import_is_excluded(next_module, &module, excluded_modules, excluded_imports) { + if import_is_excluded(&next_module, &module, excluded_modules, excluded_imports) { continue; } - if !successors.contains_key(next_module) { - successors.insert(*next_module, Some(module)); + if !successors.contains_key(&next_module) { + successors.insert(next_module, Some(module)); } - if predecessors.contains_key(next_module) { - break 'l Some(*next_module); + if predecessors.contains_key(&next_module) { + break 'l Some(next_module); } } i_backwards += 1; @@ -124,3 +191,93 @@ fn import_is_excluded( .contains(to_module) } } + +#[cfg(test)] +mod test_find_shortest_cycle { + use super::*; + + #[test] + fn test_finds_cycle_single_module() -> GrimpResult<()> { + let mut graph = Graph::default(); + let foo = graph.get_or_add_module("foo").token; + let bar = graph.get_or_add_module("bar").token; + let baz = graph.get_or_add_module("baz").token; + let x = graph.get_or_add_module("x").token; + let y = graph.get_or_add_module("y").token; + let z = graph.get_or_add_module("z").token; + // Shortest cycle + graph.add_import(foo, bar); + graph.add_import(bar, baz); + graph.add_import(baz, foo); + // Longer cycle + graph.add_import(foo, x); + graph.add_import(x, y); + graph.add_import(y, z); + graph.add_import(z, foo); + + let path = + find_shortest_cycle(&graph, &[foo], &FxHashSet::default(), &FxHashMap::default())?; + assert_eq!(path, Some(vec![foo, bar, baz, foo])); + + graph.remove_import(baz, foo); + + let path = + find_shortest_cycle(&graph, &[foo], &FxHashSet::default(), &FxHashMap::default())?; + assert_eq!(path, Some(vec![foo, x, y, z, foo])); + + Ok(()) + } + + #[test] + fn test_returns_none_if_no_cycle() -> GrimpResult<()> { + let mut graph = Graph::default(); + let foo = graph.get_or_add_module("foo").token; + let bar = graph.get_or_add_module("bar").token; + let baz = graph.get_or_add_module("baz").token; + graph.add_import(foo, bar); + graph.add_import(bar, baz); + + let path = + find_shortest_cycle(&graph, &[foo], &FxHashSet::default(), &FxHashMap::default())?; + + assert_eq!(path, None); + + Ok(()) + } + + #[test] + fn test_finds_cycle_multiple_module() -> GrimpResult<()> { + let mut graph = Graph::default(); + + graph.get_or_add_module("colors"); + let red = graph.get_or_add_module("colors.red").token; + let blue = graph.get_or_add_module("colors.blue").token; + let a = graph.get_or_add_module("a").token; + let b = graph.get_or_add_module("b").token; + let c = graph.get_or_add_module("c").token; + let d = graph.get_or_add_module("d").token; + + // The computation should not be confused by these two imports internal to `modules`. + graph.add_import(red, blue); + graph.add_import(blue, red); + // This is the part we expect to find. + graph.add_import(red, a); + graph.add_import(a, b); + graph.add_import(b, blue); + // A longer path. + graph.add_import(a, c); + graph.add_import(c, d); + graph.add_import(d, b); + + let path = find_shortest_cycle( + &graph, + &Vec::from_iter([red, blue]), + &FxHashSet::default(), + &FxHashMap::default(), + )?; + + assert_eq!(path, Some(vec![red, a, b, blue])); + + Ok(()) + } +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 5a42f9cb..2bb2e839 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -494,6 +494,25 @@ impl GraphWrapper { PySet::new(py, chains) } + #[pyo3(signature = (module, as_package=false))] + pub fn find_shortest_cycle( + &self, + module: &str, + as_package: bool, + ) -> PyResult>> { + let module = self.get_visible_module_by_name(module)?.token(); + Ok(self + ._graph + .find_shortest_cycle(module, as_package)? + .map(|chain| { + chain + .iter() + .into_module_iterator(&self._graph) + .names() + .collect() + })) + } + #[pyo3(signature = (layers, containers))] pub fn find_illegal_dependencies_for_layers<'py>( &self, diff --git a/src/grimp/adaptors/graph.py b/src/grimp/adaptors/graph.py index 5ede492d..23ed3bd0 100644 --- a/src/grimp/adaptors/graph.py +++ b/src/grimp/adaptors/graph.py @@ -152,6 +152,15 @@ def find_shortest_chains( def chain_exists(self, importer: str, imported: str, as_packages: bool = False) -> bool: return self._rustgraph.chain_exists(importer, imported, as_packages) + def find_shortest_cycle( + self, module: str, as_package: bool = False + ) -> Optional[Tuple[str, ...]]: + if not self._rustgraph.contains_module(module): + raise ValueError(f"Module {module} is not present in the graph.") + + cycle = self._rustgraph.find_shortest_cycle(module, as_package) + return tuple(cycle) if cycle else None + def find_illegal_dependencies_for_layers( self, layers: Sequence[Layer | str | set[str]], @@ -212,7 +221,7 @@ def _parse_layers(layers: Sequence[Layer | str | set[str]]) -> tuple[Layer, ...] def _dependencies_from_tuple( - rust_package_dependency_tuple: tuple[_RustPackageDependency, ...] + rust_package_dependency_tuple: tuple[_RustPackageDependency, ...], ) -> set[PackageDependency]: return { PackageDependency( diff --git a/src/grimp/application/ports/graph.py b/src/grimp/application/ports/graph.py index e5530fde..4a877333 100644 --- a/src/grimp/application/ports/graph.py +++ b/src/grimp/application/ports/graph.py @@ -349,6 +349,22 @@ def chain_exists(self, importer: str, imported: str, as_packages: bool = False) """ raise NotImplementedError + # Cycles + # ------ + + @abc.abstractmethod + def find_shortest_cycle( + self, module: str, as_package: bool = False + ) -> Optional[Tuple[str, ...]]: + """ + Returns the shortest import cycle from `module` to itself, or `None` if no cycle exist. + + Optional args: + as_package: Whether or not to treat the supplied module as an individual module, + or as an entire subpackage (including any descendants). + """ + raise NotImplementedError + # High level analysis # ------------------- diff --git a/tests/functional/test_build_graph_on_real_packages.py b/tests/functional/test_build_graph_on_real_packages.py index 3746d005..6be78748 100644 --- a/tests/functional/test_build_graph_on_real_packages.py +++ b/tests/functional/test_build_graph_on_real_packages.py @@ -2,12 +2,14 @@ import grimp +from syrupy.assertion import SnapshotAssertion # type: ignore + @pytest.mark.parametrize( "package_name", ("django", "flask", "requests", "sqlalchemy", "google.cloud.audit"), ) -def test_build_graph_on_real_package(package_name, snapshot): +def test_build_graph_on_real_package(package_name: str, snapshot: SnapshotAssertion) -> None: graph = grimp.build_graph(package_name, cache_dir=None) imports = graph.find_matching_direct_imports(import_expression="** -> **") assert imports == snapshot diff --git a/tests/unit/adaptors/graph/test_cycles.py b/tests/unit/adaptors/graph/test_cycles.py new file mode 100644 index 00000000..eaff1b16 --- /dev/null +++ b/tests/unit/adaptors/graph/test_cycles.py @@ -0,0 +1,48 @@ +from grimp.adaptors.graph import ImportGraph + + +class TestFindShortestCycle: + def test_finds_shortest_cycle_when_exists(self): + graph = ImportGraph() + # Shortest cycle + graph.add_import(importer="foo", imported="bar") + graph.add_import(importer="bar", imported="baz") + graph.add_import(importer="baz", imported="foo") + # Longer cycle + graph.add_import(importer="foo", imported="x") + graph.add_import(importer="x", imported="y") + graph.add_import(importer="y", imported="z") + graph.add_import(importer="z", imported="foo") + + assert graph.find_shortest_cycle("foo") == ("foo", "bar", "baz", "foo") + + graph.remove_import(importer="baz", imported="foo") + + assert graph.find_shortest_cycle("foo") == ("foo", "x", "y", "z", "foo") + + def test_returns_none_if_no_cycle_exists(self): + graph = ImportGraph() + # Shortest cycle + graph.add_import(importer="foo", imported="bar") + graph.add_import(importer="bar", imported="baz") + # graph.add_import(importer="baz", imported="foo") # This import is missing -> No cycle. + + assert graph.find_shortest_cycle("foo") is None + + def test_ignores_internal_imports_when_as_package_is_true(self): + graph = ImportGraph() + graph.add_module("colors") + graph.add_import(importer="colors.red", imported="colors.blue") + graph.add_import(importer="colors.blue", imported="colors.red") + graph.add_import(importer="colors.red", imported="x") + graph.add_import(importer="x", imported="y") + graph.add_import(importer="y", imported="z") + graph.add_import(importer="z", imported="colors.blue") + + assert graph.find_shortest_cycle("colors", as_package=True) == ( + "colors.red", + "x", + "y", + "z", + "colors.blue", + )