diff --git a/.gitignore b/.gitignore index 8898f647..95d38542 100644 --- a/.gitignore +++ b/.gitignore @@ -79,3 +79,6 @@ rust/debug rust/target rust/flamegraph.svg target + +# UV +uv.lock diff --git a/docs/usage.rst b/docs/usage.rst index c342f06d..f97c412e 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -84,6 +84,7 @@ Building the graph :return: An import graph that you can use to analyse the package. :rtype: ``ImportGraph`` + # TODO correct this. This method uses multiple operating system processes to build the graph, if the number of modules to scan (not including modules in the cache) is 50 or more. This threshold can be adjusted by setting the ``GRIMP_MIN_MULTIPROCESSING_MODULES`` environment variable to a different number. To disable multiprocessing altogether, set it to a large number (more than diff --git a/pyproject.toml b/pyproject.toml index 0496671e..61c8eb06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,6 @@ authors = [ ] requires-python = ">=3.9" dependencies = [ - "joblib>=1.3.0", "typing-extensions>=3.10.0.0", ] classifiers = [ diff --git a/rust/Cargo.lock b/rust/Cargo.lock index f4ae21c1..9a2b2796 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -21,11 +21,14 @@ dependencies = [ "ruff_python_parser", "ruff_source_file", "rustc-hash 2.1.1", + "serde", "serde_json", + "serde_yaml", "slotmap", "string-interner", "tap", "thiserror", + "unindent", ] [[package]] @@ -666,6 +669,19 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap 2.9.0", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "siphasher" version = "1.0.1" @@ -810,6 +826,12 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "version_check" version = "0.9.5" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 8e5acab2..845daeb7 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -25,6 +25,9 @@ const_format = "0.2.34" ruff_python_parser = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.4.10" } ruff_python_ast = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.4.10" } ruff_source_file = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.4.10" } +serde = { version = "1.0", features = ["derive"] } +serde_yaml = "0.9" +unindent = "0.2.4" [dependencies.pyo3] version = "0.24.1" diff --git a/rust/src/filesystem.rs b/rust/src/filesystem.rs new file mode 100644 index 00000000..d93eb4e7 --- /dev/null +++ b/rust/src/filesystem.rs @@ -0,0 +1,308 @@ +use pyo3::exceptions::{PyFileNotFoundError, PyRuntimeError}; +use pyo3::prelude::*; +use std::collections::HashMap; +type FileSystemContents = HashMap; +use std::fs; +use std::path::{Path, PathBuf}; +use unindent::unindent; + +pub trait FileSystem: Send + Sync { + fn sep(&self) -> String; + + fn join(&self, components: Vec) -> String; + + fn split(&self, file_name: &str) -> (String, String); + + fn exists(&self, file_name: &str) -> bool; + + fn read(&self, file_name: &str) -> PyResult; +} + +#[derive(Clone)] +pub struct RealBasicFileSystem {} + +#[pyclass(name = "RealBasicFileSystem")] +pub struct PyRealBasicFileSystem { + pub inner: RealBasicFileSystem, +} + +impl FileSystem for RealBasicFileSystem { + fn sep(&self) -> String { + std::path::MAIN_SEPARATOR.to_string() + } + + fn join(&self, components: Vec) -> String { + let mut path = PathBuf::new(); + for component in components { + path.push(component); + } + path.to_str().unwrap().to_string() + } + + fn split(&self, file_name: &str) -> (String, String) { + let path = Path::new(file_name); + + // Get the "tail" part (the file name or last directory) + let tail = match path.file_name() { + Some(name) => PathBuf::from(name), + None => PathBuf::new(), // If there's no file name (e.g., path is a root), return empty + }; + + // Get the "head" part (the parent directory) + let head = match path.parent() { + Some(parent_path) => parent_path.to_path_buf(), + None => PathBuf::new(), // If there's no parent (e.g., just a filename), return empty + }; + + ( + head.to_str().unwrap().to_string(), + tail.to_str().unwrap().to_string(), + ) + } + + fn exists(&self, file_name: &str) -> bool { + Path::new(file_name).is_file() + } + + fn read(&self, file_name: &str) -> PyResult { + // TODO: is this good enough for handling non-UTF8 encodings? + fs::read_to_string(file_name) + .map_err(|_| PyRuntimeError::new_err(format!("Could not read {}", file_name))) + } +} + +#[pymethods] +impl PyRealBasicFileSystem { + #[new] + fn new() -> Self { + PyRealBasicFileSystem { + inner: RealBasicFileSystem {}, + } + } + + #[getter] + fn sep(&self) -> String { + self.inner.sep() + } + + #[pyo3(signature = (*components))] + fn join(&self, components: Vec) -> String { + self.inner.join(components) + } + + fn split(&self, file_name: &str) -> (String, String) { + self.inner.split(file_name) + } + + fn exists(&self, file_name: &str) -> bool { + self.inner.exists(file_name) + } + + fn read(&self, file_name: &str) -> PyResult { + self.inner.read(file_name) + } +} +#[derive(Clone)] +pub struct FakeBasicFileSystem { + contents: Box, +} + +#[pyclass(name = "FakeBasicFileSystem")] +pub struct PyFakeBasicFileSystem { + pub inner: FakeBasicFileSystem, +} + +impl FakeBasicFileSystem { + fn new(contents: Option<&str>, content_map: Option>) -> PyResult { + let mut parsed_contents = match contents { + Some(contents) => parse_indented_fs_string(contents), + None => HashMap::new(), + }; + if let Some(content_map) = content_map { + let unindented_map: HashMap = content_map + .into_iter() + .map(|(key, val)| (key, unindent(&val).trim().to_string())) + .collect(); + parsed_contents.extend(unindented_map); + }; + Ok(FakeBasicFileSystem { + contents: Box::new(parsed_contents), + }) + } +} + +impl FileSystem for FakeBasicFileSystem { + fn sep(&self) -> String { + "/".to_string() + } + + fn join(&self, components: Vec) -> String { + let sep = self.sep(); // Get the separator from the getter method + components + .into_iter() + .map(|c| c.trim_end_matches(&sep).to_string()) + .collect::>() + .join(&sep) + } + + fn split(&self, file_name: &str) -> (String, String) { + let components: Vec<&str> = file_name.split('/').collect(); + + if components.is_empty() { + return ("".to_string(), "".to_string()); + } + + let tail = components.last().unwrap_or(&""); // Last component, or empty if components is empty (shouldn't happen from split) + + let head_components = &components[..components.len() - 1]; // All components except the last + + let head = if head_components.is_empty() { + // Case for single component paths like "filename.txt" or empty string "" + "".to_string() + } else if file_name.starts_with('/') + && head_components.len() == 1 + && head_components[0].is_empty() + { + // Special handling for paths starting with '/', e.g., "/" or "/filename.txt" + // If components were ["", ""], head_components is [""] -> should be "/" + // If components were ["", "file.txt"], head_components is [""] -> should be "/" + "/".to_string() + } else { + // Default joining for multiple components + head_components.join("/") + }; + + (head, tail.to_string()) + } + + fn exists(&self, file_name: &str) -> bool { + self.contents.contains_key(file_name) + } + + fn read(&self, file_name: &str) -> PyResult { + match self.contents.get(file_name) { + Some(file_name) => Ok(file_name.clone()), + None => Err(PyFileNotFoundError::new_err("")), + } + } +} + +#[pymethods] +impl PyFakeBasicFileSystem { + #[pyo3(signature = (contents=None, content_map=None))] + #[new] + fn new(contents: Option<&str>, content_map: Option>) -> PyResult { + Ok(PyFakeBasicFileSystem { + inner: FakeBasicFileSystem::new(contents, content_map)?, + }) + } + + #[getter] + fn sep(&self) -> String { + self.inner.sep() + } + + #[pyo3(signature = (*components))] + fn join(&self, components: Vec) -> String { + self.inner.join(components) + } + + fn split(&self, file_name: &str) -> (String, String) { + self.inner.split(file_name) + } + + /// Checks if a file or directory exists within the file system. + fn exists(&self, file_name: &str) -> bool { + self.inner.exists(file_name) + } + + fn read(&self, file_name: &str) -> PyResult { + self.inner.read(file_name) + } +} + +/// Parses an indented string representing a file system structure +/// into a HashMap where keys are full file paths. +/// +pub fn parse_indented_fs_string(input: &str) -> HashMap { + let mut file_paths_map: HashMap = HashMap::new(); + let mut path_stack: Vec = Vec::new(); // Stores current directory path components + let mut first_line = true; // Flag to handle the very first path component + + // Normalize newlines and split into lines + let buffer = input.replace("\r\n", "\n"); + let lines: Vec<&str> = buffer.split('\n').collect(); + + for line_raw in lines.clone() { + let line = line_raw.trim_end(); // Remove trailing whitespace + if line.is_empty() { + continue; // Skip empty lines + } + + let current_indent = line.chars().take_while(|&c| c.is_whitespace()).count(); + let trimmed_line = line.trim_start(); + + // Assuming 4 spaces per indentation level for calculating depth + // Adjust this if your indentation standard is different (e.g., 2 spaces, tabs) + let current_depth = current_indent / 4; + + if first_line { + // The first non-empty line sets the base path. + // It might be absolute (/a/b/) or relative (a/b/). + let root_component = trimmed_line.trim_end_matches('/').to_string(); + path_stack.push(root_component); + first_line = false; + } else { + // Adjust the path_stack based on indentation level + // Pop elements from the stack until we reach the correct parent directory depth + while path_stack.len() > current_depth { + path_stack.pop(); + } + + // If the current line is a file, append it to the path for inserting into map, + // then pop it off so that subsequent siblings are correctly handled. + // If it's a directory, append it and it stays on the stack for its children. + let component_to_add = trimmed_line.trim_end_matches('/').to_string(); + if !component_to_add.is_empty() { + // Avoid pushing empty strings due to lines like just "/" + path_stack.push(component_to_add); + } + } + + // Construct the full path + // Join components on the stack. If the first component started with '/', + // ensure the final path also starts with '/'. + let full_path = if !path_stack.is_empty() { + let mut joined = path_stack.join("/"); + // If the original root started with a slash, ensure the final path does too. + // But be careful not to double-slash if a component is e.g. "/root" + if lines[0].trim().starts_with('/') && !joined.starts_with('/') { + joined = format!("/{}", joined); + } + joined + } else { + "".to_string() + }; + + // If it's a file (doesn't end with '/'), add it to the map + // A file is not a directory, so its name should be removed from the stack after processing + // so that sibling items are at the correct level. + if !trimmed_line.ends_with('/') { + file_paths_map.insert(full_path, String::new()); // Value can be empty or actual content + if !path_stack.is_empty() { + path_stack.pop(); // Pop the file name off the stack + } + } + } + + // Edge case: If the very first line was a file and it ended up on the stack, it needs to be processed. + // This handles single-file inputs like "myfile.txt" + if !path_stack.is_empty() + && !path_stack.last().unwrap().ends_with('/') + && !file_paths_map.contains_key(&path_stack.join("/")) + { + file_paths_map.insert(path_stack.join("/"), String::new()); + } + + file_paths_map +} diff --git a/rust/src/import_scanning.rs b/rust/src/import_scanning.rs new file mode 100644 index 00000000..21d8e6b6 --- /dev/null +++ b/rust/src/import_scanning.rs @@ -0,0 +1,365 @@ +use crate::filesystem::{FileSystem, PyFakeBasicFileSystem, PyRealBasicFileSystem}; +use crate::import_parsing; +use crate::module_finding::{FoundPackage, Module}; +use itertools::Itertools; +use pyo3::exceptions::{PyFileNotFoundError, PyTypeError}; +use pyo3::prelude::*; +use pyo3::types::{PyDict, PySet}; +use std::collections::HashSet; +use crate::errors::GrimpError; + +#[pyclass] +pub struct ImportScanner { + file_system: Box, + found_packages: HashSet, + include_external_packages: bool, + modules: HashSet, +} + +#[derive(Debug, Hash, Eq, PartialEq)] +struct DirectImport { + importer: String, + imported: String, + line_number: usize, + line_contents: String, +} + +#[pymethods] +impl ImportScanner { + #[allow(unused_variables)] + #[new] + #[pyo3(signature = (file_system, found_packages, include_external_packages=false))] + fn new( + py: Python, + file_system: Bound<'_, PyAny>, + found_packages: Bound<'_, PyAny>, + include_external_packages: bool, + ) -> PyResult { + let file_system_boxed: Box; + + if let Ok(py_real) = file_system.extract::>() { + file_system_boxed = Box::new(py_real.inner.clone()); + } else if let Ok(py_fake) = file_system.extract::>() { + file_system_boxed = Box::new(py_fake.inner.clone()); + } else { + return Err(PyTypeError::new_err( + "file_system must be an instance of RealBasicFileSystem or FakeBasicFileSystem", + )); + } + let found_packages_rust = _py_found_packages_to_rust(found_packages); + let modules = _get_modules_from_found_packages(&found_packages_rust); + + Ok(ImportScanner { + file_system: file_system_boxed, + found_packages: found_packages_rust, + modules, + include_external_packages, + }) + } + + #[pyo3(signature = (module, exclude_type_checking_imports=false))] + fn scan_for_imports<'a>( + &self, + py: Python<'a>, + module: Bound<'_, PyAny>, + exclude_type_checking_imports: bool, + ) -> PyResult> { + let module_rust = module.extract().unwrap(); + let found_package_for_module = self._lookup_found_package_for_module(&module_rust); + let module_filename = + self._determine_module_filename(&module_rust, found_package_for_module)?; + let module_contents = self.file_system.read(&module_filename).unwrap(); + + let parse_result = import_parsing::parse_imports_from_code(&module_contents); + match parse_result { + Err(GrimpError::ParseError { line_number, text, ..}) => { + // TODO: define SourceSyntaxError using pyo3. + let exceptions_pymodule = + PyModule::import(py, "grimp.exceptions").unwrap(); + let py_exception_class = exceptions_pymodule.getattr("SourceSyntaxError").unwrap(); + let exception = py_exception_class.call1((module_filename, line_number, text,)).unwrap(); + return Err(PyErr::from_value(exception)); + }, + Err(e) => { + return Err(e.into()); + }, + _ => () + } + let imported_objects = parse_result.unwrap(); + let is_package = self._module_is_package(&module_filename); + + let mut imports: HashSet = HashSet::new(); + for imported_object in imported_objects { + // Don't include type checking imports, if specified. + if exclude_type_checking_imports && imported_object.typechecking_only { + continue; + } + + // Resolve relative imports. + let imported_object_name = self._get_absolute_imported_object_name( + &module_rust, + is_package, + &imported_object.name, + ); + + // Resolve imported module. + match self._get_internal_module(&imported_object_name) { + Some(imported_module) => { + // It's an internal import. + imports.insert(DirectImport { + importer: module_rust.name.to_string(), + imported: imported_module.name.to_string(), + line_number: imported_object.line_number, + line_contents: imported_object.line_contents, + }); + } + None => { + // It's an external import. + if self.include_external_packages { + if let Some(imported_module) = + self._distill_external_module(&imported_object_name) + { + imports.insert(DirectImport { + importer: module_rust.name.to_string(), + imported: imported_module, + line_number: imported_object.line_number, + line_contents: imported_object.line_contents, + }); + } + } + } + } + } + + Ok(self._to_py_direct_imports(py, &imports)) + } +} + +impl ImportScanner { + fn _lookup_found_package_for_module(&self, module: &Module) -> &FoundPackage { + // TODO: it's probably inefficient to do this every time we look up a module. + for found_package in &self.found_packages { + for module_file in &found_package.module_files { + if module_file.module == *module { + return found_package; + } + } + } + panic!("Could not lookup found package for module {}", module); + } + + fn _determine_module_filename( + &self, + module: &Module, + found_package: &FoundPackage, + ) -> PyResult { + // TODO: could we do all this with &str instead of String? + let top_level_components: Vec = + found_package.name.split(".").map(str::to_string).collect(); + let module_components: Vec = module.name.split(".").map(str::to_string).collect(); + let leaf_components = module_components[top_level_components.len()..].to_vec(); + + let mut filename_root_components: Vec = vec![found_package.directory.clone()]; + filename_root_components.extend(leaf_components); + let filename_root = self.file_system.join(filename_root_components); + let normal_module_path = format!("{}.py", filename_root); + let init_module_path = self + .file_system + .join(vec![filename_root, "__init__.py".to_string()]); + let candidate_filenames = [normal_module_path, init_module_path]; + for candidate_filename in candidate_filenames { + if self.file_system.exists(&candidate_filename) { + return Ok(candidate_filename); + } + } + Err(PyFileNotFoundError::new_err(format!( + "Could not find module {}.", + module + ))) + } + + fn _module_is_package(&self, module_filename: &str) -> bool { + self.file_system.split(module_filename).1 == "__init__.py" + } + + #[allow(unused_variables)] + fn _get_absolute_imported_object_name( + &self, + module: &Module, + is_package: bool, + imported_object_name: &str, + ) -> String { + let leading_dots_count = count_leading_dots(imported_object_name); + if leading_dots_count == 0 { + return imported_object_name.to_string(); + } + let imported_object_name_base: String; + if is_package { + if leading_dots_count == 1 { + imported_object_name_base = module.name.clone(); + } else { + let parts: Vec<&str> = module.name.split('.').collect(); + imported_object_name_base = + parts[0..parts.len() - leading_dots_count + 1].join("."); + } + } else { + let parts: Vec<&str> = module.name.split('.').collect(); + imported_object_name_base = parts[0..parts.len() - leading_dots_count].join("."); + } + + format!( + "{}.{}", + imported_object_name_base, + &imported_object_name[leading_dots_count..] + ) + } + + #[allow(unused_variables)] + fn _get_internal_module(&self, imported_object_name: &str) -> Option { + let candidate_module = Module { + name: imported_object_name.to_string(), + }; + if self.modules.contains(&candidate_module) { + return Some(candidate_module); + } + if let Some((parent_name, _)) = imported_object_name.rsplit_once('.') { + let parent = Module { + name: parent_name.to_string(), + }; + if self.modules.contains(&parent) { + return Some(parent); + } + } + None + } + + /// Given a module that we already know is external, turn it into a module to add to the graph. + // + // The 'distillation' process involves removing any unwanted subpackages. For example, + // django.models.db should be turned into simply django. + // The process is more complex for potential namespace packages, as it's not possible to + // determine the portion package simply from name. Rather than adding the overhead of a + // filesystem read, we just get the shallowest component that does not clash with an internal + // module namespace. Take, for example, foo.blue.alpha.one. If one of the found + // packages is foo.blue.beta, the module will be distilled to foo.blue.alpha. + // Alternatively, if the found package is foo.green, the distilled module will + // be foo.blue. + #[allow(unused_variables)] + fn _distill_external_module(&self, module_name: &str) -> Option { + let module_root = module_name.split(".").next().unwrap(); + // If it's a module that is a parent of one of the internal packages, return None + // as it doesn't make sense and is probably an import of a namespace package. + for package in &self.found_packages { + if module_is_descendant(&package.name, module_name) { + return None; + } + } + + // If it shares a namespace with an internal module, get the shallowest component that does + // not clash with an internal module namespace. + let mut candidate_portions: HashSet = HashSet::new(); + let mut sorted_found_packages: Vec<&FoundPackage> = self.found_packages.iter().collect(); + sorted_found_packages.sort_by_key(|package| &package.name); + sorted_found_packages.reverse(); + + for found_package in sorted_found_packages { + let root_module = &found_package.name; + if module_is_descendant(root_module, module_root) { + let mut internal_path_components: Vec<&str> = root_module.split(".").collect(); + let mut external_path_components: Vec<&str> = module_name.split(".").collect(); + let mut external_namespace_components: Vec<&str> = vec![]; + while external_path_components[0] == internal_path_components[0] { + external_namespace_components.push(external_path_components.remove(0)); + internal_path_components.remove(0); + } + external_namespace_components.push(external_path_components[0]); + + candidate_portions.insert(Module { + name: external_namespace_components.join("."), + }); + } + } + + if !candidate_portions.is_empty() { + // If multiple found packages share a namespace with this module, use the deepest one + // as we know that that will be a namespace too. + let deepest_candidate_portion = candidate_portions + .iter() + .sorted_by_key(|portion| portion.name.split(".").collect::>().len()) + .next_back() + .unwrap(); + Some(deepest_candidate_portion.name.clone()) + } else { + Some(module_name.split('.').next().unwrap().to_string()) + } + } + + fn _to_py_direct_imports<'a>( + &self, + py: Python<'a>, + rust_imports: &HashSet, + ) -> Bound<'a, PySet> { + // TODO: do this in the constructor. + let valueobjects_pymodule = + PyModule::import(py, "grimp.domain.valueobjects").unwrap(); + let py_module_class = valueobjects_pymodule.getattr("Module").unwrap(); + let py_direct_import_class = valueobjects_pymodule.getattr("DirectImport").unwrap(); + + let pyset = PySet::empty(py).unwrap(); + + for rust_import in rust_imports { + let importer = py_module_class.call1((&rust_import.importer,)).unwrap(); + let imported = py_module_class.call1((&rust_import.imported,)).unwrap(); + let kwargs = PyDict::new(py); + kwargs.set_item("importer", &importer).unwrap(); + kwargs.set_item("imported", &imported).unwrap(); + kwargs + .set_item("line_number", rust_import.line_number) + .unwrap(); + kwargs + .set_item("line_contents", &rust_import.line_contents) + .unwrap(); + let py_direct_import = py_direct_import_class.call((), Some(&kwargs)).unwrap(); + pyset.add(&py_direct_import).unwrap(); + } + + pyset + } +} +fn _py_found_packages_to_rust(py_found_packages: Bound<'_, PyAny>) -> HashSet { + let py_set = py_found_packages + .downcast::() + .expect("Expected py_found_packages to be a Python set."); + + let mut rust_found_packages = HashSet::new(); + // Iterate over the elements in the Python set. + for py_found_package_any in py_set.iter() { + // Extract each Python 'FoundPackage' object into a Rust 'FoundPackage' struct. + // Panics if extraction fails, as specified. + let rust_found_package: FoundPackage = py_found_package_any + .extract() + .expect("Failed to extract Python FoundPackage to Rust FoundPackage."); + rust_found_packages.insert(rust_found_package); + } + rust_found_packages +} + +fn _get_modules_from_found_packages(found_packages: &HashSet) -> HashSet { + let mut modules = HashSet::new(); + for package in found_packages { + for module_file in &package.module_files { + modules.insert(module_file.module.clone()); + } + } + modules +} + +fn count_leading_dots(s: &str) -> usize { + s.chars() + .take_while(|&c| c == '.') // Take characters as long as they are dots + .count() // Count how many were taken +} + +fn module_is_descendant(module_name: &str, potential_ancestor: &str) -> bool { + module_name.starts_with(&format!("{}.", potential_ancestor)) +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 18327cd2..1a772fab 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -1,13 +1,17 @@ pub mod errors; pub mod exceptions; +mod filesystem; pub mod graph; pub mod import_parsing; +mod import_scanning; pub mod module_expressions; use crate::errors::{GrimpError, GrimpResult}; use crate::exceptions::{InvalidModuleExpression, ModuleNotPresent, NoSuchContainer, ParseError}; +use crate::filesystem::{PyFakeBasicFileSystem, PyRealBasicFileSystem}; use crate::graph::higher_order_queries::Level; use crate::graph::{Graph, Module, ModuleIterator, ModuleTokenIterator}; +use crate::import_scanning::ImportScanner; use crate::module_expressions::ModuleExpression; use derive_new::new; use itertools::Itertools; @@ -18,11 +22,15 @@ use pyo3::types::{IntoPyDict, PyDict, PyFrozenSet, PyList, PySet, PyString, PyTu use rayon::prelude::*; use rustc_hash::FxHashSet; use std::collections::HashSet; +mod module_finding; #[pymodule] fn _rustgrimp(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(parse_imported_objects_from_code))?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; m.add("ModuleNotPresent", py.get_type::())?; m.add("NoSuchContainer", py.get_type::())?; m.add( diff --git a/rust/src/module_finding.rs b/rust/src/module_finding.rs new file mode 100644 index 00000000..3eb2016d --- /dev/null +++ b/rust/src/module_finding.rs @@ -0,0 +1,60 @@ +use pyo3::{prelude::*, types::PyFrozenSet}; +use std::collections::BTreeSet; +use std::fmt; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, FromPyObject)] +pub struct ModuleFile { + pub module: Module, +} + +/// A Python module. +/// The string is the importable name, e.g. "mypackage.foo". +#[derive(Debug, Clone, PartialEq, PartialOrd, Ord, Eq, Hash, FromPyObject)] +pub struct Module { + pub name: String, +} + +impl fmt::Display for Module { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.name) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +/// Set of modules found under a single package, together with metadata. +pub struct FoundPackage { + pub name: String, + pub directory: String, + // BTreeSet rather than HashSet seems to be necessary to make FoundPackage hashable. + pub module_files: BTreeSet, +} + +/// Implements conversion from a Python 'FoundPackage' object to the Rust 'FoundPackage' struct. +/// It extracts 'name', 'directory', and converts the 'module_files' frozenset into a Rust HashSet. +impl<'py> FromPyObject<'py> for FoundPackage { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { + // Extract the 'name' attribute. + let name: String = ob.getattr("name")?.extract()?; + // Extract the 'directory' attribute. + let directory: String = ob.getattr("directory")?.extract()?; + + // Access the 'module_files' attribute. + let module_files_py = ob.getattr("module_files")?; + // Downcast the PyAny object to a PyFrozenSet, as Python 'FrozenSet' maps to 'PyFrozenSet'. + let py_frozen_set = module_files_py.downcast::()?; + + let mut module_files = BTreeSet::new(); + // Iterate over the Python frozenset. + for py_module_file_any in py_frozen_set.iter() { + // Extract each element (PyAny) into a Rust 'ModuleFile'. + let module_file: ModuleFile = py_module_file_any.extract()?; + module_files.insert(module_file); + } + + Ok(FoundPackage { + name, + directory, + module_files, + }) + } +} diff --git a/src/grimp/adaptors/filesystem.py b/src/grimp/adaptors/filesystem.py index 9f637982..83b54436 100644 --- a/src/grimp/adaptors/filesystem.py +++ b/src/grimp/adaptors/filesystem.py @@ -2,7 +2,8 @@ import tokenize from typing import Iterator, List, Tuple -from grimp.application.ports.filesystem import AbstractFileSystem +from grimp.application.ports.filesystem import AbstractFileSystem, BasicFileSystem +from grimp import _rustgrimp as rust # type: ignore[attr-defined] class FileSystem(AbstractFileSystem): @@ -44,3 +45,6 @@ def write(self, file_name: str, contents: str) -> None: os.makedirs(dirname) with open(file_name, "w") as file: print(contents, file=file) + + def convert_to_basic(self) -> BasicFileSystem: + return rust.RealBasicFileSystem() diff --git a/src/grimp/adaptors/importscanner.py b/src/grimp/adaptors/importscanner.py index a405facc..655782de 100644 --- a/src/grimp/adaptors/importscanner.py +++ b/src/grimp/adaptors/importscanner.py @@ -1,231 +1,3 @@ -from __future__ import annotations - -import re -import logging -from dataclasses import dataclass -from typing import Dict, Optional, Set - -from grimp import exceptions -from grimp.application.ports.importscanner import AbstractImportScanner -from grimp.application.ports.modulefinder import FoundPackage -from grimp.domain.valueobjects import DirectImport, Module from grimp import _rustgrimp as rust # type: ignore[attr-defined] - -logger = logging.getLogger(__name__) - -_LEADING_DOT_REGEX = re.compile(r"^(\.+)\w") - - -@dataclass(frozen=True) -class _ImportedObject: - name: str - line_number: int - line_contents: str - typechecking_only: bool - - -class ImportScanner(AbstractImportScanner): - def __init__(self, *args, **kwargs) -> None: - super().__init__(*args, **kwargs) - - self._found_packages_by_module: Dict[Module, FoundPackage] = { - module_file.module: package - for package in self.found_packages - for module_file in package.module_files - } - - def scan_for_imports( - self, module: Module, *, exclude_type_checking_imports: bool = False - ) -> Set[DirectImport]: - """ - Note: this method only analyses the module in question and will not load any other - code, so it relies on self.modules to deduce which modules it imports. (This is - because you can't know whether "from foo.bar import baz" is importing a module - called `baz`, or a function `baz` from the module `bar`.) - """ - found_package = self._found_package_for_module(module) - module_filename = self._determine_module_filename(module, found_package) - module_contents = self._read_module_contents(module_filename) - - try: - imported_objects = self._get_raw_imported_objects(module_contents) - except rust.ParseError as e: - raise exceptions.SourceSyntaxError( - filename=module_filename, - lineno=e.line_number, - text=e.text, - ) - - is_package = self._module_is_package(module_filename) - - imports = set() - for imported_object in imported_objects: - # Filter on `exclude_type_checking_imports`. - if exclude_type_checking_imports and imported_object.typechecking_only: - continue - - # Resolve relative imports. - imported_object_name = self._get_absolute_imported_object_name( - module=module, is_package=is_package, imported_object_name=imported_object.name - ) - - # Resolve imported module. - imported_module = self._get_internal_module(imported_object_name, modules=self.modules) - if imported_module is None: - # => External import. - - # Filter on `self.include_external_packages`. - if not self.include_external_packages: - continue - - # Distill module. - imported_module = self._distill_external_module( - Module(imported_object_name), found_packages=self.found_packages - ) - if imported_module is None: - continue - - imports.add( - DirectImport( - importer=module, - imported=imported_module, - line_number=imported_object.line_number, - line_contents=imported_object.line_contents, - ) - ) - return imports - - def _found_package_for_module(self, module: Module) -> FoundPackage: - try: - return self._found_packages_by_module[module] - except KeyError: - raise ValueError(f"No found package for module {module}.") - - def _determine_module_filename(self, module: Module, found_package: FoundPackage) -> str: - """ - Work out the full filename of the given module. - - Any given module can either be a straight Python file (foo.py) or else a package - (in which case the file is an __init__.py within a directory). - """ - top_level_components = found_package.name.split(".") - module_components = module.name.split(".") - leaf_components = module_components[len(top_level_components) :] - package_directory = found_package.directory - - filename_root = self.file_system.join(package_directory, *leaf_components) - candidate_filenames = ( - f"{filename_root}.py", - self.file_system.join(filename_root, "__init__.py"), - ) - for candidate_filename in candidate_filenames: - if self.file_system.exists(candidate_filename): - return candidate_filename - raise FileNotFoundError(f"Could not find module {module}.") - - def _read_module_contents(self, module_filename: str) -> str: - """ - Read the file contents of the module. - """ - return self.file_system.read(module_filename) - - def _module_is_package(self, module_filename: str) -> bool: - """ - Whether or not the supplied module filename is a package. - """ - return self.file_system.split(module_filename)[-1] == "__init__.py" - - @staticmethod - def _get_raw_imported_objects(module_contents: str) -> Set[_ImportedObject]: - imported_object_dicts = rust.parse_imported_objects_from_code(module_contents) - return {_ImportedObject(**d) for d in imported_object_dicts} - - @staticmethod - def _get_absolute_imported_object_name( - *, module: Module, is_package: bool, imported_object_name: str - ) -> str: - leading_dot_match = _LEADING_DOT_REGEX.match(imported_object_name) - if leading_dot_match is None: - return imported_object_name - - n_leading_dots = len(leading_dot_match.group(1)) - if is_package: - if n_leading_dots == 1: - imported_object_name_base = module.name - else: - imported_object_name_base = ".".join( - module.name.split(".")[: -(n_leading_dots - 1)] - ) - else: - imported_object_name_base = ".".join(module.name.split(".")[:-n_leading_dots]) - return imported_object_name_base + "." + imported_object_name[n_leading_dots:] - - @staticmethod - def _get_internal_module(object_name: str, *, modules: Set[Module]) -> Optional[Module]: - candidate_module = Module(object_name) - if candidate_module in modules: - return candidate_module - - try: - candidate_module = candidate_module.parent - except ValueError: - return None - else: - if candidate_module in modules: - return candidate_module - else: - return None - - @staticmethod - def _distill_external_module( - module: Module, *, found_packages: Set[FoundPackage] - ) -> Optional[Module]: - """ - Given a module that we already know is external, turn it into a module to add to the graph. - - The 'distillation' process involves removing any unwanted subpackages. For example, - Module("django.models.db") should be turned into simply Module("django"). - - The process is more complex for potential namespace packages, as it's not possible to - determine the portion package simply from name. Rather than adding the overhead of a - filesystem read, we just get the shallowest component that does not clash with an internal - module namespace. Take, for example, a Module("foo.blue.alpha.one"). If one of the found - packages is foo.blue.beta, the module will be distilled to Module("foo.blue.alpha"). - Alternatively, if the found package is foo.green, the distilled module will - be Module("foo.blue"). - """ - # If it's a module that is a parent of one of the internal packages, return None - # as it doesn't make sense and is probably an import of a namespace package. - if any(Module(package.name).is_descendant_of(module) for package in found_packages): - return None - - # If it shares a namespace with an internal module, get the shallowest component that does - # not clash with an internal module namespace. - candidate_portions: Set[Module] = set() - for found_package in sorted(found_packages, key=lambda p: p.name, reverse=True): - root_module = Module(found_package.name) - if root_module.is_descendant_of(module.root): - ( - internal_path_components, - external_path_components, - ) = root_module.name.split( - "." - ), module.name.split(".") - external_namespace_components = [] - while external_path_components[0] == internal_path_components[0]: - external_namespace_components.append(external_path_components[0]) - external_path_components = external_path_components[1:] - internal_path_components = internal_path_components[1:] - external_namespace_components.append(external_path_components[0]) - candidate_portions.add(Module(".".join(external_namespace_components))) - - if candidate_portions: - # If multiple found packages share a namespace with this module, use the deepest one - # as we know that that will be a namespace too. - deepest_candidate_portion = sorted( - candidate_portions, key=lambda p: len(p.name.split(".")) - )[-1] - return deepest_candidate_portion - else: - return module.root +ImportScanner = rust.ImportScanner diff --git a/src/grimp/application/ports/filesystem.py b/src/grimp/application/ports/filesystem.py index b11125be..bfe129a2 100644 --- a/src/grimp/application/ports/filesystem.py +++ b/src/grimp/application/ports/filesystem.py @@ -1,5 +1,7 @@ +from __future__ import annotations import abc from typing import Iterator, List, Tuple +from typing import Protocol class AbstractFileSystem(abc.ABC): @@ -80,3 +82,35 @@ def write(self, file_name: str, contents: str) -> None: Write the contents to a file. """ raise NotImplementedError + + @abc.abstractmethod + def convert_to_basic(self) -> BasicFileSystem: + """ + Convert this file system to a BasicFileSystem. + """ + raise NotImplementedError + + +class BasicFileSystem(Protocol): + """ + A more limited file system, used by the ImportScanner. + + Having two different file system APIs is an interim approach, allowing us to + implement BasicFileSystem in Rust, for use with the ImportScanner, without needing + to implement the full range of methods used by other parts of Grimp. + + Eventually we'll move the full implementation to Rust, and have a single + FileSystem interface. + """ + + def join(self, *components: str) -> str: + ... + + def split(self, file_name: str) -> Tuple[str, str]: + ... + + def read(self, file_name: str) -> str: + ... + + def exists(self, file_name: str) -> bool: + ... diff --git a/src/grimp/application/ports/importscanner.py b/src/grimp/application/ports/importscanner.py index 34da5be9..82cb21bc 100644 --- a/src/grimp/application/ports/importscanner.py +++ b/src/grimp/application/ports/importscanner.py @@ -1,19 +1,18 @@ -import abc -from typing import Set +from typing import Set, Protocol -from grimp.application.ports.filesystem import AbstractFileSystem +from grimp.application.ports.filesystem import BasicFileSystem from grimp.application.ports.modulefinder import FoundPackage from grimp.domain.valueobjects import DirectImport, Module -class AbstractImportScanner(abc.ABC): +class AbstractImportScanner(Protocol): """ Statically analyses some Python modules for import statements within their shared package. """ def __init__( self, - file_system: AbstractFileSystem, + file_system: BasicFileSystem, found_packages: Set[FoundPackage], include_external_packages: bool = False, ) -> None: @@ -26,16 +25,7 @@ def __init__( modules not contained in modules_by_package_directory) in the results. """ - self.file_system = file_system - self.include_external_packages = include_external_packages - self.found_packages = found_packages - # Flatten all the modules into a set. - self.modules: Set[Module] = set() - for package in self.found_packages: - self.modules |= {mf.module for mf in package.module_files} - - @abc.abstractmethod def scan_for_imports( self, module: Module, *, exclude_type_checking_imports: bool = False ) -> Set[DirectImport]: @@ -43,4 +33,3 @@ def scan_for_imports( Statically analyses the given module and returns an iterable of Modules that it imports. """ - raise NotImplementedError diff --git a/src/grimp/application/usecases.py b/src/grimp/application/usecases.py index eee3286c..1447fa4f 100644 --- a/src/grimp/application/usecases.py +++ b/src/grimp/application/usecases.py @@ -3,10 +3,9 @@ """ from typing import Dict, Sequence, Set, Type, Union, cast, Iterable, Collection -import math -import joblib # type: ignore +from .ports.filesystem import BasicFileSystem from ..application.ports import caching from ..application.ports.filesystem import AbstractFileSystem from ..application.ports.graph import ImportGraph @@ -15,21 +14,12 @@ from ..application.ports.packagefinder import AbstractPackageFinder from ..domain.valueobjects import DirectImport, Module from .config import settings -import os class NotSupplied: pass -# Calling code can set this environment variable if it wants to tune when to switch to -# multiprocessing, or set it to a large number to disable it altogether. -MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME = "GRIMP_MIN_MULTIPROCESSING_MODULES" -# This is an arbitrary number, but setting it too low slows down our functional tests considerably. -# If you change this, update docs/usage.rst too! -DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING = 50 - - def build_graph( package_name, *additional_package_names, @@ -59,7 +49,6 @@ def build_graph( "mypackage", "anotherpackage", "onemore", include_external_packages=True, ) """ - file_system: AbstractFileSystem = settings.FILE_SYSTEM found_packages = _find_packages( @@ -145,7 +134,7 @@ def _scan_packages( imports_by_module_file.update( _scan_imports( remaining_module_files_to_scan, - file_system=file_system, + file_system=file_system.convert_to_basic(), found_packages=found_packages, include_external_packages=include_external_packages, exclude_type_checking_imports=exclude_type_checking_imports, @@ -213,52 +202,7 @@ def _read_imports_from_cache( def _scan_imports( module_files: Collection[ModuleFile], *, - file_system: AbstractFileSystem, - found_packages: Set[FoundPackage], - include_external_packages: bool, - exclude_type_checking_imports: bool, -) -> Dict[ModuleFile, Set[DirectImport]]: - chunks = _create_chunks(module_files) - return _scan_chunks( - chunks, - file_system, - found_packages, - include_external_packages, - exclude_type_checking_imports, - ) - - -def _create_chunks(module_files: Collection[ModuleFile]) -> tuple[tuple[ModuleFile, ...], ...]: - """ - Split the module files into chunks, each to be worked on by a separate OS process. - """ - module_files_tuple = tuple(module_files) - - number_of_module_files = len(module_files_tuple) - n_chunks = _decide_number_of_processes(number_of_module_files) - chunk_size = math.ceil(number_of_module_files / n_chunks) - - return tuple( - module_files_tuple[i * chunk_size : (i + 1) * chunk_size] for i in range(n_chunks) - ) - - -def _decide_number_of_processes(number_of_module_files: int) -> int: - min_number_of_modules = int( - os.environ.get( - MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME, - DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING, - ) - ) - if number_of_module_files < min_number_of_modules: - # Don't incur the overhead of multiple processes. - return 1 - return min(joblib.cpu_count(), number_of_module_files) - - -def _scan_chunks( - chunks: Collection[Collection[ModuleFile]], - file_system: AbstractFileSystem, + file_system: BasicFileSystem, found_packages: Set[FoundPackage], include_external_packages: bool, exclude_type_checking_imports: bool, @@ -266,29 +210,20 @@ def _scan_chunks( import_scanner: AbstractImportScanner = settings.IMPORT_SCANNER_CLASS( file_system=file_system, found_packages=found_packages, - include_external_packages=include_external_packages, - ) - - number_of_processes = len(chunks) - import_scanning_jobs = joblib.Parallel(n_jobs=number_of_processes)( - joblib.delayed(_scan_chunk)(import_scanner, exclude_type_checking_imports, chunk) - for chunk in chunks + # Ensure that the passed exclude_type_checking_imports is definitely a boolean, + # otherwise the Rust class will error. + include_external_packages=bool(include_external_packages), ) - - imports_by_module_file = {} - for chunk_imports_by_module_file in import_scanning_jobs: - imports_by_module_file.update(chunk_imports_by_module_file) - return imports_by_module_file - - -def _scan_chunk( - import_scanner: AbstractImportScanner, - exclude_type_checking_imports: bool, - chunk: Iterable[ModuleFile], -) -> Dict[ModuleFile, Set[DirectImport]]: return { module_file: import_scanner.scan_for_imports( module_file.module, exclude_type_checking_imports=exclude_type_checking_imports ) - for module_file in chunk + for module_file in module_files } + + +def _decide_number_of_processes(number_of_module_files: int) -> int: + # The Rust-based RealBasicFileSystem doesn't support pickling, + # which means we can't use multiprocessing. But we won't need to use + # multiprocessing shortly so we can live with this. + return 1 diff --git a/src/grimp/main.py b/src/grimp/main.py index 70622b05..d60533a8 100644 --- a/src/grimp/main.py +++ b/src/grimp/main.py @@ -13,7 +13,7 @@ settings.configure( MODULE_FINDER=ModuleFinder(), FILE_SYSTEM=FileSystem(), - IMPORT_SCANNER_CLASS=ImportScanner, + IMPORT_SCANNER_CLASS=ImportScanner, # type: ignore[has-type] IMPORT_GRAPH_CLASS=ImportGraph, PACKAGE_FINDER=ImportLibPackageFinder(), CACHE_CLASS=Cache, diff --git a/tests/adaptors/filesystem.py b/tests/adaptors/filesystem.py index 6eac8cca..74854da0 100644 --- a/tests/adaptors/filesystem.py +++ b/tests/adaptors/filesystem.py @@ -1,8 +1,10 @@ from typing import Any, Dict, Generator, List, Optional, Tuple + import yaml -from grimp.application.ports.filesystem import AbstractFileSystem +from grimp.application.ports.filesystem import AbstractFileSystem, BasicFileSystem +from grimp import _rustgrimp as rust # type: ignore[attr-defined] DEFAULT_MTIME = 10000.0 @@ -43,6 +45,7 @@ def __init__( i.e. last modified times. """ self.contents = self._parse_contents(contents) + self._raw_contents = contents self.content_map = content_map if content_map else {} self.mtime_map: Dict[str, float] = mtime_map if mtime_map else {} @@ -186,3 +189,12 @@ def get_mtime(self, file_name: str) -> float: def write(self, file_name: str, contents: str) -> None: self.content_map[file_name] = contents self.mtime_map[file_name] = DEFAULT_MTIME + + def convert_to_basic(self) -> BasicFileSystem: + """ + Convert this file system to a BasicFileSystem. + """ + return rust.FakeBasicFileSystem( + contents=self._raw_contents, + content_map=self.content_map, + ) diff --git a/tests/functional/test_build_and_use_graph.py b/tests/functional/test_build_and_use_graph.py index e061a82e..35261567 100644 --- a/tests/functional/test_build_and_use_graph.py +++ b/tests/functional/test_build_and_use_graph.py @@ -1,8 +1,6 @@ from grimp import build_graph from typing import Set, Tuple, Optional import pytest -from unittest.mock import patch -from grimp.application import usecases """ @@ -56,33 +54,6 @@ def test_modules(): } -@patch.object(usecases, "DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING", 0) -def test_modules_multiprocessing(): - """ - This test runs relatively slowly, but it's important we cover the multiprocessing code. - """ - graph = build_graph("testpackage", cache_dir=None) - - assert graph.modules == { - "testpackage", - "testpackage.one", - "testpackage.one.alpha", - "testpackage.one.beta", - "testpackage.one.gamma", - "testpackage.one.delta", - "testpackage.one.delta.blue", - "testpackage.two", - "testpackage.two.alpha", - "testpackage.two.beta", - "testpackage.two.gamma", - "testpackage.utils", - "testpackage.three", - "testpackage.three.beta", - "testpackage.three.gamma", - "testpackage.three.alpha", - } - - def test_add_module(): graph = build_graph("testpackage", cache_dir=None) number_of_modules = len(graph.modules) diff --git a/tests/unit/adaptors/test_filesystem.py b/tests/unit/adaptors/test_filesystem.py index a59a173c..899d10f6 100644 --- a/tests/unit/adaptors/test_filesystem.py +++ b/tests/unit/adaptors/test_filesystem.py @@ -1,13 +1,148 @@ from copy import copy - +from typing import Type import pytest # type: ignore - +from grimp.application.ports.filesystem import BasicFileSystem from tests.adaptors.filesystem import FakeFileSystem +from grimp import _rustgrimp as rust # type: ignore[attr-defined] + + +class _Base: + """ + Tests for methods that AbstractFileSystem and BasicFileSystem share. + """ + + file_system_cls: Type[BasicFileSystem] + + @pytest.mark.parametrize("path", ("/path/to", "/path/to/")) + def test_join(self, path): + file_system = self.file_system_cls() + assert "/path/to/mypackage/file.py" == file_system.join(path, "mypackage", "file.py") + + def test_split(self): + file_system = self.file_system_cls() + assert ("/path/to/mypackage", "file.py") == file_system.split("/path/to/mypackage/file.py") + + @pytest.mark.parametrize( + "file_name, expected", + [ + ("/path/to/mypackage", False), + ("/path/to/mypackage/", False), + ("/path/to/mypackage/readme.txt", True), + ("/path/to/mypackage/foo/one.txt", True), + ("/path/to/mypackage/foo/two/green.txt", True), + ("/path/to/nonexistent.txt", False), + ("/path/to/mypackage/purple.txt", False), + ], + ) + def test_exists_content_only(self, file_name, expected): + file_system = self.file_system_cls( + contents=""" + /path/to/mypackage/ + readme.txt + foo/ + one.txt + two/ + green.txt + """ + ) + + assert file_system.exists(file_name) == expected + + @pytest.mark.parametrize( + "file_name, expected", + [ + ("/path/to/", False), + ("/path/to/file.txt", True), + ("/path/to/a/deeper/file.txt", True), + ("/path/to/nonexistent.txt", False), + ], + ) + def test_exists_content_map_only(self, file_name, expected): + file_system = self.file_system_cls( + content_map={ + "/path/to/file.txt": "hello", + "/path/to/a/deeper/file.txt": "hello", + } + ) + + assert file_system.exists(file_name) == expected + + @pytest.mark.parametrize( + "file_name, expected", + [ + ("/path/to/file.txt", True), + ("/path/to/mypackage/foo/two/green.txt", True), + ("/path/to/nonexistent.txt", False), + ], + ) + def test_exists_content_map_and_content(self, file_name, expected): + file_system = self.file_system_cls( + contents=""" + /path/to/mypackage/ + readme.txt + foo/ + one.txt + two/ + green.txt + """, + content_map={ + "/path/to/file.txt": "hello", + "/path/to/a/deeper/file.txt": "hello", + }, + ) + + assert file_system.exists(file_name) == expected + + @pytest.mark.parametrize( + "file_name, expected_contents", + ( + ("/path/to/mypackage/readme.txt", "Lorem"), + ("/path/to/mypackage/foo/one.txt", "Ipsum"), + # Listed in contents, but not in content_map. + ("/path/to/mypackage/foo/two/green.txt", ""), + # Listed in content_map, but not in contents. + ("/path/to/mypackage/foo/two/blue.txt", "Dolor sic"), + ( + "/path/to/mypackage/indented.txt", + "This is indented text.\n We should dedent it.", + ), + ("/path/to/mypackage/nonexistent", FileNotFoundError), + ("/path/to/mypackage/foo/three/nonexistent", FileNotFoundError), + # TODO - should we raise an exception if we attempt to read a directory? + ), + ) + def test_read(self, file_name, expected_contents): + file_system = self.file_system_cls( + contents=""" + /path/to/mypackage/ + readme.txt + foo/ + one.txt + two/ + green.txt + """, + content_map={ + "/path/to/mypackage/readme.txt": "Lorem", + "/path/to/mypackage/foo/one.txt": "Ipsum", + "/path/to/mypackage/foo/two/blue.txt": "Dolor sic", + "/path/to/mypackage/indented.txt": """ + This is indented text. + We should dedent it. + """, + }, + ) + if isinstance(expected_contents, type) and issubclass(expected_contents, Exception): + with pytest.raises(expected_contents): + file_system.read(file_name) + else: + assert file_system.read(file_name) == expected_contents + +class TestFakeFileSystem(_Base): + file_system_cls = FakeFileSystem -class TestFakeFileSystem: def test_walk(self): - file_system = FakeFileSystem( + file_system = self.file_system_cls( """ /path/to/mypackage/ __init__.py @@ -29,7 +164,7 @@ def test_walk(self): ] == list(file_system.walk("/path/to/mypackage")) def test_empty_if_directory_does_not_exist(self): - file_system = FakeFileSystem( + file_system = self.file_system_cls( """ /path/to/mypackage/ __init__.py @@ -38,18 +173,9 @@ def test_empty_if_directory_does_not_exist(self): assert [] == list(file_system.walk("/path/to/nonexistent/package")) def test_dirname(self): - file_system = FakeFileSystem() + file_system = self.file_system_cls() assert "/path/to" == file_system.dirname("/path/to/file.txt") - @pytest.mark.parametrize("path", ("/path/to", "/path/to/")) - def test_join(self, path): - file_system = FakeFileSystem() - assert "/path/to/mypackage/file.py" == file_system.join(path, "mypackage", "file.py") - - def test_split(self): - file_system = FakeFileSystem() - assert ("/path/to/mypackage", "file.py") == file_system.split("/path/to/mypackage/file.py") - def test_dirnames_can_be_modified_in_place(self): """ From the os.walk docs: @@ -59,7 +185,7 @@ def test_dirnames_can_be_modified_in_place(self): of visiting, or even to inform walk() about directories the caller creates or renames before it resumes walk() again. """ - file_system = FakeFileSystem( + file_system = self.file_system_cls( """ /path/to/mypackage/ foo/ @@ -90,38 +216,6 @@ def test_dirnames_can_be_modified_in_place(self): assert expected_tuples == actual_tuples - @pytest.mark.parametrize( - "file_name, expected_contents", - ( - ("/path/to/mypackage/readme.txt", "Lorem"), - ("/path/to/mypackage/foo/one.txt", "Ipsum"), - # Listed in contents, but not in content_map. - ("/path/to/mypackage/foo/two/green.txt", ""), - # Listed in content_map, but not in contents. - ("/path/to/mypackage/foo/two/blue.txt", "Dolor sic"), - ("/path/to/mypackage/nonexistent", FileNotFoundError), - ("/path/to/mypackage/foo/three/nonexistent", FileNotFoundError), - # TODO - should we raise an exception if we attempt to read a directory? - ), - ) - def test_read(self, file_name, expected_contents): - file_system = FakeFileSystem( - contents=""" - /path/to/mypackage/ - readme.txt - foo/ - one.txt - two/ - green.txt - """, - content_map={ - "/path/to/mypackage/readme.txt": "Lorem", - "/path/to/mypackage/foo/one.txt": "Ipsum", - "/path/to/mypackage/foo/two/blue.txt": "Dolor sic", - }, - ) - if isinstance(expected_contents, type) and issubclass(expected_contents, Exception): - with pytest.raises(expected_contents): - file_system.read(file_name) - else: - assert expected_contents == file_system.read(file_name) + +class TestFakeBasicFileSystem(_Base): + file_system_cls = rust.FakeBasicFileSystem diff --git a/tests/unit/adaptors/test_importscanner.py b/tests/unit/adaptors/test_importscanner.py index 54d3cccf..4307ca3a 100644 --- a/tests/unit/adaptors/test_importscanner.py +++ b/tests/unit/adaptors/test_importscanner.py @@ -3,10 +3,10 @@ import pytest # type: ignore -from grimp.adaptors.importscanner import ImportScanner from grimp.application.ports.modulefinder import FoundPackage, ModuleFile from grimp.domain.valueobjects import DirectImport, Module -from tests.adaptors.filesystem import FakeFileSystem + +from grimp import _rustgrimp as rust # type: ignore[attr-defined] @pytest.mark.parametrize( @@ -50,7 +50,7 @@ ) def test_absolute_imports(include_external_packages, expected_result): all_modules = {Module("foo.one"), Module("foo.two")} - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/foo/one.py": """ import foo.two @@ -61,7 +61,7 @@ def test_absolute_imports(include_external_packages, expected_result): } ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -81,7 +81,7 @@ def test_absolute_imports(include_external_packages, expected_result): def test_non_ascii(): blue_module = Module("mypackage.blue") non_ascii_modules = {Module("mypackage.ñon_ascii_变"), Module("mypackage.ñon_ascii_变.ラーメン")} - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/mypackage/blue.py": """ from ñon_ascii_变 import * @@ -93,7 +93,7 @@ def test_non_ascii(): }, ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="mypackage", @@ -152,7 +152,7 @@ def test_single_namespace_package_portion(): MODULE_ALPHA, } - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/namespace/foo/one.py": """ import namespace.foo.two @@ -166,7 +166,7 @@ def test_single_namespace_package_portion(): } ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="namespace.foo", @@ -235,7 +235,7 @@ def test_import_of_portion_not_in_graph(include_external_packages): MODULE_FOO_ONE, MODULE_FOO_BLUE, } - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/namespace/foo/one.py": """ import namespace.bar.one.orange @@ -253,7 +253,7 @@ def test_import_of_portion_not_in_graph(include_external_packages): } ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="namespace.foo", @@ -415,7 +415,7 @@ def test_absolute_from_imports(include_external_packages, expected_result): Module("foo.two.yellow"), Module("foo.three"), } - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( contents=""" /path/to/foo/ __init__.py @@ -442,7 +442,7 @@ def test_absolute_from_imports(include_external_packages, expected_result): }, ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -459,15 +459,25 @@ def test_absolute_from_imports(include_external_packages, expected_result): assert expected_result == result -def test_relative_from_imports(): +@pytest.mark.parametrize("module_to_scan_is_package", (True, False)) +def test_relative_from_imports(module_to_scan_is_package): all_modules = { + Module("foo"), + Module("foo.one"), Module("foo.one.blue"), Module("foo.one.green"), Module("foo.two.brown"), Module("foo.two.yellow"), Module("foo.three"), } - file_system = FakeFileSystem( + if module_to_scan_is_package: + module_to_scan = Module("foo.one") + module_filename = "/path/to/foo/one/__init__.py" + else: + module_to_scan = Module("foo.one.blue") + module_filename = "/path/to/foo/one/blue.py" + + file_system = rust.FakeBasicFileSystem( contents=""" /path/to/foo/ __init__.py @@ -482,7 +492,7 @@ def test_relative_from_imports(): three.py """, content_map={ - "/path/to/foo/one/blue.py": """ + module_filename: """ from . import green from ..two import yellow from .. import three @@ -491,7 +501,7 @@ def test_relative_from_imports(): }, ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -502,23 +512,23 @@ def test_relative_from_imports(): file_system=file_system, ) - result = import_scanner.scan_for_imports(Module("foo.one.blue")) + result = import_scanner.scan_for_imports(module_to_scan) assert result == { DirectImport( - importer=Module("foo.one.blue"), + importer=module_to_scan, imported=Module("foo.one.green"), line_number=1, line_contents="from . import green", ), DirectImport( - importer=Module("foo.one.blue"), + importer=module_to_scan, imported=Module("foo.two.yellow"), line_number=2, line_contents="from ..two import yellow", ), DirectImport( - importer=Module("foo.one.blue"), + importer=module_to_scan, imported=Module("foo.three"), line_number=3, line_contents="from .. import three", @@ -537,7 +547,7 @@ def test_trims_to_known_modules(import_source): Module("foo.two"), Module("foo.two.yellow"), } - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( contents=""" /path/to/foo/ __init__.py @@ -549,7 +559,7 @@ def test_trims_to_known_modules(import_source): content_map={"/path/to/foo/one.py": import_source}, ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -580,7 +590,7 @@ def test_trims_to_known_modules_within_init_file(): Module("foo.one.blue"), Module("foo.one.blue.alpha"), } - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( contents=""" /path/to/foo/ __init__.py @@ -597,7 +607,7 @@ def test_trims_to_known_modules_within_init_file(): }, ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -633,7 +643,7 @@ def test_trims_to_known_modules_within_init_file(): def test_trims_whitespace_from_start_of_line_contents(): all_modules = {Module("foo"), Module("foo.one"), Module("foo.two")} - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( contents=""" /path/to/foo/ __init__.py @@ -648,7 +658,7 @@ def my_function(): }, ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -706,13 +716,13 @@ def my_function(): def test_external_package_imports_for_namespace_packages(statement, expected_module_name): module_to_scan = Module("namespace.foo.blue.alpha") - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/namespace/foo/blue/alpha.py": statement, } ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="namespace.foo.blue", @@ -751,7 +761,7 @@ def test_external_package_imports_for_namespace_packages(statement, expected_mod def test_scans_multiple_packages(statement): foo_modules = {Module("foo"), Module("foo.one"), Module("foo.two")} bar_modules = {Module("bar"), Module("bar.green"), Module("bar.blue")} - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/foo/one.py": f""" import foo.two @@ -763,7 +773,7 @@ def test_scans_multiple_packages(statement): } ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -819,7 +829,7 @@ def test_exclude_type_checking_imports( Module("foo.four"), Module("foo.five"), } - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/foo/one.py": f""" import foo.two @@ -832,7 +842,7 @@ def test_exclude_type_checking_imports( } ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="foo", diff --git a/tests/unit/application/test_usecases.py b/tests/unit/application/test_usecases.py index a8f14207..0c417f01 100644 --- a/tests/unit/application/test_usecases.py +++ b/tests/unit/application/test_usecases.py @@ -1,8 +1,6 @@ -import os from typing import Dict, Optional, Set -from unittest.mock import sentinel, patch +from unittest.mock import sentinel -import joblib # type: ignore import pytest # type: ignore from grimp.application import usecases @@ -11,7 +9,6 @@ from grimp.domain.valueobjects import DirectImport, Module from tests.adaptors.filesystem import FakeFileSystem from tests.adaptors.packagefinder import BaseFakePackageFinder -from tests.adaptors.modulefinder import BaseFakeModuleFinder from tests.config import override_settings SOME_CPU_COUNT = 8 @@ -137,74 +134,36 @@ def write( kwargs["cache_dir"] = supplied_cache_dir usecases.build_graph("mypackage", **kwargs) - @patch.object(usecases, "_scan_chunks", return_value={}) - @patch.object(joblib, "cpu_count", return_value=SOME_CPU_COUNT) - @pytest.mark.parametrize( - "number_of_modules, fake_environ, expected_number_of_chunks", - [ - ( - usecases.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING - 1, - {}, - 1, - ), - ( - usecases.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING, - {}, - SOME_CPU_COUNT, - ), - ( - usecases.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING + 1, - {}, - SOME_CPU_COUNT, - ), - ( - 149, - {usecases.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150}, - 1, - ), - ( - 150, - {usecases.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150}, - SOME_CPU_COUNT, - ), - ( - 151, - {usecases.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150}, - SOME_CPU_COUNT, - ), - ], - ) - def test_scanning_multiprocessing_respects_min_number_of_modules( - self, - mock_cpu_count, - mock_scan_chunks, - number_of_modules, - fake_environ, - expected_number_of_chunks, - ): + def test_forgives_wrong_type_being_passed_to_include_external_packages(self): + file_system = FakeFileSystem( + contents=""" + /path/to/mypackage/ + __init__.py + foo/ + __init__.py + one.py + """, + content_map={ + "/path/to/mypackage/foo/one.py": ( + "import mypackage.foo.two.green\nfrom .. import Something" + ), + }, + ) + class FakePackageFinder(BaseFakePackageFinder): directory_map = {"mypackage": "/path/to/mypackage"} - class FakeModuleFinder(BaseFakeModuleFinder): - module_files_by_package_name = { - "mypackage": frozenset( - { - ModuleFile( - module=Module(f"mypackage.mod_{i}"), - mtime=999, - ) - for i in range(number_of_modules) - } - ) - } - - with override_settings( - FILE_SYSTEM=FakeFileSystem(), - PACKAGE_FINDER=FakePackageFinder(), - MODULE_FINDER=FakeModuleFinder(), - ), patch.object(os, "environ", fake_environ): - usecases.build_graph("mypackage", cache_dir=None) + with override_settings(FILE_SYSTEM=file_system, PACKAGE_FINDER=FakePackageFinder()): + graph = usecases.build_graph( + "mypackage", + # Note: this should be a bool, but we want to tolerate it, + # as Import Linter currently has a bug where it will pass it as None. + include_external_packages=None, + ) - [call] = mock_scan_chunks.call_args_list - chunks = call.args[0] - assert len(chunks) == expected_number_of_chunks + expected_modules = { + "mypackage", + "mypackage.foo", + "mypackage.foo.one", + } + assert expected_modules == graph.modules diff --git a/tox.ini b/tox.ini index 3027d938..19c2553d 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,6 @@ envlist = check, docs, {py39,py310,py311,py312,py13}, - py13-joblib-earliest, [base] deps = @@ -26,7 +25,6 @@ basepython = py311: {env:TOXPYTHON:python3.11} py312: {env:TOXPYTHON:python3.12} py313: {env:TOXPYTHON:python3.13} - py313-joblib-earliest: {env:TOXPYTHON:python3.13} {clean,check,docs,report}: {env:TOXPYTHON:python3} setenv = PYTHONPATH={toxinidir}/tests @@ -36,15 +34,9 @@ passenv = usedevelop = false deps = {[base]deps} - joblib==1.4.2 commands = {posargs:pytest --cov --cov-report=term-missing --benchmark-skip -vv tests} -[testenv:py313-joblib-earliest] -deps = - {[base]deps} - joblib==1.3.0 - [testenv:check] basepython = py313 deps = @@ -119,4 +111,4 @@ python = 3.10: py310, report 3.11: py311, report 3.12: py312, report - 3.13: py313, py313-joblib-earliest, report, check, docs + 3.13: py313, report, check, docs