diff --git a/.gitignore b/.gitignore index 8898f647..95d38542 100644 --- a/.gitignore +++ b/.gitignore @@ -79,3 +79,6 @@ rust/debug rust/target rust/flamegraph.svg target + +# UV +uv.lock diff --git a/rust/Cargo.lock b/rust/Cargo.lock index f4ae21c1..d5c457f5 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -9,6 +9,7 @@ dependencies = [ "bimap", "const_format", "derive-new", + "encoding_rs", "getset", "indexmap 2.9.0", "itertools 0.14.0", @@ -21,11 +22,14 @@ dependencies = [ "ruff_python_parser", "ruff_source_file", "rustc-hash 2.1.1", + "serde", "serde_json", + "serde_yaml", "slotmap", "string-interner", "tap", "thiserror", + "unindent", ] [[package]] @@ -134,6 +138,15 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -666,6 +679,19 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap 2.9.0", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "siphasher" version = "1.0.1" @@ -810,6 +836,12 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "version_check" version = "0.9.5" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 8e5acab2..1b346c8a 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -25,6 +25,10 @@ const_format = "0.2.34" ruff_python_parser = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.4.10" } ruff_python_ast = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.4.10" } ruff_source_file = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.4.10" } +serde = { version = "1.0", features = ["derive"] } +serde_yaml = "0.9" +unindent = "0.2.4" +encoding_rs = "0.8.35" [dependencies.pyo3] version = "0.24.1" diff --git a/rust/src/filesystem.rs b/rust/src/filesystem.rs new file mode 100644 index 00000000..8d9c7848 --- /dev/null +++ b/rust/src/filesystem.rs @@ -0,0 +1,362 @@ +use pyo3::exceptions::{PyFileNotFoundError, PyUnicodeDecodeError}; +use pyo3::prelude::*; +use regex::Regex; +use std::collections::HashMap; +use std::fs; +use std::path::{Path, PathBuf}; +use unindent::unindent; + +pub trait FileSystem: Send + Sync { + fn sep(&self) -> String; + + fn join(&self, components: Vec) -> String; + + fn split(&self, file_name: &str) -> (String, String); + + fn exists(&self, file_name: &str) -> bool; + + fn read(&self, file_name: &str) -> PyResult; +} + +#[derive(Clone)] +#[pyclass] +pub struct RealBasicFileSystem {} + +// Implements a BasicFileSystem (defined in grimp.application.ports.filesystem.BasicFileSystem) +// that actually reads files. +#[pyclass(name = "RealBasicFileSystem")] +pub struct PyRealBasicFileSystem { + pub inner: RealBasicFileSystem, +} + +impl FileSystem for RealBasicFileSystem { + fn sep(&self) -> String { + std::path::MAIN_SEPARATOR.to_string() + } + + fn join(&self, components: Vec) -> String { + let mut path = PathBuf::new(); + for component in components { + path.push(component); + } + path.to_str().unwrap().to_string() + } + + fn split(&self, file_name: &str) -> (String, String) { + let path = Path::new(file_name); + + // Get the "tail" part (the file name or last directory) + let tail = match path.file_name() { + Some(name) => PathBuf::from(name), + None => PathBuf::new(), // If there's no file name (e.g., path is a root), return empty + }; + + // Get the "head" part (the parent directory) + let head = match path.parent() { + Some(parent_path) => parent_path.to_path_buf(), + None => PathBuf::new(), // If there's no parent (e.g., just a filename), return empty + }; + + ( + head.to_str().unwrap().to_string(), + tail.to_str().unwrap().to_string(), + ) + } + + fn exists(&self, file_name: &str) -> bool { + Path::new(file_name).is_file() + } + + fn read(&self, file_name: &str) -> PyResult { + // Python files are assumed UTF-8 by default (PEP 686), but they can specify an alternative + // encoding, which we need to take into account here. + // See https://peps.python.org/pep-0263/ + + // This method was authored primarily by an LLM. + + let path = Path::new(file_name); + let bytes = fs::read(path).map_err(|e| { + PyFileNotFoundError::new_err(format!("Failed to read file {file_name}: {e}")) + })?; + + let s = String::from_utf8_lossy(&bytes); + let encoding_re = Regex::new(r"^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)").unwrap(); + + let mut detected_encoding: Option = None; + + // Coding specification needs to be in the first two lines, or it's ignored. + for line in s.lines().take(2) { + if let Some(captures) = encoding_re.captures(line) { + if let Some(encoding_name) = captures.get(1) { + detected_encoding = Some(encoding_name.as_str().to_string()); + break; + } + } + } + + if let Some(enc_name) = detected_encoding { + let encoding = + encoding_rs::Encoding::for_label(enc_name.as_bytes()).ok_or_else(|| { + PyUnicodeDecodeError::new_err(format!( + "Failed to decode file {file_name} (unknown encoding '{enc_name}')" + )) + })?; + let (decoded_s, _, had_errors) = encoding.decode(&bytes); + if had_errors { + Err(PyUnicodeDecodeError::new_err(format!( + "Failed to decode file {file_name} with encoding '{enc_name}'" + ))) + } else { + Ok(decoded_s.into_owned()) + } + } else { + // Default to UTF-8 if no encoding is specified + String::from_utf8(bytes).map_err(|e| { + PyUnicodeDecodeError::new_err(format!( + "Failed to decode file {file_name} as UTF-8: {e}" + )) + }) + } + } +} + +#[pymethods] +impl PyRealBasicFileSystem { + #[new] + fn new() -> Self { + PyRealBasicFileSystem { + inner: RealBasicFileSystem {}, + } + } + + #[getter] + fn sep(&self) -> String { + self.inner.sep() + } + + #[pyo3(signature = (*components))] + fn join(&self, components: Vec) -> String { + self.inner.join(components) + } + + fn split(&self, file_name: &str) -> (String, String) { + self.inner.split(file_name) + } + + fn exists(&self, file_name: &str) -> bool { + self.inner.exists(file_name) + } + + fn read(&self, file_name: &str) -> PyResult { + self.inner.read(file_name) + } +} + +type FileSystemContents = HashMap; + +#[derive(Clone)] +pub struct FakeBasicFileSystem { + contents: Box, +} + +// Implements BasicFileSystem (defined in grimp.application.ports.filesystem.BasicFileSystem). +#[pyclass(name = "FakeBasicFileSystem")] +pub struct PyFakeBasicFileSystem { + pub inner: FakeBasicFileSystem, +} + +impl FakeBasicFileSystem { + fn new(contents: Option<&str>, content_map: Option>) -> PyResult { + let mut parsed_contents = match contents { + Some(contents) => parse_indented_file_system_string(contents), + None => HashMap::new(), + }; + if let Some(content_map) = content_map { + let unindented_map: HashMap = content_map + .into_iter() + .map(|(key, val)| (key, unindent(&val).trim().to_string())) + .collect(); + parsed_contents.extend(unindented_map); + }; + Ok(FakeBasicFileSystem { + contents: Box::new(parsed_contents), + }) + } +} + +impl FileSystem for FakeBasicFileSystem { + fn sep(&self) -> String { + "/".to_string() + } + + fn join(&self, components: Vec) -> String { + let sep = self.sep(); + components + .into_iter() + .map(|c| c.trim_end_matches(&sep).to_string()) + .collect::>() + .join(&sep) + } + + fn split(&self, file_name: &str) -> (String, String) { + let components: Vec<&str> = file_name.split('/').collect(); + + if components.is_empty() { + return ("".to_string(), "".to_string()); + } + + let tail = components.last().unwrap_or(&""); // Last component, or empty if components is empty (shouldn't happen from split) + + let head_components = &components[..components.len() - 1]; // All components except the last + + let head = if head_components.is_empty() { + // Case for single component paths like "filename.txt" or empty string "" + "".to_string() + } else if file_name.starts_with('/') + && head_components.len() == 1 + && head_components[0].is_empty() + { + // Special handling for paths starting with '/', e.g., "/" or "/filename.txt" + // If components were ["", ""], head_components is [""] -> should be "/" + // If components were ["", "file.txt"], head_components is [""] -> should be "/" + "/".to_string() + } else { + // Default joining for multiple components + head_components.join("/") + }; + + (head, tail.to_string()) + } + + /// Checks if a file or directory exists within the file system. + fn exists(&self, file_name: &str) -> bool { + self.contents.contains_key(file_name) + } + + fn read(&self, file_name: &str) -> PyResult { + match self.contents.get(file_name) { + Some(file_name) => Ok(file_name.clone()), + None => Err(PyFileNotFoundError::new_err("")), + } + } +} + +#[pymethods] +impl PyFakeBasicFileSystem { + #[pyo3(signature = (contents=None, content_map=None))] + #[new] + fn new(contents: Option<&str>, content_map: Option>) -> PyResult { + Ok(PyFakeBasicFileSystem { + inner: FakeBasicFileSystem::new(contents, content_map)?, + }) + } + + #[getter] + fn sep(&self) -> String { + self.inner.sep() + } + + #[pyo3(signature = (*components))] + fn join(&self, components: Vec) -> String { + self.inner.join(components) + } + + fn split(&self, file_name: &str) -> (String, String) { + self.inner.split(file_name) + } + + /// Checks if a file or directory exists within the file system. + fn exists(&self, file_name: &str) -> bool { + self.inner.exists(file_name) + } + + fn read(&self, file_name: &str) -> PyResult { + self.inner.read(file_name) + } +} + +/// Parses an indented string representing a file system structure +/// into a HashMap where keys are full file paths. +/// See tests.adaptors.filesystem.FakeFileSystem for the API. +pub fn parse_indented_file_system_string(file_system_string: &str) -> HashMap { + let mut file_paths_map: HashMap = HashMap::new(); + let mut path_stack: Vec = Vec::new(); // Stores current directory path components + let mut first_line = true; // Flag to handle the very first path component + + // Normalize newlines and split into lines + let buffer = file_system_string.replace("\r\n", "\n"); + let lines: Vec<&str> = buffer.split('\n').collect(); + + for line_raw in lines.clone() { + let line = line_raw.trim_end(); // Remove trailing whitespace + if line.is_empty() { + continue; // Skip empty lines + } + + let current_indent = line.chars().take_while(|&c| c.is_whitespace()).count(); + let trimmed_line = line.trim_start(); + + // Assuming 4 spaces per indentation level for calculating depth + // Adjust this if your indentation standard is different (e.g., 2 spaces, tabs) + let current_depth = current_indent / 4; + + if first_line { + // The first non-empty line sets the base path. + // It might be absolute (/a/b/) or relative (a/b/). + let root_component = trimmed_line.trim_end_matches('/').to_string(); + path_stack.push(root_component); + first_line = false; + } else { + // Adjust the path_stack based on indentation level + // Pop elements from the stack until we reach the correct parent directory depth + while path_stack.len() > current_depth { + path_stack.pop(); + } + + // If the current line is a file, append it to the path for inserting into map, + // then pop it off so that subsequent siblings are correctly handled. + // If it's a directory, append it and it stays on the stack for its children. + let component_to_add = trimmed_line.trim_end_matches('/').to_string(); + if !component_to_add.is_empty() { + // Avoid pushing empty strings due to lines like just "/" + path_stack.push(component_to_add); + } + } + + // Construct the full path + // Join components on the stack. If the first component started with '/', + // ensure the final path also starts with '/'. + let full_path = if !path_stack.is_empty() { + let mut joined = path_stack.join("/"); + // If the original root started with a slash, ensure the final path does too. + // But be careful not to double-slash if a component is e.g. "/root" + if lines[0].trim().starts_with('/') && !joined.starts_with('/') { + joined = format!("/{joined}"); + } + joined + } else { + "".to_string() + }; + + // If it's a file (doesn't end with '/'), add it to the map + // A file is not a directory, so its name should be removed from the stack after processing + // so that sibling items are at the correct level. + if !trimmed_line.ends_with('/') { + file_paths_map.insert(full_path, String::new()); // Value can be empty or actual content + if !path_stack.is_empty() { + path_stack.pop(); // Pop the file name off the stack + } + } + } + + // Edge case: If the very first line was a file and it ended up on the stack, it needs to be processed. + // This handles single-file inputs like "myfile.txt" + if !path_stack.is_empty() + && !path_stack.last().unwrap().ends_with('/') + && !file_paths_map.contains_key(&path_stack.join("/")) + { + file_paths_map.insert(path_stack.join("/"), String::new()); + } + + file_paths_map +} diff --git a/rust/src/import_scanning.rs b/rust/src/import_scanning.rs new file mode 100644 index 00000000..66f07a62 --- /dev/null +++ b/rust/src/import_scanning.rs @@ -0,0 +1,364 @@ +use crate::errors::GrimpError; +use crate::filesystem::{FileSystem, PyFakeBasicFileSystem, PyRealBasicFileSystem}; +use crate::import_parsing; +use crate::module_finding::{FoundPackage, Module}; +use itertools::Itertools; +use pyo3::exceptions::{PyFileNotFoundError, PyTypeError}; +use pyo3::prelude::*; +use pyo3::types::{PyDict, PySet}; +use std::collections::HashSet; + +#[pyclass] +pub struct ImportScanner { + file_system: Box, + found_packages: HashSet, + include_external_packages: bool, + modules: HashSet, +} + +#[derive(Debug, Hash, Eq, PartialEq)] +struct DirectImport { + importer: String, + imported: String, + line_number: usize, + line_contents: String, +} + +#[pymethods] +impl ImportScanner { + #[allow(unused_variables)] + #[new] + #[pyo3(signature = (file_system, found_packages, include_external_packages=false))] + fn new( + py: Python, + file_system: Bound<'_, PyAny>, + found_packages: Bound<'_, PyAny>, + include_external_packages: bool, + ) -> PyResult { + let file_system_boxed: Box; + + if let Ok(py_real) = file_system.extract::>() { + file_system_boxed = Box::new(py_real.inner.clone()); + } else if let Ok(py_fake) = file_system.extract::>() { + file_system_boxed = Box::new(py_fake.inner.clone()); + } else { + return Err(PyTypeError::new_err( + "file_system must be an instance of RealBasicFileSystem or FakeBasicFileSystem", + )); + } + let found_packages_rust = _py_found_packages_to_rust(found_packages); + let modules = _get_modules_from_found_packages(&found_packages_rust); + + Ok(ImportScanner { + file_system: file_system_boxed, + found_packages: found_packages_rust, + modules, + include_external_packages, + }) + } + + #[pyo3(signature = (module, exclude_type_checking_imports=false))] + fn scan_for_imports<'a>( + &self, + py: Python<'a>, + module: Bound<'_, PyAny>, + exclude_type_checking_imports: bool, + ) -> PyResult> { + let module_rust = module.extract().unwrap(); + let found_package_for_module = self._lookup_found_package_for_module(&module_rust); + let module_filename = + self._determine_module_filename(&module_rust, found_package_for_module)?; + let module_contents = self.file_system.read(&module_filename).unwrap(); + + let parse_result = import_parsing::parse_imports_from_code(&module_contents); + match parse_result { + Err(GrimpError::ParseError { + line_number, text, .. + }) => { + // TODO: define SourceSyntaxError using pyo3. + let exceptions_pymodule = PyModule::import(py, "grimp.exceptions").unwrap(); + let py_exception_class = exceptions_pymodule.getattr("SourceSyntaxError").unwrap(); + let exception = py_exception_class + .call1((module_filename, line_number, text)) + .unwrap(); + return Err(PyErr::from_value(exception)); + } + Err(e) => { + return Err(e.into()); + } + _ => (), + } + let imported_objects = parse_result.unwrap(); + let is_package = self._module_is_package(&module_filename); + + let mut imports: HashSet = HashSet::new(); + for imported_object in imported_objects { + // Don't include type checking imports, if specified. + if exclude_type_checking_imports && imported_object.typechecking_only { + continue; + } + + // Resolve relative imports. + let imported_object_name = self._get_absolute_imported_object_name( + &module_rust, + is_package, + &imported_object.name, + ); + + // Resolve imported module. + match self._get_internal_module(&imported_object_name) { + Some(imported_module) => { + // It's an internal import. + imports.insert(DirectImport { + importer: module_rust.name.to_string(), + imported: imported_module.name.to_string(), + line_number: imported_object.line_number, + line_contents: imported_object.line_contents, + }); + } + None => { + // It's an external import. + if self.include_external_packages { + if let Some(imported_module) = + self._distill_external_module(&imported_object_name) + { + imports.insert(DirectImport { + importer: module_rust.name.to_string(), + imported: imported_module, + line_number: imported_object.line_number, + line_contents: imported_object.line_contents, + }); + } + } + } + } + } + + Ok(self._to_py_direct_imports(py, &imports)) + } +} + +impl ImportScanner { + fn _lookup_found_package_for_module(&self, module: &Module) -> &FoundPackage { + // TODO: it's probably inefficient to do this every time we look up a module. + for found_package in &self.found_packages { + for module_file in &found_package.module_files { + if module_file.module == *module { + return found_package; + } + } + } + panic!("Could not lookup found package for module {module}"); + } + + fn _determine_module_filename( + &self, + module: &Module, + found_package: &FoundPackage, + ) -> PyResult { + // TODO: could we do all this with &str instead of String? + let top_level_components: Vec = + found_package.name.split(".").map(str::to_string).collect(); + let module_components: Vec = module.name.split(".").map(str::to_string).collect(); + let leaf_components = module_components[top_level_components.len()..].to_vec(); + + let mut filename_root_components: Vec = vec![found_package.directory.clone()]; + filename_root_components.extend(leaf_components); + let filename_root = self.file_system.join(filename_root_components); + let normal_module_path = format!("{filename_root}.py"); + let init_module_path = self + .file_system + .join(vec![filename_root, "__init__.py".to_string()]); + let candidate_filenames = [normal_module_path, init_module_path]; + for candidate_filename in candidate_filenames { + if self.file_system.exists(&candidate_filename) { + return Ok(candidate_filename); + } + } + Err(PyFileNotFoundError::new_err(format!( + "Could not find module {module}." + ))) + } + + fn _module_is_package(&self, module_filename: &str) -> bool { + self.file_system.split(module_filename).1 == "__init__.py" + } + + #[allow(unused_variables)] + fn _get_absolute_imported_object_name( + &self, + module: &Module, + is_package: bool, + imported_object_name: &str, + ) -> String { + let leading_dots_count = count_leading_dots(imported_object_name); + if leading_dots_count == 0 { + return imported_object_name.to_string(); + } + let imported_object_name_base: String; + if is_package { + if leading_dots_count == 1 { + imported_object_name_base = module.name.clone(); + } else { + let parts: Vec<&str> = module.name.split('.').collect(); + imported_object_name_base = + parts[0..parts.len() - leading_dots_count + 1].join("."); + } + } else { + let parts: Vec<&str> = module.name.split('.').collect(); + imported_object_name_base = parts[0..parts.len() - leading_dots_count].join("."); + } + + format!( + "{}.{}", + imported_object_name_base, + &imported_object_name[leading_dots_count..] + ) + } + + #[allow(unused_variables)] + fn _get_internal_module(&self, imported_object_name: &str) -> Option { + let candidate_module = Module { + name: imported_object_name.to_string(), + }; + if self.modules.contains(&candidate_module) { + return Some(candidate_module); + } + if let Some((parent_name, _)) = imported_object_name.rsplit_once('.') { + let parent = Module { + name: parent_name.to_string(), + }; + if self.modules.contains(&parent) { + return Some(parent); + } + } + None + } + + /// Given a module that we already know is external, turn it into a module to add to the graph. + // + // The 'distillation' process involves removing any unwanted subpackages. For example, + // django.models.db should be turned into simply django. + // The process is more complex for potential namespace packages, as it's not possible to + // determine the portion package simply from name. Rather than adding the overhead of a + // filesystem read, we just get the shallowest component that does not clash with an internal + // module namespace. Take, for example, foo.blue.alpha.one. If one of the found + // packages is foo.blue.beta, the module will be distilled to foo.blue.alpha. + // Alternatively, if the found package is foo.green, the distilled module will + // be foo.blue. + #[allow(unused_variables)] + fn _distill_external_module(&self, module_name: &str) -> Option { + let module_root = module_name.split(".").next().unwrap(); + // If it's a module that is a parent of one of the internal packages, return None + // as it doesn't make sense and is probably an import of a namespace package. + for package in &self.found_packages { + if module_is_descendant(&package.name, module_name) { + return None; + } + } + + // If it shares a namespace with an internal module, get the shallowest component that does + // not clash with an internal module namespace. + let mut candidate_portions: HashSet = HashSet::new(); + let mut sorted_found_packages: Vec<&FoundPackage> = self.found_packages.iter().collect(); + sorted_found_packages.sort_by_key(|package| &package.name); + sorted_found_packages.reverse(); + + for found_package in sorted_found_packages { + let root_module = &found_package.name; + if module_is_descendant(root_module, module_root) { + let mut internal_path_components: Vec<&str> = root_module.split(".").collect(); + let mut external_path_components: Vec<&str> = module_name.split(".").collect(); + let mut external_namespace_components: Vec<&str> = vec![]; + while external_path_components[0] == internal_path_components[0] { + external_namespace_components.push(external_path_components.remove(0)); + internal_path_components.remove(0); + } + external_namespace_components.push(external_path_components[0]); + + candidate_portions.insert(Module { + name: external_namespace_components.join("."), + }); + } + } + + if !candidate_portions.is_empty() { + // If multiple found packages share a namespace with this module, use the deepest one + // as we know that that will be a namespace too. + let deepest_candidate_portion = candidate_portions + .iter() + .sorted_by_key(|portion| portion.name.split(".").collect::>().len()) + .next_back() + .unwrap(); + Some(deepest_candidate_portion.name.clone()) + } else { + Some(module_name.split('.').next().unwrap().to_string()) + } + } + + fn _to_py_direct_imports<'a>( + &self, + py: Python<'a>, + rust_imports: &HashSet, + ) -> Bound<'a, PySet> { + // TODO: do this in the constructor. + let valueobjects_pymodule = PyModule::import(py, "grimp.domain.valueobjects").unwrap(); + let py_module_class = valueobjects_pymodule.getattr("Module").unwrap(); + let py_direct_import_class = valueobjects_pymodule.getattr("DirectImport").unwrap(); + + let pyset = PySet::empty(py).unwrap(); + + for rust_import in rust_imports { + let importer = py_module_class.call1((&rust_import.importer,)).unwrap(); + let imported = py_module_class.call1((&rust_import.imported,)).unwrap(); + let kwargs = PyDict::new(py); + kwargs.set_item("importer", &importer).unwrap(); + kwargs.set_item("imported", &imported).unwrap(); + kwargs + .set_item("line_number", rust_import.line_number) + .unwrap(); + kwargs + .set_item("line_contents", &rust_import.line_contents) + .unwrap(); + let py_direct_import = py_direct_import_class.call((), Some(&kwargs)).unwrap(); + pyset.add(&py_direct_import).unwrap(); + } + + pyset + } +} +fn _py_found_packages_to_rust(py_found_packages: Bound<'_, PyAny>) -> HashSet { + let py_set = py_found_packages + .downcast::() + .expect("Expected py_found_packages to be a Python set."); + + let mut rust_found_packages = HashSet::new(); + // Iterate over the elements in the Python set. + for py_found_package_any in py_set.iter() { + // Extract each Python 'FoundPackage' object into a Rust 'FoundPackage' struct. + // Panics if extraction fails, as specified. + let rust_found_package: FoundPackage = py_found_package_any + .extract() + .expect("Failed to extract Python FoundPackage to Rust FoundPackage."); + rust_found_packages.insert(rust_found_package); + } + rust_found_packages +} + +fn _get_modules_from_found_packages(found_packages: &HashSet) -> HashSet { + let mut modules = HashSet::new(); + for package in found_packages { + for module_file in &package.module_files { + modules.insert(module_file.module.clone()); + } + } + modules +} + +fn count_leading_dots(s: &str) -> usize { + s.chars().take_while(|&c| c == '.').count() +} + +fn module_is_descendant(module_name: &str, potential_ancestor: &str) -> bool { + module_name.starts_with(&format!("{potential_ancestor}.")) +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 18327cd2..7a102930 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -3,11 +3,15 @@ pub mod exceptions; pub mod graph; pub mod import_parsing; pub mod module_expressions; +mod filesystem; +mod import_scanning; +mod module_finding; use crate::errors::{GrimpError, GrimpResult}; use crate::exceptions::{InvalidModuleExpression, ModuleNotPresent, NoSuchContainer, ParseError}; use crate::graph::higher_order_queries::Level; use crate::graph::{Graph, Module, ModuleIterator, ModuleTokenIterator}; +use crate::import_scanning::ImportScanner; use crate::module_expressions::ModuleExpression; use derive_new::new; use itertools::Itertools; @@ -18,11 +22,15 @@ use pyo3::types::{IntoPyDict, PyDict, PyFrozenSet, PyList, PySet, PyString, PyTu use rayon::prelude::*; use rustc_hash::FxHashSet; use std::collections::HashSet; +use crate::filesystem::{PyRealBasicFileSystem,PyFakeBasicFileSystem}; #[pymodule] fn _rustgrimp(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(parse_imported_objects_from_code))?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; m.add("ModuleNotPresent", py.get_type::())?; m.add("NoSuchContainer", py.get_type::())?; m.add( @@ -543,7 +551,7 @@ impl GraphWrapper { .unwrap() .into_iter() .map(|name| match container.clone() { - Some(container) => format!("{}.{}", container, name), + Some(container) => format!("{container}.{name}"), None => name, }) .filter_map(|name| match self.get_visible_module_by_name(&name) { diff --git a/rust/src/module_finding.rs b/rust/src/module_finding.rs new file mode 100644 index 00000000..4034b265 --- /dev/null +++ b/rust/src/module_finding.rs @@ -0,0 +1,60 @@ +use pyo3::{prelude::*, types::PyFrozenSet}; +use std::collections::BTreeSet; +use std::fmt; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, FromPyObject)] +pub struct ModuleFile { + pub module: Module, +} + +/// A Python module. +/// The string is the importable name, e.g. "mypackage.foo". +#[derive(Debug, Clone, PartialEq, PartialOrd, Ord, Eq, Hash, FromPyObject)] +pub struct Module { + pub name: String, +} + +impl fmt::Display for Module { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.name) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +/// Set of modules found under a single package, together with metadata. +pub struct FoundPackage { + pub name: String, + pub directory: String, + // BTreeSet rather than HashSet is necessary to make FoundPackage hashable. + pub module_files: BTreeSet, +} + +/// Implements conversion from a Python 'FoundPackage' object to the Rust 'FoundPackage' struct. +/// It extracts 'name', 'directory', and converts the 'module_files' frozenset into a Rust BTreeSet. +impl<'py> FromPyObject<'py> for FoundPackage { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { + // Extract the 'name' attribute. + let name: String = ob.getattr("name")?.extract()?; + // Extract the 'directory' attribute. + let directory: String = ob.getattr("directory")?.extract()?; + + // Access the 'module_files' attribute. + let module_files_py = ob.getattr("module_files")?; + // Downcast the PyAny object to a PyFrozenSet, as Python 'FrozenSet' maps to 'PyFrozenSet'. + let py_frozen_set = module_files_py.downcast::()?; + + let mut module_files = BTreeSet::new(); + // Iterate over the Python frozenset. + for py_module_file_any in py_frozen_set.iter() { + // Extract each element (PyAny) into a Rust 'ModuleFile'. + let module_file: ModuleFile = py_module_file_any.extract()?; + module_files.insert(module_file); + } + + Ok(FoundPackage { + name, + directory, + module_files, + }) + } +} diff --git a/src/grimp/adaptors/filesystem.py b/src/grimp/adaptors/filesystem.py index 9f637982..83b54436 100644 --- a/src/grimp/adaptors/filesystem.py +++ b/src/grimp/adaptors/filesystem.py @@ -2,7 +2,8 @@ import tokenize from typing import Iterator, List, Tuple -from grimp.application.ports.filesystem import AbstractFileSystem +from grimp.application.ports.filesystem import AbstractFileSystem, BasicFileSystem +from grimp import _rustgrimp as rust # type: ignore[attr-defined] class FileSystem(AbstractFileSystem): @@ -44,3 +45,6 @@ def write(self, file_name: str, contents: str) -> None: os.makedirs(dirname) with open(file_name, "w") as file: print(contents, file=file) + + def convert_to_basic(self) -> BasicFileSystem: + return rust.RealBasicFileSystem() diff --git a/src/grimp/adaptors/importscanner.py b/src/grimp/adaptors/importscanner.py index a405facc..655782de 100644 --- a/src/grimp/adaptors/importscanner.py +++ b/src/grimp/adaptors/importscanner.py @@ -1,231 +1,3 @@ -from __future__ import annotations - -import re -import logging -from dataclasses import dataclass -from typing import Dict, Optional, Set - -from grimp import exceptions -from grimp.application.ports.importscanner import AbstractImportScanner -from grimp.application.ports.modulefinder import FoundPackage -from grimp.domain.valueobjects import DirectImport, Module from grimp import _rustgrimp as rust # type: ignore[attr-defined] - -logger = logging.getLogger(__name__) - -_LEADING_DOT_REGEX = re.compile(r"^(\.+)\w") - - -@dataclass(frozen=True) -class _ImportedObject: - name: str - line_number: int - line_contents: str - typechecking_only: bool - - -class ImportScanner(AbstractImportScanner): - def __init__(self, *args, **kwargs) -> None: - super().__init__(*args, **kwargs) - - self._found_packages_by_module: Dict[Module, FoundPackage] = { - module_file.module: package - for package in self.found_packages - for module_file in package.module_files - } - - def scan_for_imports( - self, module: Module, *, exclude_type_checking_imports: bool = False - ) -> Set[DirectImport]: - """ - Note: this method only analyses the module in question and will not load any other - code, so it relies on self.modules to deduce which modules it imports. (This is - because you can't know whether "from foo.bar import baz" is importing a module - called `baz`, or a function `baz` from the module `bar`.) - """ - found_package = self._found_package_for_module(module) - module_filename = self._determine_module_filename(module, found_package) - module_contents = self._read_module_contents(module_filename) - - try: - imported_objects = self._get_raw_imported_objects(module_contents) - except rust.ParseError as e: - raise exceptions.SourceSyntaxError( - filename=module_filename, - lineno=e.line_number, - text=e.text, - ) - - is_package = self._module_is_package(module_filename) - - imports = set() - for imported_object in imported_objects: - # Filter on `exclude_type_checking_imports`. - if exclude_type_checking_imports and imported_object.typechecking_only: - continue - - # Resolve relative imports. - imported_object_name = self._get_absolute_imported_object_name( - module=module, is_package=is_package, imported_object_name=imported_object.name - ) - - # Resolve imported module. - imported_module = self._get_internal_module(imported_object_name, modules=self.modules) - if imported_module is None: - # => External import. - - # Filter on `self.include_external_packages`. - if not self.include_external_packages: - continue - - # Distill module. - imported_module = self._distill_external_module( - Module(imported_object_name), found_packages=self.found_packages - ) - if imported_module is None: - continue - - imports.add( - DirectImport( - importer=module, - imported=imported_module, - line_number=imported_object.line_number, - line_contents=imported_object.line_contents, - ) - ) - return imports - - def _found_package_for_module(self, module: Module) -> FoundPackage: - try: - return self._found_packages_by_module[module] - except KeyError: - raise ValueError(f"No found package for module {module}.") - - def _determine_module_filename(self, module: Module, found_package: FoundPackage) -> str: - """ - Work out the full filename of the given module. - - Any given module can either be a straight Python file (foo.py) or else a package - (in which case the file is an __init__.py within a directory). - """ - top_level_components = found_package.name.split(".") - module_components = module.name.split(".") - leaf_components = module_components[len(top_level_components) :] - package_directory = found_package.directory - - filename_root = self.file_system.join(package_directory, *leaf_components) - candidate_filenames = ( - f"{filename_root}.py", - self.file_system.join(filename_root, "__init__.py"), - ) - for candidate_filename in candidate_filenames: - if self.file_system.exists(candidate_filename): - return candidate_filename - raise FileNotFoundError(f"Could not find module {module}.") - - def _read_module_contents(self, module_filename: str) -> str: - """ - Read the file contents of the module. - """ - return self.file_system.read(module_filename) - - def _module_is_package(self, module_filename: str) -> bool: - """ - Whether or not the supplied module filename is a package. - """ - return self.file_system.split(module_filename)[-1] == "__init__.py" - - @staticmethod - def _get_raw_imported_objects(module_contents: str) -> Set[_ImportedObject]: - imported_object_dicts = rust.parse_imported_objects_from_code(module_contents) - return {_ImportedObject(**d) for d in imported_object_dicts} - - @staticmethod - def _get_absolute_imported_object_name( - *, module: Module, is_package: bool, imported_object_name: str - ) -> str: - leading_dot_match = _LEADING_DOT_REGEX.match(imported_object_name) - if leading_dot_match is None: - return imported_object_name - - n_leading_dots = len(leading_dot_match.group(1)) - if is_package: - if n_leading_dots == 1: - imported_object_name_base = module.name - else: - imported_object_name_base = ".".join( - module.name.split(".")[: -(n_leading_dots - 1)] - ) - else: - imported_object_name_base = ".".join(module.name.split(".")[:-n_leading_dots]) - return imported_object_name_base + "." + imported_object_name[n_leading_dots:] - - @staticmethod - def _get_internal_module(object_name: str, *, modules: Set[Module]) -> Optional[Module]: - candidate_module = Module(object_name) - if candidate_module in modules: - return candidate_module - - try: - candidate_module = candidate_module.parent - except ValueError: - return None - else: - if candidate_module in modules: - return candidate_module - else: - return None - - @staticmethod - def _distill_external_module( - module: Module, *, found_packages: Set[FoundPackage] - ) -> Optional[Module]: - """ - Given a module that we already know is external, turn it into a module to add to the graph. - - The 'distillation' process involves removing any unwanted subpackages. For example, - Module("django.models.db") should be turned into simply Module("django"). - - The process is more complex for potential namespace packages, as it's not possible to - determine the portion package simply from name. Rather than adding the overhead of a - filesystem read, we just get the shallowest component that does not clash with an internal - module namespace. Take, for example, a Module("foo.blue.alpha.one"). If one of the found - packages is foo.blue.beta, the module will be distilled to Module("foo.blue.alpha"). - Alternatively, if the found package is foo.green, the distilled module will - be Module("foo.blue"). - """ - # If it's a module that is a parent of one of the internal packages, return None - # as it doesn't make sense and is probably an import of a namespace package. - if any(Module(package.name).is_descendant_of(module) for package in found_packages): - return None - - # If it shares a namespace with an internal module, get the shallowest component that does - # not clash with an internal module namespace. - candidate_portions: Set[Module] = set() - for found_package in sorted(found_packages, key=lambda p: p.name, reverse=True): - root_module = Module(found_package.name) - if root_module.is_descendant_of(module.root): - ( - internal_path_components, - external_path_components, - ) = root_module.name.split( - "." - ), module.name.split(".") - external_namespace_components = [] - while external_path_components[0] == internal_path_components[0]: - external_namespace_components.append(external_path_components[0]) - external_path_components = external_path_components[1:] - internal_path_components = internal_path_components[1:] - external_namespace_components.append(external_path_components[0]) - candidate_portions.add(Module(".".join(external_namespace_components))) - - if candidate_portions: - # If multiple found packages share a namespace with this module, use the deepest one - # as we know that that will be a namespace too. - deepest_candidate_portion = sorted( - candidate_portions, key=lambda p: len(p.name.split(".")) - )[-1] - return deepest_candidate_portion - else: - return module.root +ImportScanner = rust.ImportScanner diff --git a/src/grimp/application/ports/filesystem.py b/src/grimp/application/ports/filesystem.py index b11125be..bfe129a2 100644 --- a/src/grimp/application/ports/filesystem.py +++ b/src/grimp/application/ports/filesystem.py @@ -1,5 +1,7 @@ +from __future__ import annotations import abc from typing import Iterator, List, Tuple +from typing import Protocol class AbstractFileSystem(abc.ABC): @@ -80,3 +82,35 @@ def write(self, file_name: str, contents: str) -> None: Write the contents to a file. """ raise NotImplementedError + + @abc.abstractmethod + def convert_to_basic(self) -> BasicFileSystem: + """ + Convert this file system to a BasicFileSystem. + """ + raise NotImplementedError + + +class BasicFileSystem(Protocol): + """ + A more limited file system, used by the ImportScanner. + + Having two different file system APIs is an interim approach, allowing us to + implement BasicFileSystem in Rust, for use with the ImportScanner, without needing + to implement the full range of methods used by other parts of Grimp. + + Eventually we'll move the full implementation to Rust, and have a single + FileSystem interface. + """ + + def join(self, *components: str) -> str: + ... + + def split(self, file_name: str) -> Tuple[str, str]: + ... + + def read(self, file_name: str) -> str: + ... + + def exists(self, file_name: str) -> bool: + ... diff --git a/src/grimp/application/ports/importscanner.py b/src/grimp/application/ports/importscanner.py index 34da5be9..82cb21bc 100644 --- a/src/grimp/application/ports/importscanner.py +++ b/src/grimp/application/ports/importscanner.py @@ -1,19 +1,18 @@ -import abc -from typing import Set +from typing import Set, Protocol -from grimp.application.ports.filesystem import AbstractFileSystem +from grimp.application.ports.filesystem import BasicFileSystem from grimp.application.ports.modulefinder import FoundPackage from grimp.domain.valueobjects import DirectImport, Module -class AbstractImportScanner(abc.ABC): +class AbstractImportScanner(Protocol): """ Statically analyses some Python modules for import statements within their shared package. """ def __init__( self, - file_system: AbstractFileSystem, + file_system: BasicFileSystem, found_packages: Set[FoundPackage], include_external_packages: bool = False, ) -> None: @@ -26,16 +25,7 @@ def __init__( modules not contained in modules_by_package_directory) in the results. """ - self.file_system = file_system - self.include_external_packages = include_external_packages - self.found_packages = found_packages - # Flatten all the modules into a set. - self.modules: Set[Module] = set() - for package in self.found_packages: - self.modules |= {mf.module for mf in package.module_files} - - @abc.abstractmethod def scan_for_imports( self, module: Module, *, exclude_type_checking_imports: bool = False ) -> Set[DirectImport]: @@ -43,4 +33,3 @@ def scan_for_imports( Statically analyses the given module and returns an iterable of Modules that it imports. """ - raise NotImplementedError diff --git a/src/grimp/application/usecases.py b/src/grimp/application/usecases.py index eee3286c..35e830f3 100644 --- a/src/grimp/application/usecases.py +++ b/src/grimp/application/usecases.py @@ -59,7 +59,6 @@ def build_graph( "mypackage", "anotherpackage", "onemore", include_external_packages=True, ) """ - file_system: AbstractFileSystem = settings.FILE_SYSTEM found_packages = _find_packages( @@ -145,7 +144,6 @@ def _scan_packages( imports_by_module_file.update( _scan_imports( remaining_module_files_to_scan, - file_system=file_system, found_packages=found_packages, include_external_packages=include_external_packages, exclude_type_checking_imports=exclude_type_checking_imports, @@ -213,7 +211,6 @@ def _read_imports_from_cache( def _scan_imports( module_files: Collection[ModuleFile], *, - file_system: AbstractFileSystem, found_packages: Set[FoundPackage], include_external_packages: bool, exclude_type_checking_imports: bool, @@ -221,7 +218,6 @@ def _scan_imports( chunks = _create_chunks(module_files) return _scan_chunks( chunks, - file_system, found_packages, include_external_packages, exclude_type_checking_imports, @@ -256,22 +252,40 @@ def _decide_number_of_processes(number_of_module_files: int) -> int: return min(joblib.cpu_count(), number_of_module_files) -def _scan_chunks( - chunks: Collection[Collection[ModuleFile]], - file_system: AbstractFileSystem, +def _scan_chunk( found_packages: Set[FoundPackage], include_external_packages: bool, exclude_type_checking_imports: bool, + chunk: Iterable[ModuleFile], ) -> Dict[ModuleFile, Set[DirectImport]]: + file_system: AbstractFileSystem = settings.FILE_SYSTEM + basic_file_system = file_system.convert_to_basic() import_scanner: AbstractImportScanner = settings.IMPORT_SCANNER_CLASS( - file_system=file_system, + file_system=basic_file_system, found_packages=found_packages, - include_external_packages=include_external_packages, + # Ensure that the passed exclude_type_checking_imports is definitely a boolean, + # otherwise the Rust class will error. + include_external_packages=bool(include_external_packages), ) + return { + module_file: import_scanner.scan_for_imports( + module_file.module, exclude_type_checking_imports=exclude_type_checking_imports + ) + for module_file in chunk + } + +def _scan_chunks( + chunks: Collection[Collection[ModuleFile]], + found_packages: Set[FoundPackage], + include_external_packages: bool, + exclude_type_checking_imports: bool, +) -> Dict[ModuleFile, Set[DirectImport]]: number_of_processes = len(chunks) import_scanning_jobs = joblib.Parallel(n_jobs=number_of_processes)( - joblib.delayed(_scan_chunk)(import_scanner, exclude_type_checking_imports, chunk) + joblib.delayed(_scan_chunk)( + found_packages, include_external_packages, exclude_type_checking_imports, chunk + ) for chunk in chunks ) @@ -279,16 +293,3 @@ def _scan_chunks( for chunk_imports_by_module_file in import_scanning_jobs: imports_by_module_file.update(chunk_imports_by_module_file) return imports_by_module_file - - -def _scan_chunk( - import_scanner: AbstractImportScanner, - exclude_type_checking_imports: bool, - chunk: Iterable[ModuleFile], -) -> Dict[ModuleFile, Set[DirectImport]]: - return { - module_file: import_scanner.scan_for_imports( - module_file.module, exclude_type_checking_imports=exclude_type_checking_imports - ) - for module_file in chunk - } diff --git a/src/grimp/main.py b/src/grimp/main.py index 70622b05..d60533a8 100644 --- a/src/grimp/main.py +++ b/src/grimp/main.py @@ -13,7 +13,7 @@ settings.configure( MODULE_FINDER=ModuleFinder(), FILE_SYSTEM=FileSystem(), - IMPORT_SCANNER_CLASS=ImportScanner, + IMPORT_SCANNER_CLASS=ImportScanner, # type: ignore[has-type] IMPORT_GRAPH_CLASS=ImportGraph, PACKAGE_FINDER=ImportLibPackageFinder(), CACHE_CLASS=Cache, diff --git a/tests/adaptors/filesystem.py b/tests/adaptors/filesystem.py index 6eac8cca..74854da0 100644 --- a/tests/adaptors/filesystem.py +++ b/tests/adaptors/filesystem.py @@ -1,8 +1,10 @@ from typing import Any, Dict, Generator, List, Optional, Tuple + import yaml -from grimp.application.ports.filesystem import AbstractFileSystem +from grimp.application.ports.filesystem import AbstractFileSystem, BasicFileSystem +from grimp import _rustgrimp as rust # type: ignore[attr-defined] DEFAULT_MTIME = 10000.0 @@ -43,6 +45,7 @@ def __init__( i.e. last modified times. """ self.contents = self._parse_contents(contents) + self._raw_contents = contents self.content_map = content_map if content_map else {} self.mtime_map: Dict[str, float] = mtime_map if mtime_map else {} @@ -186,3 +189,12 @@ def get_mtime(self, file_name: str) -> float: def write(self, file_name: str, contents: str) -> None: self.content_map[file_name] = contents self.mtime_map[file_name] = DEFAULT_MTIME + + def convert_to_basic(self) -> BasicFileSystem: + """ + Convert this file system to a BasicFileSystem. + """ + return rust.FakeBasicFileSystem( + contents=self._raw_contents, + content_map=self.content_map, + ) diff --git a/tests/unit/adaptors/test_filesystem.py b/tests/unit/adaptors/test_filesystem.py index a59a173c..899d10f6 100644 --- a/tests/unit/adaptors/test_filesystem.py +++ b/tests/unit/adaptors/test_filesystem.py @@ -1,13 +1,148 @@ from copy import copy - +from typing import Type import pytest # type: ignore - +from grimp.application.ports.filesystem import BasicFileSystem from tests.adaptors.filesystem import FakeFileSystem +from grimp import _rustgrimp as rust # type: ignore[attr-defined] + + +class _Base: + """ + Tests for methods that AbstractFileSystem and BasicFileSystem share. + """ + + file_system_cls: Type[BasicFileSystem] + + @pytest.mark.parametrize("path", ("/path/to", "/path/to/")) + def test_join(self, path): + file_system = self.file_system_cls() + assert "/path/to/mypackage/file.py" == file_system.join(path, "mypackage", "file.py") + + def test_split(self): + file_system = self.file_system_cls() + assert ("/path/to/mypackage", "file.py") == file_system.split("/path/to/mypackage/file.py") + + @pytest.mark.parametrize( + "file_name, expected", + [ + ("/path/to/mypackage", False), + ("/path/to/mypackage/", False), + ("/path/to/mypackage/readme.txt", True), + ("/path/to/mypackage/foo/one.txt", True), + ("/path/to/mypackage/foo/two/green.txt", True), + ("/path/to/nonexistent.txt", False), + ("/path/to/mypackage/purple.txt", False), + ], + ) + def test_exists_content_only(self, file_name, expected): + file_system = self.file_system_cls( + contents=""" + /path/to/mypackage/ + readme.txt + foo/ + one.txt + two/ + green.txt + """ + ) + + assert file_system.exists(file_name) == expected + + @pytest.mark.parametrize( + "file_name, expected", + [ + ("/path/to/", False), + ("/path/to/file.txt", True), + ("/path/to/a/deeper/file.txt", True), + ("/path/to/nonexistent.txt", False), + ], + ) + def test_exists_content_map_only(self, file_name, expected): + file_system = self.file_system_cls( + content_map={ + "/path/to/file.txt": "hello", + "/path/to/a/deeper/file.txt": "hello", + } + ) + + assert file_system.exists(file_name) == expected + + @pytest.mark.parametrize( + "file_name, expected", + [ + ("/path/to/file.txt", True), + ("/path/to/mypackage/foo/two/green.txt", True), + ("/path/to/nonexistent.txt", False), + ], + ) + def test_exists_content_map_and_content(self, file_name, expected): + file_system = self.file_system_cls( + contents=""" + /path/to/mypackage/ + readme.txt + foo/ + one.txt + two/ + green.txt + """, + content_map={ + "/path/to/file.txt": "hello", + "/path/to/a/deeper/file.txt": "hello", + }, + ) + + assert file_system.exists(file_name) == expected + + @pytest.mark.parametrize( + "file_name, expected_contents", + ( + ("/path/to/mypackage/readme.txt", "Lorem"), + ("/path/to/mypackage/foo/one.txt", "Ipsum"), + # Listed in contents, but not in content_map. + ("/path/to/mypackage/foo/two/green.txt", ""), + # Listed in content_map, but not in contents. + ("/path/to/mypackage/foo/two/blue.txt", "Dolor sic"), + ( + "/path/to/mypackage/indented.txt", + "This is indented text.\n We should dedent it.", + ), + ("/path/to/mypackage/nonexistent", FileNotFoundError), + ("/path/to/mypackage/foo/three/nonexistent", FileNotFoundError), + # TODO - should we raise an exception if we attempt to read a directory? + ), + ) + def test_read(self, file_name, expected_contents): + file_system = self.file_system_cls( + contents=""" + /path/to/mypackage/ + readme.txt + foo/ + one.txt + two/ + green.txt + """, + content_map={ + "/path/to/mypackage/readme.txt": "Lorem", + "/path/to/mypackage/foo/one.txt": "Ipsum", + "/path/to/mypackage/foo/two/blue.txt": "Dolor sic", + "/path/to/mypackage/indented.txt": """ + This is indented text. + We should dedent it. + """, + }, + ) + if isinstance(expected_contents, type) and issubclass(expected_contents, Exception): + with pytest.raises(expected_contents): + file_system.read(file_name) + else: + assert file_system.read(file_name) == expected_contents + +class TestFakeFileSystem(_Base): + file_system_cls = FakeFileSystem -class TestFakeFileSystem: def test_walk(self): - file_system = FakeFileSystem( + file_system = self.file_system_cls( """ /path/to/mypackage/ __init__.py @@ -29,7 +164,7 @@ def test_walk(self): ] == list(file_system.walk("/path/to/mypackage")) def test_empty_if_directory_does_not_exist(self): - file_system = FakeFileSystem( + file_system = self.file_system_cls( """ /path/to/mypackage/ __init__.py @@ -38,18 +173,9 @@ def test_empty_if_directory_does_not_exist(self): assert [] == list(file_system.walk("/path/to/nonexistent/package")) def test_dirname(self): - file_system = FakeFileSystem() + file_system = self.file_system_cls() assert "/path/to" == file_system.dirname("/path/to/file.txt") - @pytest.mark.parametrize("path", ("/path/to", "/path/to/")) - def test_join(self, path): - file_system = FakeFileSystem() - assert "/path/to/mypackage/file.py" == file_system.join(path, "mypackage", "file.py") - - def test_split(self): - file_system = FakeFileSystem() - assert ("/path/to/mypackage", "file.py") == file_system.split("/path/to/mypackage/file.py") - def test_dirnames_can_be_modified_in_place(self): """ From the os.walk docs: @@ -59,7 +185,7 @@ def test_dirnames_can_be_modified_in_place(self): of visiting, or even to inform walk() about directories the caller creates or renames before it resumes walk() again. """ - file_system = FakeFileSystem( + file_system = self.file_system_cls( """ /path/to/mypackage/ foo/ @@ -90,38 +216,6 @@ def test_dirnames_can_be_modified_in_place(self): assert expected_tuples == actual_tuples - @pytest.mark.parametrize( - "file_name, expected_contents", - ( - ("/path/to/mypackage/readme.txt", "Lorem"), - ("/path/to/mypackage/foo/one.txt", "Ipsum"), - # Listed in contents, but not in content_map. - ("/path/to/mypackage/foo/two/green.txt", ""), - # Listed in content_map, but not in contents. - ("/path/to/mypackage/foo/two/blue.txt", "Dolor sic"), - ("/path/to/mypackage/nonexistent", FileNotFoundError), - ("/path/to/mypackage/foo/three/nonexistent", FileNotFoundError), - # TODO - should we raise an exception if we attempt to read a directory? - ), - ) - def test_read(self, file_name, expected_contents): - file_system = FakeFileSystem( - contents=""" - /path/to/mypackage/ - readme.txt - foo/ - one.txt - two/ - green.txt - """, - content_map={ - "/path/to/mypackage/readme.txt": "Lorem", - "/path/to/mypackage/foo/one.txt": "Ipsum", - "/path/to/mypackage/foo/two/blue.txt": "Dolor sic", - }, - ) - if isinstance(expected_contents, type) and issubclass(expected_contents, Exception): - with pytest.raises(expected_contents): - file_system.read(file_name) - else: - assert expected_contents == file_system.read(file_name) + +class TestFakeBasicFileSystem(_Base): + file_system_cls = rust.FakeBasicFileSystem diff --git a/tests/unit/adaptors/test_importscanner.py b/tests/unit/adaptors/test_importscanner.py index f4d1795f..fe55a704 100644 --- a/tests/unit/adaptors/test_importscanner.py +++ b/tests/unit/adaptors/test_importscanner.py @@ -3,10 +3,10 @@ import pytest # type: ignore -from grimp.adaptors.importscanner import ImportScanner from grimp.application.ports.modulefinder import FoundPackage, ModuleFile from grimp.domain.valueobjects import DirectImport, Module -from tests.adaptors.filesystem import FakeFileSystem + +from grimp import _rustgrimp as rust # type: ignore[attr-defined] @pytest.mark.parametrize( @@ -50,7 +50,7 @@ ) def test_absolute_imports(include_external_packages, expected_result): all_modules = {Module("foo.one"), Module("foo.two")} - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/foo/one.py": """ import foo.two @@ -61,7 +61,7 @@ def test_absolute_imports(include_external_packages, expected_result): } ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -81,7 +81,7 @@ def test_absolute_imports(include_external_packages, expected_result): def test_non_ascii(): blue_module = Module("mypackage.blue") non_ascii_modules = {Module("mypackage.ñon_ascii_变"), Module("mypackage.ñon_ascii_变.ラーメン")} - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/mypackage/blue.py": """ from ñon_ascii_变 import * @@ -93,7 +93,7 @@ def test_non_ascii(): }, ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="mypackage", @@ -152,7 +152,7 @@ def test_single_namespace_package_portion(): MODULE_ALPHA, } - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/namespace/foo/one.py": """ import namespace.foo.two @@ -166,7 +166,7 @@ def test_single_namespace_package_portion(): } ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="namespace.foo", @@ -235,7 +235,7 @@ def test_import_of_portion_not_in_graph(include_external_packages): MODULE_FOO_ONE, MODULE_FOO_BLUE, } - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/namespace/foo/one.py": """ import namespace.bar.one.orange @@ -253,7 +253,7 @@ def test_import_of_portion_not_in_graph(include_external_packages): } ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="namespace.foo", @@ -415,7 +415,7 @@ def test_absolute_from_imports(include_external_packages, expected_result): Module("foo.two.yellow"), Module("foo.three"), } - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( contents=""" /path/to/foo/ __init__.py @@ -442,7 +442,7 @@ def test_absolute_from_imports(include_external_packages, expected_result): }, ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -459,15 +459,25 @@ def test_absolute_from_imports(include_external_packages, expected_result): assert expected_result == result -def test_relative_from_imports(): +@pytest.mark.parametrize("module_to_scan_is_package", (True, False)) +def test_relative_from_imports(module_to_scan_is_package): all_modules = { + Module("foo"), + Module("foo.one"), Module("foo.one.blue"), Module("foo.one.green"), Module("foo.two.brown"), Module("foo.two.yellow"), Module("foo.three"), } - file_system = FakeFileSystem( + if module_to_scan_is_package: + module_to_scan = Module("foo.one") + module_filename = "/path/to/foo/one/__init__.py" + else: + module_to_scan = Module("foo.one.blue") + module_filename = "/path/to/foo/one/blue.py" + + file_system = rust.FakeBasicFileSystem( contents=""" /path/to/foo/ __init__.py @@ -482,7 +492,7 @@ def test_relative_from_imports(): three.py """, content_map={ - "/path/to/foo/one/blue.py": """ + module_filename: """ from . import green from ..two import yellow from .. import three @@ -491,7 +501,7 @@ def test_relative_from_imports(): }, ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -502,23 +512,23 @@ def test_relative_from_imports(): file_system=file_system, ) - result = import_scanner.scan_for_imports(Module("foo.one.blue")) + result = import_scanner.scan_for_imports(module_to_scan) assert result == { DirectImport( - importer=Module("foo.one.blue"), + importer=module_to_scan, imported=Module("foo.one.green"), line_number=1, line_contents="from . import green", ), DirectImport( - importer=Module("foo.one.blue"), + importer=module_to_scan, imported=Module("foo.two.yellow"), line_number=2, line_contents="from ..two import yellow", ), DirectImport( - importer=Module("foo.one.blue"), + importer=module_to_scan, imported=Module("foo.three"), line_number=3, line_contents="from .. import three", @@ -537,7 +547,7 @@ def test_trims_to_known_modules(import_source): Module("foo.two"), Module("foo.two.yellow"), } - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( contents=""" /path/to/foo/ __init__.py @@ -549,7 +559,7 @@ def test_trims_to_known_modules(import_source): content_map={"/path/to/foo/one.py": import_source}, ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -580,7 +590,7 @@ def test_trims_to_known_modules_within_init_file(): Module("foo.one.blue"), Module("foo.one.blue.alpha"), } - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( contents=""" /path/to/foo/ __init__.py @@ -597,7 +607,7 @@ def test_trims_to_known_modules_within_init_file(): }, ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -633,7 +643,7 @@ def test_trims_to_known_modules_within_init_file(): def test_trims_whitespace_from_start_of_line_contents(): all_modules = {Module("foo"), Module("foo.one"), Module("foo.two")} - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( contents=""" /path/to/foo/ __init__.py @@ -648,7 +658,7 @@ def my_function(): }, ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -706,13 +716,13 @@ def my_function(): def test_external_package_imports_for_namespace_packages(statement, expected_module_name): module_to_scan = Module("namespace.foo.blue.alpha") - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/namespace/foo/blue/alpha.py": statement, } ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="namespace.foo.blue", @@ -751,7 +761,7 @@ def test_external_package_imports_for_namespace_packages(statement, expected_mod def test_scans_multiple_packages(statement): foo_modules = {Module("foo"), Module("foo.one"), Module("foo.two")} bar_modules = {Module("bar"), Module("bar.green"), Module("bar.blue")} - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/foo/one.py": f""" import foo.two @@ -763,7 +773,7 @@ def test_scans_multiple_packages(statement): } ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -819,7 +829,7 @@ def test_exclude_type_checking_imports( Module("foo.four"), Module("foo.five"), } - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/foo/one.py": f""" import foo.two @@ -832,7 +842,7 @@ def test_exclude_type_checking_imports( } ) - import_scanner = ImportScanner( + import_scanner = rust.ImportScanner( found_packages={ FoundPackage( name="foo", diff --git a/tests/unit/application/test_usecases.py b/tests/unit/application/test_usecases.py index a8f14207..0b76e98e 100644 --- a/tests/unit/application/test_usecases.py +++ b/tests/unit/application/test_usecases.py @@ -208,3 +208,37 @@ class FakeModuleFinder(BaseFakeModuleFinder): [call] = mock_scan_chunks.call_args_list chunks = call.args[0] assert len(chunks) == expected_number_of_chunks + + def test_forgives_wrong_type_being_passed_to_include_external_packages(self): + file_system = FakeFileSystem( + contents=""" + /path/to/mypackage/ + __init__.py + foo/ + __init__.py + one.py + """, + content_map={ + "/path/to/mypackage/foo/one.py": ( + "import mypackage.foo.two.green\nfrom .. import Something" + ), + }, + ) + + class FakePackageFinder(BaseFakePackageFinder): + directory_map = {"mypackage": "/path/to/mypackage"} + + with override_settings(FILE_SYSTEM=file_system, PACKAGE_FINDER=FakePackageFinder()): + graph = usecases.build_graph( + "mypackage", + # Note: this should be a bool, but we want to tolerate it, + # as Import Linter currently has a bug where it will pass it as None. + include_external_packages=None, + ) + + expected_modules = { + "mypackage", + "mypackage.foo", + "mypackage.foo.one", + } + assert expected_modules == graph.modules