diff --git a/Cargo.lock b/Cargo.lock index 4b66f9e..494f207 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -158,6 +158,15 @@ version = "2.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bstr" version = "1.12.0" @@ -311,6 +320,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.5.0" @@ -320,12 +338,57 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "deflate64" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da692b8d1080ea3045efaab14434d40468c3d8657e42abddfffca87b428f4c1b" +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "dirs" version = "6.0.0" @@ -347,6 +410,12 @@ dependencies = [ "windows-sys 0.61.0", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -505,6 +574,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -940,6 +1019,26 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.17" @@ -991,9 +1090,11 @@ dependencies = [ "glob", "grep", "infer", + "rayon", "rust-mcp-sdk", "serde", "serde_json", + "sha2", "similar", "tempfile", "thiserror", @@ -1152,6 +1253,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "shlex" version = "1.3.0" @@ -1341,6 +1453,12 @@ dependencies = [ "once_cell", ] +[[package]] +name = "typenum" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" + [[package]] name = "unicode-ident" version = "1.0.19" @@ -1364,6 +1482,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "walkdir" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index 5d1e44b..c9b4a5c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,6 +39,8 @@ async_zip = { version = "0.0", features = ["full"] } grep = "0.3" base64 = "0.22" infer = "0.19.0" +rayon = "1.11.0" +sha2 = "0.10.9" [dev-dependencies] tempfile = "3.2" diff --git a/src/fs_service.rs b/src/fs_service.rs index 39ecb11..fad6788 100644 --- a/src/fs_service.rs +++ b/src/fs_service.rs @@ -2,6 +2,7 @@ pub mod file_info; pub mod utils; use crate::{ error::{ServiceError, ServiceResult}, + fs_service::utils::is_system_metadata_file, tools::EditOperation, }; use async_zip::tokio::{read::seek::ZipFileReader, write::ZipFileWriter}; @@ -14,20 +15,23 @@ use grep::{ regex::RegexMatcherBuilder, searcher::{BinaryDetection, Searcher, sinks::UTF8}, }; +use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator}; use rust_mcp_sdk::schema::RpcError; use serde_json::{Value, json}; +use sha2::{Digest, Sha256}; use similar::TextDiff; use std::{ - collections::HashSet, + collections::{HashMap, HashSet}, env, fs::{self}, - io::Write, + io::{SeekFrom, Write}, + os::unix::fs::MetadataExt, path::{Path, PathBuf}, sync::Arc, }; use tokio::{ fs::{File, metadata}, - io::{AsyncReadExt, AsyncWriteExt, BufReader}, + io::{AsyncBufReadExt, AsyncReadExt, AsyncSeekExt, AsyncWriteExt, BufReader}, sync::RwLock, }; use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; @@ -41,6 +45,11 @@ const SNIPPET_MAX_LENGTH: usize = 200; const SNIPPET_BACKWARD_CHARS: usize = 30; const MAX_CONCURRENT_FILE_READ: usize = 5; +#[cfg(windows)] +pub const OS_LINE_ENDING: &str = "\r\n"; +#[cfg(not(windows))] +pub const OS_LINE_ENDING: &str = "\n"; + type PathResultList = Vec>; pub struct FileSystemService { @@ -457,6 +466,22 @@ impl FileSystemService { Ok(kind) } + pub fn filesize_in_range( + &self, + file_size: u64, + min_bytes: Option, + max_bytes: Option, + ) -> bool { + if min_bytes.is_none() && max_bytes.is_none() { + return true; + } + match (min_bytes, max_bytes) { + (_, Some(max)) if file_size > max => false, + (Some(min), _) if file_size < min => false, + _ => true, + } + } + pub async fn validate_file_size>( &self, path: P, @@ -600,9 +625,11 @@ impl FileSystemService { root_path: &Path, pattern: String, exclude_patterns: Vec, + min_bytes: Option, + max_bytes: Option, ) -> ServiceResult> { let result = self - .search_files_iter(root_path, pattern, exclude_patterns) + .search_files_iter(root_path, pattern, exclude_patterns, min_bytes, max_bytes) .await?; Ok(result.collect::>()) } @@ -624,6 +651,8 @@ impl FileSystemService { root_path: &'a Path, pattern: String, exclude_patterns: Vec, + min_bytes: Option, + max_bytes: Option, ) -> ServiceResult + 'a> { let allowed_directories = self.allowed_directories().await; let valid_path = self.validate_path(root_path, allowed_directories.clone())?; @@ -654,7 +683,7 @@ impl FileSystemService { // Get the relative path from the root_path let relative_path = full_path.strip_prefix(root_path).unwrap_or(full_path); - let should_exclude = exclude_patterns.iter().any(|pattern| { + let mut should_exclude = exclude_patterns.iter().any(|pattern| { let glob_pattern = if pattern.contains('*') { pattern.clone() } else { @@ -666,6 +695,20 @@ impl FileSystemService { .unwrap_or(false) }); + // enforce min/max bytes + if !should_exclude && (min_bytes.is_none() || max_bytes.is_none()) { + match dir_entry.metadata().ok() { + Some(metadata) => { + if !self.filesize_in_range(metadata.size(), min_bytes, max_bytes) { + should_exclude = true; + } + } + None => { + should_exclude = true; + } + } + } + !should_exclude }) .filter_map(|v| v.ok()) @@ -1112,6 +1155,7 @@ impl FileSystemService { result } + #[allow(clippy::too_many_arguments)] pub async fn search_files_content( &self, root_path: impl AsRef, @@ -1119,12 +1163,16 @@ impl FileSystemService { query: &str, is_regex: bool, exclude_patterns: Option>, + min_bytes: Option, + max_bytes: Option, ) -> ServiceResult> { let files_iter = self .search_files_iter( root_path.as_ref(), pattern.to_string(), exclude_patterns.to_owned().unwrap_or_default(), + min_bytes, + max_bytes, ) .await?; @@ -1137,4 +1185,401 @@ impl FileSystemService { .collect(); Ok(results) } + + /// Reads the first n lines from a text file, preserving line endings. + /// Args: + /// file_path: Path to the file + /// n: Number of lines to read + /// Returns a String containing the first n lines with original line endings or an error if the path is invalid or file cannot be read. + pub async fn head_file(&self, file_path: &Path, n: usize) -> ServiceResult { + // Validate file path against allowed directories + let allowed_directories = self.allowed_directories().await; + let valid_path = self.validate_path(file_path, allowed_directories)?; + + // Open file asynchronously and create a BufReader + let file = File::open(&valid_path).await?; + let mut reader = BufReader::new(file); + let mut result = String::with_capacity(n * 100); // Estimate capacity (avg 100 bytes/line) + let mut count = 0; + + // Read lines asynchronously, preserving line endings + let mut line = Vec::new(); + while count < n { + line.clear(); + let bytes_read = reader.read_until(b'\n', &mut line).await?; + if bytes_read == 0 { + break; // Reached EOF + } + result.push_str(&String::from_utf8_lossy(&line)); + count += 1; + } + + Ok(result) + } + + /// Reads the last n lines from a text file, preserving line endings. + /// Args: + /// file_path: Path to the file + /// n: Number of lines to read + /// Returns a String containing the last n lines with original line endings or an error if the path is invalid or file cannot be read. + pub async fn tail_file(&self, file_path: &Path, n: usize) -> ServiceResult { + // Validate file path against allowed directories + let allowed_directories = self.allowed_directories().await; + let valid_path = self.validate_path(file_path, allowed_directories)?; + + // Open file asynchronously + let file = File::open(&valid_path).await?; + let file_size = file.metadata().await?.len(); + + // If file is empty or n is 0, return empty string + if file_size == 0 || n == 0 { + return Ok(String::new()); + } + + // Create a BufReader + let mut reader = BufReader::new(file); + let mut line_count = 0; + let mut pos = file_size; + let chunk_size = 8192; // 8KB chunks + let mut buffer = vec![0u8; chunk_size]; + let mut newline_positions = Vec::new(); + + // Read backwards to collect all newline positions + while pos > 0 { + let read_size = chunk_size.min(pos as usize); + pos -= read_size as u64; + reader.seek(SeekFrom::Start(pos)).await?; + let read_bytes = reader.read_exact(&mut buffer[..read_size]).await?; + + // Process chunk in reverse to find newlines + for (i, byte) in buffer[..read_bytes].iter().enumerate().rev() { + if *byte == b'\n' { + newline_positions.push(pos + i as u64); + line_count += 1; + } + } + } + + // Check if file ends with a non-newline character (partial last line) + if file_size > 0 { + let mut temp_reader = BufReader::new(File::open(&valid_path).await?); + temp_reader.seek(SeekFrom::End(-1)).await?; + let mut last_byte = [0u8; 1]; + temp_reader.read_exact(&mut last_byte).await?; + if last_byte[0] != b'\n' { + line_count += 1; + } + } + + // Determine start position for reading the last n lines + let start_pos = if line_count <= n { + 0 // Read from start if fewer than n lines + } else { + *newline_positions.get(line_count - n).unwrap_or(&0) + 1 + }; + + // Read forward from start_pos + reader.seek(SeekFrom::Start(start_pos)).await?; + let mut result = String::with_capacity(n * 100); // Estimate capacity + let mut line = Vec::new(); + let mut lines_read = 0; + + while lines_read < n { + line.clear(); + let bytes_read = reader.read_until(b'\n', &mut line).await?; + if bytes_read == 0 { + // Handle partial last line at EOF + if !line.is_empty() { + result.push_str(&String::from_utf8_lossy(&line)); + } + break; + } + result.push_str(&String::from_utf8_lossy(&line)); + lines_read += 1; + } + + Ok(result) + } + + /// Reads lines from a text file starting at the specified offset (0-based), preserving line endings. + /// Args: + /// path: Path to the file + /// offset: Number of lines to skip (0-based) + /// limit: Optional maximum number of lines to read + /// Returns a String containing the selected lines with original line endings or an error if the path is invalid or file cannot be read. + pub async fn read_file_lines( + &self, + path: &Path, + offset: usize, + limit: Option, + ) -> ServiceResult { + // Validate file path against allowed directories + let allowed_directories = self.allowed_directories().await; + let valid_path = self.validate_path(path, allowed_directories)?; + + // Open file and get metadata before moving into BufReader + let file = File::open(&valid_path).await?; + let file_size = file.metadata().await?.len(); + let mut reader = BufReader::new(file); + + // If file is empty or limit is 0, return empty string + if file_size == 0 || limit == Some(0) { + return Ok(String::new()); + } + + // Skip offset lines (0-based indexing) + let mut buffer = Vec::new(); + for _ in 0..offset { + buffer.clear(); + if reader.read_until(b'\n', &mut buffer).await? == 0 { + return Ok(String::new()); // EOF before offset + } + } + + // Read lines up to limit (or all remaining if limit is None) + let mut result = String::with_capacity(limit.unwrap_or(100) * 100); // Estimate capacity + match limit { + Some(max_lines) => { + for _ in 0..max_lines { + buffer.clear(); + let bytes_read = reader.read_until(b'\n', &mut buffer).await?; + if bytes_read == 0 { + break; // Reached EOF + } + result.push_str(&String::from_utf8_lossy(&buffer)); + } + } + None => { + loop { + buffer.clear(); + let bytes_read = reader.read_until(b'\n', &mut buffer).await?; + if bytes_read == 0 { + break; // Reached EOF + } + result.push_str(&String::from_utf8_lossy(&buffer)); + } + } + } + + Ok(result) + } + + /// Calculates the total size (in bytes) of all files within a directory tree. + /// + /// This function recursively searches the specified `root_path` for files, + /// filters out directories and non-file entries, and sums the sizes of all found files. + /// The size calculation is parallelized using Rayon for improved performance on large directories. + /// + /// # Arguments + /// * `root_path` - The root directory path to start the size calculation. + /// + /// # Returns + /// Returns a `ServiceResult` containing the total size in bytes of all files under the `root_path`. + /// + /// # Notes + /// - Only files are included in the size calculation; directories and other non-file entries are ignored. + /// - The search pattern is `"**/*"` (all files) and no exclusions are applied. + /// - Parallel iteration is used to speed up the metadata fetching and summation. + pub async fn calculate_directory_size(&self, root_path: &Path) -> ServiceResult { + let entries = self + .search_files_iter(root_path, "**/*".to_string(), vec![], None, None) + .await? + .filter(|e| e.file_type().is_file()); // Only process files + + // Use rayon to parallelize size summation + let total_size: u64 = entries + .par_bridge() // Convert to parallel iterator + .filter_map(|entry| entry.metadata().ok().map(|meta| meta.len())) + .sum(); + + Ok(total_size) + } + + /// Recursively finds all empty directories within the given root path. + /// + /// A directory is considered empty if it contains no files in itself or any of its subdirectories + /// except OS metadata files: `.DS_Store` (macOS) and `Thumbs.db` (Windows) + /// Empty subdirectories are allowed. You can optionally provide a list of glob-style patterns in + /// `exclude_patterns` to ignore certain paths during the search (e.g., to skip system folders or hidden directories). + /// + /// # Arguments + /// - `root_path`: The starting directory to search. + /// - `exclude_patterns`: Optional list of glob patterns to exclude from the search. + /// Directories matching these patterns will be ignored. + /// + /// # Errors + /// Returns an error if the root path is invalid or inaccessible. + /// + /// # Returns + /// A list of paths to empty directories, as strings, including parent directories that contain only empty subdirectories. + /// Recursively finds all empty directories within the given root path. + /// + /// A directory is considered empty if it contains no files in itself or any of its subdirectories. + /// Empty subdirectories are allowed. You can optionally provide a list of glob-style patterns in + /// `exclude_patterns` to ignore certain paths during the search (e.g., to skip system folders or hidden directories). + /// + /// # Arguments + /// - `root_path`: The starting directory to search. + /// - `exclude_patterns`: Optional list of glob patterns to exclude from the search. + /// Directories matching these patterns will be ignored. + /// + /// # Errors + /// Returns an error if the root path is invalid or inaccessible. + /// + /// # Returns + /// A list of paths to all empty directories, as strings, including parent directories that contain only empty subdirectories. + pub async fn find_empty_directories( + &self, + root_path: &Path, + exclude_patterns: Option>, + ) -> ServiceResult> { + let walker = self + .search_files_iter( + root_path, + "**/*".to_string(), + exclude_patterns.unwrap_or_default(), + None, + None, + ) + .await? + .filter(|e| e.file_type().is_dir()); // Only directories + + let mut empty_dirs = Vec::new(); + + // Check each directory for emptiness + for entry in walker { + let is_empty = WalkDir::new(entry.path()) + .into_iter() + .filter_map(|e| e.ok()) + .all(|e| !e.file_type().is_file() || is_system_metadata_file(e.file_name())); // Directory is empty if no files are found in it or subdirs, ".DS_Store" will be ignores on Mac + + if is_empty { + if let Some(path_str) = entry.path().to_str() { + empty_dirs.push(path_str.to_string()); + } + } + } + + Ok(empty_dirs) + } + + /// Finds groups of duplicate files within the given root path. + /// Returns a vector of vectors, where each inner vector contains paths to files with identical content. + /// Files are considered duplicates if they have the same size and SHA-256 hash. + pub async fn find_duplicate_files( + &self, + root_path: &Path, + pattern: Option, + exclude_patterns: Option>, + min_bytes: Option, + max_bytes: Option, + ) -> ServiceResult>> { + // Validate root path against allowed directories + let allowed_directories = self.allowed_directories().await; + let valid_path = self.validate_path(root_path, allowed_directories)?; + + // Get Tokio runtime handle + let rt = tokio::runtime::Handle::current(); + + // Step 1: Collect files and group by size + let mut size_map: HashMap> = HashMap::new(); + let entries = self + .search_files_iter( + &valid_path, + pattern.unwrap_or("**/*".to_string()), + exclude_patterns.unwrap_or_default(), + min_bytes, + max_bytes, + ) + .await? + .filter(|e| e.file_type().is_file()); // Only files + + for entry in entries { + if let Ok(metadata) = entry.metadata() { + if let Some(path_str) = entry.path().to_str() { + size_map + .entry(metadata.len()) + .or_default() + .push(path_str.to_string()); + } + } + } + + // Filter out sizes with only one file (no duplicates possible) + let size_groups: Vec> = size_map + .into_iter() + .collect::>() // Collect into Vec to enable parallel iteration + .into_par_iter() + .filter(|(_, paths)| paths.len() > 1) + .map(|(_, paths)| paths) + .collect(); + + // Step 2: Group by quick hash (first 4KB) + let mut quick_hash_map: HashMap, Vec> = HashMap::new(); + for paths in size_groups.into_iter() { + let quick_hashes: Vec<(String, Vec)> = paths + .into_par_iter() + .filter_map(|path| { + let rt = rt.clone(); // Clone the runtime handle for this task + rt.block_on(async { + let file = File::open(&path).await.ok()?; + let mut reader = tokio::io::BufReader::new(file); + let mut buffer = vec![0u8; 4096]; // Read first 4KB + let bytes_read = reader.read(&mut buffer).await.ok()?; + let mut hasher = Sha256::new(); + hasher.update(&buffer[..bytes_read]); + Some((path, hasher.finalize().to_vec())) + }) + }) + .collect(); + + for (path, hash) in quick_hashes { + quick_hash_map.entry(hash).or_default().push(path); + } + } + + // Step 3: Group by full hash for groups with multiple files + let mut full_hash_map: HashMap, Vec> = HashMap::new(); + let filtered_quick_hashes: Vec<(Vec, Vec)> = quick_hash_map + .into_iter() + .collect::>() + .into_par_iter() + .filter(|(_, paths)| paths.len() > 1) + .collect(); + + for (_quick_hash, paths) in filtered_quick_hashes { + let full_hashes: Vec<(String, Vec)> = paths + .into_par_iter() + .filter_map(|path| { + let rt = rt.clone(); // Clone the runtime handle for this task + rt.block_on(async { + let file = File::open(&path).await.ok()?; + let mut reader = tokio::io::BufReader::new(file); + let mut hasher = Sha256::new(); + let mut buffer = vec![0u8; 8192]; // 8KB chunks + loop { + let bytes_read = reader.read(&mut buffer).await.ok()?; + if bytes_read == 0 { + break; + } + hasher.update(&buffer[..bytes_read]); + } + Some((path, hasher.finalize().to_vec())) + }) + }) + .collect(); + + for (path, hash) in full_hashes { + full_hash_map.entry(hash).or_default().push(path); + } + } + + // Collect groups of duplicates (only groups with more than one file) + let duplicates: Vec> = full_hash_map + .into_values() + .filter(|group| group.len() > 1) + .collect(); + + Ok(duplicates) + } } diff --git a/src/fs_service/utils.rs b/src/fs_service/utils.rs index 05c7ae2..a7ca00c 100644 --- a/src/fs_service/utils.rs +++ b/src/fs_service/utils.rs @@ -1,21 +1,27 @@ +use async_zip::{Compression, ZipEntryBuilder, error::ZipError, tokio::write::ZipFileWriter}; +use chrono::{DateTime, Local}; +use dirs::home_dir; +use rust_mcp_sdk::macros::JsonSchema; +#[cfg(unix)] +use std::os::unix::fs::PermissionsExt; +#[cfg(windows)] +use std::os::windows::fs::MetadataExt; use std::{ + ffi::OsStr, fs::{self}, path::{Component, Path, PathBuf, Prefix}, time::SystemTime, }; - -use async_zip::{Compression, ZipEntryBuilder, error::ZipError, tokio::write::ZipFileWriter}; -use chrono::{DateTime, Local}; -use dirs::home_dir; - use tokio::fs::File; use tokio::io::AsyncReadExt; -#[cfg(unix)] -use std::os::unix::fs::PermissionsExt; - -#[cfg(windows)] -use std::os::windows::fs::MetadataExt; +#[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] +pub enum OutputFormat { + #[serde(rename = "text")] + Text, + #[serde(rename = "json")] + Json, +} pub fn format_system_time(system_time: SystemTime) -> String { // Convert SystemTime to DateTime @@ -140,3 +146,14 @@ pub fn contains_symlink>(path: P) -> std::io::Result { Ok(false) } + +/// Checks if a given filename is a system metadata file commonly +/// used by operating systems to store folder metadata. +/// +/// Specifically detects: +/// - `.DS_Store` (macOS) +/// - `Thumbs.db` (Windows) +/// +pub fn is_system_metadata_file(filename: &OsStr) -> bool { + filename == ".DS_Store" || filename == "Thumbs.db" +} diff --git a/src/handler.rs b/src/handler.rs index 144d01b..1855372 100644 --- a/src/handler.rs +++ b/src/handler.rs @@ -1,8 +1,6 @@ -use std::cmp::Ordering; -use std::sync::Arc; - use crate::cli::CommandArguments; use crate::error::ServiceError; +use crate::invoke_tools; use crate::{error::ServiceResult, fs_service::FileSystemService, tools::*}; use async_trait::async_trait; use rust_mcp_sdk::McpServer; @@ -12,6 +10,8 @@ use rust_mcp_sdk::schema::{ CallToolRequest, CallToolResult, InitializeRequest, InitializeResult, ListToolsRequest, ListToolsResult, RpcError, schema_utils::CallToolError, }; +use std::cmp::Ordering; +use std::sync::Arc; pub struct FileSystemHandler { readonly: bool, @@ -193,61 +193,33 @@ impl ServerHandler for FileSystemHandler { self.assert_write_access()?; } - match tool_params { - FileSystemTools::ReadMediaFileTool(params) => { - ReadMediaFileTool::run_tool(params, &self.fs_service).await - } - FileSystemTools::ReadMultipleMediaFilesTool(params) => { - ReadMultipleMediaFilesTool::run_tool(params, &self.fs_service).await - } - FileSystemTools::ReadTextFileTool(params) => { - ReadTextFileTool::run_tool(params, &self.fs_service).await - } - FileSystemTools::ReadMultipleTextFilesTool(params) => { - ReadMultipleTextFilesTool::run_tool(params, &self.fs_service).await - } - FileSystemTools::WriteFileTool(params) => { - WriteFileTool::run_tool(params, &self.fs_service).await - } - FileSystemTools::EditFileTool(params) => { - EditFileTool::run_tool(params, &self.fs_service).await - } - FileSystemTools::CreateDirectoryTool(params) => { - CreateDirectoryTool::run_tool(params, &self.fs_service).await - } - FileSystemTools::ListDirectoryTool(params) => { - ListDirectoryTool::run_tool(params, &self.fs_service).await - } - FileSystemTools::DirectoryTreeTool(params) => { - DirectoryTreeTool::run_tool(params, &self.fs_service).await - } - FileSystemTools::MoveFileTool(params) => { - MoveFileTool::run_tool(params, &self.fs_service).await - } - FileSystemTools::SearchFilesTool(params) => { - SearchFilesTool::run_tool(params, &self.fs_service).await - } - FileSystemTools::GetFileInfoTool(params) => { - GetFileInfoTool::run_tool(params, &self.fs_service).await - } - FileSystemTools::ListAllowedDirectoriesTool(params) => { - ListAllowedDirectoriesTool::run_tool(params, &self.fs_service).await - } - FileSystemTools::ZipFilesTool(params) => { - ZipFilesTool::run_tool(params, &self.fs_service).await - } - FileSystemTools::UnzipFileTool(params) => { - UnzipFileTool::run_tool(params, &self.fs_service).await - } - FileSystemTools::ZipDirectoryTool(params) => { - ZipDirectoryTool::run_tool(params, &self.fs_service).await - } - FileSystemTools::SearchFilesContentTool(params) => { - SearchFilesContentTool::run_tool(params, &self.fs_service).await - } - FileSystemTools::ListDirectoryWithSizesTool(params) => { - ListDirectoryWithSizesTool::run_tool(params, &self.fs_service).await - } - } + invoke_tools!( + tool_params, + &self.fs_service, + ReadMediaFile, + ReadMultipleMediaFiles, + ReadTextFile, + ReadMultipleTextFiles, + WriteFile, + EditFile, + CreateDirectory, + ListDirectory, + DirectoryTree, + MoveFile, + SearchFiles, + GetFileInfo, + ListAllowedDirectories, + ZipFiles, + UnzipFile, + ZipDirectory, + SearchFilesContent, + ListDirectoryWithSizes, + HeadFile, + TailFile, + ReadFileLines, + FindEmptyDirectories, + CalculateDirectorySize, + FindDuplicateFiles + ) } } diff --git a/src/lib.rs b/src/lib.rs index 7adbe2f..9a5775c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,5 +2,6 @@ pub mod cli; pub mod error; pub mod fs_service; pub mod handler; +pub mod macros; pub mod server; pub mod tools; diff --git a/src/macros.rs b/src/macros.rs new file mode 100644 index 0000000..908a3fd --- /dev/null +++ b/src/macros.rs @@ -0,0 +1,52 @@ +/// Generates a `match` expression for dispatching `FileSystemTools` variants to their respective `run_tool` methods. +/// +/// This macro reduces boilerplate in matching `FileSystemTools` enum variants by generating a `match` arm +/// for each specified tool. Each arm calls the tool's `run_tool` method with the provided parameters and +/// filesystem service, handling the async dispatch uniformly. +/// +/// # Parameters +/// - `$params:expr`: The expression to match against, expected to be a `FileSystemTools` enum value. +/// - `$fs_service:expr`: The filesystem service reference (e.g., `&self.fs_service`) to pass to each tool's `run_tool` method. +/// - `$($tool:ident),*`: A comma-separated list of tool identifiers (e.g., `ReadMediaFileTool`, `WriteFileTool`) corresponding to +/// `FileSystemTools` variants and their associated types. +/// +/// # Usage +/// The macro is typically used within a method that dispatches filesystem operations based on a `FileSystemTools` enum. +/// Each tool must have a `run_tool` method with the signature: +/// ```rust +/// async fn run_tool(params: ParamsType, fs_service: &FsService) -> ServiceResult<()> +/// ``` +/// where `ParamsType` is the parameter type for the specific tool, and `FsService` is the filesystem service type. +/// +/// # Example +/// ```rust +/// match_filesystem_tools!( +/// tool_params, +/// &self.fs_service, +/// ReadMediaFileTool, +/// WriteFileTool +/// ) +/// ``` +/// This expands to: +/// ```rust +/// match tool_params { +/// FileSystemTools::ReadMediaFileTool(params) => ReadMediaFileTool::run_tool(params, &self.fs_service).await, +/// FileSystemTools::WriteFileTool(params) => WriteFileTool::run_tool(params, &self.fs_service).await, +/// } +/// ``` +/// +/// # Notes +/// - Ensure each tool identifier matches a variant of the `FileSystemTools` enum and has a corresponding `run_tool` method. +/// - The macro assumes all `run_tool` methods are `async` and return `ServiceResult<()>`. +/// - To add a new tool, include its identifier in the macro invocation list. +/// - If a tool has a different `run_tool` signature, a separate macro or manual `match` arm may be needed. +#[macro_export] +macro_rules! invoke_tools { + ($params:expr, $fs_service:expr, $($tool:ident),* $(,)?) => { + match $params { + $( + FileSystemTools::$tool(params) => $tool::run_tool(params, $fs_service).await, + )* + } + }; +} diff --git a/src/tools.rs b/src/tools.rs index 3733b2a..d7c3c98 100644 --- a/src/tools.rs +++ b/src/tools.rs @@ -1,60 +1,77 @@ +mod calculate_directory_size; mod create_directory; mod directory_tree; mod edit_file; +mod find_duplicate_files; +mod find_empty_directories; mod get_file_info; +mod head_file; mod list_allowed_directories; mod list_directory; mod list_directory_with_sizes; mod move_file; +mod read_file_lines; mod read_media_file; mod read_multiple_media_files; mod read_multiple_text_files; mod read_text_file; mod search_file; mod search_files_content; +mod tail_file; mod write_file; mod zip_unzip; -pub use create_directory::CreateDirectoryTool; -pub use directory_tree::DirectoryTreeTool; -pub use edit_file::{EditFileTool, EditOperation}; -pub use get_file_info::GetFileInfoTool; -pub use list_allowed_directories::ListAllowedDirectoriesTool; -pub use list_directory::ListDirectoryTool; -pub use list_directory_with_sizes::ListDirectoryWithSizesTool; -pub use move_file::MoveFileTool; -pub use read_media_file::ReadMediaFileTool; -pub use read_multiple_media_files::ReadMultipleMediaFilesTool; -pub use read_multiple_text_files::ReadMultipleTextFilesTool; -pub use read_text_file::ReadTextFileTool; +pub use calculate_directory_size::{CalculateDirectorySize, FileSizeOutputFormat}; +pub use create_directory::CreateDirectory; +pub use directory_tree::DirectoryTree; +pub use edit_file::{EditFile, EditOperation}; +pub use find_duplicate_files::FindDuplicateFiles; +pub use find_empty_directories::FindEmptyDirectories; +pub use get_file_info::GetFileInfo; +pub use head_file::HeadFile; +pub use list_allowed_directories::ListAllowedDirectories; +pub use list_directory::ListDirectory; +pub use list_directory_with_sizes::ListDirectoryWithSizes; +pub use move_file::MoveFile; +pub use read_file_lines::ReadFileLines; +pub use read_media_file::ReadMediaFile; +pub use read_multiple_media_files::ReadMultipleMediaFiles; +pub use read_multiple_text_files::ReadMultipleTextFiles; +pub use read_text_file::ReadTextFile; pub use rust_mcp_sdk::tool_box; -pub use search_file::SearchFilesTool; -pub use search_files_content::SearchFilesContentTool; -pub use write_file::WriteFileTool; -pub use zip_unzip::{UnzipFileTool, ZipDirectoryTool, ZipFilesTool}; - +pub use search_file::SearchFiles; +pub use search_files_content::SearchFilesContent; +pub use tail_file::TailFile; +pub use write_file::WriteFile; +pub use zip_unzip::{UnzipFile, ZipDirectory, ZipFiles}; //Generate FileSystemTools enum , tools() function, and TryFrom trait implementation tool_box!( FileSystemTools, [ - ReadTextFileTool, - CreateDirectoryTool, - DirectoryTreeTool, - EditFileTool, - GetFileInfoTool, - ListAllowedDirectoriesTool, - ListDirectoryTool, - MoveFileTool, - ReadMultipleTextFilesTool, - SearchFilesTool, - WriteFileTool, - ZipFilesTool, - UnzipFileTool, - ZipDirectoryTool, - SearchFilesContentTool, - ListDirectoryWithSizesTool, - ReadMediaFileTool, - ReadMultipleMediaFilesTool + ReadTextFile, + CreateDirectory, + DirectoryTree, + EditFile, + GetFileInfo, + ListAllowedDirectories, + ListDirectory, + MoveFile, + ReadMultipleTextFiles, + SearchFiles, + WriteFile, + ZipFiles, + UnzipFile, + ZipDirectory, + SearchFilesContent, + ListDirectoryWithSizes, + ReadMediaFile, + ReadMultipleMediaFiles, + HeadFile, + TailFile, + ReadFileLines, + FindEmptyDirectories, + CalculateDirectorySize, + FindDuplicateFiles ] ); @@ -63,24 +80,30 @@ impl FileSystemTools { // Returns `true` for tools that modify files or directories, and `false` otherwise. pub fn require_write_access(&self) -> bool { match self { - FileSystemTools::CreateDirectoryTool(_) - | FileSystemTools::MoveFileTool(_) - | FileSystemTools::WriteFileTool(_) - | FileSystemTools::EditFileTool(_) - | FileSystemTools::ZipFilesTool(_) - | FileSystemTools::UnzipFileTool(_) - | FileSystemTools::ZipDirectoryTool(_) => true, - FileSystemTools::ReadTextFileTool(_) - | FileSystemTools::DirectoryTreeTool(_) - | FileSystemTools::GetFileInfoTool(_) - | FileSystemTools::ListAllowedDirectoriesTool(_) - | FileSystemTools::ListDirectoryTool(_) - | FileSystemTools::ReadMultipleTextFilesTool(_) - | FileSystemTools::SearchFilesContentTool(_) - | FileSystemTools::ListDirectoryWithSizesTool(_) - | FileSystemTools::ReadMediaFileTool(_) - | FileSystemTools::ReadMultipleMediaFilesTool(_) - | FileSystemTools::SearchFilesTool(_) => false, + FileSystemTools::CreateDirectory(_) + | FileSystemTools::MoveFile(_) + | FileSystemTools::WriteFile(_) + | FileSystemTools::EditFile(_) + | FileSystemTools::ZipFiles(_) + | FileSystemTools::UnzipFile(_) + | FileSystemTools::ZipDirectory(_) => true, + FileSystemTools::ReadTextFile(_) + | FileSystemTools::DirectoryTree(_) + | FileSystemTools::GetFileInfo(_) + | FileSystemTools::ListAllowedDirectories(_) + | FileSystemTools::ListDirectory(_) + | FileSystemTools::ReadMultipleTextFiles(_) + | FileSystemTools::SearchFilesContent(_) + | FileSystemTools::ListDirectoryWithSizes(_) + | FileSystemTools::ReadMediaFile(_) + | FileSystemTools::HeadFile(_) + | FileSystemTools::ReadMultipleMediaFiles(_) + | FileSystemTools::TailFile(_) + | FileSystemTools::ReadFileLines(_) + | FileSystemTools::FindEmptyDirectories(_) + | FileSystemTools::CalculateDirectorySize(_) + | FileSystemTools::FindDuplicateFiles(_) + | FileSystemTools::SearchFiles(_) => false, } } } diff --git a/src/tools/calculate_directory_size.rs b/src/tools/calculate_directory_size.rs new file mode 100644 index 0000000..af8d56c --- /dev/null +++ b/src/tools/calculate_directory_size.rs @@ -0,0 +1,59 @@ +use crate::fs_service::{FileSystemService, utils::format_bytes}; +use rust_mcp_sdk::{ + macros::{JsonSchema, mcp_tool}, + schema::{CallToolResult, TextContent, schema_utils::CallToolError}, +}; +use std::path::Path; + +#[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] +pub enum FileSizeOutputFormat { + #[serde(rename = "human-readable")] + HumanReadable, + #[serde(rename = "bytes")] + Bytes, +} + +#[mcp_tool( + name = "calculate_directory_size", + title="Calculate Directory Size", + description = concat!("Calculates the total size of a directory specified by `root_path`.", + "It recursively searches for files and sums their sizes. ", + "The result can be returned in either a `human-readable` format or as `bytes`, depending on the specified `output_format` argument.", + "Only works within allowed directories."), + destructive_hint = false, + idempotent_hint = false, + open_world_hint = false, + read_only_hint = true +)] +#[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] +pub struct CalculateDirectorySize { + /// The root directory path to start the size calculation. + pub root_path: String, + /// Defines the output format, which can be either `human-readable` or `bytes`. + #[json_schema(default = "human-readable")] + pub output_format: Option, +} + +impl CalculateDirectorySize { + pub async fn run_tool( + params: Self, + context: &FileSystemService, + ) -> std::result::Result { + let total_bytes = context + .calculate_directory_size(Path::new(¶ms.root_path)) + .await + .map_err(CallToolError::new)?; + + let output_content = match params + .output_format + .unwrap_or(FileSizeOutputFormat::HumanReadable) + { + FileSizeOutputFormat::HumanReadable => format_bytes(total_bytes), + FileSizeOutputFormat::Bytes => format!("{total_bytes}"), + }; + + Ok(CallToolResult::text_content(vec![TextContent::from( + output_content, + )])) + } +} diff --git a/src/tools/create_directory.rs b/src/tools/create_directory.rs index 36dcbbc..6fcbbc1 100644 --- a/src/tools/create_directory.rs +++ b/src/tools/create_directory.rs @@ -20,12 +20,12 @@ use crate::fs_service::FileSystemService; read_only_hint = false )] #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] -pub struct CreateDirectoryTool { +pub struct CreateDirectory { /// The path where the directory will be created. pub path: String, } -impl CreateDirectoryTool { +impl CreateDirectory { pub async fn run_tool( params: Self, context: &FileSystemService, diff --git a/src/tools/directory_tree.rs b/src/tools/directory_tree.rs index a347ea2..33cf29d 100644 --- a/src/tools/directory_tree.rs +++ b/src/tools/directory_tree.rs @@ -21,13 +21,13 @@ use crate::fs_service::FileSystemService; read_only_hint = true )] #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] -pub struct DirectoryTreeTool { +pub struct DirectoryTree { /// The root path of the directory tree to generate. pub path: String, /// Limits the depth of directory traversal pub max_depth: Option, } -impl DirectoryTreeTool { +impl DirectoryTree { pub async fn run_tool( params: Self, context: &FileSystemService, diff --git a/src/tools/edit_file.rs b/src/tools/edit_file.rs index 0d63fbe..e59ceff 100644 --- a/src/tools/edit_file.rs +++ b/src/tools/edit_file.rs @@ -30,7 +30,7 @@ pub struct EditOperation { read_only_hint = false )] #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] -pub struct EditFileTool { +pub struct EditFile { /// The path of the file to edit. pub path: String, @@ -45,7 +45,7 @@ pub struct EditFileTool { pub dry_run: Option, } -impl EditFileTool { +impl EditFile { pub async fn run_tool( params: Self, context: &FileSystemService, diff --git a/src/tools/find_duplicate_files.rs b/src/tools/find_duplicate_files.rs new file mode 100644 index 0000000..15465c0 --- /dev/null +++ b/src/tools/find_duplicate_files.rs @@ -0,0 +1,105 @@ +use crate::fs_service::{FileSystemService, utils::OutputFormat}; +use rust_mcp_sdk::{ + macros::{JsonSchema, mcp_tool}, + schema::{CallToolResult, TextContent, schema_utils::CallToolError}, +}; +use std::path::Path; +use std::{collections::BTreeMap, fmt::Write}; + +#[mcp_tool( + name = "find_duplicate_files", + title="Calculate Directory Size", + description = concat!("Find duplicate files within a directory and return list of duplicated files as text or json format", + "Optional `pattern` argument can be used to narrow down the file search to specific glob pattern.", + "Optional `exclude_patterns` can be used to exclude certain files matching a glob.", + "`min_bytes` and `max_bytes` are optional arguments that can be used to restrict the search to files with sizes within a specified range.", + "The output_format argument specifies the format of the output and accepts either `text` or `json` (default: text).", + "Only works within allowed directories."), + destructive_hint = false, + idempotent_hint = false, + open_world_hint = false, + read_only_hint = true +)] +#[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] +pub struct FindDuplicateFiles { + /// The root directory path to start the search. + pub root_path: String, + /// Optional glob pattern can be used to match target files. + pub pattern: Option, + /// Optional list of glob patterns to exclude from the search. File matching these patterns will be ignored. + pub exclude_patterns: Option>, + /// Minimum file size (in bytes) to include in the search (default to 1). + #[json_schema(default = "1")] + pub min_bytes: Option, + /// Maximum file size (in bytes) to include in the search (optional). + pub max_bytes: Option, + /// Specify the output format, accepts either `text` or `json` (default: text). + #[json_schema(default = "text")] + pub output_format: Option, +} + +impl FindDuplicateFiles { + fn format_output( + duplicate_files: Vec>, + output_format: OutputFormat, + ) -> std::result::Result { + match output_format { + OutputFormat::Text => { + let mut output = String::new(); + + let header = if duplicate_files.is_empty() { + "No duplicate files were found.".to_string() + } else { + format!("Found {} sets of duplicate files:\n", duplicate_files.len(),) + }; + output.push_str(&header); + + for (i, group) in duplicate_files.iter().enumerate() { + writeln!(output, "\nDuplicated Group {}:", i + 1) + .map_err(CallToolError::new)?; + for file in group { + writeln!(output, " {file}").map_err(CallToolError::new)?; + } + } + Ok(output) + } + OutputFormat::Json => { + // Use a map to hold string keys and array values + let mut map = BTreeMap::new(); + + for (i, group) in duplicate_files.into_iter().enumerate() { + map.insert(i.to_string(), group); + } + + // Serialize the map to a pretty JSON string + Ok(serde_json::to_string_pretty(&map).map_err(CallToolError::new)?) + } + } + } + + pub async fn run_tool( + params: Self, + context: &FileSystemService, + ) -> std::result::Result { + let duplicate_files = context + .find_duplicate_files( + Path::new(¶ms.root_path), + params.pattern.clone(), + params.exclude_patterns.clone(), + params.min_bytes.or(Some(1)), + params.max_bytes, + ) + .await + .map_err(CallToolError::new)?; + + let result_content = Self::format_output( + duplicate_files, + params.output_format.unwrap_or(OutputFormat::Text), + ) + .map_err(CallToolError::new)?; + + Ok(CallToolResult::text_content(vec![TextContent::from( + result_content, + )])) + } +} diff --git a/src/tools/find_empty_directories.rs b/src/tools/find_empty_directories.rs new file mode 100644 index 0000000..0b35b4e --- /dev/null +++ b/src/tools/find_empty_directories.rs @@ -0,0 +1,88 @@ +use rust_mcp_sdk::{ + macros::{JsonSchema, mcp_tool}, + schema::{CallToolResult, TextContent, schema_utils::CallToolError}, +}; +use std::fmt::Write; +use std::path::Path; + +use crate::fs_service::{FileSystemService, utils::OutputFormat}; + +// head_file +#[mcp_tool( + name = "find_empty_directories", + title="Find Empty Directories", + description = concat!("Recursively finds all empty directories within the given root path.", + "A directory is considered empty if it contains no files in itself or any of its subdirectories.", + "Operating system metadata files `.DS_Store` (macOS) and `Thumbs.db` (Windows) will be ignored.", + "The optional exclude_patterns argument accepts glob-style patterns to exclude specific paths from the search.", + "Only works within allowed directories."), + destructive_hint = false, + idempotent_hint = false, + open_world_hint = false, + read_only_hint = true +)] +#[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] +pub struct FindEmptyDirectories { + /// The path of the file to get information for. + pub path: String, + /// Optional list of glob patterns to exclude from the search. Directories matching these patterns will be ignored. + pub exclude_patterns: Option>, + /// Specify the output format, accepts either `text` or `json` (default: text). + pub output_format: Option, +} + +impl FindEmptyDirectories { + pub async fn run_tool( + params: Self, + context: &FileSystemService, + ) -> std::result::Result { + let result = context + .find_empty_directories(Path::new(¶ms.path), params.exclude_patterns) + .await + .map_err(CallToolError::new)?; + + let content = + Self::format_output(result, params.output_format.unwrap_or(OutputFormat::Text)) + .map_err(CallToolError::new)?; + + Ok(CallToolResult::text_content(vec![TextContent::from( + content, + )])) + } + + fn format_output( + empty_dirs: Vec, + output_format: OutputFormat, + ) -> std::result::Result { + let output = match output_format { + OutputFormat::Text => { + let mut output = String::new(); + + let header = if empty_dirs.is_empty() { + "No empty directories were found.".to_string() + } else { + format!( + "Found {} empty {}:\n", + empty_dirs.len(), + (if empty_dirs.len() == 1 { + "directory" + } else { + "directories" + }), + ) + }; + output.push_str(&header); + + for dir in empty_dirs { + writeln!(output, " {dir}").map_err(CallToolError::new)?; + } + output + } + OutputFormat::Json => { + serde_json::to_string_pretty(&empty_dirs).map_err(CallToolError::new)? + } + }; + + Ok(output) + } +} diff --git a/src/tools/get_file_info.rs b/src/tools/get_file_info.rs index 4d5d42b..8d29309 100644 --- a/src/tools/get_file_info.rs +++ b/src/tools/get_file_info.rs @@ -20,12 +20,12 @@ use crate::fs_service::FileSystemService; read_only_hint = true )] #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] -pub struct GetFileInfoTool { +pub struct GetFileInfo { /// The path of the file to get information for. pub path: String, } -impl GetFileInfoTool { +impl GetFileInfo { pub async fn run_tool( params: Self, context: &FileSystemService, diff --git a/src/tools/head_file.rs b/src/tools/head_file.rs new file mode 100644 index 0000000..e5402e3 --- /dev/null +++ b/src/tools/head_file.rs @@ -0,0 +1,45 @@ +use std::path::Path; + +use rust_mcp_sdk::{ + macros::{JsonSchema, mcp_tool}, + schema::{CallToolResult, TextContent, schema_utils::CallToolError}, +}; + +use crate::fs_service::FileSystemService; + +// head_file +#[mcp_tool( + name = "head_file", + title="Head file", + description = concat!("Reads and returns the first N lines of a text file.", + "This is useful for quickly previewing file contents without loading the entire file into memory.", + "If the file has fewer than N lines, the entire file will be returned.", + "Only works within allowed directories."), + destructive_hint = false, + idempotent_hint = false, + open_world_hint = false, + read_only_hint = true +)] +#[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] +pub struct HeadFile { + /// The path of the file to get information for. + pub path: String, + /// The number of lines to read from the beginning of the file. + pub lines: u64, +} + +impl HeadFile { + pub async fn run_tool( + params: Self, + context: &FileSystemService, + ) -> std::result::Result { + let result = context + .head_file(Path::new(¶ms.path), params.lines as usize) + .await + .map_err(CallToolError::new)?; + + Ok(CallToolResult::text_content(vec![TextContent::from( + result, + )])) + } +} diff --git a/src/tools/list_allowed_directories.rs b/src/tools/list_allowed_directories.rs index bf401ea..40ed8ee 100644 --- a/src/tools/list_allowed_directories.rs +++ b/src/tools/list_allowed_directories.rs @@ -17,9 +17,9 @@ use crate::fs_service::FileSystemService; read_only_hint = true )] #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] -pub struct ListAllowedDirectoriesTool {} +pub struct ListAllowedDirectories {} -impl ListAllowedDirectoriesTool { +impl ListAllowedDirectories { pub async fn run_tool( _: Self, context: &FileSystemService, diff --git a/src/tools/list_directory.rs b/src/tools/list_directory.rs index 2e7afbb..e86ca3f 100644 --- a/src/tools/list_directory.rs +++ b/src/tools/list_directory.rs @@ -19,12 +19,12 @@ use crate::fs_service::FileSystemService; read_only_hint = true )] #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] -pub struct ListDirectoryTool { +pub struct ListDirectory { /// The path of the directory to list. pub path: String, } -impl ListDirectoryTool { +impl ListDirectory { pub async fn run_tool( params: Self, context: &FileSystemService, diff --git a/src/tools/list_directory_with_sizes.rs b/src/tools/list_directory_with_sizes.rs index 803d290..91bf2a8 100644 --- a/src/tools/list_directory_with_sizes.rs +++ b/src/tools/list_directory_with_sizes.rs @@ -20,12 +20,12 @@ use crate::fs_service::utils::format_bytes; read_only_hint = true )] #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] -pub struct ListDirectoryWithSizesTool { +pub struct ListDirectoryWithSizes { /// The path of the directory to list. pub path: String, } -impl ListDirectoryWithSizesTool { +impl ListDirectoryWithSizes { async fn format_directory_entries( &self, mut entries: Vec, diff --git a/src/tools/move_file.rs b/src/tools/move_file.rs index 3ea5096..12d59ca 100644 --- a/src/tools/move_file.rs +++ b/src/tools/move_file.rs @@ -20,14 +20,14 @@ use crate::fs_service::FileSystemService; read_only_hint = false )] #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] -pub struct MoveFileTool { +pub struct MoveFile { /// The source path of the file to move. pub source: String, /// The destination path to move the file to. pub destination: String, } -impl MoveFileTool { +impl MoveFile { pub async fn run_tool( params: Self, context: &FileSystemService, diff --git a/src/tools/read_file_lines.rs b/src/tools/read_file_lines.rs new file mode 100644 index 0000000..86109d5 --- /dev/null +++ b/src/tools/read_file_lines.rs @@ -0,0 +1,51 @@ +use std::path::Path; + +use rust_mcp_sdk::{ + macros::{JsonSchema, mcp_tool}, + schema::{CallToolResult, TextContent, schema_utils::CallToolError}, +}; + +use crate::fs_service::FileSystemService; + +// head_file +#[mcp_tool( + name = "read_file_lines", + title="Read File Lines", + description = concat!("Reads lines from a text file starting at a specified line offset (0-based) and continues for the specified number of lines if a limit is provided.", + "This function skips the first 'offset' lines and then reads up to 'limit' lines if specified, or reads until the end of the file otherwise.", + "It's useful for partial reads, pagination, or previewing sections of large text files.", + "Only works within allowed directories."), + destructive_hint = false, + idempotent_hint = false, + open_world_hint = false, + read_only_hint = true +)] +#[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] +pub struct ReadFileLines { + /// The path of the file to get information for. + pub path: String, + /// Number of lines to skip from the start (0-based). + pub offset: u64, + /// Optional maximum number of lines to read after the offset. + pub limit: Option, +} + +impl ReadFileLines { + pub async fn run_tool( + params: Self, + context: &FileSystemService, + ) -> std::result::Result { + let result = context + .read_file_lines( + Path::new(¶ms.path), + params.offset as usize, + params.limit.map(|v| v as usize), + ) + .await + .map_err(CallToolError::new)?; + + Ok(CallToolResult::text_content(vec![TextContent::from( + result, + )])) + } +} diff --git a/src/tools/read_media_file.rs b/src/tools/read_media_file.rs index 7c606d4..28a288d 100644 --- a/src/tools/read_media_file.rs +++ b/src/tools/read_media_file.rs @@ -20,14 +20,14 @@ use crate::fs_service::FileSystemService; read_only_hint = true )] #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] -pub struct ReadMediaFileTool { +pub struct ReadMediaFile { /// The path of the file to read. pub path: String, /// Maximum allowed file size (in bytes) to be read. pub max_bytes: Option, } -impl ReadMediaFileTool { +impl ReadMediaFile { pub async fn run_tool( params: Self, context: &FileSystemService, diff --git a/src/tools/read_multiple_media_files.rs b/src/tools/read_multiple_media_files.rs index 00f5c79..38b64f7 100644 --- a/src/tools/read_multiple_media_files.rs +++ b/src/tools/read_multiple_media_files.rs @@ -17,14 +17,14 @@ use rust_mcp_sdk::schema::{CallToolResult, schema_utils::CallToolError}; read_only_hint = true )] #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] -pub struct ReadMultipleMediaFilesTool { +pub struct ReadMultipleMediaFiles { /// The list of media file paths to read. pub paths: Vec, /// Maximum allowed file size (in bytes) to be read. pub max_bytes: Option, } -impl ReadMultipleMediaFilesTool { +impl ReadMultipleMediaFiles { pub async fn run_tool( params: Self, context: &FileSystemService, diff --git a/src/tools/read_multiple_text_files.rs b/src/tools/read_multiple_text_files.rs index 640a101..0389c78 100644 --- a/src/tools/read_multiple_text_files.rs +++ b/src/tools/read_multiple_text_files.rs @@ -19,12 +19,12 @@ use std::path::Path; read_only_hint = true )] #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] -pub struct ReadMultipleTextFilesTool { +pub struct ReadMultipleTextFiles { /// The list of file paths to read. pub paths: Vec, } -impl ReadMultipleTextFilesTool { +impl ReadMultipleTextFiles { pub async fn run_tool( params: Self, context: &FileSystemService, diff --git a/src/tools/read_text_file.rs b/src/tools/read_text_file.rs index 06d6acf..3872625 100644 --- a/src/tools/read_text_file.rs +++ b/src/tools/read_text_file.rs @@ -19,12 +19,12 @@ use crate::fs_service::FileSystemService; read_only_hint = true )] #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] -pub struct ReadTextFileTool { +pub struct ReadTextFile { /// The path of the file to read. pub path: String, } -impl ReadTextFileTool { +impl ReadTextFile { pub async fn run_tool( params: Self, context: &FileSystemService, diff --git a/src/tools/search_file.rs b/src/tools/search_file.rs index 510ae99..0bdd81c 100644 --- a/src/tools/search_file.rs +++ b/src/tools/search_file.rs @@ -9,10 +9,12 @@ use crate::fs_service::FileSystemService; name = "search_files", title="Search Files", description = concat!("Recursively search for files and directories matching a pattern. ", - "Searches through all subdirectories from the starting path. The search ", -"is case-insensitive and matches partial names. Returns full paths to all ", -"matching items. Great for finding files when you don't know their exact location. ", -"Only searches within allowed directories."), + "Searches through all subdirectories from the starting path. The search is case-insensitive ", + "and matches partial names. Returns full paths to all matching items.", + "Optional 'min_bytes' and 'max_bytes' arguments can be used to filter files by size, ", + "ensuring that only files within the specified byte range are included in the search. ", + "This tool is great for finding files when you don't know their exact location or find files by their size.", + "Only searches within allowed directories."), destructive_hint = false, idempotent_hint = false, open_world_hint = false, @@ -21,16 +23,20 @@ use crate::fs_service::FileSystemService; #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] /// A tool for searching files based on a path and pattern. -pub struct SearchFilesTool { +pub struct SearchFiles { /// The directory path to search in. pub path: String, - /// The file glob pattern to match (e.g., "*.rs"). + /// Glob pattern used to match target files (e.g., "*.rs"). pub pattern: String, #[serde(rename = "excludePatterns")] /// Optional list of patterns to exclude from the search. pub exclude_patterns: Option>, + /// Minimum file size (in bytes) to include in the search (optional). + pub min_bytes: Option, + /// Maximum file size (in bytes) to include in the search (optional). + pub max_bytes: Option, } -impl SearchFilesTool { +impl SearchFiles { pub async fn run_tool( params: Self, context: &FileSystemService, @@ -40,6 +46,8 @@ impl SearchFilesTool { Path::new(¶ms.path), params.pattern, params.exclude_patterns.unwrap_or_default(), + params.min_bytes, + params.max_bytes, ) .await .map_err(CallToolError::new)?; diff --git a/src/tools/search_files_content.rs b/src/tools/search_files_content.rs index f5cf9cf..85a371c 100644 --- a/src/tools/search_files_content.rs +++ b/src/tools/search_files_content.rs @@ -10,6 +10,8 @@ use std::fmt::Write; description = concat!("Searches for text or regex patterns in the content of files matching matching a GLOB pattern.", "Returns detailed matches with file path, line number, column number and a preview of matched text.", "By default, it performs a literal text search; if the 'is_regex' parameter is set to true, it performs a regular expression (regex) search instead.", + "Optional 'min_bytes' and 'max_bytes' arguments can be used to filter files by size, ", + "ensuring that only files within the specified byte range are included in the search. ", "Ideal for finding specific code, comments, or text when you don’t know their exact location."), destructive_hint = false, idempotent_hint = false, @@ -19,7 +21,7 @@ use std::fmt::Write; #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] /// A tool for searching content of one or more files based on a path and pattern. -pub struct SearchFilesContentTool { +pub struct SearchFilesContent { /// The file or directory path to search in. pub path: String, /// The file glob pattern to match (e.g., "*.rs"). @@ -31,9 +33,13 @@ pub struct SearchFilesContentTool { #[serde(rename = "excludePatterns")] /// Optional list of patterns to exclude from the search. pub exclude_patterns: Option>, + /// Minimum file size (in bytes) to include in the search (optional). + pub min_bytes: Option, + /// Maximum file size (in bytes) to include in the search (optional). + pub max_bytes: Option, } -impl SearchFilesContentTool { +impl SearchFilesContent { fn format_result(&self, results: Vec) -> String { // TODO: improve capacity estimation let estimated_capacity = 2048; @@ -72,6 +78,8 @@ impl SearchFilesContentTool { ¶ms.query, is_regex, params.exclude_patterns.to_owned(), + params.min_bytes, + params.max_bytes, ) .await { diff --git a/src/tools/tail_file.rs b/src/tools/tail_file.rs new file mode 100644 index 0000000..b808660 --- /dev/null +++ b/src/tools/tail_file.rs @@ -0,0 +1,45 @@ +use std::path::Path; + +use rust_mcp_sdk::{ + macros::{JsonSchema, mcp_tool}, + schema::{CallToolResult, TextContent, schema_utils::CallToolError}, +}; + +use crate::fs_service::FileSystemService; + +// head_file +#[mcp_tool( + name = "head_file", + title="Head file", + description = concat!("Reads and returns the last N lines of a text file.", + "This is useful for quickly previewing file contents without loading the entire file into memory.", + "If the file has fewer than N lines, the entire file will be returned.", + "Only works within allowed directories."), + destructive_hint = false, + idempotent_hint = false, + open_world_hint = false, + read_only_hint = true +)] +#[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] +pub struct TailFile { + /// The path of the file to get information for. + pub path: String, + /// The number of lines to read from the beginning of the file. + pub lines: u64, +} + +impl TailFile { + pub async fn run_tool( + params: Self, + context: &FileSystemService, + ) -> std::result::Result { + let result = context + .tail_file(Path::new(¶ms.path), params.lines as usize) + .await + .map_err(CallToolError::new)?; + + Ok(CallToolResult::text_content(vec![TextContent::from( + result, + )])) + } +} diff --git a/src/tools/write_file.rs b/src/tools/write_file.rs index 52797cb..3d09d53 100644 --- a/src/tools/write_file.rs +++ b/src/tools/write_file.rs @@ -19,14 +19,14 @@ use crate::fs_service::FileSystemService; read_only_hint = false )] #[derive(Debug, Clone, ::serde::Deserialize, ::serde::Serialize, JsonSchema)] -pub struct WriteFileTool { +pub struct WriteFile { /// The path of the file to write to. pub path: String, /// The content to write to the file. pub content: String, } -impl WriteFileTool { +impl WriteFile { pub async fn run_tool( params: Self, context: &FileSystemService, diff --git a/src/tools/zip_unzip.rs b/src/tools/zip_unzip.rs index 88f77a1..b81e885 100644 --- a/src/tools/zip_unzip.rs +++ b/src/tools/zip_unzip.rs @@ -16,14 +16,14 @@ use crate::fs_service::FileSystemService; read_only_hint = false )] #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] -pub struct ZipFilesTool { +pub struct ZipFiles { /// The list of files to include in the ZIP archive. pub input_files: Vec, /// Path to save the resulting ZIP file, including filename and .zip extension pub target_zip_file: String, } -impl ZipFilesTool { +impl ZipFiles { pub async fn run_tool( params: Self, context: &FileSystemService, @@ -48,14 +48,14 @@ The tool decompresses all files and directories stored in the ZIP, recreating th Both the source ZIP file and the target directory should reside within allowed directories." )] #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] -pub struct UnzipFileTool { +pub struct UnzipFile { /// A filesystem path to an existing ZIP file to be extracted. pub zip_file: String, /// Path to the target directory where the contents of the ZIP file will be extracted. pub target_path: String, } -impl UnzipFileTool { +impl UnzipFile { pub async fn run_tool( params: Self, context: &FileSystemService, @@ -79,7 +79,7 @@ It takes a path to the folder and a glob pattern to identify files to compress a Both the source directory and the target ZIP file should reside within allowed directories." )] #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] -pub struct ZipDirectoryTool { +pub struct ZipDirectory { /// Path to the directory to zip pub input_directory: String, /// A optional glob pattern to match files and subdirectories to zip, defaults to **/*" @@ -88,7 +88,7 @@ pub struct ZipDirectoryTool { pub target_zip_file: String, } -impl ZipDirectoryTool { +impl ZipDirectory { pub async fn run_tool( params: Self, context: &FileSystemService, diff --git a/tests/common/common.rs b/tests/common/common.rs index 8650026..d73a750 100644 --- a/tests/common/common.rs +++ b/tests/common/common.rs @@ -35,9 +35,20 @@ pub fn setup_service(dirs: Vec) -> (PathBuf, FileSystemService, Arc>) -> Vec> { + groups.iter_mut().for_each(|group| group.sort()); + groups.sort(); + groups +} + // Helper to create a temporary file pub fn create_temp_file(dir: &Path, name: &str, content: &str) -> PathBuf { let file_path = dir.join(name); + + // Create the directory if it doesn't exist + fs::create_dir_all(file_path.parent().unwrap()).unwrap(); + File::create(&file_path) .unwrap() .write_all(content.as_bytes()) @@ -82,7 +93,40 @@ pub fn create_temp_dir() -> (TempDir, FileInfo) { (dir, file_info) } +// Helper to create a directory in a temp folder +pub async fn create_sub_dir(temp_dir: &Path, dir_name: &str) -> PathBuf { + let dir_path = temp_dir.join(dir_name); + tokio::fs::create_dir_all(&dir_path).await.unwrap(); + dir_path +} + // Helper function to try to parse arguments and return the result pub fn parse_args(args: &[&str]) -> Result { CommandArguments::try_parse_from(args) } + +// Helper to create a file with multiple lines +pub async fn create_test_file( + temp_dir: &Path, + file_name: &str, + lines: Vec<&str>, +) -> std::path::PathBuf { + let content = lines.join("\n"); + create_temp_file(temp_dir, file_name, &content) +} + +pub async fn create_test_file_with_line_ending( + temp_dir: &Path, + file_name: &str, + lines: Vec<&str>, + line_ending: &str, +) -> PathBuf { + let file_path = temp_dir.join(file_name); + tokio::fs::create_dir_all(file_path.parent().unwrap()) + .await + .unwrap(); + let mut file = File::create(&file_path).unwrap(); + let content = lines.join(line_ending); + file.write_all(content.as_bytes()).unwrap(); + file_path +} diff --git a/tests/test_fs_service.rs b/tests/test_fs_service.rs index eef6b26..df80c70 100644 --- a/tests/test_fs_service.rs +++ b/tests/test_fs_service.rs @@ -21,6 +21,10 @@ use std::time::SystemTime; use tokio::fs as tokio_fs; use tokio_util::compat::TokioAsyncReadCompatExt; +use crate::common::create_sub_dir; +use crate::common::create_test_file; +use crate::common::create_test_file_with_line_ending; +use crate::common::sort_duplicate_groups; #[cfg(unix)] use std::os::unix::fs::PermissionsExt; @@ -284,7 +288,7 @@ async fn test_search_files() { create_temp_file(&dir_path, "test1.txt", "content"); create_temp_file(&dir_path, "test2.doc", "content"); let result = service - .search_files(&dir_path, "*.txt".to_string(), vec![]) + .search_files(&dir_path, "*.txt".to_string(), vec![], None, None) .await .unwrap(); let names: Vec<_> = result @@ -305,6 +309,8 @@ async fn test_search_files_with_exclude() { &dir_path, "*.txt".to_string(), vec!["test2.txt".to_string()], + None, + None, ) .await .unwrap(); @@ -1091,6 +1097,8 @@ async fn search_files_content() { query, true, None, + None, + None, ) .await .unwrap(); @@ -1099,6 +1107,339 @@ async fn search_files_content() { assert_eq!(results[1].matches.len(), 2); } +#[tokio::test] +async fn test_head_file_normal() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_test_file_with_line_ending( + &temp_dir, + "dir1/test.txt", + vec!["line1", "line2", "line3", "line4", "line5"], + "\n", + ) + .await; + + let result = service.head_file(&file_path, 3).await.unwrap(); + assert_eq!(result, "line1\nline2\nline3\n"); +} + +#[tokio::test] +async fn test_head_file_empty_file() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = + create_test_file_with_line_ending(&temp_dir, "dir1/empty.txt", vec![], "\n").await; + + let result = service.head_file(&file_path, 5).await.unwrap(); + assert_eq!(result, ""); +} + +#[tokio::test] +async fn test_head_file_n_zero() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_test_file_with_line_ending( + &temp_dir, + "dir1/test.txt", + vec!["line1", "line2", "line3"], + "\n", + ) + .await; + + let result = service.head_file(&file_path, 0).await.unwrap(); + assert_eq!(result, ""); +} + +#[tokio::test] +async fn test_head_file_n_larger_than_file() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = + create_test_file_with_line_ending(&temp_dir, "dir1/test.txt", vec!["line1", "line2"], "\n") + .await; + + let result = service.head_file(&file_path, 5).await.unwrap(); + assert_eq!(result, "line1\nline2"); +} + +#[tokio::test] +async fn test_head_file_no_trailing_newline() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + // Create file without trailing newline + let file_path = temp_dir.join("dir1/test.txt"); + tokio::fs::create_dir_all(file_path.parent().unwrap()) + .await + .unwrap(); + let mut file = File::create(&file_path).unwrap(); + file.write_all(b"line1\nline2\nline3").unwrap(); + + let result = service.head_file(&file_path, 3).await.unwrap(); + assert_eq!(result, "line1\nline2\nline3"); +} + +#[tokio::test] +async fn test_head_file_single_line() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = + create_test_file_with_line_ending(&temp_dir, "dir1/test.txt", vec!["line1"], "\n").await; + + let result = service.head_file(&file_path, 1).await.unwrap(); + assert_eq!(result, "line1"); +} + +#[tokio::test] +async fn test_head_file_windows_line_endings() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_test_file_with_line_ending( + &temp_dir, + "dir1/test.txt", + vec!["line1", "line2", "line3"], + "\r\n", + ) + .await; + + let result = service.head_file(&file_path, 2).await.unwrap(); + assert_eq!(result, "line1\r\nline2\r\n"); +} + +#[tokio::test] +async fn test_head_file_invalid_path() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let invalid_path = temp_dir.join("dir2/test.txt"); // Outside allowed_dirs + + let result = service.head_file(&invalid_path, 3).await; + assert!(result.is_err(), "Expected error for invalid path"); +} + +#[tokio::test] +async fn test_tail_file_normal() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_test_file_with_line_ending( + &temp_dir.to_path_buf(), + "dir1/test.txt", + vec!["line1", "line2", "line3", "line4", "line5"], + "\n", + ) + .await; + + let result = service.tail_file(&file_path, 3).await.unwrap(); + assert_eq!(result, "line3\nline4\nline5"); // No trailing newline +} + +#[tokio::test] +async fn test_tail_file_empty_file() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = + create_test_file_with_line_ending(&temp_dir.to_path_buf(), "dir1/empty.txt", vec![], "\n") + .await; + + let result = service.tail_file(&file_path, 5).await.unwrap(); + assert_eq!(result, ""); +} + +#[tokio::test] +async fn test_tail_file_n_zero() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_test_file_with_line_ending( + &temp_dir.to_path_buf(), + "dir1/test.txt", + vec!["line1", "line2", "line3"], + "\n", + ) + .await; + + let result = service.tail_file(&file_path, 0).await.unwrap(); + assert_eq!(result, ""); +} + +#[tokio::test] +async fn test_tail_file_n_larger_than_file() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_test_file_with_line_ending( + &temp_dir.to_path_buf(), + "dir1/test.txt", + vec!["line1", "line2"], + "\n", + ) + .await; + + let result = service.tail_file(&file_path, 5).await.unwrap(); + assert_eq!(result, "line1\nline2"); // No trailing newline +} + +#[tokio::test] +async fn test_tail_file_no_newline_at_end() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_temp_file( + &temp_dir.join("dir1"), + "test.txt", + "line1\nline2\nline3", // No newline at end + ); + println!(">>> {file_path:?} "); + + let result = service.tail_file(&file_path, 2).await.unwrap(); + assert_eq!(result, "line2\nline3"); +} + +#[tokio::test] +async fn test_tail_file_single_line() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_test_file_with_line_ending( + &temp_dir.to_path_buf(), + "dir1/test.txt", + vec!["line1"], + "\n", + ) + .await; + + let result = service.tail_file(&file_path, 1).await.unwrap(); + assert_eq!(result, "line1"); // No trailing newline +} + +#[tokio::test] +async fn test_tail_file_windows_line_endings() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_test_file_with_line_ending( + &temp_dir.to_path_buf(), + "dir1/test.txt", + vec!["line1", "line2", "line3"], + "\r\n", + ) + .await; + + let result = service.tail_file(&file_path, 2).await.unwrap(); + assert_eq!(result, "line2\r\nline3"); // No trailing newline +} + +#[tokio::test] +async fn test_tail_file_invalid_path() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let invalid_path = temp_dir.join("dir2/test.txt"); // Outside allowed_dirs + + let result = service.tail_file(&invalid_path, 3).await; + assert!(result.is_err(), "Expected error for invalid path"); +} + +#[tokio::test] +async fn test_read_file_lines_normal() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_test_file( + &temp_dir, + "dir1/test.txt", + vec!["line1", "line2", "line3", "line4", "line5"], + ) + .await; + + let result = service + .read_file_lines(&file_path, 1, Some(2)) + .await + .unwrap(); + assert_eq!(result, "line2\nline3\n"); // No trailing newline +} + +#[tokio::test] +async fn test_read_file_lines_empty_file() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_test_file(&temp_dir, "dir1/empty.txt", vec![]).await; + + let result = service + .read_file_lines(&file_path, 0, Some(5)) + .await + .unwrap(); + assert_eq!(result, ""); +} + +#[tokio::test] +async fn test_read_file_lines_offset_beyond_file() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_test_file(&temp_dir, "dir1/test.txt", vec!["line1", "line2"]).await; + + let result = service + .read_file_lines(&file_path, 5, Some(3)) + .await + .unwrap(); + assert_eq!(result, ""); +} + +#[tokio::test] +async fn test_read_file_lines_no_limit() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_test_file( + &temp_dir, + "dir1/test.txt", + vec!["line1", "line2", "line3", "line4"], + ) + .await; + + let result = service.read_file_lines(&file_path, 2, None).await.unwrap(); + assert_eq!(result, "line3\nline4"); // No trailing newline +} + +#[tokio::test] +async fn test_read_file_lines_limit_zero() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = + create_test_file(&temp_dir, "dir1/test.txt", vec!["line1", "line2", "line3"]).await; + + let result = service + .read_file_lines(&file_path, 1, Some(0)) + .await + .unwrap(); + assert_eq!(result, ""); +} + +#[tokio::test] +async fn test_read_file_lines_exact_file_length() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = + create_test_file(&temp_dir, "dir1/test.txt", vec!["line1", "line2", "line3"]).await; + + let result = service + .read_file_lines(&file_path, 0, Some(3)) + .await + .unwrap(); + assert_eq!(result, "line1\nline2\nline3"); // No trailing newline +} + +#[tokio::test] +async fn test_read_file_lines_no_newline_at_end() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_temp_file( + &temp_dir.join("dir1"), + "test.txt", + "line1\nline2\nline3", // No newline at end + ); + + let result = service + .read_file_lines(&file_path, 1, Some(2)) + .await + .unwrap(); + assert_eq!(result, "line2\nline3"); // No trailing newline +} + +#[tokio::test] +async fn test_read_file_lines_windows_line_endings() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + + // Override to use \r\n explicitly + let file_path = create_temp_file( + &temp_dir.join("dir1"), + "test.txt", + "line1\r\nline2\r\nline3", + ); + + let result = service + .read_file_lines(&file_path, 1, Some(2)) + .await + .unwrap(); + assert_eq!(result, "line2\r\nline3"); // No trailing newline +} + +#[tokio::test] +async fn test_read_file_lines_invalid_path() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let invalid_path = temp_dir.join("dir2/test.txt"); // Outside allowed_dirs + + let result = service.read_file_lines(&invalid_path, 0, Some(3)).await; + assert!(result.is_err(), "Expected error for invalid path"); +} + #[test] fn test_extract_snippet_bug_37() { let (_, service, _) = setup_service(vec!["dir_search".to_string()]); @@ -1130,3 +1471,352 @@ fn test_extract_snippet_bug_37() { println!("Snippet: {result}"); } + +#[tokio::test] +async fn test_calculate_directory_size_normal() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + create_temp_file(&temp_dir.join("dir1"), "file1.txt", "content1"); + create_temp_file(&temp_dir.join("dir1"), "file2.txt", "content22"); + + let size = service + .calculate_directory_size(&temp_dir.join("dir1")) + .await + .unwrap(); + assert_eq!(size, 17); // "content1" (8 bytes) + "content22" (9 bytes) = 17 bytes +} + +#[tokio::test] +async fn test_calculate_directory_size_empty_dir() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + create_sub_dir(&temp_dir, "dir1").await; + + let size = service + .calculate_directory_size(&temp_dir.join("dir1")) + .await + .unwrap(); + assert_eq!(size, 0); +} + +#[tokio::test] +async fn test_calculate_directory_size_nested_files() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + create_temp_file(&temp_dir.join("dir1"), "file1.txt", "content1"); + create_temp_file(&temp_dir.join("dir1/subdir"), "file2.txt", "content22"); + + let size = service + .calculate_directory_size(&temp_dir.join("dir1")) + .await + .unwrap(); + assert_eq!(size, 17); // "content1" (8 bytes) + "content22" (9 bytes) = 17 bytes +} + +#[tokio::test] +async fn test_calculate_directory_size_invalid_path() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let invalid_path = temp_dir.join("dir2"); + + let result = service.calculate_directory_size(&invalid_path).await; + assert!(result.is_err(), "Expected error for invalid path"); +} + +#[tokio::test] +async fn test_find_empty_directories_normal() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + create_sub_dir(&temp_dir, "dir1/empty1").await; + create_sub_dir(&temp_dir, "dir1/empty2").await; + create_temp_file(&temp_dir.join("dir1/non_empty"), "file.txt", "content"); + + let result = service + .find_empty_directories(&temp_dir.join("dir1"), None) + .await + .unwrap(); + let expected = [ + temp_dir.join("dir1/empty1").to_str().unwrap().to_string(), + temp_dir.join("dir1/empty2").to_str().unwrap().to_string(), + ]; + assert_eq!(result.len(), 2); + assert!(result.iter().all(|path| expected.contains(path))); +} + +#[tokio::test] +async fn test_find_empty_directories_no_empty_dirs() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + create_temp_file(&temp_dir.join("dir1/dir1"), "file.txt", "content"); + create_temp_file(&temp_dir.join("dir1/dir2"), "file.txt", "content"); + + let result = service + .find_empty_directories(&temp_dir.join("dir1"), None) + .await + .unwrap(); + assert_eq!(result, Vec::::new()); +} + +#[tokio::test] +async fn test_find_empty_directories_empty_root() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + create_sub_dir(&temp_dir, "dir1").await; + + let result = service + .find_empty_directories(&temp_dir.join("dir1"), None) + .await + .unwrap(); + assert_eq!(result, Vec::::new()); +} + +#[tokio::test] +async fn test_find_empty_directories_invalid_path() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let invalid_path = temp_dir.join("dir2"); + + let result = service.find_empty_directories(&invalid_path, None).await; + assert!(result.is_err(), "Expected error for invalid path"); +} + +#[tokio::test] +async fn test_find_duplicate_files_normal() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let content = "same content"; + let file1 = create_temp_file(&temp_dir.join("dir1"), "file1.txt", content); + let file2 = create_temp_file(&temp_dir.join("dir1"), "file2.txt", content); + let _file3 = create_temp_file(&temp_dir.join("dir1"), "file3.txt", "different"); + + let result = service + .find_duplicate_files( + &temp_dir.join("dir1"), + Some("*".to_string()), + None, + None, + None, + ) + .await + .unwrap(); + let expected = vec![vec![ + file1.to_str().unwrap().to_string(), + file2.to_str().unwrap().to_string(), + ]]; + + assert_eq!(result.len(), 1); + assert_eq!( + sort_duplicate_groups(result), + sort_duplicate_groups(expected) + ); +} + +#[tokio::test] +async fn test_find_duplicate_files_no_duplicates() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + create_temp_file(&temp_dir.join("dir1"), "file1.txt", "content1"); + create_temp_file(&temp_dir.join("dir1"), "file2.txt", "content2"); + + let result = service + .find_duplicate_files( + &temp_dir.join("dir1"), + Some("*".to_string()), + None, + None, + None, + ) + .await + .unwrap(); + assert_eq!(result, Vec::>::new()); +} + +#[tokio::test] +async fn test_find_duplicate_files_with_pattern() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let content = "same content"; + create_temp_file(&temp_dir.join("dir1"), "file1.txt", content); + create_temp_file(&temp_dir.join("dir1"), "file2.txt", content); + create_temp_file(&temp_dir.join("dir1"), "file3.log", content); + + let result = service + .find_duplicate_files( + &temp_dir.join("dir1"), + Some("*.txt".to_string()), + None, + None, + None, + ) + .await + .unwrap(); + assert_eq!(result.len(), 1); + assert!(result[0].iter().all(|p| p.ends_with(".txt"))); +} + +#[tokio::test] +async fn test_find_duplicate_files_with_exclude_patterns() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let content = "same content"; + create_temp_file(&temp_dir.join("dir1"), "file1.txt", content); + create_temp_file(&temp_dir.join("dir1"), "file2.txt", content); + create_temp_file(&temp_dir.join("dir1"), "file3.log", content); + + let result = service + .find_duplicate_files( + &temp_dir.join("dir1"), + Some("*".to_string()), + Some(vec!["*.log".to_string()]), + None, + None, + ) + .await + .unwrap(); + assert_eq!(result.len(), 1); + assert!(result[0].iter().all(|p| !p.ends_with(".log"))); +} + +#[tokio::test] +async fn test_find_duplicate_files_size_filters() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let content = "same content"; // 12 bytes + create_temp_file(&temp_dir.join("dir1"), "file1.txt", content); + create_temp_file(&temp_dir.join("dir1"), "file2.txt", content); + create_temp_file(&temp_dir.join("dir1"), "file3.txt", "short"); // 5 bytes + + let result = service + .find_duplicate_files( + &temp_dir.join("dir1"), + Some("*".to_string()), + None, + Some(10), // min 10 bytes + Some(15), // max 15 bytes + ) + .await + .unwrap(); + assert_eq!(result.len(), 1); + assert_eq!(result[0].len(), 2); // file1.txt and file2.txt +} + +#[tokio::test] +async fn test_find_duplicate_files_empty_dir() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + create_sub_dir(&temp_dir, "dir1").await; + + let result = service + .find_duplicate_files( + &temp_dir.join("dir1"), + Some("*".to_string()), + None, + None, + None, + ) + .await + .unwrap(); + assert_eq!(result, Vec::>::new()); +} + +#[tokio::test] +async fn test_find_duplicate_files_invalid_path() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let invalid_path = temp_dir.join("dir2"); + + let result = service + .find_duplicate_files(&invalid_path, Some("*".to_string()), None, None, None) + .await; + assert!(result.is_err(), "Expected error for invalid path"); +} + +#[tokio::test] +async fn test_find_duplicate_files_nested_duplicates() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let content = "same content"; + let file1 = create_temp_file(&temp_dir.join("dir1"), "file1.txt", content); + let file2 = create_temp_file(&temp_dir.join("dir1/subdir"), "file2.txt", content); + + let result = service + .find_duplicate_files( + &temp_dir.join("dir1"), + Some("*".to_string()), + None, + None, + None, + ) + .await + .unwrap(); + let expected = vec![vec![ + file1.to_str().unwrap().to_string(), + file2.to_str().unwrap().to_string(), + ]]; + assert_eq!(result.len(), 1); + assert_eq!( + sort_duplicate_groups(result), + sort_duplicate_groups(expected) + ); +} + +#[tokio::test] +async fn test_find_empty_directories_exclude_patterns() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let dir1 = temp_dir.join("dir1"); + + // Create empty directory that should be included + let empty1 = dir1.join("empty1"); + tokio::fs::create_dir_all(&empty1).await.unwrap(); + + // Create empty directory that matches exclude pattern + let empty2 = dir1.join("empty2"); + tokio::fs::create_dir_all(&empty2).await.unwrap(); + + // Create non-empty directory + let non_empty = dir1.join("non_empty"); + tokio::fs::create_dir_all(&non_empty).await.unwrap(); + create_temp_file(&non_empty, "file.txt", "content"); + + // Ensure root dir1 exists + tokio::fs::create_dir_all(&dir1).await.unwrap(); + + // Call with exclude_patterns to exclude "*2*" + let result = service + .find_empty_directories(&dir1, Some(vec!["*2*".to_string()])) + .await + .unwrap(); + + // Expect only empty1, not empty2 or non_empty + let expected = vec![empty1.to_str().unwrap().to_string()]; + assert_eq!(result.len(), 1); + assert_eq!(result, expected); +} + +#[tokio::test] +async fn test_find_empty_directories_exclude_patterns_2() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let root_path = temp_dir.join("dir1"); + + // Create empty directories + tokio::fs::create_dir_all(&root_path.join("empty1")) + .await + .unwrap(); + tokio::fs::create_dir_all(&root_path.join("empty2.log")) + .await + .unwrap(); + tokio::fs::create_dir_all(&root_path.join("empty3")) + .await + .unwrap(); + + // Create a non-empty directory to ensure it's not returned + tokio::fs::create_dir_all(&root_path.join("non_empty")) + .await + .unwrap(); + tokio::fs::write(&root_path.join("non_empty/file.txt"), b"content") + .await + .unwrap(); + + // Test with exclude pattern "*.log" + let exclude_patterns = Some(vec!["*.log".to_string()]); + let result = service + .find_empty_directories(&root_path, exclude_patterns) + .await + .unwrap(); + + let expected = [ + root_path.join("empty1").to_str().unwrap().to_string(), + root_path.join("empty3").to_str().unwrap().to_string(), + ]; + + assert_eq!(result.len(), 2); + assert!(result.iter().all(|path| expected.contains(path))); + assert!(!result.iter().any(|path| path.contains("empty2.log"))); +} + +#[tokio::test] +async fn adhock() {} diff --git a/tests/test_tools.rs b/tests/test_tools.rs index b79b519..ef702cf 100644 --- a/tests/test_tools.rs +++ b/tests/test_tools.rs @@ -10,11 +10,11 @@ use std::fs; async fn test_create_directory_new_directory() { let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); let new_dir = temp_dir.join("dir1").join("new_dir"); - let params = CreateDirectoryTool { + let params = CreateDirectory { path: new_dir.to_str().unwrap().to_string(), }; - let result = CreateDirectoryTool::run_tool(params, &service).await; + let result = CreateDirectory::run_tool(params, &service).await; assert!(result.is_ok()); let call_result = result.unwrap(); @@ -42,11 +42,11 @@ async fn test_create_directory_existing_directory() { let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); let existing_dir = temp_dir.join("dir1").join("existing_dir"); fs::create_dir_all(&existing_dir).unwrap(); - let params = CreateDirectoryTool { + let params = CreateDirectory { path: existing_dir.to_str().unwrap().to_string(), }; - let result = CreateDirectoryTool::run_tool(params, &service).await; + let result = CreateDirectory::run_tool(params, &service).await; assert!(result.is_ok()); let call_result = result.unwrap(); @@ -73,11 +73,11 @@ async fn test_create_directory_existing_directory() { async fn test_create_directory_nested() { let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); let nested_dir = temp_dir.join("dir1").join("nested/subdir"); - let params = CreateDirectoryTool { + let params = CreateDirectory { path: nested_dir.to_str().unwrap().to_string(), }; - let result = CreateDirectoryTool::run_tool(params, &service).await; + let result = CreateDirectory::run_tool(params, &service).await; assert!(result.is_ok()); let call_result = result.unwrap(); @@ -102,11 +102,11 @@ async fn test_create_directory_nested() { async fn test_create_directory_outside_allowed() { let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); let outside_dir = temp_dir.join("dir2").join("forbidden"); - let params = CreateDirectoryTool { + let params = CreateDirectory { path: outside_dir.to_str().unwrap().to_string(), }; - let result = CreateDirectoryTool::run_tool(params, &service).await; + let result = CreateDirectory::run_tool(params, &service).await; assert!(result.is_err()); let err = result.unwrap_err(); assert!(matches!(err, CallToolError { .. })); @@ -117,13 +117,13 @@ async fn test_create_directory_outside_allowed() { async fn test_create_directory_invalid_path() { let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); let invalid_path = temp_dir.join("dir1").join("invalid\0dir"); - let params = CreateDirectoryTool { + let params = CreateDirectory { path: invalid_path .to_str() .map_or("invalid\0dir".to_string(), |s| s.to_string()), }; - let result = CreateDirectoryTool::run_tool(params, &service).await; + let result = CreateDirectory::run_tool(params, &service).await; assert!(result.is_err()); let err = result.unwrap_err(); assert!(matches!(err, CallToolError { .. }));