diff --git a/Cargo.lock b/Cargo.lock index a177319..4b66f9e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -175,6 +175,12 @@ version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.10.1" @@ -202,6 +208,17 @@ dependencies = [ "shlex", ] +[[package]] +name = "cfb" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38f2da7a0a2c4ccf0065be06397cc26a81f4e528be095826eee9d4adbb8c60f" +dependencies = [ + "byteorder", + "fnv", + "uuid", +] + [[package]] name = "cfg-if" version = "1.0.3" @@ -380,6 +397,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "futures" version = "0.3.31" @@ -639,6 +662,15 @@ dependencies = [ "cc", ] +[[package]] +name = "infer" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a588916bfdfd92e71cacef98a63d9b1f0d74d6599980d11894290e7ddefffcf7" +dependencies = [ + "cfb", +] + [[package]] name = "io-uring" version = "0.7.10" @@ -951,12 +983,14 @@ version = "0.2.3" dependencies = [ "async-trait", "async_zip", + "base64", "chrono", "clap", "dirs", "futures", "glob", "grep", + "infer", "rust-mcp-sdk", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 40043de..5d1e44b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,8 @@ futures = "0.3" tokio-util = "0.7" async_zip = { version = "0.0", features = ["full"] } grep = "0.3" +base64 = "0.22" +infer = "0.19.0" [dev-dependencies] tempfile = "3.2" diff --git a/src/error.rs b/src/error.rs index 2675360..d4977e0 100644 --- a/src/error.rs +++ b/src/error.rs @@ -34,4 +34,10 @@ pub enum ServiceError { ZipError(#[from] ZipError), #[error("{0}")] GlobPatternError(#[from] PatternError), + #[error("File size exceeds the maximum allowed limit of {0} bytes")] + FileTooLarge(usize), + #[error("File size is below the minimum required limit of {0} bytes")] + FileTooSmall(usize), + #[error("The file is either not an image/audio type or is unsupported (mime:{0}).")] + InvalidMediaFile(String), } diff --git a/src/fs_service.rs b/src/fs_service.rs index ef32c94..39ecb11 100644 --- a/src/fs_service.rs +++ b/src/fs_service.rs @@ -5,7 +5,9 @@ use crate::{ tools::EditOperation, }; use async_zip::tokio::{read::seek::ZipFileReader, write::ZipFileWriter}; +use base64::{engine::general_purpose, write::EncoderWriter}; use file_info::FileInfo; +use futures::{StreamExt, stream}; use glob::Pattern; use grep::{ matcher::{Match, Matcher}, @@ -19,12 +21,13 @@ use std::{ collections::HashSet, env, fs::{self}, + io::Write, path::{Path, PathBuf}, sync::Arc, }; use tokio::{ - fs::File, - io::{AsyncWriteExt, BufReader}, + fs::{File, metadata}, + io::{AsyncReadExt, AsyncWriteExt, BufReader}, sync::RwLock, }; use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; @@ -36,6 +39,7 @@ use walkdir::WalkDir; const SNIPPET_MAX_LENGTH: usize = 200; const SNIPPET_BACKWARD_CHARS: usize = 30; +const MAX_CONCURRENT_FILE_READ: usize = 5; type PathResultList = Vec>; @@ -432,7 +436,108 @@ impl FileSystemService { Ok(result_message) } - pub async fn read_file(&self, file_path: &Path) -> ServiceResult { + pub fn mime_from_path(&self, path: &Path) -> ServiceResult { + let is_svg = path + .extension() + .is_some_and(|e| e.to_str().is_some_and(|s| s == "svg")); + // consider it is a svg file as we cannot detect svg from bytes pattern + if is_svg { + return Ok(infer::Type::new( + infer::MatcherType::Image, + "image/svg+xml", + "svg", + |_: &[u8]| true, + )); + + // infer::Type::new(infer::MatcherType::Image, "", "svg",); + } + let kind = infer::get_from_path(path)?.ok_or(ServiceError::FromString( + "File tyle is unknown!".to_string(), + ))?; + Ok(kind) + } + + pub async fn validate_file_size>( + &self, + path: P, + min_bytes: Option, + max_bytes: Option, + ) -> ServiceResult<()> { + if min_bytes.is_none() && max_bytes.is_none() { + return Ok(()); + } + + let file_size = metadata(&path).await?.len() as usize; + + match (min_bytes, max_bytes) { + (_, Some(max)) if file_size > max => Err(ServiceError::FileTooLarge(max)), + (Some(min), _) if file_size < min => Err(ServiceError::FileTooSmall(min)), + _ => Ok(()), + } + } + + pub async fn read_media_files( + &self, + paths: Vec, + max_bytes: Option, + ) -> ServiceResult> { + let results = stream::iter(paths) + .map(|path| async { + self.read_media_file(Path::new(&path), max_bytes) + .await + .map_err(|e| (path, e)) + }) + .buffer_unordered(MAX_CONCURRENT_FILE_READ) // Process up to MAX_CONCURRENT_FILE_READ files concurrently + .filter_map(|result| async move { result.ok() }) + .collect::>() + .await; + Ok(results) + } + + pub async fn read_media_file( + &self, + file_path: &Path, + max_bytes: Option, + ) -> ServiceResult<(infer::Type, String)> { + let allowed_directories = self.allowed_directories().await; + let valid_path = self.validate_path(file_path, allowed_directories)?; + self.validate_file_size(&valid_path, None, max_bytes) + .await?; + let kind = self.mime_from_path(&valid_path)?; + let content = self.read_file_as_base64(&valid_path).await?; + Ok((kind, content)) + } + + // reads file as base64 efficiently in a streaming manner + async fn read_file_as_base64(&self, file_path: &Path) -> ServiceResult { + let file = File::open(file_path).await?; + let mut reader = BufReader::new(file); + + let mut output = Vec::new(); + { + // Wrap output Vec in a Base64 encoder writer + let mut encoder = EncoderWriter::new(&mut output, &general_purpose::STANDARD); + + let mut buffer = [0u8; 8192]; + loop { + let n = reader.read(&mut buffer).await?; + if n == 0 { + break; + } + // Write raw bytes to the Base64 encoder + encoder.write_all(&buffer[..n])?; + } + // Make sure to flush any remaining bytes + encoder.flush()?; + } // drop encoder before consuming output + + // Convert the Base64 bytes to String (safe UTF-8) + let base64_string = + String::from_utf8(output).map_err(|err| ServiceError::FromString(format!("{err}")))?; + Ok(base64_string) + } + + pub async fn read_text_file(&self, file_path: &Path) -> ServiceResult { let allowed_directories = self.allowed_directories().await; let valid_path = self.validate_path(file_path, allowed_directories)?; let content = tokio::fs::read_to_string(valid_path).await?; diff --git a/src/handler.rs b/src/handler.rs index 822e81e..144d01b 100644 --- a/src/handler.rs +++ b/src/handler.rs @@ -88,47 +88,45 @@ impl FileSystemHandler { let fs_service = self.fs_service.clone(); let mcp_roots_support = self.mcp_roots_support; // retrieve roots from the client and update the allowed directories accordingly - tokio::spawn(async move { - let roots = match runtime.clone().list_roots(None).await { - Ok(roots_result) => roots_result.roots, - Err(_err) => { - vec![] - } - }; - - let valid_roots = if roots.is_empty() { + let roots = match runtime.clone().list_roots(None).await { + Ok(roots_result) => roots_result.roots, + Err(_err) => { vec![] - } else { - let roots: Vec<_> = roots.iter().map(|v| v.uri.as_str()).collect(); - - match fs_service.valid_roots(roots) { - Ok((roots, skipped)) => { - if let Some(message) = skipped { - let _ = runtime.stderr_message(message.to_string()).await; - } - roots + } + }; + + let valid_roots = if roots.is_empty() { + vec![] + } else { + let roots: Vec<_> = roots.iter().map(|v| v.uri.as_str()).collect(); + + match fs_service.valid_roots(roots) { + Ok((roots, skipped)) => { + if let Some(message) = skipped { + let _ = runtime.stderr_message(message.to_string()).await; } - Err(_err) => vec![], + roots } - }; + Err(_err) => vec![], + } + }; - if valid_roots.is_empty() && !mcp_roots_support { - let message = if allowed_directories.is_empty() { - "Server cannot operate: No allowed directories available. Server was started without command-line directories and client provided empty roots. Please either: 1) Start server with directory arguments, or 2) Use a client that supports MCP roots protocol and provides valid root directories." - } else { - "Client provided empty roots. Allowed directories passed from command-line will be used." - }; - let _ = runtime.stderr_message(message.to_string()).await; + if valid_roots.is_empty() && !mcp_roots_support { + let message = if allowed_directories.is_empty() { + "Server cannot operate: No allowed directories available. Server was started without command-line directories and client provided empty roots. Please either: 1) Start server with directory arguments, or 2) Use a client that supports MCP roots protocol and provides valid root directories." } else { - let num_valid_roots = valid_roots.len(); + "Client provided empty roots. Allowed directories passed from command-line will be used." + }; + let _ = runtime.stderr_message(message.to_string()).await; + } else { + let num_valid_roots = valid_roots.len(); - fs_service.update_allowed_paths(valid_roots).await; - let message = format!( - "Updated allowed directories from MCP roots: {num_valid_roots} valid directories", - ); - let _ = runtime.stderr_message(message.to_string()).await; - } - }); + fs_service.update_allowed_paths(valid_roots).await; + let message = format!( + "Updated allowed directories from MCP roots: {num_valid_roots} valid directories", + ); + let _ = runtime.stderr_message(message.to_string()).await; + } } } } @@ -196,11 +194,17 @@ impl ServerHandler for FileSystemHandler { } match tool_params { - FileSystemTools::ReadFileTool(params) => { - ReadFileTool::run_tool(params, &self.fs_service).await + FileSystemTools::ReadMediaFileTool(params) => { + ReadMediaFileTool::run_tool(params, &self.fs_service).await + } + FileSystemTools::ReadMultipleMediaFilesTool(params) => { + ReadMultipleMediaFilesTool::run_tool(params, &self.fs_service).await + } + FileSystemTools::ReadTextFileTool(params) => { + ReadTextFileTool::run_tool(params, &self.fs_service).await } - FileSystemTools::ReadMultipleFilesTool(params) => { - ReadMultipleFilesTool::run_tool(params, &self.fs_service).await + FileSystemTools::ReadMultipleTextFilesTool(params) => { + ReadMultipleTextFilesTool::run_tool(params, &self.fs_service).await } FileSystemTools::WriteFileTool(params) => { WriteFileTool::run_tool(params, &self.fs_service).await diff --git a/src/tools.rs b/src/tools.rs index fea0583..3733b2a 100644 --- a/src/tools.rs +++ b/src/tools.rs @@ -6,8 +6,10 @@ mod list_allowed_directories; mod list_directory; mod list_directory_with_sizes; mod move_file; -mod read_files; -mod read_multiple_files; +mod read_media_file; +mod read_multiple_media_files; +mod read_multiple_text_files; +mod read_text_file; mod search_file; mod search_files_content; mod write_file; @@ -21,8 +23,10 @@ pub use list_allowed_directories::ListAllowedDirectoriesTool; pub use list_directory::ListDirectoryTool; pub use list_directory_with_sizes::ListDirectoryWithSizesTool; pub use move_file::MoveFileTool; -pub use read_files::ReadFileTool; -pub use read_multiple_files::ReadMultipleFilesTool; +pub use read_media_file::ReadMediaFileTool; +pub use read_multiple_media_files::ReadMultipleMediaFilesTool; +pub use read_multiple_text_files::ReadMultipleTextFilesTool; +pub use read_text_file::ReadTextFileTool; pub use rust_mcp_sdk::tool_box; pub use search_file::SearchFilesTool; pub use search_files_content::SearchFilesContentTool; @@ -33,7 +37,7 @@ pub use zip_unzip::{UnzipFileTool, ZipDirectoryTool, ZipFilesTool}; tool_box!( FileSystemTools, [ - ReadFileTool, + ReadTextFileTool, CreateDirectoryTool, DirectoryTreeTool, EditFileTool, @@ -41,14 +45,16 @@ tool_box!( ListAllowedDirectoriesTool, ListDirectoryTool, MoveFileTool, - ReadMultipleFilesTool, + ReadMultipleTextFilesTool, SearchFilesTool, WriteFileTool, ZipFilesTool, UnzipFileTool, ZipDirectoryTool, SearchFilesContentTool, - ListDirectoryWithSizesTool + ListDirectoryWithSizesTool, + ReadMediaFileTool, + ReadMultipleMediaFilesTool ] ); @@ -64,14 +70,16 @@ impl FileSystemTools { | FileSystemTools::ZipFilesTool(_) | FileSystemTools::UnzipFileTool(_) | FileSystemTools::ZipDirectoryTool(_) => true, - FileSystemTools::ReadFileTool(_) + FileSystemTools::ReadTextFileTool(_) | FileSystemTools::DirectoryTreeTool(_) | FileSystemTools::GetFileInfoTool(_) | FileSystemTools::ListAllowedDirectoriesTool(_) | FileSystemTools::ListDirectoryTool(_) - | FileSystemTools::ReadMultipleFilesTool(_) + | FileSystemTools::ReadMultipleTextFilesTool(_) | FileSystemTools::SearchFilesContentTool(_) | FileSystemTools::ListDirectoryWithSizesTool(_) + | FileSystemTools::ReadMediaFileTool(_) + | FileSystemTools::ReadMultipleMediaFilesTool(_) | FileSystemTools::SearchFilesTool(_) => false, } } diff --git a/src/tools/read_media_file.rs b/src/tools/read_media_file.rs new file mode 100644 index 0000000..7c606d4 --- /dev/null +++ b/src/tools/read_media_file.rs @@ -0,0 +1,61 @@ +use std::path::Path; + +use rust_mcp_sdk::macros::{JsonSchema, mcp_tool}; +use rust_mcp_sdk::schema::{AudioContent, ImageContent}; +use rust_mcp_sdk::schema::{CallToolResult, schema_utils::CallToolError}; + +use crate::error::ServiceError; +use crate::fs_service::FileSystemService; + +#[mcp_tool( + name = "read_media_file", + title="Read an Image or Audio file", + description = concat!("Reads an image or audio file and returns its Base64-encoded content along with the corresponding MIME type. ", + "The max_bytes argument could be used to enforce an upper limit on the size of a file to read ", + "if the media file exceeds this limit, the operation will return an error instead of reading the media file. ", + "Access is restricted to files within allowed directories only."), + destructive_hint = false, + idempotent_hint = false, + open_world_hint = false, + read_only_hint = true +)] +#[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] +pub struct ReadMediaFileTool { + /// The path of the file to read. + pub path: String, + /// Maximum allowed file size (in bytes) to be read. + pub max_bytes: Option, +} + +impl ReadMediaFileTool { + pub async fn run_tool( + params: Self, + context: &FileSystemService, + ) -> std::result::Result { + let (kind, content) = context + .read_media_file( + Path::new(¶ms.path), + params.max_bytes.map(|v| v as usize), + ) + .await + .map_err(CallToolError::new)?; + let mime_type = kind.mime_type().to_string(); + let call_result = match kind.matcher_type() { + infer::MatcherType::Image => { + let image_content: ImageContent = ImageContent::new(content, mime_type, None, None); + CallToolResult::image_content(vec![image_content]) + } + infer::MatcherType::Audio => { + let audio_content: AudioContent = AudioContent::new(content, mime_type, None, None); + CallToolResult::audio_content(vec![audio_content]) + } + _ => { + return Err(CallToolError::from_message( + ServiceError::InvalidMediaFile(mime_type).to_string(), + )); + } + }; + + Ok(call_result) + } +} diff --git a/src/tools/read_multiple_media_files.rs b/src/tools/read_multiple_media_files.rs new file mode 100644 index 0000000..00f5c79 --- /dev/null +++ b/src/tools/read_multiple_media_files.rs @@ -0,0 +1,61 @@ +use crate::fs_service::FileSystemService; +use rust_mcp_sdk::macros::{JsonSchema, mcp_tool}; +use rust_mcp_sdk::schema::{AudioContent, ContentBlock, ImageContent}; +use rust_mcp_sdk::schema::{CallToolResult, schema_utils::CallToolError}; + +#[mcp_tool( + name = "read_multiple_media_files", + title="Read Multiple Media (Image/Audio) Files", + description = concat!("Reads multiple image or audio files and returns their Base64-encoded contents along with corresponding MIME types. ", + "This method is more efficient than reading files individually. ", + "The max_bytes argument could be used to enforce an upper limit on the size of a file to read ", + "Failed reads for specific files are skipped without interrupting the entire operation. ", + "Only works within allowed directories."), + destructive_hint = false, + idempotent_hint = false, + open_world_hint = false, + read_only_hint = true +)] +#[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] +pub struct ReadMultipleMediaFilesTool { + /// The list of media file paths to read. + pub paths: Vec, + /// Maximum allowed file size (in bytes) to be read. + pub max_bytes: Option, +} + +impl ReadMultipleMediaFilesTool { + pub async fn run_tool( + params: Self, + context: &FileSystemService, + ) -> std::result::Result { + let result = context + .read_media_files(params.paths, params.max_bytes.map(|v| v as usize)) + .await + .map_err(CallToolError::new)?; + + let content: Vec<_> = result + .into_iter() + .filter_map(|(kind, content)| { + let mime_type = kind.mime_type().to_string(); + + match kind.matcher_type() { + infer::MatcherType::Image => Some(ContentBlock::ImageContent( + ImageContent::new(content, mime_type, None, None), + )), + infer::MatcherType::Audio => Some(ContentBlock::AudioContent( + AudioContent::new(content, mime_type, None, None), + )), + _ => None, + } + }) + .collect(); + + Ok(CallToolResult { + content, + is_error: None, + meta: None, + structured_content: None, + }) + } +} diff --git a/src/tools/read_multiple_files.rs b/src/tools/read_multiple_text_files.rs similarity index 84% rename from src/tools/read_multiple_files.rs rename to src/tools/read_multiple_text_files.rs index fc903c2..640a101 100644 --- a/src/tools/read_multiple_files.rs +++ b/src/tools/read_multiple_text_files.rs @@ -1,16 +1,14 @@ -use std::path::Path; - +use crate::fs_service::FileSystemService; use futures::future::join_all; use rust_mcp_sdk::macros::{JsonSchema, mcp_tool}; use rust_mcp_sdk::schema::TextContent; use rust_mcp_sdk::schema::{CallToolResult, schema_utils::CallToolError}; - -use crate::fs_service::FileSystemService; +use std::path::Path; #[mcp_tool( - name = "read_multiple_files", - title="Read Multiple Files", - description = concat!("Read the contents of multiple files simultaneously. ", + name = "read_multiple_text_files", + title="Read Multiple Text Files", + description = concat!("Read the contents of multiple text files simultaneously as text. ", "This is more efficient than reading files one by one when you need to analyze ", "or compare multiple files. Each file's content is returned with its ", "path as a reference. Failed reads for individual files won't stop ", @@ -21,12 +19,12 @@ use crate::fs_service::FileSystemService; read_only_hint = true )] #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] -pub struct ReadMultipleFilesTool { +pub struct ReadMultipleTextFilesTool { /// The list of file paths to read. pub paths: Vec, } -impl ReadMultipleFilesTool { +impl ReadMultipleTextFilesTool { pub async fn run_tool( params: Self, context: &FileSystemService, @@ -37,7 +35,7 @@ impl ReadMultipleFilesTool { .map(|path| async move { { let content = context - .read_file(Path::new(&path)) + .read_text_file(Path::new(&path)) .await .map_err(CallToolError::new); diff --git a/src/tools/read_files.rs b/src/tools/read_text_file.rs similarity index 80% rename from src/tools/read_files.rs rename to src/tools/read_text_file.rs index a6c91d1..06d6acf 100644 --- a/src/tools/read_files.rs +++ b/src/tools/read_text_file.rs @@ -7,9 +7,9 @@ use rust_mcp_sdk::schema::{CallToolResult, schema_utils::CallToolError}; use crate::fs_service::FileSystemService; #[mcp_tool( - name = "read_file", - title="Read File", - description = concat!("Read the complete contents of a file from the file system. ", + name = "read_text_file", + title="Read a text file", + description = concat!("Read the complete contents of a text file from the file system as text. ", "Handles various text encodings and provides detailed error messages if the ", "file cannot be read. Use this tool when you need to examine the contents of ", "a single file. Only works within allowed directories."), @@ -19,18 +19,18 @@ use crate::fs_service::FileSystemService; read_only_hint = true )] #[derive(::serde::Deserialize, ::serde::Serialize, Clone, Debug, JsonSchema)] -pub struct ReadFileTool { +pub struct ReadTextFileTool { /// The path of the file to read. pub path: String, } -impl ReadFileTool { +impl ReadTextFileTool { pub async fn run_tool( params: Self, context: &FileSystemService, ) -> std::result::Result { let content = context - .read_file(Path::new(¶ms.path)) + .read_text_file(Path::new(¶ms.path)) .await .map_err(CallToolError::new)?; diff --git a/tests/test_fs_service.rs b/tests/test_fs_service.rs index 0262115..eef6b26 100644 --- a/tests/test_fs_service.rs +++ b/tests/test_fs_service.rs @@ -226,7 +226,7 @@ async fn test_unzip_file_non_existent() { async fn test_read_file() { let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); let file_path = create_temp_file(temp_dir.join("dir1").as_path(), "test.txt", "content"); - let content = service.read_file(&file_path).await.unwrap(); + let content = service.read_text_file(&file_path).await.unwrap(); assert_eq!(content, "content"); } diff --git a/tests/test_tools.rs b/tests/test_tools.rs index 542fff1..b79b519 100644 --- a/tests/test_tools.rs +++ b/tests/test_tools.rs @@ -128,3 +128,6 @@ async fn test_create_directory_invalid_path() { let err = result.unwrap_err(); assert!(matches!(err, CallToolError { .. })); } + +#[tokio::test] +async fn adhoc() {}