-
Notifications
You must be signed in to change notification settings - Fork 5.5k
[mcp-server] Expose fuzzy file search in MCP #2677
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1c5d0d1
1fb779c
08a42c3
872cc13
eff9d22
cfc275f
8eb2d7b
72d1866
55cdb4a
edc25f7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,84 @@ | ||||||
use std::num::NonZero; | ||||||
use std::num::NonZeroUsize; | ||||||
use std::path::PathBuf; | ||||||
use std::sync::Arc; | ||||||
use std::sync::atomic::AtomicBool; | ||||||
|
||||||
use codex_file_search as file_search; | ||||||
use codex_protocol::mcp_protocol::FuzzyFileSearchResult; | ||||||
use tokio::task::JoinSet; | ||||||
use tracing::warn; | ||||||
|
||||||
const LIMIT_PER_ROOT: usize = 50; | ||||||
const MAX_THREADS: usize = 12; | ||||||
const COMPUTE_INDICES: bool = true; | ||||||
|
||||||
pub(crate) async fn run_fuzzy_file_search( | ||||||
query: String, | ||||||
roots: Vec<String>, | ||||||
cancellation_flag: Arc<AtomicBool>, | ||||||
) -> Vec<FuzzyFileSearchResult> { | ||||||
#[expect(clippy::expect_used)] | ||||||
let limit_per_root = | ||||||
NonZero::new(LIMIT_PER_ROOT).expect("LIMIT_PER_ROOT should be a valid non-zero usize"); | ||||||
|
||||||
let cores = std::thread::available_parallelism() | ||||||
.map(std::num::NonZero::get) | ||||||
.unwrap_or(1); | ||||||
let threads = cores.min(MAX_THREADS); | ||||||
let threads_per_root = (threads / roots.len()).max(1); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we want max here! It's primarily to catch the case where roots.len() > threads, and make sure There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You want 5.min(1) -> 5 and 0.min(1) -> 1 right? Isn't 5.max(1) -> 1 and 0.max(1) -> 0? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Other way around! Here's a repl to make sure we're on the same page: ![]() |
||||||
let threads = NonZero::new(threads_per_root).unwrap_or(NonZeroUsize::MIN); | ||||||
|
||||||
let mut files: Vec<FuzzyFileSearchResult> = Vec::new(); | ||||||
let mut join_set = JoinSet::new(); | ||||||
|
||||||
for root in roots { | ||||||
let search_dir = PathBuf::from(&root); | ||||||
let query = query.clone(); | ||||||
let cancel_flag = cancellation_flag.clone(); | ||||||
join_set.spawn_blocking(move || { | ||||||
match file_search::run( | ||||||
query.as_str(), | ||||||
limit_per_root, | ||||||
&search_dir, | ||||||
Vec::new(), | ||||||
threads, | ||||||
cancel_flag, | ||||||
COMPUTE_INDICES, | ||||||
) { | ||||||
Ok(res) => Ok((root, res)), | ||||||
Err(err) => Err((root, err)), | ||||||
} | ||||||
}); | ||||||
} | ||||||
|
||||||
while let Some(res) = join_set.join_next().await { | ||||||
match res { | ||||||
Ok(Ok((root, res))) => { | ||||||
for m in res.matches { | ||||||
let result = FuzzyFileSearchResult { | ||||||
root: root.clone(), | ||||||
path: m.path, | ||||||
score: m.score, | ||||||
indices: m.indices, | ||||||
}; | ||||||
files.push(result); | ||||||
} | ||||||
} | ||||||
Ok(Err((root, err))) => { | ||||||
warn!("fuzzy-file-search in dir '{root}' failed: {err}"); | ||||||
} | ||||||
Err(err) => { | ||||||
warn!("fuzzy-file-search join_next failed: {err}"); | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
files.sort_by(file_search::cmp_by_score_desc_then_path_asc::< | ||||||
FuzzyFileSearchResult, | ||||||
_, | ||||||
_, | ||||||
>(|f| f.score, |f| f.path.as_str())); | ||||||
|
||||||
files | ||||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
use mcp_test_support::McpProcess; | ||
use mcp_types::JSONRPCResponse; | ||
use mcp_types::RequestId; | ||
use pretty_assertions::assert_eq; | ||
use serde_json::json; | ||
use tempfile::TempDir; | ||
use tokio::time::timeout; | ||
|
||
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); | ||
|
||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)] | ||
async fn test_fuzzy_file_search_sorts_and_includes_indices() { | ||
// Prepare a temporary Codex home and a separate root with test files. | ||
let codex_home = TempDir::new().expect("create temp codex home"); | ||
let root = TempDir::new().expect("create temp search root"); | ||
|
||
// Create files designed to have deterministic ordering for query "abc". | ||
std::fs::write(root.path().join("abc"), "x").expect("write file abc"); | ||
std::fs::write(root.path().join("abcde"), "x").expect("write file abcx"); | ||
std::fs::write(root.path().join("abexy"), "x").expect("write file abcx"); | ||
std::fs::write(root.path().join("zzz.txt"), "x").expect("write file zzz"); | ||
|
||
// Start MCP server and initialize. | ||
let mut mcp = McpProcess::new(codex_home.path()).await.expect("spawn mcp"); | ||
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()) | ||
.await | ||
.expect("init timeout") | ||
.expect("init failed"); | ||
|
||
let root_path = root.path().to_string_lossy().to_string(); | ||
// Send fuzzyFileSearch request. | ||
let request_id = mcp | ||
.send_fuzzy_file_search_request("abe", vec![root_path.clone()], None) | ||
.await | ||
.expect("send fuzzyFileSearch"); | ||
|
||
// Read response and verify shape and ordering. | ||
let resp: JSONRPCResponse = timeout( | ||
DEFAULT_READ_TIMEOUT, | ||
mcp.read_stream_until_response_message(RequestId::Integer(request_id)), | ||
) | ||
.await | ||
.expect("fuzzyFileSearch timeout") | ||
.expect("fuzzyFileSearch resp"); | ||
|
||
let value = resp.result; | ||
assert_eq!( | ||
value, | ||
json!({ | ||
"files": [ | ||
{ "root": root_path.clone(), "path": "abexy", "score": 88, "indices": [0, 1, 2] }, | ||
{ "root": root_path.clone(), "path": "abcde", "score": 74, "indices": [0, 1, 4] }, | ||
] | ||
}) | ||
); | ||
} | ||
|
||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)] | ||
async fn test_fuzzy_file_search_accepts_cancellation_token() { | ||
let codex_home = TempDir::new().expect("create temp codex home"); | ||
let root = TempDir::new().expect("create temp search root"); | ||
|
||
std::fs::write(root.path().join("alpha.txt"), "contents").expect("write alpha"); | ||
|
||
let mut mcp = McpProcess::new(codex_home.path()).await.expect("spawn mcp"); | ||
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()) | ||
.await | ||
.expect("init timeout") | ||
.expect("init failed"); | ||
|
||
let root_path = root.path().to_string_lossy().to_string(); | ||
let request_id = mcp | ||
.send_fuzzy_file_search_request("alp", vec![root_path.clone()], None) | ||
.await | ||
.expect("send fuzzyFileSearch"); | ||
|
||
let request_id_2 = mcp | ||
.send_fuzzy_file_search_request( | ||
"alp", | ||
vec![root_path.clone()], | ||
Some(request_id.to_string()), | ||
) | ||
.await | ||
.expect("send fuzzyFileSearch"); | ||
|
||
let resp: JSONRPCResponse = timeout( | ||
DEFAULT_READ_TIMEOUT, | ||
mcp.read_stream_until_response_message(RequestId::Integer(request_id_2)), | ||
) | ||
.await | ||
.expect("fuzzyFileSearch timeout") | ||
.expect("fuzzyFileSearch resp"); | ||
|
||
let files = resp | ||
.result | ||
.get("files") | ||
.and_then(|value| value.as_array()) | ||
.cloned() | ||
.expect("files array"); | ||
|
||
assert_eq!(files.len(), 1); | ||
assert_eq!(files[0]["root"], root_path); | ||
assert_eq!(files[0]["path"], "alpha.txt"); | ||
} |
Uh oh!
There was an error while loading. Please reload this page.