Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions src/clients.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,25 @@ pub trait KnowledgeClient: Send + Sync {
pub trait DigestClient: Send + Sync {
fn generate_daily_digest(&self) -> Result<Value>;
}

/// Low-level repository analysis (no async, no external state).
pub trait RepoAnalyzer: Send + Sync {
fn compute_workspace_hash(&self, path: &str) -> Result<String>;
fn analyze_repo(
&self,
path: &str,
upstream_url: Option<&str>,
default_branch: Option<&str>,
) -> Result<(String, usize, usize)>;
}

/// Tantivy search operations exposed to MCP tools.
pub trait SearchClient: Send + Sync {
fn index_is_empty_at(&self, path: &std::path::Path) -> Result<bool>;
fn search_repos_at(
&self,
path: &std::path::Path,
query: &str,
limit: usize,
) -> Result<Vec<(String, f32)>>;
}
18 changes: 18 additions & 0 deletions src/health.rs
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,24 @@ impl HealthClient for AppContext {
}
}

/// Stateless implementation of [`RepoAnalyzer`] for use in spawn_blocking closures.
pub struct RepoAnalyzerImpl;

impl crate::clients::RepoAnalyzer for RepoAnalyzerImpl {
fn compute_workspace_hash(&self, path: &str) -> anyhow::Result<String> {
compute_workspace_hash(std::path::Path::new(path))
}

fn analyze_repo(
&self,
path: &str,
upstream_url: Option<&str>,
default_branch: Option<&str>,
) -> anyhow::Result<(String, usize, usize)> {
Ok(analyze_repo(path, upstream_url, default_branch))
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
59 changes: 43 additions & 16 deletions src/mcp/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -398,38 +398,62 @@ fn mock_repo(
}

#[test]
fn test_nl_filter_repos_empty_query_returns_empty() {
fn test_nl_filter_repos_empty_query_returns_empty() -> anyhow::Result<()> {
let _guard = NL_FILTER_TEST_LOCK.lock().unwrap();
let conn = crate::registry::WorkspaceRegistry::init_in_memory().unwrap();
let conn = crate::registry::WorkspaceRegistry::init_in_memory()?;
let repos: Vec<crate::registry::RepoEntry> = vec![];
let results = crate::mcp::tools::repo::nl_filter_repos("", &repos, &conn).unwrap();
let backend = crate::storage::TempStorageBackend::new();
let index_path = backend.index_path()?;
let searcher = crate::search::SearchClientImpl;
let analyzer = crate::health::RepoAnalyzerImpl;
let results = crate::mcp::tools::repo::nl_filter_repos_at(
&index_path,
"",
&repos,
&conn,
&searcher,
&analyzer,
)?;
assert!(results.is_empty());
Ok(())
}

#[test]
fn test_nl_filter_repos_fallback_finds_by_language() {
fn test_nl_filter_repos_fallback_finds_by_language() -> anyhow::Result<()> {
let _guard = NL_FILTER_TEST_LOCK.lock().unwrap();
let conn = crate::registry::WorkspaceRegistry::init_in_memory().unwrap();
let conn = crate::registry::WorkspaceRegistry::init_in_memory()?;
let repos = vec![
mock_repo("repo1", Some("rust"), vec!["cli"], Some(10)),
mock_repo("repo2", Some("python"), vec!["web"], Some(5)),
];
let results = crate::mcp::tools::repo::nl_filter_repos("rust cli tool", &repos, &conn).unwrap();
let backend = crate::storage::TempStorageBackend::new();
let index_path = backend.index_path()?;
let searcher = crate::search::SearchClientImpl;
let analyzer = crate::health::RepoAnalyzerImpl;
let results = crate::mcp::tools::repo::nl_filter_repos_at(
&index_path,
"rust cli tool",
&repos,
&conn,
&searcher,
&analyzer,
)?;
assert_eq!(results.len(), 1);
assert_eq!(results[0].id, "repo1");
Ok(())
}

#[test]
fn test_nl_filter_repos_tantivy_finds_devbase() {
fn test_nl_filter_repos_tantivy_finds_devbase() -> anyhow::Result<()> {
let backend = std::sync::Arc::new(crate::storage::TempStorageBackend::new());
let index_path = backend.index_path().unwrap();
let index_path = backend.index_path()?;

// Ensure DB schema exists
let conn = crate::registry::WorkspaceRegistry::init_db_with(&*backend).unwrap();
let conn = crate::registry::WorkspaceRegistry::init_db_with(&*backend)?;

// Populate Tantivy index with devbase doc
let (index, _reader) = crate::search::init_index_at(&index_path).unwrap();
let mut writer = crate::search::get_writer(&index).unwrap();
let (index, _reader) = crate::search::init_index_at(&index_path)?;
let mut writer = crate::search::get_writer(&index)?;
let schema = index.schema();
crate::search::add_repo_doc(
&mut writer,
Expand All @@ -438,9 +462,8 @@ fn test_nl_filter_repos_tantivy_finds_devbase() {
"devbase developer workspace manager",
"rust, cli, workspace, developer",
&["rust".to_string(), "cli".to_string()],
)
.unwrap();
crate::search::commit_writer(&mut writer).unwrap();
)?;
crate::search::commit_writer(&mut writer)?;

let repos = vec![crate::registry::RepoEntry {
id: "devbase".to_string(),
Expand All @@ -455,15 +478,19 @@ fn test_nl_filter_repos_tantivy_finds_devbase() {
remotes: vec![],
}];

let searcher = crate::search::SearchClientImpl;
let analyzer = crate::health::RepoAnalyzerImpl;
let results = crate::mcp::tools::repo::nl_filter_repos_at(
&index_path,
"developer workspace",
&repos,
&conn,
)
.unwrap();
&searcher,
&analyzer,
)?;
assert!(!results.is_empty(), "tantivy path should find devbase");
assert_eq!(results[0].id, "devbase");
Ok(())
}

#[test]
Expand Down
77 changes: 37 additions & 40 deletions src/mcp/tools/repo.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2026 juice094
use super::super::McpTool;
use crate::clients::{HealthClient, KnowledgeClient, ScanClient, SyncClient};
use crate::clients::{
HealthClient, KnowledgeClient, RepoAnalyzer, ScanClient, SearchClient, SyncClient,
};
use crate::health::RepoAnalyzerImpl;
use crate::registry::RepoEntry;
use crate::repository::health::HealthRepository;
use crate::repository::repo::RepoRepository;
use crate::repository::workspace::WorkspaceRepository;
use crate::storage::{AppContext, StorageBackend};
use crate::search::SearchClientImpl;
use crate::storage::AppContext;
use anyhow::Context;

#[derive(Clone)]
Expand Down Expand Up @@ -309,6 +313,7 @@ Returns: JSON array of repo objects. Each includes: id, local_path, language, ta
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(50) as usize;

let pool = ctx.pool();
let analyzer = RepoAnalyzerImpl;
tokio::task::spawn_blocking(move || {
let conn = pool.get()?;
let repos = RepoRepository::new(&conn).list_repos(None)?;
Expand All @@ -332,12 +337,13 @@ Returns: JSON array of repo objects. Each includes: id, local_path, language, ta
let (ahead, behind, dirty) = if repo.workspace_type == "git" {
let (st, ah, bh) = match HealthRepository::new(&conn).get_health(&repo.id)? {
Some(health) => (health.status.clone(), health.ahead, health.behind),
None => analyze_repo_for_repo(&repo),
None => analyze_repo_for_repo(&repo, &analyzer)?,
};
let dirty = st == "dirty" || st == "changed";
(ah, bh, dirty)
} else {
let dirty = match crate::health::compute_workspace_hash(&repo.local_path) {
let path_str = repo.local_path.to_string_lossy();
let dirty = match analyzer.compute_workspace_hash(&path_str) {
Ok(current_hash) => {
match WorkspaceRepository::new(&conn).get_latest_snapshot(&repo.id)? {
Some(prev) => prev.file_hash != current_hash,
Expand Down Expand Up @@ -440,10 +446,14 @@ Returns: JSON array of matching repos with metadata, same format as devkit_query
let query = query.to_string();

let pool = ctx.pool();
let index_path = ctx.storage.index_path()?;
tokio::task::spawn_blocking(move || {
let conn = pool.get()?;
let repos = RepoRepository::new(&conn).list_repos(None)?;
let filtered = nl_filter_repos(&query, &repos, &conn)?;
let searcher = SearchClientImpl;
let analyzer = RepoAnalyzerImpl;
let filtered =
nl_filter_repos_at(&index_path, &query, &repos, &conn, &searcher, &analyzer)?;

let results: Vec<serde_json::Value> = filtered
.into_iter()
Expand All @@ -469,12 +479,13 @@ Returns: JSON array of matching repos with metadata, same format as devkit_query
.map_err(|e| anyhow::anyhow!("spawn_blocking failed: {}", e))?
}
}
fn apply_nl_filters(
fn apply_nl_filters<A: RepoAnalyzer>(
repo: &RepoEntry,
q: &str,
stars_cond: Option<(char, u64)>,
explicit_tag: Option<&str>,
conn: &rusqlite::Connection,
analyzer: &A,
) -> anyhow::Result<bool> {
// Language filter: only apply if query explicitly mentions a language keyword
let lang_keywords = [
Expand Down Expand Up @@ -527,7 +538,7 @@ fn apply_nl_filters(
{
let (st, ah, bh) = match HealthRepository::new(conn).get_health(&repo.id)? {
Some(h) => (h.status.clone(), h.ahead, h.behind),
None => analyze_repo_for_repo(repo),
None => analyze_repo_for_repo(repo, analyzer)?,
};
let dirty = st == "dirty" || st == "changed";

Expand All @@ -550,38 +561,14 @@ fn apply_nl_filters(

Ok(true)
}
pub(crate) fn nl_filter_repos(
query: &str,
repos: &[RepoEntry],
conn: &rusqlite::Connection,
) -> anyhow::Result<Vec<RepoEntry>> {
let backend = crate::storage::DefaultStorageBackend {};
let index_path = match backend.index_path() {
Ok(p) => p,
Err(e) => {
tracing::warn!("Failed to resolve index path: {}", e);
// Fallback to non-Tantivy path
let q = query.to_lowercase();
let stars_cond = parse_stars_condition(&q);
let explicit_tag = extract_tag_from_query(&q);
let mut results = Vec::new();
for repo in repos {
if apply_nl_filters(repo, &q, stars_cond, explicit_tag.as_deref(), conn)? {
results.push(repo.clone());
}
}
return Ok(results);
}
};
nl_filter_repos_at(&index_path, query, repos, conn)
}

/// Filter repos using an explicit Tantivy index path, bypassing global storage backend.
pub(crate) fn nl_filter_repos_at(
pub(crate) fn nl_filter_repos_at<S: SearchClient, A: RepoAnalyzer>(
index_path: &std::path::Path,
query: &str,
repos: &[RepoEntry],
conn: &rusqlite::Connection,
searcher: &S,
analyzer: &A,
) -> anyhow::Result<Vec<RepoEntry>> {
let q = query.to_lowercase();
let stars_cond = parse_stars_condition(&q);
Expand All @@ -597,7 +584,7 @@ pub(crate) fn nl_filter_repos_at(
|| q.contains("uptodate");

// Try Tantivy search first if index is not empty
let use_tantivy = match crate::search::index_is_empty_at(index_path) {
let use_tantivy = match searcher.index_is_empty_at(index_path) {
Ok(empty) => !empty,
Err(e) => {
tracing::warn!("Failed to check search index: {}", e);
Expand All @@ -607,7 +594,7 @@ pub(crate) fn nl_filter_repos_at(

if use_tantivy && !query.trim().is_empty() {
let limit = repos.len().max(1000);
match crate::search::search_repos_at(index_path, query, limit) {
match searcher.search_repos_at(index_path, query, limit) {
Ok(search_results) => {
let repo_map: std::collections::HashMap<_, _> =
repos.iter().map(|r| (r.id.clone(), r)).collect();
Expand All @@ -618,7 +605,14 @@ pub(crate) fn nl_filter_repos_at(
continue;
}
if let Some(repo) = repo_map.get(&id)
&& apply_nl_filters(repo, &q, stars_cond, explicit_tag.as_deref(), conn)?
&& apply_nl_filters(
repo,
&q,
stars_cond,
explicit_tag.as_deref(),
conn,
analyzer,
)?
{
results.push((*repo).clone());
}
Expand All @@ -640,7 +634,7 @@ pub(crate) fn nl_filter_repos_at(
// Fallback: iterate all repos with hardcoded regex logic
let mut results = Vec::new();
for repo in repos {
if apply_nl_filters(repo, &q, stars_cond, explicit_tag.as_deref(), conn)? {
if apply_nl_filters(repo, &q, stars_cond, explicit_tag.as_deref(), conn, analyzer)? {
results.push(repo.clone());
}
}
Expand Down Expand Up @@ -677,12 +671,15 @@ fn extract_tag_from_query(q: &str) -> Option<String> {
None
}
}
fn analyze_repo_for_repo(repo: &RepoEntry) -> (String, usize, usize) {
fn analyze_repo_for_repo<A: RepoAnalyzer>(
repo: &RepoEntry,
analyzer: &A,
) -> anyhow::Result<(String, usize, usize)> {
let path = repo.local_path.to_string_lossy();
let primary = repo.primary_remote();
let upstream_url = primary.and_then(|r| r.upstream_url.as_deref());
let default_branch = primary.and_then(|r| r.default_branch.as_deref());
crate::health::analyze_repo(&path, upstream_url, default_branch)
analyzer.analyze_repo(&path, upstream_url, default_branch)
}

#[cfg(test)]
Expand Down
18 changes: 18 additions & 0 deletions src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,24 @@ fn open_index_at(path: &std::path::Path) -> Result<(Index, Schema), TantivyError
Ok((idx, schema))
}

/// Stateless implementation of [`crate::clients::SearchClient`] for use in spawn_blocking closures.
pub struct SearchClientImpl;

impl crate::clients::SearchClient for SearchClientImpl {
fn index_is_empty_at(&self, path: &std::path::Path) -> anyhow::Result<bool> {
index_is_empty_at(path).map_err(|e| anyhow::anyhow!(e))
}

fn search_repos_at(
&self,
path: &std::path::Path,
query: &str,
limit: usize,
) -> anyhow::Result<Vec<(String, f32)>> {
search_repos_at(path, query, limit).map_err(|e| anyhow::anyhow!(e))
}
}

#[cfg(test)]
pub(crate) static SEARCH_TEST_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());

Expand Down
Loading