Skip to content
Open
8 changes: 6 additions & 2 deletions .claude/settings.local.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
{
"enabledMcpjsonServers": ["codeembed"],
"enabledMcpjsonServers": [
"codeembed"
],
"permissions": {
"allow": ["mcp__codeembed__search"]
"allow": [
"mcp__codeembed__search"
]
}
}
6 changes: 4 additions & 2 deletions .mcp.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
{
"mcpServers": {
"codeembed": {
"command": "uv",
"args": ["run", "codeembed", "serve"]
"command": "codeembed",
"args": [
"serve"
]
}
}
}
6 changes: 4 additions & 2 deletions .vscode/mcp.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
{
"servers": {
"codeembed": {
"command": "uv",
"args": ["run", "codeembed", "serve"]
"command": "codeembed",
"args": [
"serve"
]
}
}
}
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# CodeEmbed

Embeds your codebase into a local vector database and exposes it as an MCP tool, giving AI assistants like Claude Code fast semantic search over your code.
Embeds your codebase into a local vector and graph database and exposes it as an MCP tool, giving AI assistants like Claude Code fast semantic search over your code using Graph RAG.

Particularly useful for questions like:

Expand All @@ -12,7 +12,7 @@ For other questions, the agent will fall back to normal lookups.
CodeEmbed can improve lookup speed and accuracy, especially for finding existing implementations before writing new ones.
Note that the biggest bottleneck in coding agents is LLM thinking and token generation — solid prompts and follow-up questions still matter.

Uses [ChromaDB](https://github.com/chroma-core/chroma) for local vector storage and either [Ollama](https://github.com/ollama/ollama) or OpenAI (including OpenAI models via Azure AI Foundry) for LLM analysis.
Uses [ChromaDB](https://github.com/chroma-core/chroma) for vector storage, SQLite for graph storage, and either [Ollama](https://github.com/ollama/ollama) or OpenAI (including OpenAI models via Azure AI Foundry) for LLM analysis.

## Prerequisites

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "codeembed"
version = "0.1.1"
version = "0.2.0b3"
description = "Embeds your codebase and makes it available for quick LLM lookups via MCP."
readme = "README.md"
requires-python = ">=3.11"
Expand Down
21 changes: 17 additions & 4 deletions src/codeembed/bootstrap/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from codeembed.doc_embedder.doc_embedder import DocEmbedder
from codeembed.doc_provider.local_doc_provider import LocalDocProvider
from codeembed.doc_search_service.doc_search_service import DocSearchService
from codeembed.graph_db.sqlite_adapter import SqliteGraphDb
from codeembed.llm.base import LLMServiceBase
from codeembed.llm.ollama_adapter import OllamaLLMService
from codeembed.vector_db.chromadb_adapter import ChromaDbAdapter
Expand All @@ -23,10 +24,21 @@
_DEFAULT_SLEEP_INTERVAL = 60


@lru_cache(maxsize=1)
def get_vector_db() -> ChromaDbAdapter:
return ChromaDbAdapter(collection_name="codebase")


@lru_cache(maxsize=1)
def get_graph_db() -> SqliteGraphDb:
return SqliteGraphDb(db_path=".codeembed/graph.db")


@lru_cache(maxsize=1)
def get_search_service() -> DocSearchService:
vector_db = ChromaDbAdapter(collection_name="codebase")
search_service = DocSearchService(vector_db)
vector_db = get_vector_db()
graph_db = get_graph_db()
search_service = DocSearchService(vector_db, graph_db)
return search_service


Expand Down Expand Up @@ -195,10 +207,11 @@ def get_embedder_service() -> DocEmbedder:
base_path=".",
supported_file_extensions=_SUPPORTED_FILE_EXTENSIONS,
)
vector_db = ChromaDbAdapter(collection_name="codebase")
vector_db = get_vector_db()
llm_service = get_llm_service()
graph_db = get_graph_db()
embedder = DocEmbedder(
doc_provider, vector_db, llm_service, llm_model=config.llm_model, debounce_seconds=config.debounce
doc_provider, vector_db, graph_db, llm_service, llm_model=config.llm_model, debounce_seconds=config.debounce
)
return embedder

Expand Down
12 changes: 12 additions & 0 deletions src/codeembed/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,18 @@ def serve():
mcp.run(transport="stdio")


@app.command()
def search(
query: str = typer.Argument(..., help="Natural-language search query"),
top_n: int = typer.Option(10, "--top-n", "-n", help="Number of results to return"),
):
"""Search the embedded codebase using semantic similarity."""
from codeembed.bootstrap.services import get_search_service

result = get_search_service().search(query, top_n)
typer.echo(result)


@app.command()
def embed():
"""Embed codebase into the vector database."""
Expand Down
6 changes: 4 additions & 2 deletions src/codeembed/delta_computer/delta_computer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def __init__(self, doc_provider: DocProviderBase, vector_db: VectorDbBase, debou
self._vector_db = vector_db
self._debounce_seconds = debounce_seconds

def compute_deltas(self) -> Tuple[Set[UUID], Set[str]]:
def compute_deltas(self) -> Tuple[Set[UUID], Set[str], Set[str]]:
"""
Returns chunk IDs to delete and file paths to process.

Expand All @@ -26,6 +26,7 @@ def compute_deltas(self) -> Tuple[Set[UUID], Set[str]]:

file_path_to_chunk_ids: Dict[str, List[UUID]] = {}
chunk_ids_to_delete: Set[UUID] = set()
file_paths_to_delete: Set[str] = set()

# Collect modified_at stored in our database.
old_modified_at: Dict[str, datetime] = {}
Expand Down Expand Up @@ -71,5 +72,6 @@ def compute_deltas(self) -> Tuple[Set[UUID], Set[str]]:
if file_path not in current:
for chunk_id in file_path_to_chunk_ids.get(file_path, []):
chunk_ids_to_delete.add(chunk_id)
file_paths_to_delete.add(file_path)

return chunk_ids_to_delete, file_paths_to_update
return chunk_ids_to_delete, file_paths_to_update, file_paths_to_delete
Loading