diff --git a/CHANGELOG.md b/CHANGELOG.md index b358a63..cc4844c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,32 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.5.1] - 2026-04-18 + +### Added +- **Capability-based permission engine** (PR #72): Framework-independent engine that replaces the ConfirmationPlugin-based HITL. Tools declare capabilities (e.g. `filesystem.write(path=...)`, `http.read`, `shell.exec`) via `@register_tool(capabilities=...)`. Rules are evaluated from four sources — builtin defaults, user `~/.{app_name}/settings.json`, project `./.{app_name}/settings.json`, and in-memory session. When no rule matches, the user is prompted with `Allow once / for session / always (save to project) / Deny`; "always" grants persist into project settings. +- **Matchers**: `PathMatcher` (with `**` glob), `URLMatcher`, `ShellMatcher`, and `StringGlobMatcher`. +- **ADK `PermissionPlugin`**: capability gating for ADK agents. +- **LangGraph `wrap_tool_for_permission`**: per-tool permission wrapping for LangGraph `ToolNode`. +- **`permissions` and `permissions_enabled` settings** for runtime control. +- **`EXEMPT` sentinel**: opts a tool out of the permission engine (used for backend state tools and ADK `transfer_to_agent`). +- **Real BM25 backends**: `bm25s` (preferred) and `rank_bm25` fallback. + +### Changed +- **`@register_tool`** now requires the `capabilities=` kwarg. +- **Default grants broadened**: `filesystem.*` auto-extends to the parent directory; `memory.*` and `kb.*` are allowed by default. +- **Ask prompt** now shows the directory-scope target. +- **Workflow init**: `_ensure_managers_initialized` runs in a worker thread to avoid blocking the event loop. +- **HITL confirmation** extracted into a backend-neutral module during the permissions migration. +- **Memory tools**: deduped registry-bound and factory-bound entry points. + +### Fixed +- Matcher canonicalize/matches preserves the `*` wildcard. +- `KnowledgeBaseManager` concurrency contract tightened. + +### Removed +- **`PermissionLevel`** (SAFE / CAUTION / DANGEROUS), `ConfirmationPlugin`, `_wrap_for_confirmation`, `hitl_tools`, and the `hitl_enabled` setting — superseded by the permission engine. + ## [0.5.0] - 2026-04-17 ### Added diff --git a/CLAUDE.md b/CLAUDE.md index 44dd3ef..ddece9f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -47,7 +47,7 @@ agentic-cli/ │ │ ├── persistence/ # Checkpointers, stores │ │ └── tools/ # LangChain-compatible wrappers │ ├── tools/ -│ │ ├── registry.py # ToolRegistry, @register_tool, ToolCategory, PermissionLevel +│ │ ├── registry.py # ToolRegistry, @register_tool, ToolCategory │ │ ├── executor.py # SafePythonExecutor │ │ ├── knowledge_tools.py # kb_search, kb_ingest, kb_list, kb_read │ │ ├── arxiv_tools.py # search_arxiv, fetch_arxiv_paper, analyze_arxiv_paper @@ -62,7 +62,7 @@ agentic-cli/ │ │ ├── memory_tools.py # save_memory, search_memory + MemoryStore │ │ ├── planning_tools.py # save_plan, get_plan + PlanStore │ │ ├── task_tools.py # save_tasks, get_tasks + TaskStore -│ │ ├── hitl_tools.py # request_approval + ApprovalManager, HITLConfig +│ │ ├── reflection_tools.py # save_reflection + ToolReflectionStore │ │ ├── shell/ # 8-layer shell security │ │ └── webfetch/ # Fetcher, converter, validator, robots │ ├── knowledge_base/ @@ -114,6 +114,9 @@ Workflow: 2. For features: create `feature/` (or `fix/` or `refactor/`) from `develop`, work there, merge back to `develop` 3. When ready to release: merge `develop` → `main` and tag the release +### What NOT to commit +- `docs/` is gitignored on purpose (see `.gitignore`). It is a scratchpad for review notes, plans, and internal analysis. **Never `git add docs/…` or suggest committing anything under `docs/`.** If a document belongs in the repo, it lives elsewhere (README, CHANGELOG, top-level `*.md`). + ## Development Principles ### Code Style @@ -130,7 +133,8 @@ Workflow: ### Key Design Patterns - **Tool error handling**: All tools return `{"success": bool, ...}` dicts. Never raise `ToolError`. -- **Tool registration**: Use `@register_tool(category=..., permission_level=..., description=...)` decorator. Tools are auto-discovered via the global `ToolRegistry`. +- **Tool registration**: Use `@register_tool(category=..., capabilities=..., description=...)` decorator. `capabilities=` is required — pass `EXEMPT` for tools that need no permission check or a list of `Capability(name, target_arg=...)` tuples the engine matches against rules. Tools are auto-discovered via the global `ToolRegistry`. +- **Permissions**: `workflow/permissions/` holds a framework-independent engine that evaluates declared capabilities against rules from four sources (builtin, user `~/.{app_name}/settings.json`, project `./.{app_name}/settings.json`, in-memory session). ADK + LangGraph gate tool calls via `workflow/adk/permission_plugin.py::PermissionPlugin` and `workflow/langgraph/permission_wrap.py::wrap_tool_for_permission`. See `docs/superpowers/specs/2026-04-18-permissions-system-design.md`. - **Service registry**: Tools access services and shared state via `get_service(key)` from `workflow.service_registry`. A single ContextVar holds a `dict[str, Any]` set by the workflow manager during processing. Complex services (KBManager, SandboxManager, MemoryStore) are lazily created; simple state (plan string, task list) lives directly in the registry dict. - **Manager detection**: Tools decorated with `@requires("kb_manager")` etc. are scanned by `BaseWorkflowManager._detect_required_managers()` which lazily creates only the needed services. - **Atomic writes**: Use `atomic_write_json`/`atomic_write_text` from `persistence/_utils.py` for file persistence. diff --git a/README.md b/README.md index 7ee0ce5..6f72800 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ Agentic CLI provides the core infrastructure for building interactive CLI applic ├─────────────────────────────┬───────────────────────────────────────┤ │ GoogleADKWorkflowManager │ LangGraphWorkflowManager │ │ (default) │ (optional: langgraph extra) │ -│ + ConfirmationPlugin │ + confirmation tool wrapper │ +│ + PermissionPlugin │ + wrap_tool_for_permission │ │ + LLMLoggingPlugin │ + native ToolNode │ │ + TaskProgressPlugin │ │ └─────────────────────────────┴───────────────────────────────────────┘ @@ -145,7 +145,7 @@ manager = GoogleADKWorkflowManager( ``` ADK integrations: -- **ConfirmationPlugin** — intercepts `DANGEROUS` tools and prompts the user for approval +- **PermissionPlugin** — evaluates each tool's declared capabilities against the permission engine and prompts the user when no rule matches - **LLMLoggingPlugin** — structured logging of LLM requests/responses - **TaskProgressPlugin** — streams the task checklist into its own thinking box @@ -171,7 +171,7 @@ Features: - **Explicit provider support**: Uses `langchain-google-genai` for Gemini (not VertexAI) - **Thinking mode**: Native support for Claude and Gemini thinking/reasoning - **Retry policies**: Automatic retry with exponential backoff -- **Confirmation wrapper**: Wraps `DANGEROUS` tools to request HITL approval +- **Permission wrapper**: Wraps each tool to gate execution through the permission engine - **Event streaming**: Real-time workflow events via `WorkflowEvent` Requires: `pip install agentic-cli[langgraph]` @@ -186,7 +186,7 @@ Requires: `pip install agentic-cli[langgraph]` | State persistence | In-memory | Memory, PostgreSQL, or SQLite | | Thinking support | Native (Gemini) | Native (Claude & Gemini) | | Retry handling | Built-in | Built-in with backoff | -| HITL confirmation | ConfirmationPlugin | Tool wrapper | +| Permission gate | PermissionPlugin | wrap_tool_for_permission | | Context trimming | Native | Native | ### Auto-selection via Settings @@ -320,11 +320,12 @@ configs = [coordinator, researcher, analyst] Tools are regular Python functions with type hints and docstrings. **All tools return `{"success": bool, ...}` dicts** — never raise exceptions. ```python -from agentic_cli.tools import register_tool, ToolCategory, PermissionLevel +from agentic_cli.tools import register_tool, ToolCategory +from agentic_cli.workflow.permissions import Capability, EXEMPT @register_tool( - category=ToolCategory.DATA, - permission_level=PermissionLevel.SAFE, + category=ToolCategory.NETWORK, + capabilities=[Capability("http.read")], description="Search the database for matching records.", ) def search_database(query: str, limit: int = 10) -> dict: @@ -341,15 +342,14 @@ def search_database(query: str, limit: int = 10) -> dict: return {"success": True, "results": results, "count": len(results)} ``` -Registering via `@register_tool` is optional — you can pass raw callables into `AgentConfig.tools`. Registering gives the tool metadata for the registry, permission-aware HITL wrapping, and tool-summary formatting. +Registering via `@register_tool` is optional — you can pass raw callables into `AgentConfig.tools`. Registering gives the tool metadata for the registry, capability-aware permission gating, and tool-summary formatting. -### Permission Levels +### Capabilities -| Level | Behavior | -|-------|----------| -| `SAFE` | Runs silently | -| `CAUTION` | Runs; result is surfaced prominently | -| `DANGEROUS` | Intercepted by HITL — user must approve before execution | +Tool access is gated by the **permission engine** (see the HITL section below). Each registered tool declares what it touches via `capabilities=`: + +- **`Capability("namespace.action", target_arg="...")`** — e.g. `Capability("filesystem.write", target_arg="path")`, `Capability("http.read", target_arg="url")`, `Capability("shell.exec", target_arg="command")`. The engine resolves `target_arg` against the actual tool-call arguments and matches against rules. +- **`EXEMPT`** — opts the tool out of the engine entirely. Reserved for backend-internal tools (e.g. ADK `transfer_to_agent`, backend state tools). ### Built-in Tools @@ -430,7 +430,7 @@ list_dir("src/", include_hidden=False) diff_compare(source_a="old.txt", source_b="new.txt") ``` -**WRITE tools (caution)** +**WRITE tools** ```python from agentic_cli.tools import write_file, edit_file @@ -534,15 +534,9 @@ Each tool keeps at most N reflections (FIFO eviction). Reflections can be inject #### HITL (Human-in-the-Loop) -Two mechanisms: - -1. **Automatic confirmation for `DANGEROUS` tools** — handled by ADK's `ConfirmationPlugin` and LangGraph's tool wrapper. No code changes needed; just mark the tool `DANGEROUS`. -2. **Explicit `request_approval` tool** — for domain-level checkpoints: +Tool calls are gated by the **permission engine** (`workflow/permissions/`). Each tool declares a list of capabilities (e.g. `filesystem.write(path=...)`); the engine evaluates them against rules from four sources (builtin defaults, user `~/.{app_name}/settings.json`, project `./.{app_name}/settings.json`, in-memory session). When no rule matches, the user is prompted with `Allow once / Allow for session / Allow always (save to project) / Deny`. Always-grants persist into the project settings file so the next run picks them up automatically. - ```python - from agentic_cli.tools import hitl_tools - # request_approval(message, options) -> {"success": True, "choice": "..."} - ``` +See `docs/superpowers/specs/2026-04-18-permissions-system-design.md` for the full design. ## CLI Commands @@ -718,15 +712,24 @@ agentic-cli/ │ │ ├── adk/ │ │ │ ├── manager.py # GoogleADKWorkflowManager │ │ │ ├── event_processor.py -│ │ │ ├── plugins.py # ConfirmationPlugin, LLMLoggingPlugin +│ │ │ ├── plugins.py # LLMLoggingPlugin +│ │ │ ├── permission_plugin.py # PermissionPlugin (capability gating) │ │ │ └── task_progress_plugin.py -│ │ └── langgraph/ -│ │ ├── manager.py # LangGraphWorkflowManager -│ │ ├── graph_builder.py -│ │ ├── state.py -│ │ └── persistence/ # Checkpointers and stores +│ │ ├── langgraph/ +│ │ │ ├── manager.py # LangGraphWorkflowManager +│ │ │ ├── graph_builder.py +│ │ │ ├── permission_wrap.py # wrap_tool_for_permission +│ │ │ ├── state.py +│ │ │ └── persistence/ # Checkpointers and stores +│ │ └── permissions/ # Framework-independent permission engine +│ │ ├── capabilities.py # Capability, ResolvedCapability, EXEMPT +│ │ ├── rules.py # Rule, Effect, RuleSource, CheckResult, AskScope +│ │ ├── matchers.py # Path/URL/Shell/StringGlob matchers +│ │ ├── store.py # PermissionContext, BUILTIN_RULES, JSON load/save +│ │ ├── prompt.py # build_request + parse_response +│ │ └── engine.py # PermissionEngine │ ├── tools/ -│ │ ├── registry.py # ToolRegistry, ToolCategory, PermissionLevel +│ │ ├── registry.py # ToolRegistry, ToolCategory, register_tool │ │ ├── factories.py # Backend-aware tool factories │ │ ├── executor.py # SafePythonExecutor │ │ ├── arxiv_tools.py # search_arxiv, fetch_arxiv_paper, ingest_arxiv_paper @@ -744,7 +747,6 @@ agentic-cli/ │ │ ├── pdf_utils.py # PDF text extraction helpers │ │ ├── memory_tools.py # save/search/update/delete + MemoryStore │ │ ├── reflection_tools.py # save_reflection + ToolReflectionStore -│ │ ├── hitl_tools.py # request_approval │ │ ├── _core/ # Shared planning/task logic │ │ │ ├── planning.py │ │ │ └── tasks.py diff --git a/pyproject.toml b/pyproject.toml index 152c1f2..e2630e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "agentic-cli" -version = "0.5.0" +version = "0.5.1" description = "A framework for building domain-specific agentic CLI applications" readme = "README.md" license = "MIT" @@ -46,6 +46,8 @@ kb = [ "torch>=2.2.0", "sentence-transformers>=2.0.0,<3.0.0", "faiss-cpu>=1.7.0", + "bm25s>=0.2.0", + "rank-bm25>=0.2.2", ] langgraph = [ "langgraph>=0.2.0", diff --git a/src/agentic_cli/__init__.py b/src/agentic_cli/__init__.py index 849ec3c..0eee686 100644 --- a/src/agentic_cli/__init__.py +++ b/src/agentic_cli/__init__.py @@ -92,4 +92,4 @@ def __getattr__(name: str): "CLISettingsMixin", ] -__version__ = "0.5.0" +__version__ = "0.5.1" diff --git a/src/agentic_cli/knowledge_base/_bm25_backends.py b/src/agentic_cli/knowledge_base/_bm25_backends.py new file mode 100644 index 0000000..08625cf --- /dev/null +++ b/src/agentic_cli/knowledge_base/_bm25_backends.py @@ -0,0 +1,137 @@ +"""Real BM25 index backends (bm25s, rank_bm25). + +Both implement the same interface as MockBM25Index so create_bm25_index() +can return any of them interchangeably. Tokenization is lowercase whitespace +split to match MockBM25Index; the scoring model is the library's own BM25. + +Neither underlying library supports true incremental add/remove, so we keep +tokenized docs + chunk_ids in memory and rebuild the model lazily on search. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +from agentic_cli.file_utils import atomic_write_json + + +def _tokenize(text: str) -> list[str]: + return text.lower().split() + + +class _BM25BackendBase: + _INDEX_FILE: str = "" + + def __init__(self): + self._chunk_ids: list[str] = [] + self._tokenized: list[list[str]] = [] + self._model = None + + @property + def size(self) -> int: + return len(self._chunk_ids) + + def add_documents(self, chunk_ids: list[str], texts: list[str]) -> None: + for cid, text in zip(chunk_ids, texts): + self._chunk_ids.append(cid) + self._tokenized.append(_tokenize(text)) + self._model = None + + def remove_documents(self, chunk_ids: list[str]) -> None: + remove_set = set(chunk_ids) + keep = [ + (cid, toks) + for cid, toks in zip(self._chunk_ids, self._tokenized) + if cid not in remove_set + ] + if keep: + self._chunk_ids, self._tokenized = map(list, zip(*keep)) + else: + self._chunk_ids, self._tokenized = [], [] + self._model = None + + def rebuild(self, chunk_ids: list[str], texts: list[str]) -> None: + self._chunk_ids = [] + self._tokenized = [] + self._model = None + self.add_documents(chunk_ids, texts) + + def save(self, path: Path) -> None: + path.mkdir(parents=True, exist_ok=True) + atomic_write_json( + path / self._INDEX_FILE, + {"chunk_ids": self._chunk_ids, "tokenized": self._tokenized}, + ) + + def load(self, path: Path) -> None: + index_path = path / self._INDEX_FILE + if not index_path.exists(): + return + data = json.loads(index_path.read_text()) + self._chunk_ids = data["chunk_ids"] + self._tokenized = data["tokenized"] + self._model = None + + +class RankBM25Index(_BM25BackendBase): + """BM25 backed by rank_bm25.BM25Plus (pure Python). + + BM25Plus rather than BM25Okapi because Okapi's IDF can go zero or + negative when a term appears in most of the corpus; BM25Plus adds a + delta offset that guarantees positive contributions on real matches. + """ + + _INDEX_FILE = "bm25_rank.json" + + def search(self, query: str, top_k: int = 10) -> list[tuple[str, float]]: + if not self._chunk_ids: + return [] + query_tokens = _tokenize(query) + if not query_tokens: + return [] + query_set = set(query_tokens) + if self._model is None: + from rank_bm25 import BM25Plus + + self._model = BM25Plus(self._tokenized) + scores = self._model.get_scores(query_tokens) + scored: list[tuple[str, float]] = [] + for cid, doc_tokens, score in zip( + self._chunk_ids, self._tokenized, scores + ): + if query_set.intersection(doc_tokens): + scored.append((cid, float(score))) + scored.sort(key=lambda x: x[1], reverse=True) + return scored[:top_k] + + +class BM25sIndex(_BM25BackendBase): + """BM25 backed by bm25s (NumPy/C-accelerated).""" + + _INDEX_FILE = "bm25s_sidecar.json" + + def _build_model(self) -> None: + import bm25s + + model = bm25s.BM25() + model.index(self._tokenized, show_progress=False) + self._model = model + + def search(self, query: str, top_k: int = 10) -> list[tuple[str, float]]: + if not self._chunk_ids: + return [] + query_tokens = _tokenize(query) + if not query_tokens: + return [] + if self._model is None: + self._build_model() + k = min(top_k, len(self._chunk_ids)) + docs, scores = self._model.retrieve( + [query_tokens], k=k, show_progress=False + ) + results: list[tuple[str, float]] = [] + for idx, score in zip(docs[0], scores[0]): + if score > 0: + results.append((self._chunk_ids[int(idx)], float(score))) + return results diff --git a/src/agentic_cli/knowledge_base/manager.py b/src/agentic_cli/knowledge_base/manager.py index 25ab021..5964871 100644 --- a/src/agentic_cli/knowledge_base/manager.py +++ b/src/agentic_cli/knowledge_base/manager.py @@ -96,6 +96,26 @@ class KnowledgeBaseManager: """Main interface for knowledge base operations. Provides document ingestion, semantic search, and document management. + + Concurrency model + ----------------- + Two lock surfaces with different semantics: + + * ``_lock`` (``threading.Lock``) — guards every mutation to the + in-memory document/chunk/vector/BM25 state and the check-and-set + of ``_backfill_running``. The sync mutation API + (``ingest_document``, ``search``, ``delete_document``, ``clear``) + is safe to call from any thread. + * ``_sidecar_locks`` (``dict[str, asyncio.Lock]``) — per-doc locks + that serialize concurrent LLM-backed sidecar generation for the + same document. Always obtain these through + :meth:`get_or_create_sidecar_lock`; never index the dict directly. + + The async sidecar path (``backfill_sidecars`` and the lazy + ``kb_read`` fallback in ``tools/knowledge_tools``) is safe within a + single asyncio event loop. Sharing a manager across multiple event + loops is NOT supported — the per-doc ``asyncio.Lock`` is bound to + the loop it was created on. """ def __init__( @@ -302,6 +322,23 @@ def _delete_sidecar(self, doc_id: str) -> None: if path.exists(): path.unlink() + def get_or_create_sidecar_lock(self, doc_id: str) -> asyncio.Lock: + """Return the per-doc async lock used to serialize sidecar writes. + + Same lock is returned for the same ``doc_id`` across repeat calls, + so ``backfill_sidecars`` and the lazy ``kb_read`` fallback cannot + double-generate a sidecar for the same document. + + The dict insert is guarded by ``_lock`` so two threads racing to + create the lock for a new doc will not produce two distinct + ``asyncio.Lock`` instances. + """ + existing = self._sidecar_locks.get(doc_id) + if existing is not None: + return existing + with self._lock: + return self._sidecar_locks.setdefault(doc_id, asyncio.Lock()) + def _index_md_path(self) -> Path: return self.kb_dir / "index.md" @@ -1028,35 +1065,40 @@ async def backfill_sidecars( Returns: Number of sidecars written this call. """ - if self._backfill_running: - raise BackfillAlreadyRunning( - "backfill_sidecars is already running on this KB" - ) - self._backfill_running = True - try: - written = 0 - # Snapshot the docs that need backfilling up front so the - # progress denominator is stable even if ingest/delete runs - # concurrently. + # Check-and-set the running flag and snapshot the work list under + # one lock acquisition so a second thread cannot slip past the + # guard between the read and the write. + with self._lock: + if self._backfill_running: + raise BackfillAlreadyRunning( + "backfill_sidecars is already running on this KB" + ) + self._backfill_running = True todo = [ doc - for doc in list(self._documents.values()) + for doc in self._documents.values() if not self._sidecar_path(doc.id).exists() ] + try: + written = 0 total = len(todo) for i, doc in enumerate(todo): # Guard against delete-during-backfill — doc may have - # been removed since the snapshot was taken. - if doc.id not in self._documents: - continue - lock = self._sidecar_locks.setdefault(doc.id, asyncio.Lock()) + # been removed since the snapshot was taken. Checked + # under the lock so a concurrent delete_document cannot + # interleave between the check and use. + with self._lock: + if doc.id not in self._documents: + continue + lock = self.get_or_create_sidecar_lock(doc.id) async with lock: # Double-check inside the lock — another task may # have written it (e.g. lazy kb_read fallback). if self._sidecar_path(doc.id).exists(): continue - if doc.id not in self._documents: - continue + with self._lock: + if doc.id not in self._documents: + continue if progress_cb is not None: progress_cb(i, total, doc) payload = await self.generate_sidecar_payload( @@ -1066,7 +1108,8 @@ async def backfill_sidecars( written += 1 return written finally: - self._backfill_running = False + with self._lock: + self._backfill_running = False class BackfillAlreadyRunning(RuntimeError): diff --git a/src/agentic_cli/tools/__init__.py b/src/agentic_cli/tools/__init__.py index 443094f..43cd318 100644 --- a/src/agentic_cli/tools/__init__.py +++ b/src/agentic_cli/tools/__init__.py @@ -10,7 +10,6 @@ Framework Tools: - memory_tools: Working and long-term memory tools - - hitl_tools: Human-in-the-loop approval tools - web_search: Web search with pluggable backends (Tavily, Brave) For resilience patterns, use tenacity, pybreaker, aiolimiter directly. @@ -48,7 +47,6 @@ from agentic_cli.tools.webfetch_tool import web_fetch from agentic_cli.tools.registry import ( ToolCategory, - PermissionLevel, ToolDefinition, ToolRegistry, get_registry, @@ -61,7 +59,6 @@ __all__ = [ # Registry classes "ToolCategory", - "PermissionLevel", "ToolDefinition", "ToolRegistry", "get_registry", @@ -103,7 +100,6 @@ "ask_clarification", # Framework tool modules (lazy loaded) "memory_tools", - "hitl_tools", "sandbox_tools", "reflection_tools", ] @@ -112,7 +108,6 @@ # Lazy loading for framework tool modules _lazy_tool_modules = { "memory_tools": "agentic_cli.tools.memory_tools", - "hitl_tools": "agentic_cli.tools.hitl_tools", "sandbox_tools": "agentic_cli.tools.sandbox", "reflection_tools": "agentic_cli.tools.reflection_tools", } diff --git a/src/agentic_cli/tools/adk/state_tools.py b/src/agentic_cli/tools/adk/state_tools.py index 212423b..00f3b42 100644 --- a/src/agentic_cli/tools/adk/state_tools.py +++ b/src/agentic_cli/tools/adk/state_tools.py @@ -13,8 +13,11 @@ from agentic_cli.tools._core.planning import summarize_checkboxes from agentic_cli.tools._core.tasks import validate_tasks, normalize_tasks, filter_tasks +from agentic_cli.tools.registry import ToolCategory, register_tool +from agentic_cli.workflow.permissions import EXEMPT +@register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING) def save_plan(content: str, tool_context: ToolContext) -> dict[str, Any]: """Save or update the execution plan as markdown with checkboxes. @@ -33,6 +36,7 @@ def save_plan(content: str, tool_context: ToolContext) -> dict[str, Any]: return {"success": True, "message": message} +@register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING) def get_plan(tool_context: ToolContext) -> dict[str, Any]: """Retrieve the current execution plan. @@ -47,6 +51,7 @@ def get_plan(tool_context: ToolContext) -> dict[str, Any]: return {"success": True, "content": plan} +@register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING) def save_tasks( tasks: list[dict[str, Any]], tool_context: ToolContext ) -> dict[str, Any]: @@ -86,6 +91,7 @@ def save_tasks( } +@register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING) def get_tasks( status: str = "", priority: str = "", diff --git a/src/agentic_cli/tools/arxiv_tools.py b/src/agentic_cli/tools/arxiv_tools.py index f10b937..2b195e6 100644 --- a/src/agentic_cli/tools/arxiv_tools.py +++ b/src/agentic_cli/tools/arxiv_tools.py @@ -20,8 +20,8 @@ from agentic_cli.tools.registry import ( register_tool, ToolCategory, - PermissionLevel, ) +from agentic_cli.workflow.permissions import Capability from agentic_cli.workflow.service_registry import ( ARXIV_SOURCE, KB_MANAGER, @@ -122,7 +122,8 @@ async def _fetch_arxiv_paper_with_source(source, arxiv_id: str) -> dict[str, Any @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.SAFE, + + capabilities=[Capability("http.read")], description="Search arXiv for academic papers by query, category, or date range. Use this to find research papers on a topic.", ) def search_arxiv( @@ -170,7 +171,8 @@ def search_arxiv( @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.SAFE, + + capabilities=[Capability("http.read")], description="Fetch metadata for a specific arXiv paper by ID or URL. Returns title, authors, abstract, categories, and PDF URL.", ) async def fetch_arxiv_paper( @@ -296,7 +298,8 @@ async def _ingest_arxiv_paper_with_services( @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.SAFE, + + capabilities=[Capability("http.read"), Capability("kb.write")], description="Download an arXiv paper's PDF, extract text, and ingest it into the knowledge base. Use this to add a specific arXiv paper to long-term storage so it can be searched later.", ) async def ingest_arxiv_paper( diff --git a/src/agentic_cli/tools/execution_tools.py b/src/agentic_cli/tools/execution_tools.py index ccc8160..7d723f1 100644 --- a/src/agentic_cli/tools/execution_tools.py +++ b/src/agentic_cli/tools/execution_tools.py @@ -9,13 +9,13 @@ from agentic_cli.tools.registry import ( register_tool, ToolCategory, - PermissionLevel, ) +from agentic_cli.workflow.permissions import Capability @register_tool( category=ToolCategory.EXECUTION, - permission_level=PermissionLevel.DANGEROUS, + capabilities=[Capability("python.exec")], description="Stateless Python scratchpad for quick calculations, formula checks, and mathematical reasoning. Each call starts fresh — no state persists. Only whitelisted modules (math, numpy, pandas, json, etc.) are available. Use sandbox_execute instead for stateful work.", ) def execute_python( diff --git a/src/agentic_cli/tools/factories.py b/src/agentic_cli/tools/factories.py index cf9b354..a437554 100644 --- a/src/agentic_cli/tools/factories.py +++ b/src/agentic_cli/tools/factories.py @@ -25,6 +25,11 @@ def make_memory_tools(memory_store, embedding_service=None) -> list[Callable]: """Create memory tools bound to a MemoryStore. + Thin closure-bound wrappers around the shared helpers in + ``tools.memory_tools``. The helpers take the store as an explicit arg + so the closure-bound and module-level (registry-bound) tool versions + stay in lockstep. + Args: memory_store: A MemoryStore instance. embedding_service: Optional EmbeddingService (unused, kept for interface compat). @@ -32,108 +37,55 @@ def make_memory_tools(memory_store, embedding_service=None) -> list[Callable]: Returns: [save_memory, search_memory, update_memory, delete_memory] """ + from agentic_cli.tools.memory_tools import ( + _SENTINEL, + _delete_memory_with_store, + _save_memory_with_store, + _search_memory_with_store, + _update_memory_with_store, + save_memory as _orig_save, + search_memory as _orig_search, + update_memory as _orig_update, + delete_memory as _orig_delete, + ) def save_memory( content: str, tags: list[str] | None = None, importance: int = 5, ) -> dict[str, Any]: - """Save information to persistent memory. - - Use this to remember important facts, preferences, or learnings - that should persist across sessions. - - Args: - content: The content to store. - tags: Optional tags for categorization. - importance: Importance rating 1-10 (default 5). Higher = more important. - - Returns: - A dict with the stored item ID and any similar existing memories. - """ - result = memory_store.store_with_similarity_check(content, tags=tags, importance=importance) - return { - "success": True, - "item_id": result["item_id"], - "message": "Saved to persistent memory", - "similar_existing": result["similar_existing"], - } + return _save_memory_with_store(memory_store, content, tags, importance) def search_memory( query: str, limit: int = 10, include_archived: bool = False, ) -> dict[str, Any]: - """Search persistent memory for stored information. - - Args: - query: The search query. - limit: Maximum number of results to return. - include_archived: If True, include archived (soft-deleted) memories. - - Returns: - A dict with matching memory items. - """ - results = memory_store.search(query, limit=limit, include_archived=include_archived) - items = [ - { - "id": item.id, - "content": item.content, - "tags": item.tags, - "importance": item.importance, - } - for item in results - ] - return { - "success": True, - "query": query, - "items": items, - "count": len(items), - } + return _search_memory_with_store( + memory_store, query, limit, include_archived + ) def update_memory( item_id: str, content: str | None = None, - tags: list[str] | None = None, + tags: list[str] | None = _SENTINEL, ) -> dict[str, Any]: - """Update an existing memory item. - - Args: - item_id: ID of the memory to update. - content: New content (optional). - tags: New tags (optional). Pass explicitly to update; omit (None) to leave unchanged. - - Returns: - A dict indicating success. - """ - # Don't forward tags to store.update() unless explicitly provided, - # since the store's sentinel default means "leave unchanged" but None means "clear tags". - if tags is None: - updated = memory_store.update(item_id, content=content) - else: - updated = memory_store.update(item_id, content=content, tags=tags) - return {"success": True, "updated": updated} + return _update_memory_with_store(memory_store, item_id, content, tags) def delete_memory( item_id: str, purge: bool = False, ) -> dict[str, Any]: - """Delete a memory item (soft-delete by default). - - Args: - item_id: ID of the memory to delete. - purge: If True, permanently remove. If False, archive. - - Returns: - A dict indicating success. - """ - deleted = memory_store.delete(item_id, purge=purge) - return {"success": True, "deleted": deleted} + return _delete_memory_with_store(memory_store, item_id, purge) save_memory.__name__ = "save_memory" + save_memory.__doc__ = _orig_save.__doc__ search_memory.__name__ = "search_memory" + search_memory.__doc__ = _orig_search.__doc__ update_memory.__name__ = "update_memory" + update_memory.__doc__ = _orig_update.__doc__ delete_memory.__name__ = "delete_memory" + delete_memory.__doc__ = _orig_delete.__doc__ return [save_memory, search_memory, update_memory, delete_memory] diff --git a/src/agentic_cli/tools/file_read.py b/src/agentic_cli/tools/file_read.py index 6746064..7d88f8c 100644 --- a/src/agentic_cli/tools/file_read.py +++ b/src/agentic_cli/tools/file_read.py @@ -3,8 +3,6 @@ Provides safe, read-only tools for file system access: - read_file: Read file contents with optional offset/limit - diff_compare: Compare two text sources - -All tools in this module have PermissionLevel.SAFE. """ import difflib @@ -13,14 +11,14 @@ from agentic_cli.tools.registry import ( ToolCategory, - PermissionLevel, register_tool, ) +from agentic_cli.workflow.permissions import Capability @register_tool( category=ToolCategory.READ, - permission_level=PermissionLevel.SAFE, + capabilities=[Capability("filesystem.read", target_arg="path")], description="Read the contents of a file at the given path. Use this to examine source code, config files, or any text file. For finding files by name pattern use glob instead; for searching file contents use grep.", ) def read_file( @@ -110,7 +108,7 @@ def read_file( @register_tool( category=ToolCategory.READ, - permission_level=PermissionLevel.SAFE, + capabilities=[Capability("filesystem.read", target_arg="source_a"), Capability("filesystem.read", target_arg="source_b")], description="Compare two text sources (files or strings) and show differences. Use this to see what changed between two versions of content.", ) def diff_compare( diff --git a/src/agentic_cli/tools/file_write.py b/src/agentic_cli/tools/file_write.py index 3c01a01..4a0a4a5 100644 --- a/src/agentic_cli/tools/file_write.py +++ b/src/agentic_cli/tools/file_write.py @@ -4,7 +4,6 @@ - write_file: Write content to a file (creates or overwrites) - edit_file: Replace text in a file (sed-like operation) -All tools in this module have PermissionLevel.CAUTION. Delete, move, and copy operations should use the shell tool. """ @@ -15,14 +14,14 @@ from agentic_cli.file_utils import atomic_write_text from agentic_cli.tools.registry import ( ToolCategory, - PermissionLevel, register_tool, ) +from agentic_cli.workflow.permissions import Capability @register_tool( category=ToolCategory.WRITE, - permission_level=PermissionLevel.CAUTION, + capabilities=[Capability("filesystem.write", target_arg="path")], description="Write content to a file (creates or overwrites). Use this to create new files or replace entire file contents. For partial modifications use edit_file instead.", ) def write_file( @@ -87,7 +86,7 @@ def write_file( @register_tool( category=ToolCategory.WRITE, - permission_level=PermissionLevel.CAUTION, + capabilities=[Capability("filesystem.read", target_arg="path"), Capability("filesystem.write", target_arg="path")], description="Replace specific text in an existing file. Use this for targeted edits (find-and-replace). For creating or fully rewriting files use write_file instead.", ) def edit_file( diff --git a/src/agentic_cli/tools/glob_tool.py b/src/agentic_cli/tools/glob_tool.py index 980ca4a..057e03e 100644 --- a/src/agentic_cli/tools/glob_tool.py +++ b/src/agentic_cli/tools/glob_tool.py @@ -2,8 +2,6 @@ Provides file discovery using glob patterns: - glob: Find files matching a pattern (also serves as directory listing) - -This is a read-only tool with PermissionLevel.SAFE. """ import os @@ -13,14 +11,14 @@ from agentic_cli.tools.registry import ( ToolCategory, - PermissionLevel, register_tool, ) +from agentic_cli.workflow.permissions import Capability @register_tool( category=ToolCategory.READ, - permission_level=PermissionLevel.SAFE, + capabilities=[Capability("filesystem.read", target_arg="path")], description="Find files by name pattern (e.g. '**/*.py'). Use this to discover files in a directory tree. For searching inside file contents, use grep instead.", ) def glob( @@ -137,7 +135,7 @@ def glob( @register_tool( category=ToolCategory.READ, - permission_level=PermissionLevel.SAFE, + capabilities=[Capability("filesystem.read", target_arg="path")], description="List directory contents organized by type (directories first, then files). Use this when you need a structured overview of a directory; for pattern-based file search use glob.", ) def list_dir( diff --git a/src/agentic_cli/tools/grep_tool.py b/src/agentic_cli/tools/grep_tool.py index 58fea08..98c4384 100644 --- a/src/agentic_cli/tools/grep_tool.py +++ b/src/agentic_cli/tools/grep_tool.py @@ -2,8 +2,6 @@ Provides pattern-based content search across files: - grep: Search for patterns in files (ripgrep-like interface) - -This is a read-only tool with PermissionLevel.SAFE. """ import functools @@ -14,14 +12,14 @@ from agentic_cli.tools.registry import ( ToolCategory, - PermissionLevel, register_tool, ) +from agentic_cli.workflow.permissions import Capability @register_tool( category=ToolCategory.READ, - permission_level=PermissionLevel.SAFE, + capabilities=[Capability("filesystem.read", target_arg="path")], description="Search for text patterns inside file contents (regex or literal). Use this to find code references, function definitions, or any text across files. For finding files by name, use glob instead.", ) def grep( diff --git a/src/agentic_cli/tools/hitl_tools.py b/src/agentic_cli/tools/hitl_tools.py deleted file mode 100644 index 962a907..0000000 --- a/src/agentic_cli/tools/hitl_tools.py +++ /dev/null @@ -1,81 +0,0 @@ -"""Human-in-the-loop tools for agentic workflows. - -Provides ApprovalManager for audit history of HITL confirmation decisions. -Actual confirmation is handled at the framework level by ADK ConfirmationPlugin -and LangGraph's _wrap_for_confirmation wrapper. - -Example: - from agentic_cli.tools.hitl_tools import ApprovalManager, ApprovalResult - - manager = ApprovalManager() - result = manager.record("req-1", approved=True) -""" - -from dataclasses import dataclass, field -from datetime import datetime - - -# --------------------------------------------------------------------------- -# Data classes -# --------------------------------------------------------------------------- - - -@dataclass -class ApprovalResult: - """Result of an approval decision.""" - - request_id: str - approved: bool - reason: str | None = None - decided_at: datetime = field(default_factory=datetime.now) - - -# --------------------------------------------------------------------------- -# ApprovalManager -# --------------------------------------------------------------------------- - - -class ApprovalManager: - """Manages approval request records. - - The actual blocking is handled by framework-level confirmation hooks - (ADK ConfirmationPlugin / LangGraph _wrap_for_confirmation). This manager - records decisions for audit/history purposes. - - Example: - manager = ApprovalManager() - result = manager.record(request_id, approved=True) - """ - - def __init__(self) -> None: - """Initialize approval manager.""" - self._history: list[ApprovalResult] = [] - - def record( - self, - request_id: str, - approved: bool, - reason: str | None = None, - ) -> ApprovalResult: - """Record an approval decision. - - Args: - request_id: The request ID. - approved: Whether the request was approved. - reason: Optional reason for the decision. - - Returns: - ApprovalResult with the recorded decision. - """ - result = ApprovalResult( - request_id=request_id, - approved=approved, - reason=reason, - ) - self._history.append(result) - return result - - @property - def history(self) -> list[ApprovalResult]: - """Get all recorded approval decisions.""" - return list(self._history) diff --git a/src/agentic_cli/tools/interaction_tools.py b/src/agentic_cli/tools/interaction_tools.py index e910c9c..1b8e6c6 100644 --- a/src/agentic_cli/tools/interaction_tools.py +++ b/src/agentic_cli/tools/interaction_tools.py @@ -8,13 +8,13 @@ from agentic_cli.tools.registry import ( register_tool, ToolCategory, - PermissionLevel, ) +from agentic_cli.workflow.permissions import EXEMPT @register_tool( category=ToolCategory.INTERACTION, - permission_level=PermissionLevel.SAFE, + capabilities=EXEMPT, description="Ask the user a clarifying question and wait for their response. Use this when requirements are ambiguous or you need user input to proceed.", ) async def ask_clarification( diff --git a/src/agentic_cli/tools/knowledge_tools.py b/src/agentic_cli/tools/knowledge_tools.py index c58f428..dfa76e1 100644 --- a/src/agentic_cli/tools/knowledge_tools.py +++ b/src/agentic_cli/tools/knowledge_tools.py @@ -29,8 +29,8 @@ from agentic_cli.tools.registry import ( register_tool, ToolCategory, - PermissionLevel, ) +from agentic_cli.workflow.permissions import Capability from agentic_cli.workflow.service_registry import ( get_service, require_service, @@ -413,8 +413,7 @@ async def _read_document_from_kbs( # Sidecar mode (default). Lazily generate if missing. sidecar_path = source_kb._sidecar_path(doc.id) if not sidecar_path.exists(): - import asyncio as _asyncio - lock = source_kb._sidecar_locks.setdefault(doc.id, _asyncio.Lock()) + lock = source_kb.get_or_create_sidecar_lock(doc.id) async with lock: if not sidecar_path.exists(): # Resolve content the same way full=True does — extract from @@ -599,7 +598,7 @@ def _find_document_in_kbs(doc_id_or_title: str) -> tuple: @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.SAFE, + capabilities=[Capability("kb.read")], description="Search the local knowledge base for relevant documents using semantic similarity. Use this when you need to find previously ingested papers, notes, or documents.", ) def kb_search( @@ -626,7 +625,7 @@ def kb_search( @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.CAUTION, + capabilities=[Capability("kb.write")], description=( "Ingest a document into the knowledge base. " "REQUIRED: provide either 'content' (text) or 'url_or_path' (file path or URL). " @@ -685,7 +684,7 @@ async def kb_ingest( @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.SAFE, + capabilities=[Capability("kb.read")], description=( "Read a stored document by ID or title. Returns the per-document " "sidecar (summary, key claims, entities) by default. Pass full=True " @@ -713,7 +712,7 @@ async def kb_read( @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.SAFE, + capabilities=[Capability("kb.read")], description="List documents in the knowledge base with summaries. Filter by query or source type. Returns summaries, not full content.", ) def kb_list( @@ -740,7 +739,7 @@ def kb_list( @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.CAUTION, + capabilities=[Capability("kb.write")], description=( "Save an agent-curated concept page summarizing what the KB " "knows about a topic. Pages live at concepts/{slug}.md and are " @@ -778,7 +777,7 @@ async def kb_write_concept( @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.SAFE, + capabilities=[Capability("kb.read")], description=( "Search concept pages (agent-curated synthesis notes). " "Case-insensitive substring match; title hits rank above body " diff --git a/src/agentic_cli/tools/langgraph/state_tools.py b/src/agentic_cli/tools/langgraph/state_tools.py index cef95db..9c049f6 100644 --- a/src/agentic_cli/tools/langgraph/state_tools.py +++ b/src/agentic_cli/tools/langgraph/state_tools.py @@ -17,8 +17,11 @@ from agentic_cli.tools._core.planning import summarize_checkboxes from agentic_cli.tools._core.tasks import validate_tasks, normalize_tasks, filter_tasks +from agentic_cli.tools.registry import ToolCategory, register_tool +from agentic_cli.workflow.permissions import EXEMPT +@register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING) def save_plan( content: str, tool_call_id: Annotated[str, InjectedToolCallId], @@ -40,6 +43,7 @@ def save_plan( }) +@register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING) def get_plan( state: Annotated[dict, InjectedState], tool_call_id: Annotated[str, InjectedToolCallId], @@ -58,6 +62,7 @@ def get_plan( }) +@register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING) def save_tasks( tasks: list[dict[str, Any]], tool_call_id: Annotated[str, InjectedToolCallId], @@ -103,6 +108,7 @@ def save_tasks( }) +@register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING) def get_tasks( status: str = "", priority: str = "", diff --git a/src/agentic_cli/tools/memory_tools.py b/src/agentic_cli/tools/memory_tools.py index 6e3ea5f..2bc0a74 100644 --- a/src/agentic_cli/tools/memory_tools.py +++ b/src/agentic_cli/tools/memory_tools.py @@ -31,8 +31,8 @@ from agentic_cli.tools.registry import ( register_tool, ToolCategory, - PermissionLevel, ) +from agentic_cli.workflow.permissions import Capability from agentic_cli.workflow.service_registry import require_service, MEMORY_STORE @@ -428,9 +428,84 @@ def apply_forgetting(self, policy: ForgettingPolicy) -> dict[str, Any]: } +# --------------------------------------------------------------------------- +# Shared helpers — the single source of truth for tool behavior. +# +# Both the @register_tool wrappers below (service-registry bound) and the +# make_memory_tools() closures in tools/factories.py delegate to these +# helpers, so the two entry points cannot drift. +# --------------------------------------------------------------------------- + + +def _save_memory_with_store( + store: "MemoryStore", + content: str, + tags: list[str] | None, + importance: int, +) -> dict[str, Any]: + result = store.store_with_similarity_check( + content, tags=tags, importance=importance + ) + return { + "success": True, + "item_id": result["item_id"], + "message": "Saved to persistent memory", + "similar_existing": result["similar_existing"], + } + + +def _search_memory_with_store( + store: "MemoryStore", + query: str, + limit: int, + include_archived: bool, +) -> dict[str, Any]: + results = store.search(query, limit=limit, include_archived=include_archived) + items = [ + { + "id": item.id, + "content": item.content, + "tags": item.tags, + "importance": item.importance, + } + for item in results + ] + return { + "success": True, + "query": query, + "items": items, + "count": len(items), + } + + +def _update_memory_with_store( + store: "MemoryStore", + item_id: str, + content: str | None, + tags: list[str] | None | object, +) -> dict[str, Any]: + updated = store.update(item_id, content=content, tags=tags) + return {"success": True, "updated": updated} + + +def _delete_memory_with_store( + store: "MemoryStore", + item_id: str, + purge: bool, +) -> dict[str, Any]: + deleted = store.delete(item_id, purge=purge) + return {"success": True, "deleted": deleted} + + +# --------------------------------------------------------------------------- +# Registry-bound tool functions (@register_tool) — thin wrappers that resolve +# the store through the service registry and delegate to the helpers above. +# --------------------------------------------------------------------------- + + @register_tool( category=ToolCategory.MEMORY, - permission_level=PermissionLevel.SAFE, + capabilities=[Capability("memory.write")], description="Save information to persistent memory that survives across sessions. Use this to remember user preferences, important facts, or learnings for future conversations.", ) def save_memory( @@ -440,71 +515,43 @@ def save_memory( ) -> dict[str, Any]: """Save information to persistent memory. - Use this to remember important facts, preferences, or learnings - that should persist across sessions. - Args: content: The content to store. tags: Optional tags for categorization. importance: Importance rating 1-10 (default 5). - - Returns: - A dict with the stored item ID. """ store = require_service(MEMORY_STORE) if isinstance(store, dict): return store - result = store.store_with_similarity_check(content, tags=tags, importance=importance) - return { - "success": True, - "item_id": result["item_id"], - "message": "Saved to persistent memory", - "similar_existing": result["similar_existing"], - } + return _save_memory_with_store(store, content, tags, importance) @register_tool( category=ToolCategory.MEMORY, - permission_level=PermissionLevel.SAFE, + capabilities=[Capability("memory.read")], description="Search persistent memory by keyword/substring. Use this to recall previously saved facts, preferences, or learnings.", ) def search_memory( query: str, limit: int = 10, + include_archived: bool = False, ) -> dict[str, Any]: """Search persistent memory for stored information. Args: query: The search query (substring match, case-insensitive). limit: Maximum number of results to return. - - Returns: - A dict with matching memory items. + include_archived: If True, include archived (soft-deleted) memories. """ store = require_service(MEMORY_STORE) if isinstance(store, dict): return store - results = store.search(query, limit=limit) - items = [ - { - "id": item.id, - "content": item.content, - "tags": item.tags, - } - for item in results - ] - - return { - "success": True, - "query": query, - "items": items, - "count": len(items), - } + return _search_memory_with_store(store, query, limit, include_archived) @register_tool( category=ToolCategory.MEMORY, - permission_level=PermissionLevel.SAFE, + capabilities=[Capability("memory.write")], description="Update an existing memory item", ) def update_memory( @@ -517,21 +564,17 @@ def update_memory( Args: item_id: ID of the memory to update. content: New content (optional). - tags: New tags (optional). Pass explicitly to update; omit to leave unchanged. - - Returns: - A dict indicating success. + tags: New tags. Omit to leave unchanged; pass None to clear. """ store = require_service(MEMORY_STORE) if isinstance(store, dict): return store - updated = store.update(item_id, content=content, tags=tags) - return {"success": True, "updated": updated} + return _update_memory_with_store(store, item_id, content, tags) @register_tool( category=ToolCategory.MEMORY, - permission_level=PermissionLevel.CAUTION, + capabilities=[Capability("memory.write")], description="Delete a memory item", ) def delete_memory( @@ -543,12 +586,8 @@ def delete_memory( Args: item_id: ID of the memory to delete. purge: If True, permanently remove. If False, archive. - - Returns: - A dict indicating success. """ store = require_service(MEMORY_STORE) if isinstance(store, dict): return store - deleted = store.delete(item_id, purge=purge) - return {"success": True, "deleted": deleted} + return _delete_memory_with_store(store, item_id, purge) diff --git a/src/agentic_cli/tools/reflection_tools.py b/src/agentic_cli/tools/reflection_tools.py index a099e5e..2909802 100644 --- a/src/agentic_cli/tools/reflection_tools.py +++ b/src/agentic_cli/tools/reflection_tools.py @@ -14,7 +14,8 @@ from agentic_cli.file_utils import atomic_write_json from agentic_cli.logging import get_logger -from agentic_cli.tools.registry import PermissionLevel, ToolCategory, register_tool +from agentic_cli.tools.registry import ToolCategory, register_tool +from agentic_cli.workflow.permissions import EXEMPT from agentic_cli.workflow.service_registry import require_service if TYPE_CHECKING: @@ -116,7 +117,7 @@ def format_for_prompt(self, tool_name: str) -> str: @register_tool( category=ToolCategory.MEMORY, - permission_level=PermissionLevel.SAFE, + capabilities=EXEMPT, description="Save a learned heuristic from a tool failure", ) def save_reflection( diff --git a/src/agentic_cli/tools/registry.py b/src/agentic_cli/tools/registry.py index 8a09e4e..9803200 100644 --- a/src/agentic_cli/tools/registry.py +++ b/src/agentic_cli/tools/registry.py @@ -5,11 +5,18 @@ - ToolRegistry: Registry for tool discovery and management """ -from dataclasses import dataclass +from dataclasses import dataclass, field from enum import Enum from typing import Any, Callable import inspect +from agentic_cli.workflow.permissions.capabilities import ( + Capability, + CapabilitiesSpec, + EXEMPT, +) +from agentic_cli.workflow.permissions.capabilities import _CapabilityExempt + class ToolCategory(Enum): """Categories for organizing tools. @@ -50,17 +57,6 @@ class ToolCategory(Enum): OTHER = "other" -class PermissionLevel(Enum): - """Permission levels for tool safety classification. - - Used to determine whether user confirmation is needed before tool execution. - """ - - SAFE = "safe" # No confirmation needed (read operations) - CAUTION = "caution" # May need allowlisting (write operations) - DANGEROUS = "dangerous" # Always requires confirmation (delete, shell) - - @dataclass class ToolDefinition: """Metadata-rich tool definition. @@ -69,7 +65,7 @@ class ToolDefinition: name: Tool name (defaults to function name) description: Human-readable description category: Tool category for organization (READ, WRITE, NETWORK, etc.) - permission_level: Safety classification (SAFE, CAUTION, DANGEROUS) + capabilities: Capability declarations for the permission engine is_async: Whether the tool is async func: The actual tool function """ @@ -77,8 +73,8 @@ class ToolDefinition: name: str description: str func: Callable[..., Any] + capabilities: CapabilitiesSpec category: ToolCategory = ToolCategory.OTHER - permission_level: PermissionLevel = PermissionLevel.SAFE is_async: bool = False def __post_init__(self): @@ -87,6 +83,35 @@ def __post_init__(self): self.is_async = True +def _validate_capabilities(caps: Any, tool_name: str) -> CapabilitiesSpec: + """Validate and return a capabilities value for a tool registration. + + - ``EXEMPT`` (or any ``_CapabilityExempt`` instance) passes through unchanged. + - A non-empty ``list`` of ``Capability`` instances is returned as-is. + - An empty list raises ``ValueError`` (use ``EXEMPT`` to opt out explicitly). + - Any other type raises ``TypeError``. + """ + if isinstance(caps, _CapabilityExempt): + return caps + if isinstance(caps, list): + if not caps: + raise ValueError( + f"Tool {tool_name!r}: capabilities=[] is not allowed. " + "Use capabilities=EXEMPT to opt out explicitly." + ) + for item in caps: + if not isinstance(item, Capability): + raise TypeError( + f"Tool {tool_name!r}: capabilities list items must be " + f"Capability instances, got {type(item)!r}." + ) + return caps + raise TypeError( + f"Tool {tool_name!r}: capabilities must be EXEMPT or a list of Capability " + f"instances, got {type(caps)!r}." + ) + + class ToolRegistry: """Registry for managing and discovering tools. @@ -106,29 +131,30 @@ def register( name: str | None = None, description: str | None = None, category: ToolCategory = ToolCategory.OTHER, - permission_level: PermissionLevel = PermissionLevel.SAFE, + capabilities: CapabilitiesSpec, ) -> Callable[..., Any]: """Register a tool function. Can be used as a decorator: - @registry.register(category=ToolCategory.READ, permission_level=PermissionLevel.SAFE) + @registry.register(category=ToolCategory.READ, capabilities=[Capability(...)]) def my_tool(query: str) -> dict: ... Or called directly: - registry.register(my_tool, category=ToolCategory.READ) + registry.register(my_tool, category=ToolCategory.READ, capabilities=EXEMPT) """ def decorator(f: Callable[..., Any]) -> Callable[..., Any]: tool_name = name or f.__name__ tool_desc = description or (f.__doc__ or "").split("\n")[0].strip() + validated_caps = _validate_capabilities(capabilities, tool_name) definition = ToolDefinition( name=tool_name, description=tool_desc, func=f, + capabilities=validated_caps, category=category, - permission_level=permission_level, ) self._tools[tool_name] = definition @@ -176,13 +202,13 @@ def register_tool( name: str | None = None, description: str | None = None, category: ToolCategory = ToolCategory.OTHER, - permission_level: PermissionLevel = PermissionLevel.SAFE, + capabilities: CapabilitiesSpec, ) -> Callable[..., Any]: """Register a tool with the default registry. - DANGEROUS tools are no longer wrapped at registration time. Instead, - confirmation is handled at the framework level by ADK ConfirmationPlugin - and LangGraph's _wrap_for_confirmation wrapper. + ``capabilities`` is a required keyword argument. Pass ``EXEMPT`` to opt out + of the permission engine, or a list of ``Capability`` instances to declare + the resources this tool accesses. """ def _outer(f: Callable[..., Any]) -> Callable[..., Any]: @@ -191,11 +217,9 @@ def _outer(f: Callable[..., Any]) -> Callable[..., Any]: name=name, description=description, category=category, - permission_level=permission_level, + capabilities=capabilities, ) if func is not None: return _outer(func) return _outer - - diff --git a/src/agentic_cli/tools/sandbox/__init__.py b/src/agentic_cli/tools/sandbox/__init__.py index 40b0e50..994ba14 100644 --- a/src/agentic_cli/tools/sandbox/__init__.py +++ b/src/agentic_cli/tools/sandbox/__init__.py @@ -6,13 +6,14 @@ from typing import Any -from agentic_cli.tools.registry import register_tool, ToolCategory, PermissionLevel +from agentic_cli.tools.registry import register_tool, ToolCategory from agentic_cli.workflow.service_registry import require_service, SANDBOX_MANAGER +from agentic_cli.workflow.permissions import Capability @register_tool( category=ToolCategory.EXECUTION, - permission_level=PermissionLevel.DANGEROUS, + capabilities=[Capability("python.exec")], description=( "Execute Python code in a stateful sandbox session. " "State (variables, imports) persists across calls within the same session. " diff --git a/src/agentic_cli/tools/search.py b/src/agentic_cli/tools/search.py index 8460cb4..700022f 100644 --- a/src/agentic_cli/tools/search.py +++ b/src/agentic_cli/tools/search.py @@ -26,8 +26,8 @@ from agentic_cli.tools.registry import ( register_tool, ToolCategory, - PermissionLevel, ) +from agentic_cli.workflow.permissions import Capability @dataclass @@ -198,7 +198,7 @@ def _get_backend(settings: Any) -> SearchBackend: @register_tool( category=ToolCategory.NETWORK, - permission_level=PermissionLevel.SAFE, + capabilities=[Capability("http.read")], description="Search the web for current information using the configured backend (Tavily or Brave). Use this for questions about recent events, documentation, or facts not in your training data.", ) async def web_search( diff --git a/src/agentic_cli/tools/shell/executor.py b/src/agentic_cli/tools/shell/executor.py index f18ddcd..fa85176 100644 --- a/src/agentic_cli/tools/shell/executor.py +++ b/src/agentic_cli/tools/shell/executor.py @@ -22,8 +22,8 @@ from agentic_cli.tools.registry import ( register_tool, ToolCategory, - PermissionLevel, ) +from agentic_cli.workflow.permissions import Capability # ============================================================================= # SHELL TOOL DISABLED @@ -202,7 +202,7 @@ def _execute_command( @register_tool( category=ToolCategory.EXECUTION, - permission_level=PermissionLevel.DANGEROUS, + capabilities=[Capability("shell.exec", target_arg="command")], description="Execute a shell command with layered security and HITL approval", ) def shell_executor( @@ -390,7 +390,7 @@ def shell_executor( "command": command, "risk_level": risk.overall_risk.value, "risk_factors": risk.risk_factors, - "message": "Command requires approval but no ApprovalManager configured", + "message": "Command requires approval but no approval manager configured", "error": None, } diff --git a/src/agentic_cli/tools/webfetch_tool.py b/src/agentic_cli/tools/webfetch_tool.py index 17c5553..b37fa07 100644 --- a/src/agentic_cli/tools/webfetch_tool.py +++ b/src/agentic_cli/tools/webfetch_tool.py @@ -9,7 +9,8 @@ from typing import Any from agentic_cli.config import get_settings -from agentic_cli.tools.registry import register_tool, ToolCategory, PermissionLevel +from agentic_cli.tools.registry import register_tool, ToolCategory +from agentic_cli.workflow.permissions import Capability from agentic_cli.tools.webfetch import ( ContentFetcher, URLValidator, @@ -75,7 +76,7 @@ def get_or_create_fetcher(settings=None) -> ContentFetcher: @register_tool( category=ToolCategory.NETWORK, - permission_level=PermissionLevel.SAFE, + capabilities=[Capability("http.read", target_arg="url")], description="Fetch a web page, convert it to markdown, and summarize it using an LLM based on your prompt. Use this to extract specific information from a URL (e.g., documentation, articles).", ) async def web_fetch(url: str, prompt: str, timeout: int = 30) -> dict[str, Any]: diff --git a/src/agentic_cli/workflow/adk/__init__.py b/src/agentic_cli/workflow/adk/__init__.py index 2afa6fc..6513ad0 100644 --- a/src/agentic_cli/workflow/adk/__init__.py +++ b/src/agentic_cli/workflow/adk/__init__.py @@ -3,7 +3,7 @@ This submodule contains ADK-specific components: - manager.py: GoogleADKWorkflowManager - event_processor.py: ADKEventProcessor -- plugins.py: ADK plugins (ConfirmationPlugin, LLMLoggingPlugin) +- plugins.py: ADK plugins (LLMLoggingPlugin) Note: GoogleADKWorkflowManager is NOT re-exported here to avoid circular imports. Import it directly from agentic_cli.workflow.adk.manager. diff --git a/src/agentic_cli/workflow/adk/manager.py b/src/agentic_cli/workflow/adk/manager.py index 7e851f7..763c355 100644 --- a/src/agentic_cli/workflow/adk/manager.py +++ b/src/agentic_cli/workflow/adk/manager.py @@ -24,7 +24,8 @@ from agentic_cli.workflow.events import WorkflowEvent, EventType from agentic_cli.workflow.config import AgentConfig from agentic_cli.workflow.adk.event_processor import ADKEventProcessor -from agentic_cli.workflow.adk.plugins import ConfirmationPlugin, LLMLoggingPlugin +from agentic_cli.workflow.adk.permission_plugin import PermissionPlugin +from agentic_cli.workflow.adk.plugins import LLMLoggingPlugin from agentic_cli.config import ( BaseSettings, @@ -400,7 +401,7 @@ def _init_plugins(self) -> list: """ from agentic_cli.workflow.adk.task_progress_plugin import TaskProgressPlugin - plugins: list = [ConfirmationPlugin()] + plugins: list = [PermissionPlugin()] # Task progress tracking via ToolContext.state self._task_progress_plugin = TaskProgressPlugin() diff --git a/src/agentic_cli/workflow/adk/permission_plugin.py b/src/agentic_cli/workflow/adk/permission_plugin.py new file mode 100644 index 0000000..6524895 --- /dev/null +++ b/src/agentic_cli/workflow/adk/permission_plugin.py @@ -0,0 +1,72 @@ +"""ADK plugin that gates tool calls via PermissionEngine. + +Adapter check order (mirrors LangGraph wrapper for consistency): +1. EXEMPT tool → allow, no engine call. +2. Tool has no capability declaration → deny (author error, loud). +3. Engine absent from service registry → allow (test/dev fallback). +4. Otherwise call engine.check() and return None on allow, error dict on deny. +""" + +from __future__ import annotations + +from typing import Any, TYPE_CHECKING + +from google.adk.plugins.base_plugin import BasePlugin + +from agentic_cli.logging import Loggers +from agentic_cli.tools.registry import ToolCategory, get_registry, register_tool +from agentic_cli.workflow.permissions import EXEMPT +from agentic_cli.workflow.permissions.capabilities import _CapabilityExempt +from agentic_cli.workflow.service_registry import PERMISSION_ENGINE, get_service + +if TYPE_CHECKING: + from google.adk.tools import BaseTool + from google.adk.tools.tool_context import ToolContext + +logger = Loggers.workflow() + + +# ADK auto-injects ``transfer_to_agent`` into coordinator agents that declare +# ``sub_agents``. It's an internal routing primitive, not an external side +# effect, so register it with EXEMPT so the plugin lets it through. +try: + from google.adk.tools.transfer_to_agent_tool import transfer_to_agent + + register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING)(transfer_to_agent) +except ImportError: + pass + + +class PermissionPlugin(BasePlugin): + """ADK plugin: gates every tool call through :class:`PermissionEngine`.""" + + def __init__(self) -> None: + super().__init__(name="permission") + + async def before_tool_callback( + self, + *, + tool: "BaseTool", + tool_args: dict[str, Any], + tool_context: "ToolContext | None", + ) -> dict | None: + defn = get_registry().get(tool.name) + caps = defn.capabilities if defn else None + + if isinstance(caps, _CapabilityExempt): + return None + if not caps: + logger.warning("permission_undeclared", tool=tool.name) + return { + "success": False, + "error": "Permission denied: tool has no capability declaration", + } + + engine = get_service(PERMISSION_ENGINE) + if engine is None: + return None # test/dev fallback + + result = await engine.check(tool.name, caps, tool_args) + if result.allowed: + return None + return {"success": False, "error": f"Permission denied: {result.reason}"} diff --git a/src/agentic_cli/workflow/adk/plugins.py b/src/agentic_cli/workflow/adk/plugins.py index afec1e5..b3069b8 100644 --- a/src/agentic_cli/workflow/adk/plugins.py +++ b/src/agentic_cli/workflow/adk/plugins.py @@ -1,7 +1,6 @@ """ADK Plugins for agentic-cli. Provides framework-level cross-cutting concerns as ADK Plugins: -- ConfirmationPlugin: HITL confirmation for DANGEROUS tools - LLMLoggingPlugin: Raw LLM traffic logging for debugging """ @@ -9,7 +8,6 @@ import json import time -import uuid from collections import deque from datetime import datetime, timezone from pathlib import Path @@ -17,109 +15,15 @@ from google.adk.plugins.base_plugin import BasePlugin -from agentic_cli.tools.registry import get_registry, PermissionLevel -from agentic_cli.workflow.service_registry import get_service, WORKFLOW -from agentic_cli.workflow.events import WorkflowEvent, UserInputRequest, InputType +from agentic_cli.workflow.events import WorkflowEvent from agentic_cli.logging import Loggers if TYPE_CHECKING: from google.adk.agents.callback_context import CallbackContext from google.adk.models import LlmRequest, LlmResponse - from google.adk.tools import BaseTool - from google.adk.tools.tool_context import ToolContext logger = Loggers.workflow() -_APPROVED_RESPONSES = ("yes", "y", "approve", "true") - - -def is_dangerous(tool_name: str) -> bool: - """Check if a tool is registered as DANGEROUS permission level. - - Uses the ToolRegistry's O(1) lookup directly — no caching needed. - """ - defn = get_registry().get(tool_name) - if defn is None: - return False - return defn.permission_level == PermissionLevel.DANGEROUS - - -async def request_tool_confirmation( - tool_name: str, tool_args: dict[str, Any] -) -> bool | None: - """Prompt user for confirmation of a dangerous tool call. - - Shared by ConfirmationPlugin (ADK) and _wrap_for_confirmation (LangGraph). - - Returns: - True if approved, False if denied, None if no workflow/callback available. - """ - workflow = get_service(WORKFLOW) - if workflow is None: - return None - - arg_summary = ", ".join( - f"{k}={repr(v)[:50]}" for k, v in list(tool_args.items())[:3] - ) - request = UserInputRequest( - request_id=str(uuid.uuid4())[:8], - tool_name=tool_name, - prompt=( - f"Tool requires approval: {tool_name}({arg_summary})\n\n" - f"Allow this operation? (yes/no)" - ), - input_type=InputType.CONFIRM, - default="no", - ) - - try: - response = await workflow.request_user_input(request) - except RuntimeError: - return None - - return response.strip().lower() in _APPROVED_RESPONSES - - -class ConfirmationPlugin(BasePlugin): - """ADK Plugin that requires user confirmation for DANGEROUS tools. - - Uses the workflow manager's request_user_input callback to prompt - the user before executing any tool with PermissionLevel.DANGEROUS. - - Replaces the old _wrap_dangerous decorator pattern with a single - framework-level hook that applies to all agents globally. - """ - - def __init__(self) -> None: - super().__init__(name="confirmation") - - async def before_tool_callback( - self, - *, - tool: "BaseTool", - tool_args: dict[str, Any], - tool_context: "ToolContext", - ) -> dict | None: - """Intercept DANGEROUS tool calls and request user confirmation.""" - if not is_dangerous(tool.name): - return None - - approved = await request_tool_confirmation(tool.name, tool_args) - - if approved is None: - logger.warning("confirmation_plugin.no_workflow_or_callback", tool=tool.name) - return None - - if approved: - logger.debug("confirmation_plugin.approved", tool=tool.name) - return None - - logger.info("confirmation_plugin.denied", tool=tool.name) - return { - "success": False, - "error": f"User denied approval for {tool.name}", - } - class LLMLoggingPlugin(BasePlugin): """ADK Plugin that captures raw LLM request/response traffic for debugging. diff --git a/src/agentic_cli/workflow/base_manager.py b/src/agentic_cli/workflow/base_manager.py index 3c79bb0..e4b2d4b 100644 --- a/src/agentic_cli/workflow/base_manager.py +++ b/src/agentic_cli/workflow/base_manager.py @@ -22,13 +22,14 @@ from agentic_cli.workflow.models import ModelRegistry from agentic_cli.workflow.service_registry import ( set_service_registry, + ARXIV_SOURCE, KB_MANAGER, - USER_KB_MANAGER, - SANDBOX_MANAGER, LLM_SUMMARIZER, MEMORY_STORE, + PERMISSION_ENGINE, REFLECTION_STORE, - ARXIV_SOURCE, + SANDBOX_MANAGER, + USER_KB_MANAGER, WORKFLOW, ) from agentic_cli.logging import Loggers @@ -364,6 +365,15 @@ def _ensure_managers_initialized(self) -> None: from agentic_cli.tools.arxiv_source import ArxivSearchSource s[ARXIV_SOURCE] = ArxivSearchSource() + # Always construct the PermissionEngine (all agents may need it) + if PERMISSION_ENGINE not in s: + from pathlib import Path + from agentic_cli.workflow.permissions import PermissionContext, PermissionEngine + ctx = PermissionContext(workdir=Path.cwd(), home=Path.home()) + s[PERMISSION_ENGINE] = PermissionEngine( + settings=self._settings, workflow=self, ctx=ctx, + ) + # Always ensure workflow reference is available s[WORKFLOW] = self @@ -497,7 +507,13 @@ async def initialize_services(self, validate: bool = True) -> None: # Create services BEFORE backend init so _build_tools() can # produce factory-bound tools during agent/graph creation. - self._ensure_managers_initialized() + # Offloaded to a worker thread because constructors here may + # load heavy dependencies (e.g. the sentence-transformers model + # inside EmbeddingService) that would otherwise block the event + # loop — which keeps the prompt unresponsive at startup. + import asyncio as _asyncio + + await _asyncio.to_thread(self._ensure_managers_initialized) await self._do_initialize() self._initialized = True diff --git a/src/agentic_cli/workflow/langgraph/graph_builder.py b/src/agentic_cli/workflow/langgraph/graph_builder.py index 58b6044..95787a8 100644 --- a/src/agentic_cli/workflow/langgraph/graph_builder.py +++ b/src/agentic_cli/workflow/langgraph/graph_builder.py @@ -6,13 +6,11 @@ from __future__ import annotations -import asyncio -import functools from collections import deque from typing import Any, Callable, TYPE_CHECKING from agentic_cli.workflow.config import AgentConfig -from agentic_cli.workflow.adk.plugins import is_dangerous, request_tool_confirmation +from agentic_cli.workflow.langgraph.permission_wrap import wrap_tool_for_permission from agentic_cli.logging import Loggers if TYPE_CHECKING: @@ -122,9 +120,7 @@ def _get_tools(config: AgentConfig) -> list: if tools: tool_node_name = f"{config.name}_tools" tool_map[config.name] = tool_node_name - wrapped_tools = [ - self._wrap_for_confirmation(t) for t in tools - ] + wrapped_tools = [wrap_tool_for_permission(t) for t in tools] tool_node = ToolNode( wrapped_tools, name=tool_node_name, handle_tool_errors=True, ) @@ -304,45 +300,6 @@ def get_retry_policy(self): backoff_factor=self._settings.retry_backoff_factor, ) - @staticmethod - def _wrap_for_confirmation(tool: Callable) -> Callable: - """Wrap a DANGEROUS tool with user confirmation logic. - - Safe tools are returned as-is. DANGEROUS tools get an async wrapper - that uses the workflow manager's request_user_input callback to prompt - the user before executing. - - The wrapper is always async regardless of the original tool, because - the confirmation prompt requires awaiting request_user_input(). This - is safe because LangGraph's ToolNode calls ainvoke() for all tools. - - Args: - tool: The tool function to potentially wrap. - - Returns: - The original function (if safe) or a confirmation-wrapped version. - """ - tool_name = getattr(tool, "__name__", str(tool)) - if not is_dangerous(tool_name): - return tool - - is_async = asyncio.iscoroutinefunction(tool) - - @functools.wraps(tool) - async def _confirmed(*args: Any, **kwargs: Any) -> Any: - approved = await request_tool_confirmation(tool_name, kwargs) - if approved is False: - return { - "success": False, - "error": f"User denied approval for {tool_name}", - } - # approved is True or None (no workflow/callback) → proceed - if is_async: - return await tool(*args, **kwargs) - return tool(*args, **kwargs) - - return _confirmed - def _create_agent_node( self, config: AgentConfig, default_model: str, tools: list | None = None, ) -> Callable: diff --git a/src/agentic_cli/workflow/langgraph/permission_wrap.py b/src/agentic_cli/workflow/langgraph/permission_wrap.py new file mode 100644 index 0000000..08600fa --- /dev/null +++ b/src/agentic_cli/workflow/langgraph/permission_wrap.py @@ -0,0 +1,58 @@ +# src/agentic_cli/workflow/langgraph/permission_wrap.py +"""LangGraph tool wrapper that gates calls via PermissionEngine. + +Adapter check order matches ADK's PermissionPlugin: +1. EXEMPT tool → returned unwrapped (no engine call ever). +2. Tool has no capability declaration → wrapper returns deny dict at call time. +3. Engine absent from service registry → wrapper runs the original tool (fallback). +4. Otherwise call engine.check(); return on allow, deny dict on deny. +""" + +from __future__ import annotations + +import asyncio +import functools +from typing import Any, Callable + +from agentic_cli.logging import Loggers +from agentic_cli.tools.registry import get_registry +from agentic_cli.workflow.permissions.capabilities import _CapabilityExempt +from agentic_cli.workflow.service_registry import PERMISSION_ENGINE, get_service + +logger = Loggers.workflow() + + +def wrap_tool_for_permission(tool: Callable[..., Any]) -> Callable[..., Any]: + """Wrap ``tool`` so every call goes through ``PermissionEngine.check``. + + Exempt tools (``capabilities=EXEMPT``) are returned unmodified. + Tools that never declared capabilities (``defn is None``) get a wrapper + that returns a deny error dict, mirroring ADK. + """ + name = getattr(tool, "__name__", str(tool)) + defn = get_registry().get(name) + caps = defn.capabilities if defn else None + + if isinstance(caps, _CapabilityExempt): + return tool + + is_async = asyncio.iscoroutinefunction(tool) + + @functools.wraps(tool) + async def _guarded(*args: Any, **kwargs: Any) -> Any: + if not caps: + logger.warning("permission_undeclared", tool=name) + return { + "success": False, + "error": "Permission denied: tool has no capability declaration", + } + engine = get_service(PERMISSION_ENGINE) + if engine is not None: + result = await engine.check(name, caps, kwargs) + if not result.allowed: + return {"success": False, "error": f"Permission denied: {result.reason}"} + if is_async: + return await tool(*args, **kwargs) + return tool(*args, **kwargs) + + return _guarded diff --git a/src/agentic_cli/workflow/permissions/__init__.py b/src/agentic_cli/workflow/permissions/__init__.py new file mode 100644 index 0000000..d148e1a --- /dev/null +++ b/src/agentic_cli/workflow/permissions/__init__.py @@ -0,0 +1,37 @@ +# src/agentic_cli/workflow/permissions/__init__.py +"""Centralised permission system. + +See docs/superpowers/specs/2026-04-18-permissions-system-design.md for the +full design. Public API re-exported here; framework-bindings live under +``workflow/adk/permission_plugin.py`` and ``workflow/langgraph/permission_wrap.py``. +""" + +from agentic_cli.workflow.permissions.capabilities import ( + Capability, + CapabilitiesSpec, + EXEMPT, + ResolvedCapability, +) +from agentic_cli.workflow.permissions.engine import PermissionEngine +from agentic_cli.workflow.permissions.rules import ( + AskScope, + CheckResult, + Effect, + Rule, + RuleSource, +) +from agentic_cli.workflow.permissions.store import PermissionContext + +__all__ = [ + "AskScope", + "Capability", + "CapabilitiesSpec", + "CheckResult", + "EXEMPT", + "Effect", + "PermissionContext", + "PermissionEngine", + "ResolvedCapability", + "Rule", + "RuleSource", +] diff --git a/src/agentic_cli/workflow/permissions/capabilities.py b/src/agentic_cli/workflow/permissions/capabilities.py new file mode 100644 index 0000000..0cb1de8 --- /dev/null +++ b/src/agentic_cli/workflow/permissions/capabilities.py @@ -0,0 +1,44 @@ +"""Capability declarations for tools. + +A ``Capability`` describes one side effect a tool may perform, as a +``(namespace.action, target_source)`` pair. ``ResolvedCapability`` is the +concrete form emitted at call time, with the target extracted from the +tool's arguments and canonicalized by the namespace's matcher. + +``EXEMPT`` is a sentinel used in ``@register_tool(capabilities=EXEMPT)`` +to mark tools that explicitly require no permission check. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Final + + +@dataclass(frozen=True) +class Capability: + """A capability a tool needs. Resolved against args at call time.""" + + name: str # e.g. "filesystem.read" + target_arg: str | None = None # arg name holding the target; None → target "*" + + +@dataclass(frozen=True) +class ResolvedCapability: + """Capability + concrete, canonicalized target.""" + + name: str + target: str + + +@dataclass(frozen=True) +class _CapabilityExempt: + """Sentinel type; see ``EXEMPT`` for the singleton value.""" + + def __bool__(self) -> bool: # truthy so `not caps` ≠ "missing" + return True + + +EXEMPT: Final[_CapabilityExempt] = _CapabilityExempt() + +CapabilitiesSpec = list[Capability] | _CapabilityExempt diff --git a/src/agentic_cli/workflow/permissions/engine.py b/src/agentic_cli/workflow/permissions/engine.py new file mode 100644 index 0000000..998410c --- /dev/null +++ b/src/agentic_cli/workflow/permissions/engine.py @@ -0,0 +1,226 @@ +# src/agentic_cli/workflow/permissions/engine.py +"""Permission engine. + +See docs/superpowers/specs/2026-04-18-permissions-system-design.md §4 for +the full decision flow. This file implements: + +1. Rule loading from builtin + user + project JSON + in-memory session. +2. ``permissions_enabled=False`` short-circuit. +3. Full check() flow (Tasks 15–17). +""" + +from __future__ import annotations + +import asyncio +from pathlib import Path +from typing import TYPE_CHECKING + +from agentic_cli.logging import Loggers +from agentic_cli.settings_persistence import ( + get_project_config_path, + get_user_config_path, +) +from agentic_cli.workflow.permissions.capabilities import Capability, ResolvedCapability +from agentic_cli.workflow.permissions.matchers import get_matcher +from agentic_cli.workflow.permissions.rules import ( + AskScope, + CheckResult, + Effect, + Rule, + RuleSource, +) +from agentic_cli.workflow.permissions.store import ( + BUILTIN_RULES, + PermissionContext, + load_rules, +) + +if TYPE_CHECKING: + from agentic_cli.config import BaseSettings + from agentic_cli.workflow.base_manager import BaseWorkflowManager + +logger = Loggers.workflow() + + +def broaden_target_for_grant(cap: ResolvedCapability) -> str: + """Widen a resolved target before synthesising a session/persistent rule. + + For ``filesystem.*`` capabilities we broaden to the parent directory + (glob) so one grant covers every file the agent writes/reads there — + otherwise each new file in the same directory would prompt again. + Other namespaces keep the exact resolved target (URL, command, etc.), + and the wildcard sentinel ``"*"`` passes through unchanged. + + Used by both the engine (when installing a rule) and the prompt + builder (when describing the pending grant) so the displayed scope + always matches what will actually be stored. + """ + if cap.target == "*": + return "*" + if cap.name.startswith("filesystem."): + p = Path(cap.target) + parent = p.parent + # Already at the root: nothing to widen to. + if str(p) == str(parent): + return cap.target + # Avoid "//**" when the parent is the filesystem root. + if str(parent) == "/": + return "/**" + return f"{parent}/**" + return cap.target + + +class PermissionEngine: + """Evaluate tool invocations against rules from four sources. + + Concurrency: one ``asyncio.Lock`` around the ask prompt only (rule + matching is pure). See spec §4.4. + """ + + def __init__( + self, + settings: "BaseSettings", + workflow: "BaseWorkflowManager", + ctx: PermissionContext, + ) -> None: + self._settings = settings + self._workflow = workflow + self._ctx = ctx + self._session_rules: list[Rule] = [] + self._ask_lock = asyncio.Lock() + self._base_rules: list[Rule] = self._load_all_rules() + + # ------------------------------------------------------------------ + # Rule loading + # ------------------------------------------------------------------ + + def _load_all_rules(self) -> list[Rule]: + rules: list[Rule] = [] + for r in BUILTIN_RULES: + # Canonicalise each builtin rule's target (which may use ${workdir}/${home}). + rules.append( + Rule( + capability=r.capability, + target=get_matcher(r.capability).canonicalize(r.target, self._ctx), + effect=r.effect, + source=r.source, + ) + ) + app = self._settings.app_name + rules += load_rules(get_user_config_path(app), RuleSource.USER, self._ctx) + rules += load_rules(get_project_config_path(app), RuleSource.PROJECT, self._ctx) + return rules + + @property + def rules(self) -> list[Rule]: + """All currently-active rules, in source order (builtin→user→project→session).""" + return list(self._base_rules) + list(self._session_rules) + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + async def check( + self, + tool_name: str, + capabilities: list[Capability], + args: dict, + ) -> CheckResult: + """Evaluate whether ``tool_name`` may run with ``args``.""" + if not self._settings.permissions_enabled: + return CheckResult(True, "permissions disabled") + + resolved = self._resolve(capabilities, args) + outcomes = self._evaluate(resolved) + + # DENY wins. + deny_hits = [(c, r) for c, r in outcomes if r is not None and r.effect is Effect.DENY] + if deny_hits: + c, r = deny_hits[0] + return CheckResult(False, self._fmt_rule_reason(r, c)) + + # All allowed? + if all(r is not None and r.effect is Effect.ALLOW for _, r in outcomes): + any_c, any_r = outcomes[0] + return CheckResult(True, self._fmt_rule_reason(any_r, any_c)) + + # Ask flow lands in Task 16. + return await self._ask_and_apply(tool_name, resolved, outcomes) + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + + def _resolve( + self, capabilities: list[Capability], args: dict + ) -> list[ResolvedCapability]: + resolved: list[ResolvedCapability] = [] + for cap in capabilities: + raw = "*" if cap.target_arg is None else str(args.get(cap.target_arg, "")) + target = "*" if cap.target_arg is None else get_matcher(cap.name).canonicalize(raw, self._ctx) + resolved.append(ResolvedCapability(cap.name, target)) + return resolved + + def _evaluate( + self, resolved: list[ResolvedCapability] + ) -> list[tuple[ResolvedCapability, Rule | None]]: + """For each resolved capability, return the strongest matched rule or None.""" + from agentic_cli.workflow.permissions.matchers import _cap_matches + all_rules = self.rules + out: list[tuple[ResolvedCapability, Rule | None]] = [] + for cap in resolved: + matcher = get_matcher(cap.name) + matched: list[Rule] = [ + r for r in all_rules + if _cap_matches(r.capability, cap.name) + and matcher.matches(r.target, cap.target) + ] + if not matched: + out.append((cap, None)) + continue + # DENY wins per capability; otherwise any ALLOW. + deny = next((r for r in matched if r.effect is Effect.DENY), None) + out.append((cap, deny or matched[0])) + return out + + @staticmethod + def _fmt_rule_reason(rule: Rule, cap: ResolvedCapability) -> str: + return f"rule: {rule.source.value}/{rule.effect.value} {cap.name} {rule.target}" + + + async def _ask_and_apply( + self, + tool_name: str, + resolved: list[ResolvedCapability], + outcomes: list[tuple[ResolvedCapability, Rule | None]], + ) -> CheckResult: + from agentic_cli.workflow.permissions.prompt import build_request, parse_response + from agentic_cli.workflow.permissions.store import append_project_rule + + unmatched = [cap for cap, r in outcomes if r is None] + async with self._ask_lock: + request = build_request(tool_name, resolved) + response = await self._workflow.request_user_input(request) + scope = parse_response(response) + + if scope is AskScope.DENY: + logger.info( + "permission_denied_by_user", + tool=tool_name, + capabilities=[(c.name, c.target) for c in resolved], + ) + return CheckResult(False, "no rule + user denied") + + if scope is AskScope.ONCE: + return CheckResult(True, "no rule + user allowed (once)") + + source = RuleSource.SESSION if scope is AskScope.SESSION else RuleSource.PROJECT + for cap in unmatched: + target = broaden_target_for_grant(cap) + rule = Rule(cap.name, target, Effect.ALLOW, source) + self._session_rules.append(rule) + if source is RuleSource.PROJECT: + append_project_rule(self._settings.app_name, rule) + + label = "session" if source is RuleSource.SESSION else "always, saved to project" + return CheckResult(True, f"no rule + user allowed ({label})") diff --git a/src/agentic_cli/workflow/permissions/matchers.py b/src/agentic_cli/workflow/permissions/matchers.py new file mode 100644 index 0000000..b4b1c28 --- /dev/null +++ b/src/agentic_cli/workflow/permissions/matchers.py @@ -0,0 +1,193 @@ +# src/agentic_cli/workflow/permissions/matchers.py +"""Matchers for capability targets. + +Each capability namespace (``filesystem.*``, ``http.*``, ``shell.*``) has a +Matcher that knows how to canonicalize targets + patterns and compare them. +Unknown namespaces fall back to ``StringGlobMatcher``. +""" + +from __future__ import annotations + +import fnmatch +import re +from functools import lru_cache +from pathlib import Path +from typing import Protocol, runtime_checkable +from urllib.parse import urlsplit, urlunsplit + +from agentic_cli.workflow.permissions.store import PermissionContext + + +@runtime_checkable +class Matcher(Protocol): + """Canonicalise and match targets for a capability namespace.""" + + def canonicalize(self, s: str, ctx: PermissionContext) -> str: ... + + def matches(self, pattern: str, target: str) -> bool: ... + + +class StringGlobMatcher: + """Default fallback: ``${...}`` substitution + ``fnmatch``.""" + + def canonicalize(self, s: str, ctx: PermissionContext) -> str: + if s == "*": + return "*" + return ctx.substitute(s).strip() + + def matches(self, pattern: str, target: str) -> bool: + if pattern == "*": + return True + return fnmatch.fnmatchcase(target, pattern) + + +@lru_cache(maxsize=512) +def _glob_to_regex(pattern: str) -> re.Pattern[str]: + """Translate a ``**``-aware glob into a compiled regex. + + * ``*`` matches one path segment (no ``/``). + * ``**`` matches zero or more complete path segments (including their + separating ``/`` characters). + * ``?`` matches one non-``/`` character within a segment. + """ + segments = pattern.split("/") + # Build per-segment regex pieces. Each piece is the regex for that + # segment *without* surrounding slashes; we join with "/" afterwards, + # but "**" segments need special junction treatment. + pieces: list[str] = [] + for seg in segments: + if seg == "**": + pieces.append("**") # placeholder; resolved during join below + continue + seg_re = "" + for ch in seg: + if ch == "*": + seg_re += "[^/]*" + elif ch == "?": + seg_re += "[^/]" + else: + seg_re += re.escape(ch) + pieces.append(seg_re) + + # Join pieces, collapsing "**" placeholders. + # "**" as a segment means "zero or more /" sequences. + # We absorb the "/" on the right side of "**" into its regex so that + # a/z still matches a/**/z (zero repetitions absorbs the "/" between + # "a" and "z" is just the normal "/" join when ** is absent). + # + # Strategy: iterate and build the body string, handling ** specially. + body = "" + for i, piece in enumerate(pieces): + if piece == "**": + # Replace the preceding "/" (already appended) with nothing, + # then emit the zero-or-more pattern that *includes* a trailing "/". + # We do this by emitting `(?:[^/]+/)*` and NOT prepending a "/". + # But we already appended "/" before this piece if i > 0. + # So strip the trailing "/" from body and emit the junction regex. + if i > 0 and body.endswith("/"): + body = body[:-1] + # Zero or more / sequences + body += "(?:/[^/]+)*" + else: + if i > 0: + body += "/" + body += piece + + return re.compile(f"^{body}$") + + +class PathMatcher: + """Matcher for ``filesystem.*`` capabilities.""" + + def canonicalize(self, s: str, ctx: PermissionContext) -> str: + if s == "*": + return "*" + s = ctx.substitute(s) + p = Path(s).expanduser() + if not p.is_absolute(): + p = ctx.workdir / p + return str(Path(p).resolve(strict=False)) + + def matches(self, pattern: str, target: str) -> bool: + if pattern == "*": + return True + return bool(_glob_to_regex(pattern).match(target)) + + +class URLMatcher: + """Matcher for ``http.*`` capabilities.""" + + _DEFAULT_PORTS = {"http": 80, "https": 443} + + def canonicalize(self, s: str, ctx: PermissionContext) -> str: + if s == "*": + return "*" + s = ctx.substitute(s) + parts = urlsplit(s if "://" in s else f"https://{s}") + scheme = parts.scheme.lower() or "https" + host = (parts.hostname or "").lower() + port = parts.port + if port is not None and port == self._DEFAULT_PORTS.get(scheme): + netloc = host + elif port is not None: + netloc = f"{host}:{port}" + else: + netloc = host + path = parts.path + query = parts.query + # Drop fragment; reassemble + return urlunsplit((scheme, netloc, path, query, "")) + + def matches(self, pattern: str, target: str) -> bool: + if pattern == "*": + return True + pp = urlsplit(pattern if "://" in pattern else f"https://{pattern}") + tt = urlsplit(target) + if pp.scheme.lower() != tt.scheme.lower(): + return False + if (pp.hostname or "").lower() != (tt.hostname or "").lower(): + return False + if pp.port and pp.port != tt.port: + return False + if not _glob_to_regex(pp.path or "/").match(tt.path or "/"): + return False + if pp.query and not fnmatch.fnmatchcase(tt.query, pp.query): + return False + return True + + +class ShellMatcher: + """Matcher for ``shell.*`` capabilities.""" + + def canonicalize(self, s: str, ctx: PermissionContext) -> str: + if s == "*": + return "*" + return ctx.substitute(s).strip() + + def matches(self, pattern: str, target: str) -> bool: + if pattern == "*": + return True + return fnmatch.fnmatchcase(target, pattern) + + +_MATCHERS: dict[str, Matcher] = { + "filesystem": PathMatcher(), + "http": URLMatcher(), + "shell": ShellMatcher(), +} +_FALLBACK: Matcher = StringGlobMatcher() + + +def get_matcher(capability: str) -> Matcher: + """Return the matcher for a capability name, or the string-glob fallback.""" + ns = capability.split(".", 1)[0] + return _MATCHERS.get(ns, _FALLBACK) + + +def _cap_matches(rule_cap: str, target_cap: str) -> bool: + """Capability-name glob: exact / ``ns.*`` / ``*``.""" + if rule_cap == "*" or rule_cap == target_cap: + return True + if rule_cap.endswith(".*"): + return target_cap.startswith(rule_cap[:-1]) + return False diff --git a/src/agentic_cli/workflow/permissions/prompt.py b/src/agentic_cli/workflow/permissions/prompt.py new file mode 100644 index 0000000..4bbdbe6 --- /dev/null +++ b/src/agentic_cli/workflow/permissions/prompt.py @@ -0,0 +1,66 @@ +"""UserInputRequest construction + response parsing for the permission engine.""" + +from __future__ import annotations + +import uuid + +from agentic_cli.workflow.events import InputType, UserInputRequest +from agentic_cli.workflow.permissions.capabilities import ResolvedCapability +from agentic_cli.workflow.permissions.engine import broaden_target_for_grant +from agentic_cli.workflow.permissions.rules import AskScope + +# Strings kept module-level so the UI and parser stay in sync. +ALLOW_ONCE_CHOICE = "Allow once" +ALLOW_SESSION_CHOICE = "Allow for this session" +ALLOW_ALWAYS_CHOICE = "Allow always (save to project)" +DENY_CHOICE = "Deny" + +_CHOICE_TO_SCOPE = { + ALLOW_ONCE_CHOICE: AskScope.ONCE, + ALLOW_SESSION_CHOICE: AskScope.SESSION, + ALLOW_ALWAYS_CHOICE: AskScope.PROJECT, + DENY_CHOICE: AskScope.DENY, +} + + +def build_request(tool_name: str, capabilities: list[ResolvedCapability]) -> UserInputRequest: + """Construct a ``UserInputRequest`` (CHOICE) describing the pending grant. + + The displayed target is the **effective grant scope** — i.e. what will be + stored as a rule if the user picks Session or Always. For ``filesystem.*`` + that's the parent directory (``/foo/**``) rather than the exact file, so + one grant covers every sibling/nested file. + """ + lines = [f"Tool `{tool_name}` wants:"] + has_broadened_filesystem = False + for cap in capabilities: + display_target = broaden_target_for_grant(cap) + if not display_target: + display_target = "*" + lines.append(f" • {cap.name} → {display_target}") + if cap.name.startswith("filesystem.") and display_target != cap.target: + has_broadened_filesystem = True + lines.append("") + if has_broadened_filesystem: + lines.append("(Grant scope widened to the parent directory.)") + lines.append("Allow?") + prompt = "\n".join(lines) + + return UserInputRequest( + request_id=f"perm-{uuid.uuid4().hex[:8]}", + tool_name=tool_name, + prompt=prompt, + input_type=InputType.CHOICE, + choices=[ + ALLOW_ONCE_CHOICE, + ALLOW_SESSION_CHOICE, + ALLOW_ALWAYS_CHOICE, + DENY_CHOICE, + ], + default=DENY_CHOICE, + ) + + +def parse_response(text: str) -> AskScope: + """Parse a choice string into an ``AskScope``. Unknown values deny.""" + return _CHOICE_TO_SCOPE.get((text or "").strip(), AskScope.DENY) diff --git a/src/agentic_cli/workflow/permissions/rules.py b/src/agentic_cli/workflow/permissions/rules.py new file mode 100644 index 0000000..1636b57 --- /dev/null +++ b/src/agentic_cli/workflow/permissions/rules.py @@ -0,0 +1,39 @@ +"""Rule, decision, and scope types for the permission engine.""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum + + +class Effect(str, Enum): + ALLOW = "allow" + DENY = "deny" + + +class RuleSource(str, Enum): + BUILTIN = "builtin" + USER = "user" + PROJECT = "project" + SESSION = "session" + + +class AskScope(str, Enum): + ONCE = "once" + SESSION = "session" + PROJECT = "project" + DENY = "deny" + + +@dataclass(frozen=True) +class Rule: + capability: str + target: str + effect: Effect + source: RuleSource + + +@dataclass +class CheckResult: + allowed: bool + reason: str diff --git a/src/agentic_cli/workflow/permissions/store.py b/src/agentic_cli/workflow/permissions/store.py new file mode 100644 index 0000000..2667aa1 --- /dev/null +++ b/src/agentic_cli/workflow/permissions/store.py @@ -0,0 +1,114 @@ +# src/agentic_cli/workflow/permissions/store.py +"""Context, JSON persistence, and builtin rules for the permission engine. + +Only PermissionContext is implemented at this point — load/save helpers +and BUILTIN_RULES land in Tasks 10–12. +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path + +from agentic_cli.file_utils import atomic_write_text +from agentic_cli.settings_persistence import get_project_config_path +from agentic_cli.workflow.permissions.rules import Effect, Rule, RuleSource + + +@dataclass(frozen=True) +class PermissionContext: + """Static per-run context exposed to matchers and substitution. + + Attributes: + workdir: Absolute current working directory. + home: Absolute home directory. + """ + + workdir: Path + home: Path + + def substitute(self, s: str) -> str: + """Expand ${workdir} and ${home} in a pattern string.""" + return ( + s.replace("${workdir}", str(self.workdir)) + .replace("${home}", str(self.home)) + ) + + +BUILTIN_RULES: list[Rule] = [ + # Routine reads inside workdir: allowed without prompt. + Rule("filesystem.read", "${workdir}/**", Effect.ALLOW, RuleSource.BUILTIN), + + # Agent-internal stores — reads and writes to the user's own knowledge base + # and memory are routine workflow activity, not external side effects. + Rule("memory.*", "*", Effect.ALLOW, RuleSource.BUILTIN), + Rule("kb.*", "*", Effect.ALLOW, RuleSource.BUILTIN), + + # System locations — writes always denied. + Rule("filesystem.write", "/etc/**", Effect.DENY, RuleSource.BUILTIN), + Rule("filesystem.write", "/usr/**", Effect.DENY, RuleSource.BUILTIN), + Rule("filesystem.write", "/bin/**", Effect.DENY, RuleSource.BUILTIN), + Rule("filesystem.write", "/sbin/**", Effect.DENY, RuleSource.BUILTIN), + Rule("filesystem.write", "/boot/**", Effect.DENY, RuleSource.BUILTIN), + Rule("filesystem.write", "/System/**", Effect.DENY, RuleSource.BUILTIN), # macOS + + # Credential directories. + Rule("filesystem.write", "${home}/.ssh/**", Effect.DENY, RuleSource.BUILTIN), + Rule("filesystem.write", "${home}/.aws/**", Effect.DENY, RuleSource.BUILTIN), + Rule("filesystem.write", "${home}/.gnupg/**", Effect.DENY, RuleSource.BUILTIN), +] + + +def load_rules(path: Path, source: RuleSource, ctx: PermissionContext) -> list[Rule]: + """Load rules from a settings.json file's ``permissions`` section. + + Returns an empty list when the file is absent or has no ``permissions`` + key. Raises ``ValueError`` if the file is not valid JSON. + """ + # Local import to avoid circular dependency: matchers.py imports PermissionContext from here. + from agentic_cli.workflow.permissions.matchers import get_matcher # noqa: PLC0415 + + if not path.exists(): + return [] + try: + data = json.loads(path.read_text()) + except json.JSONDecodeError as exc: + raise ValueError(f"Malformed JSON in {path}: {exc}") from exc + + section = data.get("permissions") or {} + rules: list[Rule] = [] + for effect_name, effect in (("allow", Effect.ALLOW), ("deny", Effect.DENY)): + for entry in section.get(effect_name) or []: + cap = entry["capability"] + target_raw = entry["target"] + target = get_matcher(cap).canonicalize(target_raw, ctx) + rules.append(Rule(cap, target, effect, source)) + return rules + + +def append_project_rule(app_name: str, rule: Rule) -> None: + """Append ``rule`` to the project ``settings.json`` permissions section. + + Creates the file (and its parent directory) if absent; preserves every + other settings key; dedupes by exact ``(capability, target)``. Atomic + rewrite via ``file_utils.atomic_write_text``. + + Only ``Rule`` instances with ``source == RuleSource.PROJECT`` should be + passed here — this helper doesn't validate (engine enforces the + invariant). + """ + path = get_project_config_path(app_name) + try: + data = json.loads(path.read_text()) if path.exists() else {} + except json.JSONDecodeError as exc: + raise ValueError(f"Malformed JSON in {path}: {exc}") from exc + + key = "allow" if rule.effect is Effect.ALLOW else "deny" + section = data.setdefault("permissions", {}).setdefault(key, []) + entry = {"capability": rule.capability, "target": rule.target} + if entry not in section: + section.append(entry) + + path.parent.mkdir(parents=True, exist_ok=True) + atomic_write_text(path, json.dumps(data, indent=2)) diff --git a/src/agentic_cli/workflow/service_registry.py b/src/agentic_cli/workflow/service_registry.py index 3040089..cbad534 100644 --- a/src/agentic_cli/workflow/service_registry.py +++ b/src/agentic_cli/workflow/service_registry.py @@ -13,13 +13,14 @@ # ---- Well-known registry keys ---- +ARXIV_SOURCE = "arxiv_source" KB_MANAGER = "kb_manager" -USER_KB_MANAGER = "user_kb_manager" -SANDBOX_MANAGER = "sandbox_manager" LLM_SUMMARIZER = "llm_summarizer" MEMORY_STORE = "memory_store" +PERMISSION_ENGINE = "permission_engine" REFLECTION_STORE = "reflection_store" -ARXIV_SOURCE = "arxiv_source" +SANDBOX_MANAGER = "sandbox_manager" +USER_KB_MANAGER = "user_kb_manager" WORKFLOW = "workflow" diff --git a/src/agentic_cli/workflow/settings.py b/src/agentic_cli/workflow/settings.py index 3839624..1246380 100644 --- a/src/agentic_cli/workflow/settings.py +++ b/src/agentic_cli/workflow/settings.py @@ -10,7 +10,7 @@ from enum import Enum from typing import Literal, TYPE_CHECKING -from pydantic import Field +from pydantic import BaseModel, Field from agentic_cli.workflow.models import ModelFamily, ModelRegistry @@ -21,6 +21,20 @@ THINKING_EFFORT_LEVELS = ModelRegistry.THINKING_EFFORT_LEVELS +class PermissionRuleConfig(BaseModel): + """A single permission rule — serialised to settings.json as JSON.""" + + capability: str + target: str + + +class PermissionsConfig(BaseModel): + """Nested settings section holding user-editable permission rules.""" + + allow: list[PermissionRuleConfig] = [] + deny: list[PermissionRuleConfig] = [] + + class OrchestratorType(str, Enum): """Types of workflow orchestrators available.""" @@ -285,13 +299,19 @@ class WorkflowSettingsMixin: json_schema_extra={"ui_order": 132}, ) - # HITL (Human-in-the-Loop) settings - hitl_enabled: bool = Field( - default=True, - title="HITL Enabled", - description="Enable human-in-the-loop features (approvals)", + # Permissions + permissions: PermissionsConfig = Field( + default_factory=PermissionsConfig, + title="Permissions", + description="Declarative allow/deny rules for tool capabilities.", json_schema_extra={"ui_order": 135}, ) + permissions_enabled: bool = Field( + default=True, + title="Permissions Enabled", + description="Master switch; when False, all tool calls are allowed.", + json_schema_extra={"ui_order": 136}, + ) # Persistence settings (LangGraph) postgres_uri: str | None = Field( diff --git a/tests/integration/test_permission_adk.py b/tests/integration/test_permission_adk.py new file mode 100644 index 0000000..5d36ade --- /dev/null +++ b/tests/integration/test_permission_adk.py @@ -0,0 +1,141 @@ +# tests/integration/test_permission_adk.py (unit-level for Task 20) +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from agentic_cli.workflow.permissions import Capability, EXEMPT +from agentic_cli.workflow.service_registry import PERMISSION_ENGINE + + +@pytest.fixture +def stub_engine(): + from agentic_cli.workflow.permissions.rules import CheckResult + engine = MagicMock() + engine.check = AsyncMock(return_value=CheckResult(True, "rule: test/allow")) + return engine + + +class TestPermissionPluginUnit: + @pytest.mark.asyncio + async def test_exempt_tool_passes_through(self, monkeypatch, stub_engine): + from agentic_cli.tools.registry import get_registry + from agentic_cli.workflow.adk.permission_plugin import PermissionPlugin + + monkeypatch.setattr( + "agentic_cli.workflow.adk.permission_plugin.get_service", + lambda k: stub_engine if k == PERMISSION_ENGINE else None, + ) + reg = get_registry() + + @reg.register(name="exempt_x", capabilities=EXEMPT) + def exempt_x(): + return {} + + plugin = PermissionPlugin() + result = await plugin.before_tool_callback( + tool=SimpleNamespace(name="exempt_x"), tool_args={}, tool_context=None, + ) + assert result is None + stub_engine.check.assert_not_called() + + @pytest.mark.asyncio + async def test_missing_declaration_denies(self, monkeypatch, stub_engine): + from agentic_cli.workflow.adk.permission_plugin import PermissionPlugin + + monkeypatch.setattr( + "agentic_cli.workflow.adk.permission_plugin.get_service", + lambda k: stub_engine if k == PERMISSION_ENGINE else None, + ) + plugin = PermissionPlugin() + result = await plugin.before_tool_callback( + tool=SimpleNamespace(name="never_registered"), + tool_args={}, + tool_context=None, + ) + assert result == { + "success": False, + "error": "Permission denied: tool has no capability declaration", + } + + @pytest.mark.asyncio + async def test_allow_calls_engine_and_passes(self, monkeypatch, stub_engine): + from agentic_cli.tools.registry import get_registry + from agentic_cli.workflow.adk.permission_plugin import PermissionPlugin + + monkeypatch.setattr( + "agentic_cli.workflow.adk.permission_plugin.get_service", + lambda k: stub_engine if k == PERMISSION_ENGINE else None, + ) + reg = get_registry() + + @reg.register( + name="reader_x", + capabilities=[Capability("filesystem.read", target_arg="path")], + ) + def reader_x(path: str): + return {} + + plugin = PermissionPlugin() + result = await plugin.before_tool_callback( + tool=SimpleNamespace(name="reader_x"), + tool_args={"path": "/tmp/x"}, + tool_context=None, + ) + assert result is None + stub_engine.check.assert_awaited_once() + + @pytest.mark.asyncio + async def test_deny_returns_error_dict(self, monkeypatch): + from agentic_cli.tools.registry import get_registry + from agentic_cli.workflow.adk.permission_plugin import PermissionPlugin + from agentic_cli.workflow.permissions.rules import CheckResult + + denying_engine = MagicMock() + denying_engine.check = AsyncMock(return_value=CheckResult(False, "rule: builtin/deny")) + monkeypatch.setattr( + "agentic_cli.workflow.adk.permission_plugin.get_service", + lambda k: denying_engine if k == PERMISSION_ENGINE else None, + ) + reg = get_registry() + + @reg.register( + name="writer_x", + capabilities=[Capability("filesystem.write", target_arg="path")], + ) + def writer_x(path: str): + return {} + + plugin = PermissionPlugin() + result = await plugin.before_tool_callback( + tool=SimpleNamespace(name="writer_x"), + tool_args={"path": "/etc/x"}, + tool_context=None, + ) + assert result == {"success": False, "error": "Permission denied: rule: builtin/deny"} + + @pytest.mark.asyncio + async def test_engine_absent_allows(self, monkeypatch): + from agentic_cli.tools.registry import get_registry + from agentic_cli.workflow.adk.permission_plugin import PermissionPlugin + + monkeypatch.setattr( + "agentic_cli.workflow.adk.permission_plugin.get_service", + lambda k: None, + ) + reg = get_registry() + + @reg.register( + name="reader_y", + capabilities=[Capability("filesystem.read", target_arg="path")], + ) + def reader_y(path: str): + return {} + + plugin = PermissionPlugin() + result = await plugin.before_tool_callback( + tool=SimpleNamespace(name="reader_y"), + tool_args={"path": "/tmp/x"}, + tool_context=None, + ) + assert result is None # fallback allow diff --git a/tests/integration/test_permission_langgraph.py b/tests/integration/test_permission_langgraph.py new file mode 100644 index 0000000..a9c4e45 --- /dev/null +++ b/tests/integration/test_permission_langgraph.py @@ -0,0 +1,110 @@ +# tests/integration/test_permission_langgraph.py +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from agentic_cli.workflow.permissions import Capability, EXEMPT +from agentic_cli.workflow.service_registry import PERMISSION_ENGINE + + +class TestWrapToolForPermissionUnit: + def test_exempt_tool_returned_unmodified(self): + from agentic_cli.tools.registry import get_registry + from agentic_cli.workflow.langgraph.permission_wrap import wrap_tool_for_permission + + reg = get_registry() + + @reg.register(name="ex_lg", capabilities=EXEMPT) + def ex_lg(): + return {"success": True} + + wrapped = wrap_tool_for_permission(ex_lg) + assert wrapped is ex_lg + + @pytest.mark.asyncio + async def test_missing_declaration_denies(self, monkeypatch): + from agentic_cli.workflow.langgraph.permission_wrap import wrap_tool_for_permission + + def unregistered(path: str): + return {"ok": True} + + wrapped = wrap_tool_for_permission(unregistered) + result = await wrapped(path="/x") + assert result == { + "success": False, + "error": "Permission denied: tool has no capability declaration", + } + + @pytest.mark.asyncio + async def test_engine_allow_runs_tool(self, monkeypatch): + from agentic_cli.tools.registry import get_registry + from agentic_cli.workflow.langgraph.permission_wrap import wrap_tool_for_permission + from agentic_cli.workflow.permissions.rules import CheckResult + + engine = MagicMock() + engine.check = AsyncMock(return_value=CheckResult(True, "rule: test/allow")) + monkeypatch.setattr( + "agentic_cli.workflow.langgraph.permission_wrap.get_service", + lambda k: engine if k == PERMISSION_ENGINE else None, + ) + + reg = get_registry() + + @reg.register( + name="read_lg", + capabilities=[Capability("filesystem.read", target_arg="path")], + ) + def read_lg(path: str): + return {"content": f"read:{path}"} + + wrapped = wrap_tool_for_permission(read_lg) + result = await wrapped(path="/abs/x") + assert result == {"content": "read:/abs/x"} + + @pytest.mark.asyncio + async def test_engine_deny_returns_error_dict(self, monkeypatch): + from agentic_cli.tools.registry import get_registry + from agentic_cli.workflow.langgraph.permission_wrap import wrap_tool_for_permission + from agentic_cli.workflow.permissions.rules import CheckResult + + engine = MagicMock() + engine.check = AsyncMock(return_value=CheckResult(False, "rule: builtin/deny")) + monkeypatch.setattr( + "agentic_cli.workflow.langgraph.permission_wrap.get_service", + lambda k: engine if k == PERMISSION_ENGINE else None, + ) + + reg = get_registry() + + @reg.register( + name="write_lg", + capabilities=[Capability("filesystem.write", target_arg="path")], + ) + def write_lg(path: str): + return {"ok": True} + + wrapped = wrap_tool_for_permission(write_lg) + result = await wrapped(path="/etc/x") + assert result == {"success": False, "error": "Permission denied: rule: builtin/deny"} + + @pytest.mark.asyncio + async def test_engine_absent_allows(self, monkeypatch): + from agentic_cli.tools.registry import get_registry + from agentic_cli.workflow.langgraph.permission_wrap import wrap_tool_for_permission + + monkeypatch.setattr( + "agentic_cli.workflow.langgraph.permission_wrap.get_service", + lambda k: None, + ) + reg = get_registry() + + @reg.register( + name="read_lg2", + capabilities=[Capability("filesystem.read", target_arg="path")], + ) + def read_lg2(path: str): + return {"ok": True} + + wrapped = wrap_tool_for_permission(read_lg2) + result = await wrapped(path="/x") + assert result == {"ok": True} diff --git a/tests/permissions/__init__.py b/tests/permissions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/permissions/test_capabilities.py b/tests/permissions/test_capabilities.py new file mode 100644 index 0000000..8a5eb36 --- /dev/null +++ b/tests/permissions/test_capabilities.py @@ -0,0 +1,47 @@ +"""Tests for capability declarations.""" + +import pytest + +from agentic_cli.workflow.permissions.capabilities import ( + EXEMPT, + Capability, + ResolvedCapability, + _CapabilityExempt, +) + + +class TestCapability: + def test_targeted_capability(self): + cap = Capability("filesystem.read", target_arg="path") + assert cap.name == "filesystem.read" + assert cap.target_arg == "path" + + def test_targetless_capability(self): + cap = Capability("python.exec") + assert cap.name == "python.exec" + assert cap.target_arg is None + + def test_capability_is_hashable(self): + a = Capability("filesystem.read", target_arg="path") + b = Capability("filesystem.read", target_arg="path") + assert a == b + assert hash(a) == hash(b) + + +class TestResolvedCapability: + def test_carries_name_and_target(self): + r = ResolvedCapability(name="filesystem.read", target="/abs/path") + assert r.name == "filesystem.read" + assert r.target == "/abs/path" + + +class TestExempt: + def test_exempt_is_singleton_of_sentinel_type(self): + assert isinstance(EXEMPT, _CapabilityExempt) + # Same instance referenced everywhere + from agentic_cli.workflow.permissions.capabilities import EXEMPT as also_exempt + assert EXEMPT is also_exempt + + def test_exempt_is_truthy(self): + # So `not caps` in adapters doesn't misclassify EXEMPT as "missing". + assert bool(EXEMPT) diff --git a/tests/permissions/test_engine.py b/tests/permissions/test_engine.py new file mode 100644 index 0000000..98fa4b4 --- /dev/null +++ b/tests/permissions/test_engine.py @@ -0,0 +1,435 @@ +"""Tests for PermissionEngine.""" + +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from agentic_cli.workflow.permissions.capabilities import Capability +from agentic_cli.workflow.permissions.engine import PermissionEngine +from agentic_cli.workflow.permissions.rules import Effect, Rule, RuleSource +from agentic_cli.workflow.permissions.store import PermissionContext + + +def _stub_settings(*, enabled: bool = True, app_name: str = "agentic") -> MagicMock: + s = MagicMock() + s.permissions_enabled = enabled + s.app_name = app_name + return s + + +def _stub_workflow() -> MagicMock: + w = MagicMock() + w.request_user_input = AsyncMock(return_value="Deny") + return w + + +@pytest.fixture +def ctx(tmp_path: Path) -> PermissionContext: + return PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + + +class TestEngineInit: + def test_loads_builtin_rules(self, ctx): + engine = PermissionEngine( + settings=_stub_settings(), workflow=_stub_workflow(), ctx=ctx, + ) + builtin = [r for r in engine.rules if r.source is RuleSource.BUILTIN] + assert any(r.capability == "filesystem.read" for r in builtin) + + def test_loads_user_and_project_from_files(self, ctx, tmp_path, monkeypatch): + import json + # Prepare user + project files at their canonical locations. + monkeypatch.chdir(tmp_path) + monkeypatch.setenv("HOME", str(tmp_path)) + (tmp_path / ".agentic").mkdir() + (tmp_path / ".agentic/settings.json").write_text(json.dumps({ + "permissions": {"allow": [{"capability": "http.read", "target": "https://x.test/**"}]}, + })) + # User file lives at ~/.agentic/settings.json (which is tmp_path/.agentic/...) + # — same path since HOME == tmp_path; test merging via overlap + + engine = PermissionEngine( + settings=_stub_settings(), workflow=_stub_workflow(), ctx=ctx, + ) + # Exactly one http.read allow should be loaded + http = [r for r in engine.rules if r.capability == "http.read"] + assert len(http) >= 1 + + +class TestEngineDisabled: + @pytest.mark.asyncio + async def test_short_circuits_when_disabled(self, ctx): + engine = PermissionEngine( + settings=_stub_settings(enabled=False), + workflow=_stub_workflow(), + ctx=ctx, + ) + result = await engine.check( + "write_file", + [Capability("filesystem.write", target_arg="path")], + {"path": "/any/where"}, + ) + assert result.allowed is True + assert "disabled" in result.reason.lower() + + +class TestEngineRuleBased: + @pytest.mark.asyncio + async def test_builtin_allow_reads_in_workdir(self, ctx, tmp_path): + engine = PermissionEngine(settings=_stub_settings(), workflow=_stub_workflow(), ctx=ctx) + result = await engine.check( + "read_file", + [Capability("filesystem.read", target_arg="path")], + {"path": str(tmp_path / "foo.py")}, + ) + assert result.allowed is True + assert "builtin" in result.reason and "allow" in result.reason + + @pytest.mark.asyncio + async def test_builtin_deny_writes_to_etc(self, ctx): + engine = PermissionEngine(settings=_stub_settings(), workflow=_stub_workflow(), ctx=ctx) + result = await engine.check( + "write_file", + [Capability("filesystem.write", target_arg="path")], + {"path": "/etc/passwd"}, + ) + assert result.allowed is False + assert "deny" in result.reason.lower() + + @pytest.mark.asyncio + async def test_deny_wins_across_capabilities(self, ctx, tmp_path): + """copy_file-style: one cap allowed, one denied → overall deny.""" + engine = PermissionEngine(settings=_stub_settings(), workflow=_stub_workflow(), ctx=ctx) + result = await engine.check( + "copy_file", + [ + Capability("filesystem.read", target_arg="src"), # inside workdir: allow + Capability("filesystem.write", target_arg="dest"), # /etc: deny + ], + {"src": str(tmp_path / "a"), "dest": "/etc/out"}, + ) + assert result.allowed is False + + @pytest.mark.asyncio + async def test_deny_wins_across_sources(self, ctx, tmp_path, monkeypatch): + """Add a session allow for filesystem.write → still blocked by builtin deny.""" + monkeypatch.chdir(tmp_path) + engine = PermissionEngine(settings=_stub_settings(), workflow=_stub_workflow(), ctx=ctx) + # Inject a session ALLOW for /etc/** — canonicalised so it matches on macOS + from agentic_cli.workflow.permissions.matchers import get_matcher + engine._session_rules.append( + Rule( + "filesystem.write", + get_matcher("filesystem.write").canonicalize("/etc/**", ctx), + Effect.ALLOW, + RuleSource.SESSION, + ) + ) + result = await engine.check( + "write_file", + [Capability("filesystem.write", target_arg="path")], + {"path": "/etc/x"}, + ) + assert result.allowed is False # DENY wins + + @pytest.mark.asyncio + async def test_targetless_capability_uses_star(self, ctx): + """A rule with target='*' matches any invocation of a targetless capability.""" + engine = PermissionEngine(settings=_stub_settings(), workflow=_stub_workflow(), ctx=ctx) + engine._session_rules.append( + Rule("python.exec", "*", Effect.ALLOW, RuleSource.SESSION) + ) + result = await engine.check( + "execute_python", + [Capability("python.exec")], # no target_arg + {"code": "print('hi')"}, + ) + assert result.allowed is True + + +class TestEngineAskFlow: + @pytest.mark.asyncio + async def test_user_allow_once_no_rule_installed(self, ctx, tmp_path): + w = _stub_workflow() + w.request_user_input = AsyncMock(return_value="Allow once") + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + result = await engine.check( + "web_fetch", + [Capability("http.read", target_arg="url")], + {"url": "https://example.com/"}, + ) + assert result.allowed is True + assert "once" in result.reason + assert not any(r.source is RuleSource.SESSION for r in engine.rules) + + @pytest.mark.asyncio + async def test_user_allow_session_installs_session_rule(self, ctx, tmp_path): + w = _stub_workflow() + w.request_user_input = AsyncMock(return_value="Allow for this session") + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + result = await engine.check( + "web_fetch", + [Capability("http.read", target_arg="url")], + {"url": "https://example.com/x"}, + ) + assert result.allowed is True + session = [r for r in engine.rules if r.source is RuleSource.SESSION] + assert any(r.capability == "http.read" and "example.com" in r.target for r in session) + + # Second call for the same target → no new prompt. + w.request_user_input.reset_mock() + result2 = await engine.check( + "web_fetch", + [Capability("http.read", target_arg="url")], + {"url": "https://example.com/x"}, + ) + assert result2.allowed is True + w.request_user_input.assert_not_called() + + @pytest.mark.asyncio + async def test_user_allow_always_writes_project_file(self, ctx, tmp_path, monkeypatch): + import json + monkeypatch.chdir(tmp_path) + w = _stub_workflow() + w.request_user_input = AsyncMock(return_value="Allow always (save to project)") + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + result = await engine.check( + "web_fetch", + [Capability("http.read", target_arg="url")], + {"url": "https://example.com/x"}, + ) + assert result.allowed is True + + data = json.loads((tmp_path / ".agentic/settings.json").read_text()) + allow = data["permissions"]["allow"] + assert len(allow) == 1 + assert allow[0]["capability"] == "http.read" + assert "example.com" in allow[0]["target"] + + @pytest.mark.asyncio + async def test_user_denies(self, ctx): + w = _stub_workflow() + w.request_user_input = AsyncMock(return_value="Deny") + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + result = await engine.check( + "web_fetch", + [Capability("http.read", target_arg="url")], + {"url": "https://example.com/x"}, + ) + assert result.allowed is False + assert "denied" in result.reason.lower() + + @pytest.mark.asyncio + async def test_only_unmatched_capabilities_become_session_rules(self, ctx, tmp_path): + """copy_file with src inside workdir (allowed by builtin) and dest outside. + Only the write rule should be synthesised.""" + w = _stub_workflow() + w.request_user_input = AsyncMock(return_value="Allow for this session") + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + result = await engine.check( + "copy_file", + [ + Capability("filesystem.read", target_arg="src"), + Capability("filesystem.write", target_arg="dest"), + ], + {"src": str(tmp_path / "a"), "dest": str(tmp_path / "out")}, + ) + assert result.allowed is True + session = [r for r in engine.rules if r.source is RuleSource.SESSION] + assert len(session) == 1 + assert session[0].capability == "filesystem.write" + + +class TestEngineConcurrency: + @pytest.mark.asyncio + async def test_ask_is_serialised(self, ctx): + """Two simultaneous calls that both need to ask → one prompt at a time.""" + import asyncio + + ask_active = 0 + ask_peak = 0 + + async def fake_input(request): + nonlocal ask_active, ask_peak + ask_active += 1 + ask_peak = max(ask_peak, ask_active) + # Simulate think time + await asyncio.sleep(0.01) + ask_active -= 1 + return "Allow once" + + w = MagicMock() + w.request_user_input = fake_input + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + await asyncio.gather( + engine.check( + "web_fetch", [Capability("http.read", target_arg="url")], + {"url": "https://a.test/"}, + ), + engine.check( + "web_fetch", [Capability("http.read", target_arg="url")], + {"url": "https://b.test/"}, + ), + ) + assert ask_peak == 1 # never two asks in flight simultaneously + + +class TestTargetlessAllowAlwaysRegression: + """Regression: after 'Allow always' on a targetless capability (target_arg=None), + subsequent calls must not re-prompt. + + Bug: matchers that canonicalise targets (URLMatcher, PathMatcher) were mangling + the ``"*"`` sentinel, so the just-installed rule didn't match the next + invocation's resolved target (also ``"*"``). Fix short-circuits ``"*"`` in + ``canonicalize`` and in ``matches``. + """ + + @pytest.mark.asyncio + async def test_http_read_allow_always_matches_next_call(self, ctx, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + w = _stub_workflow() + w.request_user_input = AsyncMock(return_value="Allow always (save to project)") + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + # First call: no rule → ask → allow always (saves session + project rule) + result1 = await engine.check( + "web_search", + [Capability("http.read")], # targetless + {"query": "test"}, + ) + assert result1.allowed is True + assert w.request_user_input.await_count == 1 + + # Second call must match the session rule and NOT re-prompt. + result2 = await engine.check( + "web_search", + [Capability("http.read")], + {"query": "different query"}, + ) + assert result2.allowed is True + assert w.request_user_input.await_count == 1 + + # Third call with a DIFFERENT tool that declares the same capability + # is also covered — same cap, still target=='*'. + result3 = await engine.check( + "search_arxiv", + [Capability("http.read")], + {"query": "papers"}, + ) + assert result3.allowed is True + assert w.request_user_input.await_count == 1 + + @pytest.mark.asyncio + async def test_filesystem_grant_broadens_to_parent_directory( + self, ctx, tmp_path, monkeypatch + ): + """When user picks 'Allow always' for filesystem.write to /foo/bar.txt, + the rule covers /foo/** — subsequent writes to /foo/baz.txt must not re-prompt.""" + monkeypatch.chdir(tmp_path) + outside = tmp_path / "out" + outside.mkdir() + + w = _stub_workflow() + w.request_user_input = AsyncMock(return_value="Allow always (save to project)") + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + # First write → prompts → allow always. + result1 = await engine.check( + "write_file", + [Capability("filesystem.write", target_arg="path")], + {"path": str(outside / "bar.txt")}, + ) + assert result1.allowed is True + assert w.request_user_input.await_count == 1 + + # The stored session rule should target the parent with ** glob. + session_write_rules = [ + r for r in engine.rules + if r.capability == "filesystem.write" and str(outside) in r.target + ] + assert any(r.target.endswith("/**") for r in session_write_rules) + + # Second write to a DIFFERENT file in the same directory → no prompt. + result2 = await engine.check( + "write_file", + [Capability("filesystem.write", target_arg="path")], + {"path": str(outside / "baz.txt")}, + ) + assert result2.allowed is True + assert w.request_user_input.await_count == 1 + + # Write to a nested subdir also matches. + sub = outside / "sub" + sub.mkdir() + result3 = await engine.check( + "write_file", + [Capability("filesystem.write", target_arg="path")], + {"path": str(sub / "deep.txt")}, + ) + assert result3.allowed is True + assert w.request_user_input.await_count == 1 + + @pytest.mark.asyncio + async def test_filesystem_broaden_preserves_other_namespaces(self, ctx, tmp_path, monkeypatch): + """Non-filesystem capabilities keep their exact resolved target.""" + monkeypatch.chdir(tmp_path) + w = _stub_workflow() + w.request_user_input = AsyncMock(return_value="Allow for this session") + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + await engine.check( + "web_fetch", + [Capability("http.read", target_arg="url")], + {"url": "https://example.com/a"}, + ) + + http_session = [r for r in engine.rules if r.capability == "http.read" and r.source is RuleSource.SESSION] + assert http_session + # URL stays exact (host + path), NOT broadened. + assert all(r.target == "https://example.com/a" for r in http_session) + + @pytest.mark.asyncio + async def test_memory_and_kb_allowed_by_builtin(self, ctx, tmp_path): + """Memory and KB capabilities should be allowed without prompting.""" + w = _stub_workflow() + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + for cap_name in ("memory.read", "memory.write", "kb.read", "kb.write"): + result = await engine.check( + f"tool_using_{cap_name}", + [Capability(cap_name)], + {}, + ) + assert result.allowed is True, f"{cap_name} should be allowed by builtin" + w.request_user_input.assert_not_called() + + @pytest.mark.asyncio + async def test_reloaded_wildcard_rule_still_matches(self, ctx, tmp_path, monkeypatch): + """After the project JSON is reloaded (simulating next process run), + a rule stored with target='*' must still match a targetless capability.""" + monkeypatch.chdir(tmp_path) + + # Round 1: grant "allow always" so the rule is persisted. + w1 = _stub_workflow() + w1.request_user_input = AsyncMock(return_value="Allow always (save to project)") + engine1 = PermissionEngine(settings=_stub_settings(), workflow=w1, ctx=ctx) + await engine1.check("web_search", [Capability("http.read")], {"query": "x"}) + + # Round 2: fresh engine reads rules from disk. + w2 = _stub_workflow() + w2.request_user_input = AsyncMock(return_value="Deny") # would deny if re-asked + engine2 = PermissionEngine(settings=_stub_settings(), workflow=w2, ctx=ctx) + result = await engine2.check( + "web_search", + [Capability("http.read")], + {"query": "y"}, + ) + assert result.allowed is True + w2.request_user_input.assert_not_called() diff --git a/tests/permissions/test_matchers.py b/tests/permissions/test_matchers.py new file mode 100644 index 0000000..0291023 --- /dev/null +++ b/tests/permissions/test_matchers.py @@ -0,0 +1,175 @@ +# tests/permissions/test_matchers.py +"""Tests for permission matchers + cap-name glob.""" + +from pathlib import Path + +import pytest + +from agentic_cli.workflow.permissions.matchers import ( + Matcher, + StringGlobMatcher, +) +from agentic_cli.workflow.permissions.store import PermissionContext + + +@pytest.fixture +def ctx(tmp_path: Path) -> PermissionContext: + return PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + + +class TestStringGlobMatcher: + def test_canonicalize_strips_and_substitutes(self, ctx): + m = StringGlobMatcher() + assert m.canonicalize(" ${workdir}/foo ", ctx) == f"{ctx.workdir}/foo" + + def test_matches_exact(self, ctx): + m = StringGlobMatcher() + assert m.matches("python.exec", "python.exec") + + def test_matches_glob_star(self, ctx): + m = StringGlobMatcher() + assert m.matches("*", "anything") + assert m.matches("foo*", "foobar") + assert not m.matches("foo*", "barbaz") + + +class TestMatcherProtocol: + def test_string_glob_matcher_satisfies_protocol(self): + assert isinstance(StringGlobMatcher(), Matcher) + + +class TestPathMatcher: + def test_canonicalize_substitutes_workdir(self, ctx): + from agentic_cli.workflow.permissions.matchers import PathMatcher + m = PathMatcher() + assert m.canonicalize("${workdir}/src/**", ctx) == f"{ctx.workdir}/src/**" + + def test_canonicalize_expands_home(self, ctx): + from agentic_cli.workflow.permissions.matchers import PathMatcher + m = PathMatcher() + # ctx.home is a fake absolute path; expanduser() won't change it, + # but an input like "~/x" should be expanded via the OS user's home + # only when there isn't a ${home} substitution — we use ${home}. + assert m.canonicalize("${home}/.cache", ctx) == "/fake/home/.cache" + + def test_canonicalize_anchors_relative_to_workdir(self, ctx): + from agentic_cli.workflow.permissions.matchers import PathMatcher + m = PathMatcher() + assert m.canonicalize("foo/bar", ctx) == f"{ctx.workdir}/foo/bar" + + def test_canonicalize_resolves_dotdot(self, ctx): + from agentic_cli.workflow.permissions.matchers import PathMatcher + m = PathMatcher() + # ${workdir} is tmp_path; ../x resolves out of it. + out = m.canonicalize("${workdir}/a/../b", ctx) + assert out == f"{ctx.workdir}/b" + + def test_match_single_star_is_single_segment(self, ctx): + from agentic_cli.workflow.permissions.matchers import PathMatcher + m = PathMatcher() + assert m.matches("/a/*/c", "/a/b/c") + assert not m.matches("/a/*/c", "/a/b/x/c") + + def test_match_double_star_crosses_segments(self, ctx): + from agentic_cli.workflow.permissions.matchers import PathMatcher + m = PathMatcher() + assert m.matches("/a/**/z", "/a/z") + assert m.matches("/a/**/z", "/a/b/z") + assert m.matches("/a/**/z", "/a/b/c/z") + assert not m.matches("/a/**/z", "/x/b/z") + + def test_match_question_mark_single_char(self, ctx): + from agentic_cli.workflow.permissions.matchers import PathMatcher + m = PathMatcher() + assert m.matches("/a/?/c", "/a/b/c") + assert not m.matches("/a/?/c", "/a/bb/c") + + def test_match_case_sensitive(self, ctx): + from agentic_cli.workflow.permissions.matchers import PathMatcher + m = PathMatcher() + assert not m.matches("/A/b", "/a/b") + + +class TestURLMatcher: + def test_canonicalize_lowercases_scheme_and_host(self, ctx): + from agentic_cli.workflow.permissions.matchers import URLMatcher + m = URLMatcher() + assert m.canonicalize("HTTPS://API.GitHub.com/repos", ctx) == "https://api.github.com/repos" + + def test_canonicalize_strips_default_port(self, ctx): + from agentic_cli.workflow.permissions.matchers import URLMatcher + m = URLMatcher() + assert m.canonicalize("https://example.com:443/x", ctx) == "https://example.com/x" + assert m.canonicalize("http://example.com:80/x", ctx) == "http://example.com/x" + + def test_canonicalize_defaults_to_https(self, ctx): + from agentic_cli.workflow.permissions.matchers import URLMatcher + m = URLMatcher() + assert m.canonicalize("api.github.com/x", ctx) == "https://api.github.com/x" + + def test_canonicalize_drops_fragment(self, ctx): + from agentic_cli.workflow.permissions.matchers import URLMatcher + m = URLMatcher() + assert m.canonicalize("https://example.com/x#frag", ctx) == "https://example.com/x" + + def test_matches_host_exact(self, ctx): + from agentic_cli.workflow.permissions.matchers import URLMatcher + m = URLMatcher() + assert m.matches("https://api.github.com/**", "https://api.github.com/repos/foo") + assert not m.matches("https://api.github.com/**", "https://pypi.org/x") + + def test_matches_path_glob(self, ctx): + from agentic_cli.workflow.permissions.matchers import URLMatcher + m = URLMatcher() + assert m.matches("https://api.github.com/repos/**", "https://api.github.com/repos/owner/repo") + assert not m.matches("https://api.github.com/repos/**", "https://api.github.com/gists/1") + + +class TestShellMatcher: + def test_canonicalize_trims_and_substitutes(self, ctx): + from agentic_cli.workflow.permissions.matchers import ShellMatcher + m = ShellMatcher() + assert m.canonicalize(" ls ${workdir} ", ctx) == f"ls {ctx.workdir}" + + def test_matches_whole_command_glob(self, ctx): + from agentic_cli.workflow.permissions.matchers import ShellMatcher + m = ShellMatcher() + assert m.matches("git *", "git status") + assert m.matches("git commit*", "git commit -m 'x'") + assert not m.matches("git *", "svn up") + + def test_matches_does_not_tokenize(self, ctx): + """Shell matcher is intentionally naive; risk assessor handles semantics.""" + from agentic_cli.workflow.permissions.matchers import ShellMatcher + m = ShellMatcher() + assert m.matches("git *", "git rm -rf /") # permission matcher allows + + +class TestMatcherRegistry: + def test_get_matcher_dispatches_by_namespace(self): + from agentic_cli.workflow.permissions.matchers import ( + PathMatcher, URLMatcher, ShellMatcher, StringGlobMatcher, get_matcher, + ) + assert isinstance(get_matcher("filesystem.read"), PathMatcher) + assert isinstance(get_matcher("http.read"), URLMatcher) + assert isinstance(get_matcher("shell.exec"), ShellMatcher) + assert isinstance(get_matcher("python.exec"), StringGlobMatcher) # fallback + assert isinstance(get_matcher("kb.read"), StringGlobMatcher) + + +class TestCapMatches: + def test_exact(self): + from agentic_cli.workflow.permissions.matchers import _cap_matches + assert _cap_matches("filesystem.read", "filesystem.read") + assert not _cap_matches("filesystem.read", "filesystem.write") + + def test_namespace_glob(self): + from agentic_cli.workflow.permissions.matchers import _cap_matches + assert _cap_matches("filesystem.*", "filesystem.read") + assert _cap_matches("filesystem.*", "filesystem.write") + assert not _cap_matches("filesystem.*", "http.read") + + def test_star_matches_everything(self): + from agentic_cli.workflow.permissions.matchers import _cap_matches + assert _cap_matches("*", "filesystem.read") + assert _cap_matches("*", "anything") diff --git a/tests/permissions/test_prompt.py b/tests/permissions/test_prompt.py new file mode 100644 index 0000000..98409e0 --- /dev/null +++ b/tests/permissions/test_prompt.py @@ -0,0 +1,92 @@ +"""Tests for prompt build + response parse.""" + +import pytest + +from agentic_cli.workflow.events import InputType, UserInputRequest +from agentic_cli.workflow.permissions.capabilities import ResolvedCapability +from agentic_cli.workflow.permissions.prompt import ( + ALLOW_ALWAYS_CHOICE, + ALLOW_ONCE_CHOICE, + ALLOW_SESSION_CHOICE, + DENY_CHOICE, + build_request, + parse_response, +) +from agentic_cli.workflow.permissions.rules import AskScope + + +class TestBuildRequest: + def test_uses_choice_input_type(self): + req = build_request("write_file", [ResolvedCapability("filesystem.write", "/foo")]) + assert req.input_type is InputType.CHOICE + assert req.default == DENY_CHOICE + + def test_choices_in_fixed_order(self): + req = build_request("write_file", [ResolvedCapability("filesystem.write", "/foo")]) + assert req.choices == [ + ALLOW_ONCE_CHOICE, ALLOW_SESSION_CHOICE, ALLOW_ALWAYS_CHOICE, DENY_CHOICE + ] + + def test_prompt_mentions_tool_and_capabilities(self): + """For filesystem.* the prompt shows the broadened (parent-directory) + scope — matching what an Allow Session/Always would actually store.""" + req = build_request( + "copy_file", + [ + ResolvedCapability("filesystem.read", "/src/file_a.txt"), + ResolvedCapability("filesystem.write", "/dst/file_b.txt"), + ], + ) + assert "copy_file" in req.prompt + # Both entries show the parent-directory glob, not the exact file. + assert "filesystem.read → /src/**" in req.prompt + assert "filesystem.write → /dst/**" in req.prompt + # The exact filenames do NOT appear — the scope the user will actually + # grant is the directory. + assert "file_a.txt" not in req.prompt + assert "file_b.txt" not in req.prompt + # And a hint explaining the widening: + assert "parent directory" in req.prompt + + def test_prompt_preserves_exact_http_target(self): + """Non-filesystem capabilities keep their exact display target.""" + req = build_request( + "web_fetch", + [ResolvedCapability("http.read", "https://example.com/path")], + ) + assert "https://example.com/path" in req.prompt + # No broadening hint when no filesystem capability is involved. + assert "parent directory" not in req.prompt + + def test_prompt_handles_root_parent(self): + """A file directly under /: broadening collapses '//**' to '/**'.""" + req = build_request( + "write_file", + [ResolvedCapability("filesystem.write", "/hello.txt")], + ) + assert "filesystem.write → /**" in req.prompt + assert "//**" not in req.prompt + + def test_request_id_has_perm_prefix(self): + req = build_request("x", []) + assert req.request_id.startswith("perm-") + + +class TestParseResponse: + @pytest.mark.parametrize("text, scope", [ + (ALLOW_ONCE_CHOICE, AskScope.ONCE), + (ALLOW_SESSION_CHOICE, AskScope.SESSION), + (ALLOW_ALWAYS_CHOICE, AskScope.PROJECT), + (DENY_CHOICE, AskScope.DENY), + ]) + def test_round_trip(self, text, scope): + assert parse_response(text) is scope + + def test_unknown_defaults_to_deny(self): + assert parse_response("whatever") is AskScope.DENY + + def test_empty_defaults_to_deny(self): + assert parse_response("") is AskScope.DENY + + def test_whitespace_tolerant(self): + assert parse_response(f" {ALLOW_ONCE_CHOICE} ") is AskScope.ONCE diff --git a/tests/permissions/test_rules.py b/tests/permissions/test_rules.py new file mode 100644 index 0000000..929ce57 --- /dev/null +++ b/tests/permissions/test_rules.py @@ -0,0 +1,47 @@ +"""Tests for rule/decision data types.""" + +from agentic_cli.workflow.permissions.rules import ( + AskScope, + CheckResult, + Effect, + Rule, + RuleSource, +) + + +class TestEnums: + def test_effect_values(self): + assert Effect.ALLOW.value == "allow" + assert Effect.DENY.value == "deny" + + def test_rule_source_values(self): + assert {s.value for s in RuleSource} == {"builtin", "user", "project", "session"} + + def test_ask_scope_values(self): + assert {s.value for s in AskScope} == {"once", "session", "project", "deny"} + + +class TestRule: + def test_fields(self): + r = Rule("filesystem.read", "/abs/**", Effect.ALLOW, RuleSource.BUILTIN) + assert r.capability == "filesystem.read" + assert r.target == "/abs/**" + assert r.effect is Effect.ALLOW + assert r.source is RuleSource.BUILTIN + + def test_rule_is_hashable(self): + a = Rule("filesystem.read", "/x", Effect.ALLOW, RuleSource.BUILTIN) + b = Rule("filesystem.read", "/x", Effect.ALLOW, RuleSource.BUILTIN) + assert a == b + assert hash(a) == hash(b) + + +class TestCheckResult: + def test_allowed(self): + res = CheckResult(True, "rule: builtin/allow filesystem.read") + assert res.allowed is True + assert "allow" in res.reason + + def test_denied(self): + res = CheckResult(False, "user denied") + assert res.allowed is False diff --git a/tests/permissions/test_store.py b/tests/permissions/test_store.py new file mode 100644 index 0000000..5f52707 --- /dev/null +++ b/tests/permissions/test_store.py @@ -0,0 +1,179 @@ +"""Tests for PermissionContext + JSON store helpers.""" + +from pathlib import Path + +import pytest + +from agentic_cli.workflow.permissions.store import PermissionContext + + +class TestPermissionContext: + def test_fields(self, tmp_path: Path): + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + assert ctx.workdir == tmp_path + assert ctx.home == Path("/fake/home") + + def test_substitute_workdir(self, tmp_path: Path): + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + out = ctx.substitute("${workdir}/src/**") + assert out == f"{tmp_path}/src/**" + + def test_substitute_home(self, tmp_path: Path): + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + assert ctx.substitute("${home}/.cache") == "/fake/home/.cache" + + def test_substitute_multiple(self, tmp_path: Path): + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + out = ctx.substitute("${workdir}:${home}") + assert out == f"{tmp_path}:/fake/home" + + def test_substitute_unknown_variable_passes_through(self, tmp_path: Path): + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + assert ctx.substitute("${unknown}/x") == "${unknown}/x" + + +class TestBuiltinRules: + def test_has_expected_entries(self): + from agentic_cli.workflow.permissions.rules import Effect, RuleSource + from agentic_cli.workflow.permissions.store import BUILTIN_RULES + + allows = [r for r in BUILTIN_RULES if r.effect is Effect.ALLOW] + denies = [r for r in BUILTIN_RULES if r.effect is Effect.DENY] + + # Reads within workdir allowed: + assert any(r.capability == "filesystem.read" and "${workdir}" in r.target for r in allows) + # Memory + KB: agent-internal stores allowed by default. + assert any(r.capability == "memory.*" and r.target == "*" for r in allows) + assert any(r.capability == "kb.*" and r.target == "*" for r in allows) + # System dirs denied: + system_targets = {r.target for r in denies if r.capability == "filesystem.write"} + for path in ("/etc/**", "/usr/**", "/bin/**", "/sbin/**", "/boot/**", "/System/**"): + assert path in system_targets + # Credential dirs denied: + for path in ("${home}/.ssh/**", "${home}/.aws/**", "${home}/.gnupg/**"): + assert path in system_targets + + def test_all_builtin_have_builtin_source(self): + from agentic_cli.workflow.permissions.rules import RuleSource + from agentic_cli.workflow.permissions.store import BUILTIN_RULES + for rule in BUILTIN_RULES: + assert rule.source is RuleSource.BUILTIN + + +class TestLoadRules: + def test_missing_file_returns_empty(self, tmp_path: Path): + from agentic_cli.workflow.permissions.rules import RuleSource + from agentic_cli.workflow.permissions.store import PermissionContext, load_rules + + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + rules = load_rules(tmp_path / "missing.json", RuleSource.USER, ctx) + assert rules == [] + + def test_missing_permissions_section_returns_empty(self, tmp_path: Path): + import json + from agentic_cli.workflow.permissions.rules import RuleSource + from agentic_cli.workflow.permissions.store import PermissionContext, load_rules + + path = tmp_path / "settings.json" + path.write_text(json.dumps({"default_model": "gpt-4"})) + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + assert load_rules(path, RuleSource.USER, ctx) == [] + + def test_parses_allow_and_deny(self, tmp_path: Path): + import json + from agentic_cli.workflow.permissions.rules import Effect, RuleSource + from agentic_cli.workflow.permissions.store import PermissionContext, load_rules + + path = tmp_path / "settings.json" + path.write_text(json.dumps({ + "permissions": { + "allow": [{"capability": "filesystem.read", "target": "${workdir}/**"}], + "deny": [{"capability": "filesystem.write", "target": "/etc/**"}], + } + })) + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + rules = load_rules(path, RuleSource.PROJECT, ctx) + + assert len(rules) == 2 + allow = next(r for r in rules if r.effect is Effect.ALLOW) + deny = next(r for r in rules if r.effect is Effect.DENY) + assert allow.capability == "filesystem.read" + # PathMatcher canonicalises ${workdir} to the absolute path + /** + assert str(tmp_path) in allow.target and allow.target.endswith("/**") + assert allow.source is RuleSource.PROJECT + # PathMatcher resolves the path (resolves symlinks on macOS /etc -> /private/etc) + assert deny.target == str(Path("/etc/**").resolve(strict=False)) + + def test_malformed_json_raises(self, tmp_path: Path): + from agentic_cli.workflow.permissions.rules import RuleSource + from agentic_cli.workflow.permissions.store import PermissionContext, load_rules + + path = tmp_path / "settings.json" + path.write_text("{not json") + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + with pytest.raises(ValueError): + load_rules(path, RuleSource.USER, ctx) + + +class TestAppendProjectRule: + def test_creates_file_when_absent(self, tmp_path, monkeypatch): + from agentic_cli.workflow.permissions.rules import Effect, Rule, RuleSource + from agentic_cli.workflow.permissions.store import append_project_rule + + monkeypatch.chdir(tmp_path) + rule = Rule("filesystem.write", "/abs/foo", Effect.ALLOW, RuleSource.PROJECT) + append_project_rule("agentic", rule) + + import json + data = json.loads((tmp_path / ".agentic/settings.json").read_text()) + assert data["permissions"]["allow"] == [ + {"capability": "filesystem.write", "target": "/abs/foo"} + ] + + def test_preserves_other_settings_keys(self, tmp_path, monkeypatch): + import json + from agentic_cli.workflow.permissions.rules import Effect, Rule, RuleSource + from agentic_cli.workflow.permissions.store import append_project_rule + + monkeypatch.chdir(tmp_path) + (tmp_path / ".agentic").mkdir() + (tmp_path / ".agentic/settings.json").write_text(json.dumps({ + "default_model": "claude-sonnet-4", + "thinking_effort": "medium", + })) + + rule = Rule("filesystem.write", "/abs/foo", Effect.ALLOW, RuleSource.PROJECT) + append_project_rule("agentic", rule) + + data = json.loads((tmp_path / ".agentic/settings.json").read_text()) + assert data["default_model"] == "claude-sonnet-4" + assert data["thinking_effort"] == "medium" + assert data["permissions"]["allow"][0]["capability"] == "filesystem.write" + + def test_deduplicates_identical_rules(self, tmp_path, monkeypatch): + import json + from agentic_cli.workflow.permissions.rules import Effect, Rule, RuleSource + from agentic_cli.workflow.permissions.store import append_project_rule + + monkeypatch.chdir(tmp_path) + rule = Rule("filesystem.write", "/abs/foo", Effect.ALLOW, RuleSource.PROJECT) + append_project_rule("agentic", rule) + append_project_rule("agentic", rule) + + data = json.loads((tmp_path / ".agentic/settings.json").read_text()) + assert len(data["permissions"]["allow"]) == 1 + + def test_writes_deny_section_for_deny_effect(self, tmp_path, monkeypatch): + import json + from agentic_cli.workflow.permissions.rules import Effect, Rule, RuleSource + from agentic_cli.workflow.permissions.store import append_project_rule + + monkeypatch.chdir(tmp_path) + rule = Rule("filesystem.write", "/etc/foo", Effect.DENY, RuleSource.PROJECT) + append_project_rule("agentic", rule) + + data = json.loads((tmp_path / ".agentic/settings.json").read_text()) + assert data["permissions"]["deny"] == [ + {"capability": "filesystem.write", "target": "/etc/foo"} + ] + assert "allow" not in data["permissions"] or data["permissions"]["allow"] == [] diff --git a/tests/test_adk_plugins.py b/tests/test_adk_plugins.py index 3bf03ff..8530e83 100644 --- a/tests/test_adk_plugins.py +++ b/tests/test_adk_plugins.py @@ -1,235 +1,11 @@ -"""Tests for ADK Plugins (ConfirmationPlugin, LLMLoggingPlugin).""" +"""Tests for ADK Plugins (LLMLoggingPlugin).""" import json import pytest -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import MagicMock -from agentic_cli.workflow.adk.plugins import ( - ConfirmationPlugin, LLMLoggingPlugin, is_dangerous, -) -from agentic_cli.workflow.events import EventType, UserInputRequest, WorkflowEvent - - -def _make_mock_tool(name: str) -> MagicMock: - """Create a mock ADK BaseTool with a given name.""" - tool = MagicMock() - tool.name = name - return tool - - -def _make_mock_tool_context() -> MagicMock: - """Create a mock ToolContext.""" - return MagicMock() - - -class TestIsDangerous: - """Tests for the is_dangerous helper.""" - - def test_dangerous_tool_detected(self): - """A tool with PermissionLevel.DANGEROUS should be detected.""" - from agentic_cli.tools.registry import ToolDefinition, PermissionLevel, ToolRegistry - - mock_registry = ToolRegistry() - mock_registry.register( - lambda: None, - name="risky_tool", - permission_level=PermissionLevel.DANGEROUS, - ) - - with patch("agentic_cli.workflow.adk.plugins.get_registry", return_value=mock_registry): - result = is_dangerous("risky_tool") - assert result is True - - def test_safe_tool_not_dangerous(self): - """A tool with PermissionLevel.SAFE should not be dangerous.""" - from agentic_cli.tools.registry import ToolDefinition, PermissionLevel, ToolRegistry - - mock_registry = ToolRegistry() - mock_registry.register( - lambda: None, - name="safe_tool", - permission_level=PermissionLevel.SAFE, - ) - - with patch("agentic_cli.workflow.adk.plugins.get_registry", return_value=mock_registry): - result = is_dangerous("safe_tool") - assert result is False - - def test_unknown_tool_not_dangerous(self): - """An unregistered tool should not be dangerous.""" - from agentic_cli.tools.registry import ToolRegistry - - mock_registry = ToolRegistry() - - with patch("agentic_cli.workflow.adk.plugins.get_registry", return_value=mock_registry): - result = is_dangerous("unknown_tool") - assert result is False - - -class TestConfirmationPlugin: - """Tests for the ConfirmationPlugin.""" - - @pytest.fixture - def plugin(self): - return ConfirmationPlugin() - - async def test_safe_tool_passes_through(self, plugin): - """Safe tools should not be intercepted (returns None).""" - tool = _make_mock_tool("safe_tool") - tool_context = _make_mock_tool_context() - - with patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=False): - result = await plugin.before_tool_callback( - tool=tool, - tool_args={"query": "hello"}, - tool_context=tool_context, - ) - - assert result is None - - async def test_dangerous_tool_approved(self, plugin): - """DANGEROUS tool should pass through when user approves.""" - tool = _make_mock_tool("dangerous_tool") - tool_context = _make_mock_tool_context() - - mock_workflow = AsyncMock() - mock_workflow.request_user_input = AsyncMock(return_value="yes") - - with ( - patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.adk.plugins.get_service", return_value=mock_workflow), - ): - result = await plugin.before_tool_callback( - tool=tool, - tool_args={"path": "/tmp/file"}, - tool_context=tool_context, - ) - - assert result is None - mock_workflow.request_user_input.assert_called_once() - # Verify the request was a CONFIRM type - request = mock_workflow.request_user_input.call_args[0][0] - assert isinstance(request, UserInputRequest) - assert request.tool_name == "dangerous_tool" - assert request.input_type.value == "confirm" - - async def test_dangerous_tool_denied(self, plugin): - """DANGEROUS tool should return error dict when user denies.""" - tool = _make_mock_tool("dangerous_tool") - tool_context = _make_mock_tool_context() - - mock_workflow = AsyncMock() - mock_workflow.request_user_input = AsyncMock(return_value="no") - - with ( - patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.adk.plugins.get_service", return_value=mock_workflow), - ): - result = await plugin.before_tool_callback( - tool=tool, - tool_args={"cmd": "rm -rf /"}, - tool_context=tool_context, - ) - - assert result is not None - assert result["success"] is False - assert "denied" in result["error"].lower() - assert "dangerous_tool" in result["error"] - - async def test_no_workflow_passes_through(self, plugin): - """When no workflow is available, tool should proceed (returns None).""" - tool = _make_mock_tool("dangerous_tool") - tool_context = _make_mock_tool_context() - - with ( - patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.adk.plugins.get_service", return_value=None), - ): - result = await plugin.before_tool_callback( - tool=tool, - tool_args={"cmd": "ls"}, - tool_context=tool_context, - ) - - assert result is None - - async def test_no_callback_passes_through(self, plugin): - """When workflow has no callback, tool should proceed (returns None).""" - tool = _make_mock_tool("dangerous_tool") - tool_context = _make_mock_tool_context() - - mock_workflow = AsyncMock() - mock_workflow.request_user_input = AsyncMock( - side_effect=RuntimeError("No callback registered") - ) - - with ( - patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.adk.plugins.get_service", return_value=mock_workflow), - ): - result = await plugin.before_tool_callback( - tool=tool, - tool_args={"cmd": "ls"}, - tool_context=tool_context, - ) - - assert result is None - - async def test_various_approval_responses(self, plugin): - """Multiple response strings should be treated as approval.""" - tool = _make_mock_tool("dangerous_tool") - tool_context = _make_mock_tool_context() - - for response in ["yes", "y", "YES", " Yes ", "approve", "true"]: - mock_workflow = AsyncMock() - mock_workflow.request_user_input = AsyncMock(return_value=response) - - with ( - patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.adk.plugins.get_service", return_value=mock_workflow), - ): - result = await plugin.before_tool_callback( - tool=tool, - tool_args={}, - tool_context=tool_context, - ) - - assert result is None, f"Expected approval for response {response!r}" - - async def test_arg_summary_truncation(self, plugin): - """Tool args should be summarized and truncated in the prompt.""" - tool = _make_mock_tool("dangerous_tool") - tool_context = _make_mock_tool_context() - - mock_workflow = AsyncMock() - mock_workflow.request_user_input = AsyncMock(return_value="yes") - - long_args = { - "arg1": "x" * 100, - "arg2": "short", - "arg3": "included", - "arg4": "excluded_beyond_3", - } - - with ( - patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.adk.plugins.get_service", return_value=mock_workflow), - ): - result = await plugin.before_tool_callback( - tool=tool, - tool_args=long_args, - tool_context=tool_context, - ) - - assert result is None - request = mock_workflow.request_user_input.call_args[0][0] - # arg4 should not appear (only first 3 args included) - assert "arg4" not in request.prompt - assert "arg1" in request.prompt - - def test_plugin_name(self, plugin): - """Plugin should have the correct name.""" - assert plugin.name == "confirmation" +from agentic_cli.workflow.adk.plugins import LLMLoggingPlugin +from agentic_cli.workflow.events import EventType, WorkflowEvent # ------------------------------------------------------------------------- diff --git a/tests/test_bm25_index.py b/tests/test_bm25_index.py index b53e5a6..35e4655 100644 --- a/tests/test_bm25_index.py +++ b/tests/test_bm25_index.py @@ -65,3 +65,124 @@ def test_rebuild(self, tmp_path): assert index.size == 2 results = index.search("new doc one", top_k=5) assert results[0][0] == "c1" + + +def _rank_bm25_cls(): + pytest.importorskip("rank_bm25") + from agentic_cli.knowledge_base._bm25_backends import RankBM25Index + return RankBM25Index + + +def _bm25s_cls(): + pytest.importorskip("bm25s") + from agentic_cli.knowledge_base._bm25_backends import BM25sIndex + return BM25sIndex + + +@pytest.fixture(params=["rank_bm25", "bm25s"]) +def bm25_cls(request): + if request.param == "rank_bm25": + return _rank_bm25_cls() + return _bm25s_cls() + + +class TestBM25BackendContract: + """Contract tests applied to every real BM25 backend. + + Same behavior as MockBM25Index — any backend that wants to plug into + create_bm25_index() must satisfy all of these. + """ + + def test_add_and_search(self, tmp_path, bm25_cls): + index = bm25_cls() + index.add_documents( + ["chunk1", "chunk2", "chunk3"], + [ + "python programming language", + "java programming language", + "the weather is sunny", + ], + ) + results = index.search("python programming", top_k=2) + assert len(results) <= 2 + assert results[0][0] == "chunk1" + assert results[0][1] > 0 + + def test_search_returns_chunk_ids_and_scores(self, tmp_path, bm25_cls): + index = bm25_cls() + index.add_documents(["a", "b"], ["alpha beta", "gamma delta"]) + results = index.search("alpha", top_k=5) + assert results, "expected at least one result for matching query" + for chunk_id, score in results: + assert isinstance(chunk_id, str) + assert isinstance(score, float) + + def test_remove_documents(self, tmp_path, bm25_cls): + index = bm25_cls() + index.add_documents(["c1", "c2"], ["hello world", "goodbye world"]) + index.remove_documents(["c1"]) + assert index.size == 1 + results = index.search("hello", top_k=10) + chunk_ids = [cid for cid, _ in results] + assert "c1" not in chunk_ids + + def test_save_and_load(self, tmp_path, bm25_cls): + index = bm25_cls() + index.add_documents( + ["c1", "c2"], + ["test document content", "unrelated fruit basket"], + ) + index.save(tmp_path) + index2 = bm25_cls() + index2.load(tmp_path) + assert index2.size == 2 + results = index2.search("test document", top_k=5) + assert results + assert results[0][0] == "c1" + + def test_empty_search(self, tmp_path, bm25_cls): + index = bm25_cls() + results = index.search("anything", top_k=5) + assert results == [] + + def test_size_property(self, tmp_path, bm25_cls): + index = bm25_cls() + assert index.size == 0 + index.add_documents(["c1", "c2"], ["doc one", "doc two"]) + assert index.size == 2 + + def test_rebuild(self, tmp_path, bm25_cls): + index = bm25_cls() + index.add_documents(["old"], ["stale content"]) + index.rebuild(["c1", "c2"], ["new doc one", "new doc two"]) + assert index.size == 2 + results = index.search("new doc one", top_k=5) + assert results[0][0] == "c1" + + def test_load_missing_is_noop(self, tmp_path, bm25_cls): + index = bm25_cls() + # Loading from an empty dir should not raise; just leaves index empty. + index.load(tmp_path) + assert index.size == 0 + + +class TestCreateBM25IndexPicksRealBackend: + """When a real BM25 library is installed, create_bm25_index() must return it, + not the mock.""" + + def test_prefers_bm25s_when_available(self): + pytest.importorskip("bm25s") + from agentic_cli.knowledge_base.bm25_index import create_bm25_index + from agentic_cli.knowledge_base._bm25_backends import BM25sIndex + + index = create_bm25_index() + assert isinstance(index, BM25sIndex), ( + f"expected BM25sIndex, got {type(index).__name__}" + ) + + def test_use_mock_flag_returns_mock(self): + from agentic_cli.knowledge_base.bm25_index import create_bm25_index + from agentic_cli.knowledge_base._mock_bm25 import MockBM25Index + + index = create_bm25_index(use_mock=True) + assert isinstance(index, MockBM25Index) diff --git a/tests/test_config.py b/tests/test_config.py index 8a439a3..054733e 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -417,14 +417,10 @@ def test_package_exports_new_modules(): # Memory from agentic_cli.tools.memory_tools import MemoryStore, MemoryItem - # HITL - from agentic_cli.tools.hitl_tools import ApprovalManager - # Tools from agentic_cli.tools import shell_executor, read_file, write_file, diff_compare # All imports should succeed assert MemoryStore is not None assert MemoryItem is not None - assert ApprovalManager is not None assert shell_executor is not None diff --git a/tests/test_hitl.py b/tests/test_hitl.py deleted file mode 100644 index 9db85be..0000000 --- a/tests/test_hitl.py +++ /dev/null @@ -1,61 +0,0 @@ -"""Tests for Human-in-the-Loop module.""" - -import pytest - - -class TestApprovalManager: - """Tests for simplified ApprovalManager class.""" - - def test_empty_history(self): - """Test manager starts with empty history.""" - from agentic_cli.tools.hitl_tools import ApprovalManager - - manager = ApprovalManager() - assert manager.history == [] - - def test_record_approval(self): - """Test recording an approval.""" - from agentic_cli.tools.hitl_tools import ApprovalManager - - manager = ApprovalManager() - result = manager.record("req-1", approved=True) - - assert result.request_id == "req-1" - assert result.approved is True - assert result.reason is None - assert len(manager.history) == 1 - - def test_record_rejection(self): - """Test recording a rejection with reason.""" - from agentic_cli.tools.hitl_tools import ApprovalManager - - manager = ApprovalManager() - result = manager.record("req-1", approved=False, reason="Too risky") - - assert result.approved is False - assert result.reason == "Too risky" - - def test_history_accumulates(self): - """Test that history accumulates multiple records.""" - from agentic_cli.tools.hitl_tools import ApprovalManager - - manager = ApprovalManager() - manager.record("req-1", approved=True) - manager.record("req-2", approved=False, reason="No") - manager.record("req-3", approved=True) - - assert len(manager.history) == 3 - - -class TestHITLImports: - """Tests for hitl_tools module exports.""" - - def test_import_core_classes(self): - """Test core exports are available.""" - from agentic_cli.tools.hitl_tools import ( - ApprovalManager, - ApprovalResult, - ) - - assert ApprovalManager is not None - assert ApprovalResult is not None diff --git a/tests/test_kb_sidecar.py b/tests/test_kb_sidecar.py index 82b863d..174a51e 100644 --- a/tests/test_kb_sidecar.py +++ b/tests/test_kb_sidecar.py @@ -602,3 +602,74 @@ async def test_delete_removes_lock_entry(self, kb): kb.delete_document(d.id) assert d.id not in kb._sidecar_locks + + +class TestGetOrCreateSidecarLock: + """The public accessor replaces direct access to _sidecar_locks and is + the single path through which per-doc async locks are created, so + concurrent callers (backfill + lazy kb_read) always see the same lock. + """ + + @pytest.fixture + def kb(self, tmp_path): + return _make_kb(tmp_path) + + def test_returns_same_lock_on_repeat_calls(self, kb): + import asyncio + + lock1 = kb.get_or_create_sidecar_lock("doc-a") + lock2 = kb.get_or_create_sidecar_lock("doc-a") + assert lock1 is lock2 + assert isinstance(lock1, asyncio.Lock) + + def test_returns_distinct_locks_for_different_docs(self, kb): + lock_a = kb.get_or_create_sidecar_lock("doc-a") + lock_b = kb.get_or_create_sidecar_lock("doc-b") + assert lock_a is not lock_b + + async def test_lock_is_usable_for_serialization(self, kb): + """The returned lock actually serializes — two tasks that both + `async with` it run sequentially, not interleaved. + """ + import asyncio + + lock = kb.get_or_create_sidecar_lock("doc-a") + order: list[str] = [] + + async def worker(name: str, hold_ms: int): + async with lock: + order.append(f"{name}:start") + await asyncio.sleep(hold_ms / 1000) + order.append(f"{name}:end") + + await asyncio.gather(worker("A", 20), worker("B", 5)) + # A must fully finish before B starts (or vice versa) + assert order in ( + ["A:start", "A:end", "B:start", "B:end"], + ["B:start", "B:end", "A:start", "A:end"], + ) + + async def test_kb_read_lazy_path_uses_accessor(self, kb): + """The lazy sidecar generation path in knowledge_tools must go + through the public accessor, not reach into `_sidecar_locks` + directly. + """ + from agentic_cli.tools.knowledge_tools import _read_document_from_kbs + from unittest.mock import patch + + doc = kb.ingest_document( + content="body", title="X", source_type=SourceType.USER, + ) + kb._sidecar_path(doc.id).unlink() + + original = kb.get_or_create_sidecar_lock + seen: list[str] = [] + + def spy(doc_id: str): + seen.append(doc_id) + return original(doc_id) + + with patch.object(kb, "get_or_create_sidecar_lock", side_effect=spy): + await _read_document_from_kbs(kb, None, doc.id) + + assert doc.id in seen diff --git a/tests/test_langgraph_confirmation.py b/tests/test_langgraph_confirmation.py deleted file mode 100644 index 747cacd..0000000 --- a/tests/test_langgraph_confirmation.py +++ /dev/null @@ -1,134 +0,0 @@ -"""Tests for LangGraph confirmation wrapper.""" - -import pytest -from unittest.mock import AsyncMock, MagicMock, patch - -from agentic_cli.workflow.langgraph.graph_builder import LangGraphBuilder - - -def _make_sync_tool(name: str = "my_tool"): - """Create a simple sync tool function.""" - def tool_fn(x: str) -> dict: - """A test tool.""" - return {"success": True, "result": x} - tool_fn.__name__ = name - return tool_fn - - -def _make_async_tool(name: str = "my_async_tool"): - """Create a simple async tool function.""" - async def tool_fn(x: str) -> dict: - """An async test tool.""" - return {"success": True, "result": x} - tool_fn.__name__ = name - return tool_fn - - -# Patch targets — both imported into graph_builder from plugins -_IS_DANGEROUS = "agentic_cli.workflow.langgraph.graph_builder.is_dangerous" -_CONFIRM = "agentic_cli.workflow.langgraph.graph_builder.request_tool_confirmation" - - -class TestWrapForConfirmation: - """Tests for LangGraphBuilder._wrap_for_confirmation.""" - - def test_safe_tool_passes_through(self): - """Safe tools should be returned unwrapped.""" - tool = _make_sync_tool("safe_tool") - with patch(_IS_DANGEROUS, return_value=False): - result = LangGraphBuilder._wrap_for_confirmation(tool) - assert result is tool - - def test_dangerous_tool_is_wrapped(self): - """DANGEROUS tools should be wrapped (different function object).""" - tool = _make_sync_tool("dangerous_tool") - with patch(_IS_DANGEROUS, return_value=True): - result = LangGraphBuilder._wrap_for_confirmation(tool) - assert result is not tool - assert result.__name__ == "dangerous_tool" - - async def test_dangerous_sync_tool_approved(self): - """Approved DANGEROUS sync tool should execute and return result.""" - tool = _make_sync_tool("dangerous_tool") - with patch(_IS_DANGEROUS, return_value=True): - wrapped = LangGraphBuilder._wrap_for_confirmation(tool) - - with patch(_CONFIRM, return_value=True): - result = await wrapped(x="hello") - assert result == {"success": True, "result": "hello"} - - async def test_dangerous_async_tool_approved(self): - """Approved DANGEROUS async tool should execute and return result.""" - tool = _make_async_tool("dangerous_async") - with patch(_IS_DANGEROUS, return_value=True): - wrapped = LangGraphBuilder._wrap_for_confirmation(tool) - - with patch(_CONFIRM, return_value=True): - result = await wrapped(x="world") - assert result == {"success": True, "result": "world"} - - async def test_dangerous_tool_denied(self): - """Denied DANGEROUS tool should return error dict without executing.""" - call_count = 0 - def tool_fn(x: str) -> dict: - """A test tool.""" - nonlocal call_count - call_count += 1 - return {"success": True} - tool_fn.__name__ = "dangerous_tool" - - with patch(_IS_DANGEROUS, return_value=True): - wrapped = LangGraphBuilder._wrap_for_confirmation(tool_fn) - - with patch(_CONFIRM, return_value=False): - result = await wrapped(x="test") - - assert result["success"] is False - assert "denied" in result["error"].lower() - assert call_count == 0 - - async def test_no_workflow_allows_execution(self): - """When request_tool_confirmation returns None, tool executes.""" - tool = _make_sync_tool("dangerous_tool") - with patch(_IS_DANGEROUS, return_value=True): - wrapped = LangGraphBuilder._wrap_for_confirmation(tool) - - with patch(_CONFIRM, return_value=None): - result = await wrapped(x="test") - assert result == {"success": True, "result": "test"} - - async def test_no_callback_allows_execution(self): - """When request_tool_confirmation returns None (no callback), tool executes.""" - tool = _make_sync_tool("dangerous_tool") - with patch(_IS_DANGEROUS, return_value=True): - wrapped = LangGraphBuilder._wrap_for_confirmation(tool) - - with patch(_CONFIRM, return_value=None): - result = await wrapped(x="test") - assert result == {"success": True, "result": "test"} - - def test_preserves_docstring(self): - """Wrapped function should preserve the docstring.""" - tool = _make_sync_tool("dangerous_tool") - with patch(_IS_DANGEROUS, return_value=True): - wrapped = LangGraphBuilder._wrap_for_confirmation(tool) - assert wrapped.__doc__ == tool.__doc__ - - def test_preserves_annotations(self): - """Wrapped function should preserve type annotations.""" - tool = _make_sync_tool("dangerous_tool") - with patch(_IS_DANGEROUS, return_value=True): - wrapped = LangGraphBuilder._wrap_for_confirmation(tool) - assert wrapped.__annotations__ == tool.__annotations__ - - async def test_confirmation_called_with_tool_args(self): - """request_tool_confirmation should receive tool name and kwargs.""" - tool = _make_sync_tool("dangerous_tool") - with patch(_IS_DANGEROUS, return_value=True): - wrapped = LangGraphBuilder._wrap_for_confirmation(tool) - - mock_confirm = AsyncMock(return_value=True) - with patch(_CONFIRM, mock_confirm): - await wrapped(x="hello") - - mock_confirm.assert_called_once_with("dangerous_tool", {"x": "hello"}) diff --git a/tests/test_memory.py b/tests/test_memory.py index 22902e1..657568c 100644 --- a/tests/test_memory.py +++ b/tests/test_memory.py @@ -377,6 +377,89 @@ def test_save_memory_with_importance(self, memory_store_ctx): assert result["success"] is True +class TestMemoryToolParity: + """The @register_tool wrappers in memory_tools.py and the closures + returned by make_memory_tools() must produce identical results for + identical inputs — both delegate to the same _with_store helpers, + and this test locks that invariant in place. + """ + + def _ctx(self, mock_context): + """Build both entry-point variants on top of one fresh MemoryStore.""" + from agentic_cli.tools.memory_tools import MemoryStore + from agentic_cli.tools.factories import make_memory_tools + from agentic_cli.tools import memory_tools as mt + from agentic_cli.workflow.service_registry import set_service_registry + + store = MemoryStore(mock_context.settings) + factory_tools = make_memory_tools(store) + token = set_service_registry({"memory_store": store}) + return store, factory_tools, mt, token + + def test_save_output_parity(self, mock_context): + store, factory_tools, mt, token = self._ctx(mock_context) + try: + registry_out = mt.save_memory(content="fact A", tags=["x"], importance=7) + factory_out = factory_tools[0](content="fact A", tags=["x"], importance=7) + finally: + token.var.reset(token) + # IDs differ (fresh UUIDs), so compare the contract-bearing keys only. + assert registry_out.keys() == factory_out.keys() + for k in ("success", "message"): + assert registry_out[k] == factory_out[k] + + def test_search_output_parity(self, mock_context): + store, factory_tools, mt, token = self._ctx(mock_context) + try: + mt.save_memory(content="alpha beta", tags=["t"], importance=6) + mt.save_memory(content="gamma delta") + registry_out = mt.search_memory(query="alpha", limit=5) + factory_out = factory_tools[1](query="alpha", limit=5) + finally: + token.var.reset(token) + assert registry_out == factory_out + + def test_update_output_parity_tags_unchanged(self, mock_context): + store, factory_tools, mt, token = self._ctx(mock_context) + try: + r = mt.save_memory(content="orig", tags=["keep"]) + item_id = r["item_id"] + # Neither caller passes tags — both must leave them alone. + registry_out = mt.update_memory(item_id=item_id, content="new1") + assert store._items[item_id].tags == ["keep"] + factory_out = factory_tools[2](item_id=item_id, content="new2") + assert store._items[item_id].tags == ["keep"] + finally: + token.var.reset(token) + assert registry_out == factory_out == {"success": True, "updated": True} + + def test_update_output_parity_tags_cleared_with_none(self, mock_context): + store, factory_tools, mt, token = self._ctx(mock_context) + try: + r = mt.save_memory(content="orig", tags=["clear-me"]) + item_id = r["item_id"] + # Both entry points must clear tags when explicitly passed None. + mt.update_memory(item_id=item_id, tags=None) + assert store._items[item_id].tags is None + + r2 = mt.save_memory(content="orig2", tags=["clear-me-2"]) + factory_tools[2](item_id=r2["item_id"], tags=None) + assert store._items[r2["item_id"]].tags is None + finally: + token.var.reset(token) + + def test_delete_output_parity(self, mock_context): + store, factory_tools, mt, token = self._ctx(mock_context) + try: + r1 = mt.save_memory(content="one") + r2 = mt.save_memory(content="two") + registry_out = mt.delete_memory(item_id=r1["item_id"]) + factory_out = factory_tools[3](item_id=r2["item_id"]) + finally: + token.var.reset(token) + assert registry_out == factory_out == {"success": True, "deleted": True} + + class TestMemorySemanticSearch: """Tests for semantic search in MemoryStore.""" diff --git a/tests/test_tools.py b/tests/test_tools.py index 4362b10..2b387ee 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -426,10 +426,13 @@ def test_definition_basic(self): def my_tool(query: str) -> dict: return {"result": query} + from agentic_cli.workflow.permissions import EXEMPT + definition = ToolDefinition( name="my_tool", description="A test tool", func=my_tool, + capabilities=EXEMPT, ) assert definition.name == "my_tool" @@ -440,6 +443,7 @@ def my_tool(query: str) -> dict: def test_definition_async_detection(self): """Test async function detection.""" + from agentic_cli.workflow.permissions import EXEMPT async def async_tool(query: str) -> dict: return {"result": query} @@ -448,12 +452,14 @@ async def async_tool(query: str) -> dict: name="async_tool", description="An async tool", func=async_tool, + capabilities=EXEMPT, ) assert definition.is_async is True def test_definition_with_category(self): """Test definition with category.""" + from agentic_cli.workflow.permissions import EXEMPT def search(query: str) -> dict: return {} @@ -462,6 +468,7 @@ def search(query: str) -> dict: name="search", description="Search tool", func=search, + capabilities=EXEMPT, category=ToolCategory.NETWORK, ) @@ -481,13 +488,15 @@ def test_registry_empty(self): def test_registry_register_direct(self): """Test direct registration.""" + from agentic_cli.workflow.permissions import EXEMPT + registry = ToolRegistry() def my_tool(x: int) -> int: """Multiply by two.""" return x * 2 - registry.register(my_tool, category=ToolCategory.KNOWLEDGE) + registry.register(my_tool, category=ToolCategory.KNOWLEDGE, capabilities=EXEMPT) assert len(registry) == 1 assert "my_tool" in registry @@ -498,9 +507,11 @@ def my_tool(x: int) -> int: def test_registry_register_decorator(self): """Test decorator registration.""" + from agentic_cli.workflow.permissions import EXEMPT + registry = ToolRegistry() - @registry.register(category=ToolCategory.NETWORK) + @registry.register(category=ToolCategory.NETWORK, capabilities=EXEMPT) def search_tool(query: str) -> dict: """Search for things.""" return {"results": []} @@ -511,9 +522,11 @@ def search_tool(query: str) -> dict: def test_registry_custom_name(self): """Test registration with custom name.""" + from agentic_cli.workflow.permissions import EXEMPT + registry = ToolRegistry() - @registry.register(name="custom_search", description="Custom search") + @registry.register(name="custom_search", description="Custom search", capabilities=EXEMPT) def internal_search(q: str) -> dict: return {} @@ -523,6 +536,8 @@ def internal_search(q: str) -> dict: def test_registry_get_functions(self): """Test getting all functions.""" + from agentic_cli.workflow.permissions import EXEMPT + registry = ToolRegistry() def tool_a(): @@ -531,8 +546,8 @@ def tool_a(): def tool_b(): pass - registry.register(tool_a) - registry.register(tool_b) + registry.register(tool_a, capabilities=EXEMPT) + registry.register(tool_b, capabilities=EXEMPT) functions = registry.get_functions() assert len(functions) == 2 @@ -541,17 +556,19 @@ def tool_b(): def test_registry_list_by_category(self): """Test listing tools by category.""" + from agentic_cli.workflow.permissions import EXEMPT + registry = ToolRegistry() - @registry.register(category=ToolCategory.NETWORK) + @registry.register(category=ToolCategory.NETWORK, capabilities=EXEMPT) def search1(): pass - @registry.register(category=ToolCategory.NETWORK) + @registry.register(category=ToolCategory.NETWORK, capabilities=EXEMPT) def search2(): pass - @registry.register(category=ToolCategory.EXECUTION) + @registry.register(category=ToolCategory.EXECUTION, capabilities=EXEMPT) def execute(): pass @@ -1294,19 +1311,6 @@ def test_all_registered_tools_have_category(self): assert tool.category is not None, f"Tool '{tool.name}' has no category" assert tool.category.value is not None, f"Tool '{tool.name}' has invalid category" - def test_all_registered_tools_have_permission_level(self): - """Test that all registered tools have a permission level defined.""" - from agentic_cli.tools import get_registry, PermissionLevel - - registry = get_registry() - tools = registry.list_tools() - - for tool in tools: - assert tool.permission_level is not None, f"Tool '{tool.name}' has no permission_level" - assert isinstance(tool.permission_level, PermissionLevel), ( - f"Tool '{tool.name}' has invalid permission_level type" - ) - def test_expected_tools_are_registered(self): """Test that all expected tools are in the registry.""" from agentic_cli.tools import get_registry @@ -1323,7 +1327,6 @@ def test_expected_tools_are_registered(self): import agentic_cli.tools.interaction_tools # noqa: F401 import agentic_cli.tools.webfetch_tool # noqa: F401 import agentic_cli.tools.memory_tools # noqa: F401 - import agentic_cli.tools.hitl_tools # noqa: F401 import agentic_cli.tools.shell.executor # noqa: F401 registry = get_registry() @@ -1368,58 +1371,6 @@ def test_expected_tools_are_registered(self): f"Registered tools: {sorted(registered_names)}" ) - def test_dangerous_tools_have_correct_permission(self): - """Test that dangerous tools are properly marked.""" - from agentic_cli.tools import get_registry, PermissionLevel - - registry = get_registry() - - dangerous_tools = ["shell_executor", "execute_python"] - for tool_name in dangerous_tools: - tool = registry.get(tool_name) - if tool: - assert tool.permission_level == PermissionLevel.DANGEROUS, ( - f"{tool_name} should be DANGEROUS, got {tool.permission_level}" - ) - - def test_caution_tools_have_correct_permission(self): - """Test that caution-level tools are properly marked.""" - from agentic_cli.tools import get_registry, PermissionLevel - - registry = get_registry() - - caution_tools = ["write_file", "edit_file", "kb_ingest"] - - for tool_name in caution_tools: - tool = registry.get(tool_name) - if tool: - assert tool.permission_level == PermissionLevel.CAUTION, ( - f"{tool_name} should be CAUTION, got {tool.permission_level}" - ) - - def test_safe_tools_have_correct_permission(self): - """Test that safe tools are properly marked.""" - from agentic_cli.tools import get_registry, PermissionLevel - - registry = get_registry() - - safe_tools = [ - "read_file", "diff_compare", "grep", "glob", "list_dir", - "web_search", "web_fetch", "kb_search", - "kb_read", "kb_list", - "search_arxiv", "fetch_arxiv_paper", - "ask_clarification", - "save_memory", "search_memory", - "save_plan", "get_plan", - ] - - for tool_name in safe_tools: - tool = registry.get(tool_name) - if tool: - assert tool.permission_level == PermissionLevel.SAFE, ( - f"{tool_name} should be SAFE, got {tool.permission_level}" - ) - def test_tools_by_category(self): """Test that tools are properly categorized.""" from agentic_cli.tools import get_registry, ToolCategory @@ -1436,7 +1387,6 @@ def test_tools_by_category(self): import agentic_cli.tools.interaction_tools # noqa: F401 import agentic_cli.tools.webfetch_tool # noqa: F401 import agentic_cli.tools.memory_tools # noqa: F401 - import agentic_cli.tools.hitl_tools # noqa: F401 import agentic_cli.tools.shell.executor # noqa: F401 registry = get_registry() @@ -1488,7 +1438,6 @@ def test_registry_tool_count(self): import agentic_cli.tools.interaction_tools # noqa: F401 import agentic_cli.tools.webfetch_tool # noqa: F401 import agentic_cli.tools.memory_tools # noqa: F401 - import agentic_cli.tools.hitl_tools # noqa: F401 import agentic_cli.tools.shell.executor # noqa: F401 registry = get_registry() @@ -1507,10 +1456,10 @@ def test_registry_tool_count(self): class TestDangerousToolDirectExecution: - """Tests that DANGEROUS tools execute directly (no registry-level wrapping). + """Tests that tools execute directly (no registry-level wrapping). - Confirmation is now handled at the framework level by ADK ConfirmationPlugin - and LangGraph's _wrap_for_confirmation wrapper. + Permission checks are now handled at the framework level by the + capability-based permission engine. """ def test_dangerous_tool_not_wrapped(self): @@ -1544,11 +1493,4 @@ def test_dangerous_tool_executes_directly(self): tok.var.reset(tok) mgr.cleanup() - def test_dangerous_tool_has_correct_permission_level(self): - """DANGEROUS tools should still be registered with DANGEROUS permission.""" - from agentic_cli.tools import get_registry, PermissionLevel - registry = get_registry() - tool = registry.get("sandbox_execute") - if tool: - assert tool.permission_level == PermissionLevel.DANGEROUS diff --git a/tests/tools/test_registry.py b/tests/tools/test_registry.py new file mode 100644 index 0000000..440a959 --- /dev/null +++ b/tests/tools/test_registry.py @@ -0,0 +1,66 @@ +"""Tests for ToolDefinition.capabilities field + register_tool capabilities= kwarg. + +capabilities= is a required kwarg on @register_tool / ToolRegistry.register. +Omitting it raises TypeError. +""" + +import pytest + +from agentic_cli.tools.registry import ( + ToolCategory, + ToolRegistry, +) +from agentic_cli.workflow.permissions import Capability, EXEMPT +from agentic_cli.workflow.permissions.capabilities import _CapabilityExempt + + +class TestCapabilitiesKwarg: + def test_capabilities_kwarg_is_required(self): + """Omitting capabilities= should raise TypeError.""" + reg = ToolRegistry() + with pytest.raises(TypeError): + @reg.register(name="no_caps") + def no_caps() -> dict: + return {} + + def test_exempt_stored(self): + reg = ToolRegistry() + + @reg.register(name="exempt", capabilities=EXEMPT) + def exempt() -> dict: + return {} + + defn = reg.get("exempt") + assert defn.capabilities is EXEMPT + + def test_list_stored(self): + reg = ToolRegistry() + caps = [Capability("filesystem.read", target_arg="path")] + + @reg.register(name="reader", capabilities=caps) + def reader(path: str) -> dict: + return {} + + defn = reg.get("reader") + assert defn.capabilities == caps + + def test_empty_list_rejected(self): + reg = ToolRegistry() + with pytest.raises(ValueError, match="EXEMPT"): + @reg.register(name="bad", capabilities=[]) + def bad() -> dict: + return {} + + def test_non_list_non_exempt_rejected(self): + reg = ToolRegistry() + with pytest.raises(TypeError): + @reg.register(name="bad2", capabilities="invalid") # type: ignore[arg-type] + def bad2() -> dict: + return {} + + def test_list_with_non_capability_rejected(self): + reg = ToolRegistry() + with pytest.raises(TypeError): + @reg.register(name="bad3", capabilities=["not a Capability"]) # type: ignore[list-item] + def bad3() -> dict: + return {} diff --git a/tests/workflow/test_backend_isolation.py b/tests/workflow/test_backend_isolation.py new file mode 100644 index 0000000..9820e7b --- /dev/null +++ b/tests/workflow/test_backend_isolation.py @@ -0,0 +1,56 @@ +"""Import-hygiene tests enforcing backend isolation. + +ADK and LangGraph are pluggable orchestrators behind BaseWorkflowManager. +Neither backend should depend on the other — shared logic lives in +workflow.* (e.g. workflow.events, workflow.service_registry, workflow.permissions). +If one backend imports from the other, adding a third orchestrator would +drag the dependency along. +""" + +from __future__ import annotations + +import re +from pathlib import Path + +import agentic_cli.workflow.langgraph +import agentic_cli.workflow.adk + + +_ADK_IMPORT = re.compile( + r"^\s*(from\s+agentic_cli\.workflow\.adk|import\s+agentic_cli\.workflow\.adk)", + re.MULTILINE, +) +_LG_IMPORT = re.compile( + r"^\s*(from\s+agentic_cli\.workflow\.langgraph|import\s+agentic_cli\.workflow\.langgraph)", + re.MULTILINE, +) + + +def _python_files(pkg) -> list[Path]: + root = Path(pkg.__file__).parent + return sorted(p for p in root.rglob("*.py") if "__pycache__" not in p.parts) + + +def test_langgraph_does_not_import_from_adk(): + offenders: list[str] = [] + for pyfile in _python_files(agentic_cli.workflow.langgraph): + text = pyfile.read_text() + if _ADK_IMPORT.search(text): + offenders.append(str(pyfile)) + assert not offenders, ( + "LangGraph backend must not import from workflow.adk.*. " + f"Offending files: {offenders}. " + "Move shared logic to workflow/.py (e.g. workflow/confirmation.py)." + ) + + +def test_adk_does_not_import_from_langgraph(): + offenders: list[str] = [] + for pyfile in _python_files(agentic_cli.workflow.adk): + text = pyfile.read_text() + if _LG_IMPORT.search(text): + offenders.append(str(pyfile)) + assert not offenders, ( + "ADK backend must not import from workflow.langgraph.*. " + f"Offending files: {offenders}." + ) diff --git a/tests/workflow/test_base_manager_permissions.py b/tests/workflow/test_base_manager_permissions.py new file mode 100644 index 0000000..13efd8b --- /dev/null +++ b/tests/workflow/test_base_manager_permissions.py @@ -0,0 +1,27 @@ +"""Test that BaseWorkflowManager constructs and publishes PermissionEngine.""" + +from pathlib import Path + +import pytest + +from agentic_cli.workflow.permissions import PermissionEngine +from agentic_cli.workflow.service_registry import PERMISSION_ENGINE + + +class TestEngineInServiceRegistry: + def test_base_manager_registers_permission_engine(self, tmp_path, monkeypatch): + """After _ensure_managers_initialized, the engine is published to the registry dict.""" + from agentic_cli.config import BaseSettings + from agentic_cli.workflow.adk.manager import GoogleADKWorkflowManager + from agentic_cli.workflow.config import AgentConfig + + monkeypatch.chdir(tmp_path) + monkeypatch.setenv("HOME", str(tmp_path)) + + settings = BaseSettings(google_api_key="test") + configs = [AgentConfig(name="root", prompt="hi", tools=[])] + manager = GoogleADKWorkflowManager( + agent_configs=configs, settings=settings, model="gemini-2.5-flash", + ) + manager._ensure_managers_initialized() + assert isinstance(manager.services[PERMISSION_ENGINE], PermissionEngine) diff --git a/tests/workflow/test_settings_permissions.py b/tests/workflow/test_settings_permissions.py new file mode 100644 index 0000000..25e1b59 --- /dev/null +++ b/tests/workflow/test_settings_permissions.py @@ -0,0 +1,37 @@ +"""Tests for permissions settings.""" + +from agentic_cli.config import BaseSettings + + +class TestPermissionsSettings: + def test_permissions_enabled_default_true(self): + s = BaseSettings() + assert s.permissions_enabled is True + + def test_permissions_default_empty(self): + s = BaseSettings() + assert s.permissions.allow == [] + assert s.permissions.deny == [] + + def test_hitl_enabled_removed(self): + """hitl_enabled should no longer exist.""" + s = BaseSettings() + assert not hasattr(s, "hitl_enabled") + + def test_permissions_accepts_allow_and_deny(self): + from agentic_cli.workflow.settings import ( + PermissionRuleConfig, + PermissionsConfig, + ) + + cfg = PermissionsConfig( + allow=[PermissionRuleConfig(capability="filesystem.read", target="/x/**")], + deny=[PermissionRuleConfig(capability="filesystem.write", target="/etc/**")], + ) + assert cfg.allow[0].capability == "filesystem.read" + assert cfg.deny[0].target == "/etc/**" + + def test_base_settings_has_permissions_field(self): + s = BaseSettings() + from agentic_cli.workflow.settings import PermissionsConfig + assert isinstance(s.permissions, PermissionsConfig)