From e8dfc748492c802f80172c660405fe554f138686 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Fri, 17 Apr 2026 15:53:42 -0400 Subject: [PATCH 01/41] feat(kb): implement real BM25 backends (bm25s, rank_bm25) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `create_bm25_index()` previously advertised a 3-tier fallback (bm25s → rank_bm25 → mock) but the backend module didn't exist, so every deployment silently got the mock TF-IDF stub and hybrid retrieval's keyword arm was degraded. Adds `_bm25_backends.py` with `BM25sIndex` (NumPy-accelerated, preferred) and `RankBM25Index` (pure Python, uses BM25Plus to avoid Okapi's zero/negative IDF on common terms). Both implement the same add/remove/search/save/load interface as `MockBM25Index` so the factory can swap them transparently. Adds parametrized contract tests over both backends plus a test that the factory now returns the real backend when bm25s is installed. --- pyproject.toml | 2 + .../knowledge_base/_bm25_backends.py | 137 ++++++++++++++++++ tests/test_bm25_index.py | 121 ++++++++++++++++ 3 files changed, 260 insertions(+) create mode 100644 src/agentic_cli/knowledge_base/_bm25_backends.py diff --git a/pyproject.toml b/pyproject.toml index 152c1f2..d91b37f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,8 @@ kb = [ "torch>=2.2.0", "sentence-transformers>=2.0.0,<3.0.0", "faiss-cpu>=1.7.0", + "bm25s>=0.2.0", + "rank-bm25>=0.2.2", ] langgraph = [ "langgraph>=0.2.0", diff --git a/src/agentic_cli/knowledge_base/_bm25_backends.py b/src/agentic_cli/knowledge_base/_bm25_backends.py new file mode 100644 index 0000000..08625cf --- /dev/null +++ b/src/agentic_cli/knowledge_base/_bm25_backends.py @@ -0,0 +1,137 @@ +"""Real BM25 index backends (bm25s, rank_bm25). + +Both implement the same interface as MockBM25Index so create_bm25_index() +can return any of them interchangeably. Tokenization is lowercase whitespace +split to match MockBM25Index; the scoring model is the library's own BM25. + +Neither underlying library supports true incremental add/remove, so we keep +tokenized docs + chunk_ids in memory and rebuild the model lazily on search. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +from agentic_cli.file_utils import atomic_write_json + + +def _tokenize(text: str) -> list[str]: + return text.lower().split() + + +class _BM25BackendBase: + _INDEX_FILE: str = "" + + def __init__(self): + self._chunk_ids: list[str] = [] + self._tokenized: list[list[str]] = [] + self._model = None + + @property + def size(self) -> int: + return len(self._chunk_ids) + + def add_documents(self, chunk_ids: list[str], texts: list[str]) -> None: + for cid, text in zip(chunk_ids, texts): + self._chunk_ids.append(cid) + self._tokenized.append(_tokenize(text)) + self._model = None + + def remove_documents(self, chunk_ids: list[str]) -> None: + remove_set = set(chunk_ids) + keep = [ + (cid, toks) + for cid, toks in zip(self._chunk_ids, self._tokenized) + if cid not in remove_set + ] + if keep: + self._chunk_ids, self._tokenized = map(list, zip(*keep)) + else: + self._chunk_ids, self._tokenized = [], [] + self._model = None + + def rebuild(self, chunk_ids: list[str], texts: list[str]) -> None: + self._chunk_ids = [] + self._tokenized = [] + self._model = None + self.add_documents(chunk_ids, texts) + + def save(self, path: Path) -> None: + path.mkdir(parents=True, exist_ok=True) + atomic_write_json( + path / self._INDEX_FILE, + {"chunk_ids": self._chunk_ids, "tokenized": self._tokenized}, + ) + + def load(self, path: Path) -> None: + index_path = path / self._INDEX_FILE + if not index_path.exists(): + return + data = json.loads(index_path.read_text()) + self._chunk_ids = data["chunk_ids"] + self._tokenized = data["tokenized"] + self._model = None + + +class RankBM25Index(_BM25BackendBase): + """BM25 backed by rank_bm25.BM25Plus (pure Python). + + BM25Plus rather than BM25Okapi because Okapi's IDF can go zero or + negative when a term appears in most of the corpus; BM25Plus adds a + delta offset that guarantees positive contributions on real matches. + """ + + _INDEX_FILE = "bm25_rank.json" + + def search(self, query: str, top_k: int = 10) -> list[tuple[str, float]]: + if not self._chunk_ids: + return [] + query_tokens = _tokenize(query) + if not query_tokens: + return [] + query_set = set(query_tokens) + if self._model is None: + from rank_bm25 import BM25Plus + + self._model = BM25Plus(self._tokenized) + scores = self._model.get_scores(query_tokens) + scored: list[tuple[str, float]] = [] + for cid, doc_tokens, score in zip( + self._chunk_ids, self._tokenized, scores + ): + if query_set.intersection(doc_tokens): + scored.append((cid, float(score))) + scored.sort(key=lambda x: x[1], reverse=True) + return scored[:top_k] + + +class BM25sIndex(_BM25BackendBase): + """BM25 backed by bm25s (NumPy/C-accelerated).""" + + _INDEX_FILE = "bm25s_sidecar.json" + + def _build_model(self) -> None: + import bm25s + + model = bm25s.BM25() + model.index(self._tokenized, show_progress=False) + self._model = model + + def search(self, query: str, top_k: int = 10) -> list[tuple[str, float]]: + if not self._chunk_ids: + return [] + query_tokens = _tokenize(query) + if not query_tokens: + return [] + if self._model is None: + self._build_model() + k = min(top_k, len(self._chunk_ids)) + docs, scores = self._model.retrieve( + [query_tokens], k=k, show_progress=False + ) + results: list[tuple[str, float]] = [] + for idx, score in zip(docs[0], scores[0]): + if score > 0: + results.append((self._chunk_ids[int(idx)], float(score))) + return results diff --git a/tests/test_bm25_index.py b/tests/test_bm25_index.py index b53e5a6..35e4655 100644 --- a/tests/test_bm25_index.py +++ b/tests/test_bm25_index.py @@ -65,3 +65,124 @@ def test_rebuild(self, tmp_path): assert index.size == 2 results = index.search("new doc one", top_k=5) assert results[0][0] == "c1" + + +def _rank_bm25_cls(): + pytest.importorskip("rank_bm25") + from agentic_cli.knowledge_base._bm25_backends import RankBM25Index + return RankBM25Index + + +def _bm25s_cls(): + pytest.importorskip("bm25s") + from agentic_cli.knowledge_base._bm25_backends import BM25sIndex + return BM25sIndex + + +@pytest.fixture(params=["rank_bm25", "bm25s"]) +def bm25_cls(request): + if request.param == "rank_bm25": + return _rank_bm25_cls() + return _bm25s_cls() + + +class TestBM25BackendContract: + """Contract tests applied to every real BM25 backend. + + Same behavior as MockBM25Index — any backend that wants to plug into + create_bm25_index() must satisfy all of these. + """ + + def test_add_and_search(self, tmp_path, bm25_cls): + index = bm25_cls() + index.add_documents( + ["chunk1", "chunk2", "chunk3"], + [ + "python programming language", + "java programming language", + "the weather is sunny", + ], + ) + results = index.search("python programming", top_k=2) + assert len(results) <= 2 + assert results[0][0] == "chunk1" + assert results[0][1] > 0 + + def test_search_returns_chunk_ids_and_scores(self, tmp_path, bm25_cls): + index = bm25_cls() + index.add_documents(["a", "b"], ["alpha beta", "gamma delta"]) + results = index.search("alpha", top_k=5) + assert results, "expected at least one result for matching query" + for chunk_id, score in results: + assert isinstance(chunk_id, str) + assert isinstance(score, float) + + def test_remove_documents(self, tmp_path, bm25_cls): + index = bm25_cls() + index.add_documents(["c1", "c2"], ["hello world", "goodbye world"]) + index.remove_documents(["c1"]) + assert index.size == 1 + results = index.search("hello", top_k=10) + chunk_ids = [cid for cid, _ in results] + assert "c1" not in chunk_ids + + def test_save_and_load(self, tmp_path, bm25_cls): + index = bm25_cls() + index.add_documents( + ["c1", "c2"], + ["test document content", "unrelated fruit basket"], + ) + index.save(tmp_path) + index2 = bm25_cls() + index2.load(tmp_path) + assert index2.size == 2 + results = index2.search("test document", top_k=5) + assert results + assert results[0][0] == "c1" + + def test_empty_search(self, tmp_path, bm25_cls): + index = bm25_cls() + results = index.search("anything", top_k=5) + assert results == [] + + def test_size_property(self, tmp_path, bm25_cls): + index = bm25_cls() + assert index.size == 0 + index.add_documents(["c1", "c2"], ["doc one", "doc two"]) + assert index.size == 2 + + def test_rebuild(self, tmp_path, bm25_cls): + index = bm25_cls() + index.add_documents(["old"], ["stale content"]) + index.rebuild(["c1", "c2"], ["new doc one", "new doc two"]) + assert index.size == 2 + results = index.search("new doc one", top_k=5) + assert results[0][0] == "c1" + + def test_load_missing_is_noop(self, tmp_path, bm25_cls): + index = bm25_cls() + # Loading from an empty dir should not raise; just leaves index empty. + index.load(tmp_path) + assert index.size == 0 + + +class TestCreateBM25IndexPicksRealBackend: + """When a real BM25 library is installed, create_bm25_index() must return it, + not the mock.""" + + def test_prefers_bm25s_when_available(self): + pytest.importorskip("bm25s") + from agentic_cli.knowledge_base.bm25_index import create_bm25_index + from agentic_cli.knowledge_base._bm25_backends import BM25sIndex + + index = create_bm25_index() + assert isinstance(index, BM25sIndex), ( + f"expected BM25sIndex, got {type(index).__name__}" + ) + + def test_use_mock_flag_returns_mock(self): + from agentic_cli.knowledge_base.bm25_index import create_bm25_index + from agentic_cli.knowledge_base._mock_bm25 import MockBM25Index + + index = create_bm25_index(use_mock=True) + assert isinstance(index, MockBM25Index) From de3e9a9dc1edc03b36b1ae13c27e7a234c5ffc74 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Fri, 17 Apr 2026 15:53:47 -0400 Subject: [PATCH 02/41] docs(claude): note that docs/ is gitignored and must not be committed --- CLAUDE.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index 44dd3ef..3f9c4dc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -114,6 +114,9 @@ Workflow: 2. For features: create `feature/` (or `fix/` or `refactor/`) from `develop`, work there, merge back to `develop` 3. When ready to release: merge `develop` → `main` and tag the release +### What NOT to commit +- `docs/` is gitignored on purpose (see `.gitignore`). It is a scratchpad for review notes, plans, and internal analysis. **Never `git add docs/…` or suggest committing anything under `docs/`.** If a document belongs in the repo, it lives elsewhere (README, CHANGELOG, top-level `*.md`). + ## Development Principles ### Code Style From 38366223bc152811f334664aefa0884202fb5e27 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Fri, 17 Apr 2026 16:06:48 -0400 Subject: [PATCH 03/41] refactor(workflow): extract HITL confirmation to backend-neutral module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LangGraph's graph_builder previously imported `is_dangerous` and `request_tool_confirmation` from `workflow.adk.plugins`, making the LangGraph backend carry a dependency on the ADK backend. Both functions are framework-neutral (they only touch the tool registry, service registry, and event model), so moving them to `workflow.confirmation` restores the intended "pluggable orchestrators behind one protocol" shape — a third orchestrator would no longer inherit an ADK dep via shared HITL logic. `ConfirmationPlugin` (the ADK Plugin wrapper) stays in `workflow.adk.plugins` and now imports the helpers from the neutral module. Added `tests/workflow/test_backend_isolation.py` to lock in the constraint: no file under `workflow/langgraph/` may import from `workflow/adk/*`, and vice versa. --- src/agentic_cli/workflow/adk/plugins.py | 58 ++--------------- src/agentic_cli/workflow/confirmation.py | 62 +++++++++++++++++++ .../workflow/langgraph/graph_builder.py | 2 +- tests/test_adk_plugins.py | 18 +++--- tests/workflow/test_backend_isolation.py | 56 +++++++++++++++++ 5 files changed, 133 insertions(+), 63 deletions(-) create mode 100644 src/agentic_cli/workflow/confirmation.py create mode 100644 tests/workflow/test_backend_isolation.py diff --git a/src/agentic_cli/workflow/adk/plugins.py b/src/agentic_cli/workflow/adk/plugins.py index afec1e5..0e2a5dc 100644 --- a/src/agentic_cli/workflow/adk/plugins.py +++ b/src/agentic_cli/workflow/adk/plugins.py @@ -9,7 +9,6 @@ import json import time -import uuid from collections import deque from datetime import datetime, timezone from pathlib import Path @@ -17,9 +16,11 @@ from google.adk.plugins.base_plugin import BasePlugin -from agentic_cli.tools.registry import get_registry, PermissionLevel -from agentic_cli.workflow.service_registry import get_service, WORKFLOW -from agentic_cli.workflow.events import WorkflowEvent, UserInputRequest, InputType +from agentic_cli.workflow.confirmation import ( + is_dangerous, + request_tool_confirmation, +) +from agentic_cli.workflow.events import WorkflowEvent from agentic_cli.logging import Loggers if TYPE_CHECKING: @@ -30,55 +31,6 @@ logger = Loggers.workflow() -_APPROVED_RESPONSES = ("yes", "y", "approve", "true") - - -def is_dangerous(tool_name: str) -> bool: - """Check if a tool is registered as DANGEROUS permission level. - - Uses the ToolRegistry's O(1) lookup directly — no caching needed. - """ - defn = get_registry().get(tool_name) - if defn is None: - return False - return defn.permission_level == PermissionLevel.DANGEROUS - - -async def request_tool_confirmation( - tool_name: str, tool_args: dict[str, Any] -) -> bool | None: - """Prompt user for confirmation of a dangerous tool call. - - Shared by ConfirmationPlugin (ADK) and _wrap_for_confirmation (LangGraph). - - Returns: - True if approved, False if denied, None if no workflow/callback available. - """ - workflow = get_service(WORKFLOW) - if workflow is None: - return None - - arg_summary = ", ".join( - f"{k}={repr(v)[:50]}" for k, v in list(tool_args.items())[:3] - ) - request = UserInputRequest( - request_id=str(uuid.uuid4())[:8], - tool_name=tool_name, - prompt=( - f"Tool requires approval: {tool_name}({arg_summary})\n\n" - f"Allow this operation? (yes/no)" - ), - input_type=InputType.CONFIRM, - default="no", - ) - - try: - response = await workflow.request_user_input(request) - except RuntimeError: - return None - - return response.strip().lower() in _APPROVED_RESPONSES - class ConfirmationPlugin(BasePlugin): """ADK Plugin that requires user confirmation for DANGEROUS tools. diff --git a/src/agentic_cli/workflow/confirmation.py b/src/agentic_cli/workflow/confirmation.py new file mode 100644 index 0000000..21c7266 --- /dev/null +++ b/src/agentic_cli/workflow/confirmation.py @@ -0,0 +1,62 @@ +"""Backend-neutral HITL confirmation for DANGEROUS tools. + +Both the ADK ConfirmationPlugin and the LangGraph tool wrapper call into +this module to decide whether a tool call needs user approval and, if so, +to request it through the active workflow manager. Keeping these helpers +here (rather than under workflow/adk/) prevents LangGraph from having to +import ADK internals. +""" + +from __future__ import annotations + +import uuid +from typing import Any + +from agentic_cli.tools.registry import PermissionLevel, get_registry +from agentic_cli.workflow.events import InputType, UserInputRequest +from agentic_cli.workflow.service_registry import WORKFLOW, get_service + +_APPROVED_RESPONSES = ("yes", "y", "approve", "true") + + +def is_dangerous(tool_name: str) -> bool: + """Return True if the named tool is registered as DANGEROUS.""" + defn = get_registry().get(tool_name) + if defn is None: + return False + return defn.permission_level == PermissionLevel.DANGEROUS + + +async def request_tool_confirmation( + tool_name: str, tool_args: dict[str, Any] +) -> bool | None: + """Prompt the user for confirmation of a dangerous tool call. + + Returns: + True if approved, False if denied, None if no workflow/callback + is available to prompt the user. + """ + workflow = get_service(WORKFLOW) + if workflow is None: + return None + + arg_summary = ", ".join( + f"{k}={repr(v)[:50]}" for k, v in list(tool_args.items())[:3] + ) + request = UserInputRequest( + request_id=str(uuid.uuid4())[:8], + tool_name=tool_name, + prompt=( + f"Tool requires approval: {tool_name}({arg_summary})\n\n" + f"Allow this operation? (yes/no)" + ), + input_type=InputType.CONFIRM, + default="no", + ) + + try: + response = await workflow.request_user_input(request) + except RuntimeError: + return None + + return response.strip().lower() in _APPROVED_RESPONSES diff --git a/src/agentic_cli/workflow/langgraph/graph_builder.py b/src/agentic_cli/workflow/langgraph/graph_builder.py index 58b6044..679cfef 100644 --- a/src/agentic_cli/workflow/langgraph/graph_builder.py +++ b/src/agentic_cli/workflow/langgraph/graph_builder.py @@ -12,7 +12,7 @@ from typing import Any, Callable, TYPE_CHECKING from agentic_cli.workflow.config import AgentConfig -from agentic_cli.workflow.adk.plugins import is_dangerous, request_tool_confirmation +from agentic_cli.workflow.confirmation import is_dangerous, request_tool_confirmation from agentic_cli.logging import Loggers if TYPE_CHECKING: diff --git a/tests/test_adk_plugins.py b/tests/test_adk_plugins.py index 3bf03ff..5c03f16 100644 --- a/tests/test_adk_plugins.py +++ b/tests/test_adk_plugins.py @@ -36,7 +36,7 @@ def test_dangerous_tool_detected(self): permission_level=PermissionLevel.DANGEROUS, ) - with patch("agentic_cli.workflow.adk.plugins.get_registry", return_value=mock_registry): + with patch("agentic_cli.workflow.confirmation.get_registry", return_value=mock_registry): result = is_dangerous("risky_tool") assert result is True @@ -51,7 +51,7 @@ def test_safe_tool_not_dangerous(self): permission_level=PermissionLevel.SAFE, ) - with patch("agentic_cli.workflow.adk.plugins.get_registry", return_value=mock_registry): + with patch("agentic_cli.workflow.confirmation.get_registry", return_value=mock_registry): result = is_dangerous("safe_tool") assert result is False @@ -61,7 +61,7 @@ def test_unknown_tool_not_dangerous(self): mock_registry = ToolRegistry() - with patch("agentic_cli.workflow.adk.plugins.get_registry", return_value=mock_registry): + with patch("agentic_cli.workflow.confirmation.get_registry", return_value=mock_registry): result = is_dangerous("unknown_tool") assert result is False @@ -97,7 +97,7 @@ async def test_dangerous_tool_approved(self, plugin): with ( patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.adk.plugins.get_service", return_value=mock_workflow), + patch("agentic_cli.workflow.confirmation.get_service", return_value=mock_workflow), ): result = await plugin.before_tool_callback( tool=tool, @@ -123,7 +123,7 @@ async def test_dangerous_tool_denied(self, plugin): with ( patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.adk.plugins.get_service", return_value=mock_workflow), + patch("agentic_cli.workflow.confirmation.get_service", return_value=mock_workflow), ): result = await plugin.before_tool_callback( tool=tool, @@ -143,7 +143,7 @@ async def test_no_workflow_passes_through(self, plugin): with ( patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.adk.plugins.get_service", return_value=None), + patch("agentic_cli.workflow.confirmation.get_service", return_value=None), ): result = await plugin.before_tool_callback( tool=tool, @@ -165,7 +165,7 @@ async def test_no_callback_passes_through(self, plugin): with ( patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.adk.plugins.get_service", return_value=mock_workflow), + patch("agentic_cli.workflow.confirmation.get_service", return_value=mock_workflow), ): result = await plugin.before_tool_callback( tool=tool, @@ -186,7 +186,7 @@ async def test_various_approval_responses(self, plugin): with ( patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.adk.plugins.get_service", return_value=mock_workflow), + patch("agentic_cli.workflow.confirmation.get_service", return_value=mock_workflow), ): result = await plugin.before_tool_callback( tool=tool, @@ -213,7 +213,7 @@ async def test_arg_summary_truncation(self, plugin): with ( patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.adk.plugins.get_service", return_value=mock_workflow), + patch("agentic_cli.workflow.confirmation.get_service", return_value=mock_workflow), ): result = await plugin.before_tool_callback( tool=tool, diff --git a/tests/workflow/test_backend_isolation.py b/tests/workflow/test_backend_isolation.py new file mode 100644 index 0000000..2714778 --- /dev/null +++ b/tests/workflow/test_backend_isolation.py @@ -0,0 +1,56 @@ +"""Import-hygiene tests enforcing backend isolation. + +ADK and LangGraph are pluggable orchestrators behind BaseWorkflowManager. +Neither backend should depend on the other — shared logic lives in +workflow.* (e.g. workflow.confirmation, workflow.events, workflow.service_registry). +If one backend imports from the other, adding a third orchestrator would +drag the dependency along. +""" + +from __future__ import annotations + +import re +from pathlib import Path + +import agentic_cli.workflow.langgraph +import agentic_cli.workflow.adk + + +_ADK_IMPORT = re.compile( + r"^\s*(from\s+agentic_cli\.workflow\.adk|import\s+agentic_cli\.workflow\.adk)", + re.MULTILINE, +) +_LG_IMPORT = re.compile( + r"^\s*(from\s+agentic_cli\.workflow\.langgraph|import\s+agentic_cli\.workflow\.langgraph)", + re.MULTILINE, +) + + +def _python_files(pkg) -> list[Path]: + root = Path(pkg.__file__).parent + return sorted(p for p in root.rglob("*.py") if "__pycache__" not in p.parts) + + +def test_langgraph_does_not_import_from_adk(): + offenders: list[str] = [] + for pyfile in _python_files(agentic_cli.workflow.langgraph): + text = pyfile.read_text() + if _ADK_IMPORT.search(text): + offenders.append(str(pyfile)) + assert not offenders, ( + "LangGraph backend must not import from workflow.adk.*. " + f"Offending files: {offenders}. " + "Move shared logic to workflow/.py (e.g. workflow/confirmation.py)." + ) + + +def test_adk_does_not_import_from_langgraph(): + offenders: list[str] = [] + for pyfile in _python_files(agentic_cli.workflow.adk): + text = pyfile.read_text() + if _LG_IMPORT.search(text): + offenders.append(str(pyfile)) + assert not offenders, ( + "ADK backend must not import from workflow.langgraph.*. " + f"Offending files: {offenders}." + ) From 2ac3a09a957adb027a65addbafb3540f9e387a66 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Fri, 17 Apr 2026 16:32:31 -0400 Subject: [PATCH 04/41] refactor(tools/memory): dedupe registry-bound and factory-bound entry points MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The @register_tool wrappers in memory_tools.py and the closures returned by factories.make_memory_tools() previously contained duplicated function bodies. This had already caused silent drift: search_memory returned different keys from each entry point (the factory version included `importance`, the registry version did not), and update_memory handled tags=None differently (factory treated it as "don't touch", registry as "clear"), so the same tool name exhibited different behavior depending on how the workflow wired it. Following the pattern already used for KB and ArXiv tools, extract four _with_store helpers in memory_tools.py that own the business logic. Both the @register_tool wrappers (service-registry bound) and the make_memory_tools() closures (explicit store bound) now delegate to the helpers, so they cannot drift. Unified contract: - search_memory always includes `importance` in result items and accepts include_archived. - update_memory uses the _SENTINEL default in both entry points: omit tags to leave them unchanged, pass None to clear, pass a list to replace. Added TestMemoryToolParity (5 tests) that feed identical inputs to both variants and assert identical output — locks the no-drift invariant in place. --- src/agentic_cli/tools/factories.py | 102 ++++++--------------- src/agentic_cli/tools/memory_tools.py | 125 +++++++++++++++++--------- tests/test_memory.py | 83 +++++++++++++++++ 3 files changed, 192 insertions(+), 118 deletions(-) diff --git a/src/agentic_cli/tools/factories.py b/src/agentic_cli/tools/factories.py index cf9b354..a437554 100644 --- a/src/agentic_cli/tools/factories.py +++ b/src/agentic_cli/tools/factories.py @@ -25,6 +25,11 @@ def make_memory_tools(memory_store, embedding_service=None) -> list[Callable]: """Create memory tools bound to a MemoryStore. + Thin closure-bound wrappers around the shared helpers in + ``tools.memory_tools``. The helpers take the store as an explicit arg + so the closure-bound and module-level (registry-bound) tool versions + stay in lockstep. + Args: memory_store: A MemoryStore instance. embedding_service: Optional EmbeddingService (unused, kept for interface compat). @@ -32,108 +37,55 @@ def make_memory_tools(memory_store, embedding_service=None) -> list[Callable]: Returns: [save_memory, search_memory, update_memory, delete_memory] """ + from agentic_cli.tools.memory_tools import ( + _SENTINEL, + _delete_memory_with_store, + _save_memory_with_store, + _search_memory_with_store, + _update_memory_with_store, + save_memory as _orig_save, + search_memory as _orig_search, + update_memory as _orig_update, + delete_memory as _orig_delete, + ) def save_memory( content: str, tags: list[str] | None = None, importance: int = 5, ) -> dict[str, Any]: - """Save information to persistent memory. - - Use this to remember important facts, preferences, or learnings - that should persist across sessions. - - Args: - content: The content to store. - tags: Optional tags for categorization. - importance: Importance rating 1-10 (default 5). Higher = more important. - - Returns: - A dict with the stored item ID and any similar existing memories. - """ - result = memory_store.store_with_similarity_check(content, tags=tags, importance=importance) - return { - "success": True, - "item_id": result["item_id"], - "message": "Saved to persistent memory", - "similar_existing": result["similar_existing"], - } + return _save_memory_with_store(memory_store, content, tags, importance) def search_memory( query: str, limit: int = 10, include_archived: bool = False, ) -> dict[str, Any]: - """Search persistent memory for stored information. - - Args: - query: The search query. - limit: Maximum number of results to return. - include_archived: If True, include archived (soft-deleted) memories. - - Returns: - A dict with matching memory items. - """ - results = memory_store.search(query, limit=limit, include_archived=include_archived) - items = [ - { - "id": item.id, - "content": item.content, - "tags": item.tags, - "importance": item.importance, - } - for item in results - ] - return { - "success": True, - "query": query, - "items": items, - "count": len(items), - } + return _search_memory_with_store( + memory_store, query, limit, include_archived + ) def update_memory( item_id: str, content: str | None = None, - tags: list[str] | None = None, + tags: list[str] | None = _SENTINEL, ) -> dict[str, Any]: - """Update an existing memory item. - - Args: - item_id: ID of the memory to update. - content: New content (optional). - tags: New tags (optional). Pass explicitly to update; omit (None) to leave unchanged. - - Returns: - A dict indicating success. - """ - # Don't forward tags to store.update() unless explicitly provided, - # since the store's sentinel default means "leave unchanged" but None means "clear tags". - if tags is None: - updated = memory_store.update(item_id, content=content) - else: - updated = memory_store.update(item_id, content=content, tags=tags) - return {"success": True, "updated": updated} + return _update_memory_with_store(memory_store, item_id, content, tags) def delete_memory( item_id: str, purge: bool = False, ) -> dict[str, Any]: - """Delete a memory item (soft-delete by default). - - Args: - item_id: ID of the memory to delete. - purge: If True, permanently remove. If False, archive. - - Returns: - A dict indicating success. - """ - deleted = memory_store.delete(item_id, purge=purge) - return {"success": True, "deleted": deleted} + return _delete_memory_with_store(memory_store, item_id, purge) save_memory.__name__ = "save_memory" + save_memory.__doc__ = _orig_save.__doc__ search_memory.__name__ = "search_memory" + search_memory.__doc__ = _orig_search.__doc__ update_memory.__name__ = "update_memory" + update_memory.__doc__ = _orig_update.__doc__ delete_memory.__name__ = "delete_memory" + delete_memory.__doc__ = _orig_delete.__doc__ return [save_memory, search_memory, update_memory, delete_memory] diff --git a/src/agentic_cli/tools/memory_tools.py b/src/agentic_cli/tools/memory_tools.py index 6e3ea5f..920ce29 100644 --- a/src/agentic_cli/tools/memory_tools.py +++ b/src/agentic_cli/tools/memory_tools.py @@ -428,6 +428,81 @@ def apply_forgetting(self, policy: ForgettingPolicy) -> dict[str, Any]: } +# --------------------------------------------------------------------------- +# Shared helpers — the single source of truth for tool behavior. +# +# Both the @register_tool wrappers below (service-registry bound) and the +# make_memory_tools() closures in tools/factories.py delegate to these +# helpers, so the two entry points cannot drift. +# --------------------------------------------------------------------------- + + +def _save_memory_with_store( + store: "MemoryStore", + content: str, + tags: list[str] | None, + importance: int, +) -> dict[str, Any]: + result = store.store_with_similarity_check( + content, tags=tags, importance=importance + ) + return { + "success": True, + "item_id": result["item_id"], + "message": "Saved to persistent memory", + "similar_existing": result["similar_existing"], + } + + +def _search_memory_with_store( + store: "MemoryStore", + query: str, + limit: int, + include_archived: bool, +) -> dict[str, Any]: + results = store.search(query, limit=limit, include_archived=include_archived) + items = [ + { + "id": item.id, + "content": item.content, + "tags": item.tags, + "importance": item.importance, + } + for item in results + ] + return { + "success": True, + "query": query, + "items": items, + "count": len(items), + } + + +def _update_memory_with_store( + store: "MemoryStore", + item_id: str, + content: str | None, + tags: list[str] | None | object, +) -> dict[str, Any]: + updated = store.update(item_id, content=content, tags=tags) + return {"success": True, "updated": updated} + + +def _delete_memory_with_store( + store: "MemoryStore", + item_id: str, + purge: bool, +) -> dict[str, Any]: + deleted = store.delete(item_id, purge=purge) + return {"success": True, "deleted": deleted} + + +# --------------------------------------------------------------------------- +# Registry-bound tool functions (@register_tool) — thin wrappers that resolve +# the store through the service registry and delegate to the helpers above. +# --------------------------------------------------------------------------- + + @register_tool( category=ToolCategory.MEMORY, permission_level=PermissionLevel.SAFE, @@ -440,27 +515,15 @@ def save_memory( ) -> dict[str, Any]: """Save information to persistent memory. - Use this to remember important facts, preferences, or learnings - that should persist across sessions. - Args: content: The content to store. tags: Optional tags for categorization. importance: Importance rating 1-10 (default 5). - - Returns: - A dict with the stored item ID. """ store = require_service(MEMORY_STORE) if isinstance(store, dict): return store - result = store.store_with_similarity_check(content, tags=tags, importance=importance) - return { - "success": True, - "item_id": result["item_id"], - "message": "Saved to persistent memory", - "similar_existing": result["similar_existing"], - } + return _save_memory_with_store(store, content, tags, importance) @register_tool( @@ -471,35 +534,19 @@ def save_memory( def search_memory( query: str, limit: int = 10, + include_archived: bool = False, ) -> dict[str, Any]: """Search persistent memory for stored information. Args: query: The search query (substring match, case-insensitive). limit: Maximum number of results to return. - - Returns: - A dict with matching memory items. + include_archived: If True, include archived (soft-deleted) memories. """ store = require_service(MEMORY_STORE) if isinstance(store, dict): return store - results = store.search(query, limit=limit) - items = [ - { - "id": item.id, - "content": item.content, - "tags": item.tags, - } - for item in results - ] - - return { - "success": True, - "query": query, - "items": items, - "count": len(items), - } + return _search_memory_with_store(store, query, limit, include_archived) @register_tool( @@ -517,16 +564,12 @@ def update_memory( Args: item_id: ID of the memory to update. content: New content (optional). - tags: New tags (optional). Pass explicitly to update; omit to leave unchanged. - - Returns: - A dict indicating success. + tags: New tags. Omit to leave unchanged; pass None to clear. """ store = require_service(MEMORY_STORE) if isinstance(store, dict): return store - updated = store.update(item_id, content=content, tags=tags) - return {"success": True, "updated": updated} + return _update_memory_with_store(store, item_id, content, tags) @register_tool( @@ -543,12 +586,8 @@ def delete_memory( Args: item_id: ID of the memory to delete. purge: If True, permanently remove. If False, archive. - - Returns: - A dict indicating success. """ store = require_service(MEMORY_STORE) if isinstance(store, dict): return store - deleted = store.delete(item_id, purge=purge) - return {"success": True, "deleted": deleted} + return _delete_memory_with_store(store, item_id, purge) diff --git a/tests/test_memory.py b/tests/test_memory.py index 22902e1..657568c 100644 --- a/tests/test_memory.py +++ b/tests/test_memory.py @@ -377,6 +377,89 @@ def test_save_memory_with_importance(self, memory_store_ctx): assert result["success"] is True +class TestMemoryToolParity: + """The @register_tool wrappers in memory_tools.py and the closures + returned by make_memory_tools() must produce identical results for + identical inputs — both delegate to the same _with_store helpers, + and this test locks that invariant in place. + """ + + def _ctx(self, mock_context): + """Build both entry-point variants on top of one fresh MemoryStore.""" + from agentic_cli.tools.memory_tools import MemoryStore + from agentic_cli.tools.factories import make_memory_tools + from agentic_cli.tools import memory_tools as mt + from agentic_cli.workflow.service_registry import set_service_registry + + store = MemoryStore(mock_context.settings) + factory_tools = make_memory_tools(store) + token = set_service_registry({"memory_store": store}) + return store, factory_tools, mt, token + + def test_save_output_parity(self, mock_context): + store, factory_tools, mt, token = self._ctx(mock_context) + try: + registry_out = mt.save_memory(content="fact A", tags=["x"], importance=7) + factory_out = factory_tools[0](content="fact A", tags=["x"], importance=7) + finally: + token.var.reset(token) + # IDs differ (fresh UUIDs), so compare the contract-bearing keys only. + assert registry_out.keys() == factory_out.keys() + for k in ("success", "message"): + assert registry_out[k] == factory_out[k] + + def test_search_output_parity(self, mock_context): + store, factory_tools, mt, token = self._ctx(mock_context) + try: + mt.save_memory(content="alpha beta", tags=["t"], importance=6) + mt.save_memory(content="gamma delta") + registry_out = mt.search_memory(query="alpha", limit=5) + factory_out = factory_tools[1](query="alpha", limit=5) + finally: + token.var.reset(token) + assert registry_out == factory_out + + def test_update_output_parity_tags_unchanged(self, mock_context): + store, factory_tools, mt, token = self._ctx(mock_context) + try: + r = mt.save_memory(content="orig", tags=["keep"]) + item_id = r["item_id"] + # Neither caller passes tags — both must leave them alone. + registry_out = mt.update_memory(item_id=item_id, content="new1") + assert store._items[item_id].tags == ["keep"] + factory_out = factory_tools[2](item_id=item_id, content="new2") + assert store._items[item_id].tags == ["keep"] + finally: + token.var.reset(token) + assert registry_out == factory_out == {"success": True, "updated": True} + + def test_update_output_parity_tags_cleared_with_none(self, mock_context): + store, factory_tools, mt, token = self._ctx(mock_context) + try: + r = mt.save_memory(content="orig", tags=["clear-me"]) + item_id = r["item_id"] + # Both entry points must clear tags when explicitly passed None. + mt.update_memory(item_id=item_id, tags=None) + assert store._items[item_id].tags is None + + r2 = mt.save_memory(content="orig2", tags=["clear-me-2"]) + factory_tools[2](item_id=r2["item_id"], tags=None) + assert store._items[r2["item_id"]].tags is None + finally: + token.var.reset(token) + + def test_delete_output_parity(self, mock_context): + store, factory_tools, mt, token = self._ctx(mock_context) + try: + r1 = mt.save_memory(content="one") + r2 = mt.save_memory(content="two") + registry_out = mt.delete_memory(item_id=r1["item_id"]) + factory_out = factory_tools[3](item_id=r2["item_id"]) + finally: + token.var.reset(token) + assert registry_out == factory_out == {"success": True, "deleted": True} + + class TestMemorySemanticSearch: """Tests for semantic search in MemoryStore.""" From caa57d1220febc758f06e90f00fc25df8966eb1d Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Fri, 17 Apr 2026 16:46:54 -0400 Subject: [PATCH 05/41] fix(kb): tighten concurrency contract in KnowledgeBaseManager MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three narrow fixes plus documentation of what was previously implicit: 1. `_backfill_running` check-and-set is now inside `with self._lock:`, closing a cross-thread TOCTOU window where two threads could both pass the `if self._backfill_running:` guard before either set the flag. Within a single asyncio event loop this was already safe (no `await` between check and set), but two threads — each with its own event loop, or a thread pool call plus a direct await — could race. 2. `backfill_sidecars` now snapshots `self._documents` and performs its per-iteration existence check under `_lock`, so a concurrent sync `delete_document` call from another thread cannot interleave with backfill iteration. 3. `tools/knowledge_tools.py` used to reach into `source_kb._sidecar_locks.setdefault(...)` to create per-doc async locks. Replaced with a new public accessor `KnowledgeBaseManager.get_or_create_sidecar_lock(doc_id)` that performs the insert under `_lock` (so two threads seeding the dict for the same new doc cannot produce distinct `asyncio.Lock` instances). The lazy `kb_read` fallback and `backfill_sidecars` now both go through this accessor. Also documented the contract in the class docstring: sync mutation API is thread-safe; async sidecar API is single-event-loop. Sharing a manager across multiple event loops is not supported — per-doc `asyncio.Lock` instances are bound to the loop they were created on. Added TestGetOrCreateSidecarLock with 4 tests covering accessor identity, usability, and that the lazy kb_read path actually routes through the accessor. --- src/agentic_cli/knowledge_base/manager.py | 79 +++++++++++++++++------ src/agentic_cli/tools/knowledge_tools.py | 3 +- tests/test_kb_sidecar.py | 71 ++++++++++++++++++++ 3 files changed, 133 insertions(+), 20 deletions(-) diff --git a/src/agentic_cli/knowledge_base/manager.py b/src/agentic_cli/knowledge_base/manager.py index 25ab021..5964871 100644 --- a/src/agentic_cli/knowledge_base/manager.py +++ b/src/agentic_cli/knowledge_base/manager.py @@ -96,6 +96,26 @@ class KnowledgeBaseManager: """Main interface for knowledge base operations. Provides document ingestion, semantic search, and document management. + + Concurrency model + ----------------- + Two lock surfaces with different semantics: + + * ``_lock`` (``threading.Lock``) — guards every mutation to the + in-memory document/chunk/vector/BM25 state and the check-and-set + of ``_backfill_running``. The sync mutation API + (``ingest_document``, ``search``, ``delete_document``, ``clear``) + is safe to call from any thread. + * ``_sidecar_locks`` (``dict[str, asyncio.Lock]``) — per-doc locks + that serialize concurrent LLM-backed sidecar generation for the + same document. Always obtain these through + :meth:`get_or_create_sidecar_lock`; never index the dict directly. + + The async sidecar path (``backfill_sidecars`` and the lazy + ``kb_read`` fallback in ``tools/knowledge_tools``) is safe within a + single asyncio event loop. Sharing a manager across multiple event + loops is NOT supported — the per-doc ``asyncio.Lock`` is bound to + the loop it was created on. """ def __init__( @@ -302,6 +322,23 @@ def _delete_sidecar(self, doc_id: str) -> None: if path.exists(): path.unlink() + def get_or_create_sidecar_lock(self, doc_id: str) -> asyncio.Lock: + """Return the per-doc async lock used to serialize sidecar writes. + + Same lock is returned for the same ``doc_id`` across repeat calls, + so ``backfill_sidecars`` and the lazy ``kb_read`` fallback cannot + double-generate a sidecar for the same document. + + The dict insert is guarded by ``_lock`` so two threads racing to + create the lock for a new doc will not produce two distinct + ``asyncio.Lock`` instances. + """ + existing = self._sidecar_locks.get(doc_id) + if existing is not None: + return existing + with self._lock: + return self._sidecar_locks.setdefault(doc_id, asyncio.Lock()) + def _index_md_path(self) -> Path: return self.kb_dir / "index.md" @@ -1028,35 +1065,40 @@ async def backfill_sidecars( Returns: Number of sidecars written this call. """ - if self._backfill_running: - raise BackfillAlreadyRunning( - "backfill_sidecars is already running on this KB" - ) - self._backfill_running = True - try: - written = 0 - # Snapshot the docs that need backfilling up front so the - # progress denominator is stable even if ingest/delete runs - # concurrently. + # Check-and-set the running flag and snapshot the work list under + # one lock acquisition so a second thread cannot slip past the + # guard between the read and the write. + with self._lock: + if self._backfill_running: + raise BackfillAlreadyRunning( + "backfill_sidecars is already running on this KB" + ) + self._backfill_running = True todo = [ doc - for doc in list(self._documents.values()) + for doc in self._documents.values() if not self._sidecar_path(doc.id).exists() ] + try: + written = 0 total = len(todo) for i, doc in enumerate(todo): # Guard against delete-during-backfill — doc may have - # been removed since the snapshot was taken. - if doc.id not in self._documents: - continue - lock = self._sidecar_locks.setdefault(doc.id, asyncio.Lock()) + # been removed since the snapshot was taken. Checked + # under the lock so a concurrent delete_document cannot + # interleave between the check and use. + with self._lock: + if doc.id not in self._documents: + continue + lock = self.get_or_create_sidecar_lock(doc.id) async with lock: # Double-check inside the lock — another task may # have written it (e.g. lazy kb_read fallback). if self._sidecar_path(doc.id).exists(): continue - if doc.id not in self._documents: - continue + with self._lock: + if doc.id not in self._documents: + continue if progress_cb is not None: progress_cb(i, total, doc) payload = await self.generate_sidecar_payload( @@ -1066,7 +1108,8 @@ async def backfill_sidecars( written += 1 return written finally: - self._backfill_running = False + with self._lock: + self._backfill_running = False class BackfillAlreadyRunning(RuntimeError): diff --git a/src/agentic_cli/tools/knowledge_tools.py b/src/agentic_cli/tools/knowledge_tools.py index c58f428..c6319a1 100644 --- a/src/agentic_cli/tools/knowledge_tools.py +++ b/src/agentic_cli/tools/knowledge_tools.py @@ -413,8 +413,7 @@ async def _read_document_from_kbs( # Sidecar mode (default). Lazily generate if missing. sidecar_path = source_kb._sidecar_path(doc.id) if not sidecar_path.exists(): - import asyncio as _asyncio - lock = source_kb._sidecar_locks.setdefault(doc.id, _asyncio.Lock()) + lock = source_kb.get_or_create_sidecar_lock(doc.id) async with lock: if not sidecar_path.exists(): # Resolve content the same way full=True does — extract from diff --git a/tests/test_kb_sidecar.py b/tests/test_kb_sidecar.py index 82b863d..174a51e 100644 --- a/tests/test_kb_sidecar.py +++ b/tests/test_kb_sidecar.py @@ -602,3 +602,74 @@ async def test_delete_removes_lock_entry(self, kb): kb.delete_document(d.id) assert d.id not in kb._sidecar_locks + + +class TestGetOrCreateSidecarLock: + """The public accessor replaces direct access to _sidecar_locks and is + the single path through which per-doc async locks are created, so + concurrent callers (backfill + lazy kb_read) always see the same lock. + """ + + @pytest.fixture + def kb(self, tmp_path): + return _make_kb(tmp_path) + + def test_returns_same_lock_on_repeat_calls(self, kb): + import asyncio + + lock1 = kb.get_or_create_sidecar_lock("doc-a") + lock2 = kb.get_or_create_sidecar_lock("doc-a") + assert lock1 is lock2 + assert isinstance(lock1, asyncio.Lock) + + def test_returns_distinct_locks_for_different_docs(self, kb): + lock_a = kb.get_or_create_sidecar_lock("doc-a") + lock_b = kb.get_or_create_sidecar_lock("doc-b") + assert lock_a is not lock_b + + async def test_lock_is_usable_for_serialization(self, kb): + """The returned lock actually serializes — two tasks that both + `async with` it run sequentially, not interleaved. + """ + import asyncio + + lock = kb.get_or_create_sidecar_lock("doc-a") + order: list[str] = [] + + async def worker(name: str, hold_ms: int): + async with lock: + order.append(f"{name}:start") + await asyncio.sleep(hold_ms / 1000) + order.append(f"{name}:end") + + await asyncio.gather(worker("A", 20), worker("B", 5)) + # A must fully finish before B starts (or vice versa) + assert order in ( + ["A:start", "A:end", "B:start", "B:end"], + ["B:start", "B:end", "A:start", "A:end"], + ) + + async def test_kb_read_lazy_path_uses_accessor(self, kb): + """The lazy sidecar generation path in knowledge_tools must go + through the public accessor, not reach into `_sidecar_locks` + directly. + """ + from agentic_cli.tools.knowledge_tools import _read_document_from_kbs + from unittest.mock import patch + + doc = kb.ingest_document( + content="body", title="X", source_type=SourceType.USER, + ) + kb._sidecar_path(doc.id).unlink() + + original = kb.get_or_create_sidecar_lock + seen: list[str] = [] + + def spy(doc_id: str): + seen.append(doc_id) + return original(doc_id) + + with patch.object(kb, "get_or_create_sidecar_lock", side_effect=spy): + await _read_document_from_kbs(kb, None, doc.id) + + assert doc.id in seen From 132ccaad64d850aa55dee28562e74196b0c88cf9 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 09:52:42 -0400 Subject: [PATCH 06/41] feat(permissions): scaffold permissions package --- .../workflow/permissions/__init__.py | 37 +++++++++++++++++++ tests/permissions/__init__.py | 0 2 files changed, 37 insertions(+) create mode 100644 src/agentic_cli/workflow/permissions/__init__.py create mode 100644 tests/permissions/__init__.py diff --git a/src/agentic_cli/workflow/permissions/__init__.py b/src/agentic_cli/workflow/permissions/__init__.py new file mode 100644 index 0000000..d148e1a --- /dev/null +++ b/src/agentic_cli/workflow/permissions/__init__.py @@ -0,0 +1,37 @@ +# src/agentic_cli/workflow/permissions/__init__.py +"""Centralised permission system. + +See docs/superpowers/specs/2026-04-18-permissions-system-design.md for the +full design. Public API re-exported here; framework-bindings live under +``workflow/adk/permission_plugin.py`` and ``workflow/langgraph/permission_wrap.py``. +""" + +from agentic_cli.workflow.permissions.capabilities import ( + Capability, + CapabilitiesSpec, + EXEMPT, + ResolvedCapability, +) +from agentic_cli.workflow.permissions.engine import PermissionEngine +from agentic_cli.workflow.permissions.rules import ( + AskScope, + CheckResult, + Effect, + Rule, + RuleSource, +) +from agentic_cli.workflow.permissions.store import PermissionContext + +__all__ = [ + "AskScope", + "Capability", + "CapabilitiesSpec", + "CheckResult", + "EXEMPT", + "Effect", + "PermissionContext", + "PermissionEngine", + "ResolvedCapability", + "Rule", + "RuleSource", +] diff --git a/tests/permissions/__init__.py b/tests/permissions/__init__.py new file mode 100644 index 0000000..e69de29 From 6ef9dacef271343e3ae95ead1f5c71f39d7791eb Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 09:55:09 -0400 Subject: [PATCH 07/41] feat(permissions): Capability, ResolvedCapability, EXEMPT sentinel Guard sibling imports in __init__.py with try/except so each task's tests can run in isolation before all modules are implemented. --- .../workflow/permissions/__init__.py | 30 ++++++++---- .../workflow/permissions/capabilities.py | 44 +++++++++++++++++ tests/permissions/test_capabilities.py | 47 +++++++++++++++++++ 3 files changed, 112 insertions(+), 9 deletions(-) create mode 100644 src/agentic_cli/workflow/permissions/capabilities.py create mode 100644 tests/permissions/test_capabilities.py diff --git a/src/agentic_cli/workflow/permissions/__init__.py b/src/agentic_cli/workflow/permissions/__init__.py index d148e1a..310209d 100644 --- a/src/agentic_cli/workflow/permissions/__init__.py +++ b/src/agentic_cli/workflow/permissions/__init__.py @@ -12,15 +12,27 @@ EXEMPT, ResolvedCapability, ) -from agentic_cli.workflow.permissions.engine import PermissionEngine -from agentic_cli.workflow.permissions.rules import ( - AskScope, - CheckResult, - Effect, - Rule, - RuleSource, -) -from agentic_cli.workflow.permissions.store import PermissionContext + +try: + from agentic_cli.workflow.permissions.engine import PermissionEngine +except ImportError: # not yet implemented + PermissionEngine = None # type: ignore[assignment,misc] + +try: + from agentic_cli.workflow.permissions.rules import ( + AskScope, + CheckResult, + Effect, + Rule, + RuleSource, + ) +except ImportError: # not yet implemented + AskScope = CheckResult = Effect = Rule = RuleSource = None # type: ignore[assignment,misc] + +try: + from agentic_cli.workflow.permissions.store import PermissionContext +except ImportError: # not yet implemented + PermissionContext = None # type: ignore[assignment,misc] __all__ = [ "AskScope", diff --git a/src/agentic_cli/workflow/permissions/capabilities.py b/src/agentic_cli/workflow/permissions/capabilities.py new file mode 100644 index 0000000..0cb1de8 --- /dev/null +++ b/src/agentic_cli/workflow/permissions/capabilities.py @@ -0,0 +1,44 @@ +"""Capability declarations for tools. + +A ``Capability`` describes one side effect a tool may perform, as a +``(namespace.action, target_source)`` pair. ``ResolvedCapability`` is the +concrete form emitted at call time, with the target extracted from the +tool's arguments and canonicalized by the namespace's matcher. + +``EXEMPT`` is a sentinel used in ``@register_tool(capabilities=EXEMPT)`` +to mark tools that explicitly require no permission check. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Final + + +@dataclass(frozen=True) +class Capability: + """A capability a tool needs. Resolved against args at call time.""" + + name: str # e.g. "filesystem.read" + target_arg: str | None = None # arg name holding the target; None → target "*" + + +@dataclass(frozen=True) +class ResolvedCapability: + """Capability + concrete, canonicalized target.""" + + name: str + target: str + + +@dataclass(frozen=True) +class _CapabilityExempt: + """Sentinel type; see ``EXEMPT`` for the singleton value.""" + + def __bool__(self) -> bool: # truthy so `not caps` ≠ "missing" + return True + + +EXEMPT: Final[_CapabilityExempt] = _CapabilityExempt() + +CapabilitiesSpec = list[Capability] | _CapabilityExempt diff --git a/tests/permissions/test_capabilities.py b/tests/permissions/test_capabilities.py new file mode 100644 index 0000000..8a5eb36 --- /dev/null +++ b/tests/permissions/test_capabilities.py @@ -0,0 +1,47 @@ +"""Tests for capability declarations.""" + +import pytest + +from agentic_cli.workflow.permissions.capabilities import ( + EXEMPT, + Capability, + ResolvedCapability, + _CapabilityExempt, +) + + +class TestCapability: + def test_targeted_capability(self): + cap = Capability("filesystem.read", target_arg="path") + assert cap.name == "filesystem.read" + assert cap.target_arg == "path" + + def test_targetless_capability(self): + cap = Capability("python.exec") + assert cap.name == "python.exec" + assert cap.target_arg is None + + def test_capability_is_hashable(self): + a = Capability("filesystem.read", target_arg="path") + b = Capability("filesystem.read", target_arg="path") + assert a == b + assert hash(a) == hash(b) + + +class TestResolvedCapability: + def test_carries_name_and_target(self): + r = ResolvedCapability(name="filesystem.read", target="/abs/path") + assert r.name == "filesystem.read" + assert r.target == "/abs/path" + + +class TestExempt: + def test_exempt_is_singleton_of_sentinel_type(self): + assert isinstance(EXEMPT, _CapabilityExempt) + # Same instance referenced everywhere + from agentic_cli.workflow.permissions.capabilities import EXEMPT as also_exempt + assert EXEMPT is also_exempt + + def test_exempt_is_truthy(self): + # So `not caps` in adapters doesn't misclassify EXEMPT as "missing". + assert bool(EXEMPT) From ae2b5e9ef68176f018bc70674907dc6c53a6de92 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 09:57:04 -0400 Subject: [PATCH 08/41] feat(permissions): Rule, Effect, RuleSource, AskScope, CheckResult --- src/agentic_cli/workflow/permissions/rules.py | 39 +++++++++++++++ tests/permissions/test_rules.py | 47 +++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 src/agentic_cli/workflow/permissions/rules.py create mode 100644 tests/permissions/test_rules.py diff --git a/src/agentic_cli/workflow/permissions/rules.py b/src/agentic_cli/workflow/permissions/rules.py new file mode 100644 index 0000000..1636b57 --- /dev/null +++ b/src/agentic_cli/workflow/permissions/rules.py @@ -0,0 +1,39 @@ +"""Rule, decision, and scope types for the permission engine.""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum + + +class Effect(str, Enum): + ALLOW = "allow" + DENY = "deny" + + +class RuleSource(str, Enum): + BUILTIN = "builtin" + USER = "user" + PROJECT = "project" + SESSION = "session" + + +class AskScope(str, Enum): + ONCE = "once" + SESSION = "session" + PROJECT = "project" + DENY = "deny" + + +@dataclass(frozen=True) +class Rule: + capability: str + target: str + effect: Effect + source: RuleSource + + +@dataclass +class CheckResult: + allowed: bool + reason: str diff --git a/tests/permissions/test_rules.py b/tests/permissions/test_rules.py new file mode 100644 index 0000000..929ce57 --- /dev/null +++ b/tests/permissions/test_rules.py @@ -0,0 +1,47 @@ +"""Tests for rule/decision data types.""" + +from agentic_cli.workflow.permissions.rules import ( + AskScope, + CheckResult, + Effect, + Rule, + RuleSource, +) + + +class TestEnums: + def test_effect_values(self): + assert Effect.ALLOW.value == "allow" + assert Effect.DENY.value == "deny" + + def test_rule_source_values(self): + assert {s.value for s in RuleSource} == {"builtin", "user", "project", "session"} + + def test_ask_scope_values(self): + assert {s.value for s in AskScope} == {"once", "session", "project", "deny"} + + +class TestRule: + def test_fields(self): + r = Rule("filesystem.read", "/abs/**", Effect.ALLOW, RuleSource.BUILTIN) + assert r.capability == "filesystem.read" + assert r.target == "/abs/**" + assert r.effect is Effect.ALLOW + assert r.source is RuleSource.BUILTIN + + def test_rule_is_hashable(self): + a = Rule("filesystem.read", "/x", Effect.ALLOW, RuleSource.BUILTIN) + b = Rule("filesystem.read", "/x", Effect.ALLOW, RuleSource.BUILTIN) + assert a == b + assert hash(a) == hash(b) + + +class TestCheckResult: + def test_allowed(self): + res = CheckResult(True, "rule: builtin/allow filesystem.read") + assert res.allowed is True + assert "allow" in res.reason + + def test_denied(self): + res = CheckResult(False, "user denied") + assert res.allowed is False From 163b3eb1cb022aadb710e10531ec807d65e506d6 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 09:58:48 -0400 Subject: [PATCH 09/41] feat(permissions): PermissionContext + variable substitution --- src/agentic_cli/workflow/permissions/store.py | 31 ++++++++++++++++++ tests/permissions/test_store.py | 32 +++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 src/agentic_cli/workflow/permissions/store.py create mode 100644 tests/permissions/test_store.py diff --git a/src/agentic_cli/workflow/permissions/store.py b/src/agentic_cli/workflow/permissions/store.py new file mode 100644 index 0000000..1c37f00 --- /dev/null +++ b/src/agentic_cli/workflow/permissions/store.py @@ -0,0 +1,31 @@ +# src/agentic_cli/workflow/permissions/store.py +"""Context, JSON persistence, and builtin rules for the permission engine. + +Only PermissionContext is implemented at this point — load/save helpers +and BUILTIN_RULES land in Tasks 10–12. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + + +@dataclass(frozen=True) +class PermissionContext: + """Static per-run context exposed to matchers and substitution. + + Attributes: + workdir: Absolute current working directory. + home: Absolute home directory. + """ + + workdir: Path + home: Path + + def substitute(self, s: str) -> str: + """Expand ${workdir} and ${home} in a pattern string.""" + return ( + s.replace("${workdir}", str(self.workdir)) + .replace("${home}", str(self.home)) + ) diff --git a/tests/permissions/test_store.py b/tests/permissions/test_store.py new file mode 100644 index 0000000..b6e2a65 --- /dev/null +++ b/tests/permissions/test_store.py @@ -0,0 +1,32 @@ +"""Tests for PermissionContext + JSON store helpers.""" + +from pathlib import Path + +import pytest + +from agentic_cli.workflow.permissions.store import PermissionContext + + +class TestPermissionContext: + def test_fields(self, tmp_path: Path): + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + assert ctx.workdir == tmp_path + assert ctx.home == Path("/fake/home") + + def test_substitute_workdir(self, tmp_path: Path): + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + out = ctx.substitute("${workdir}/src/**") + assert out == f"{tmp_path}/src/**" + + def test_substitute_home(self, tmp_path: Path): + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + assert ctx.substitute("${home}/.cache") == "/fake/home/.cache" + + def test_substitute_multiple(self, tmp_path: Path): + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + out = ctx.substitute("${workdir}:${home}") + assert out == f"{tmp_path}:/fake/home" + + def test_substitute_unknown_variable_passes_through(self, tmp_path: Path): + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + assert ctx.substitute("${unknown}/x") == "${unknown}/x" From 569b7518161cfa9472fb9273aa754e0f2f8d35d3 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:00:18 -0400 Subject: [PATCH 10/41] feat(permissions): Matcher protocol + StringGlobMatcher --- .../workflow/permissions/matchers.py | 33 ++++++++++++++++ tests/permissions/test_matchers.py | 38 +++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 src/agentic_cli/workflow/permissions/matchers.py create mode 100644 tests/permissions/test_matchers.py diff --git a/src/agentic_cli/workflow/permissions/matchers.py b/src/agentic_cli/workflow/permissions/matchers.py new file mode 100644 index 0000000..5a10917 --- /dev/null +++ b/src/agentic_cli/workflow/permissions/matchers.py @@ -0,0 +1,33 @@ +# src/agentic_cli/workflow/permissions/matchers.py +"""Matchers for capability targets. + +Each capability namespace (``filesystem.*``, ``http.*``, ``shell.*``) has a +Matcher that knows how to canonicalize targets + patterns and compare them. +Unknown namespaces fall back to ``StringGlobMatcher``. +""" + +from __future__ import annotations + +import fnmatch +from typing import Protocol, runtime_checkable + +from agentic_cli.workflow.permissions.store import PermissionContext + + +@runtime_checkable +class Matcher(Protocol): + """Canonicalise and match targets for a capability namespace.""" + + def canonicalize(self, s: str, ctx: PermissionContext) -> str: ... + + def matches(self, pattern: str, target: str) -> bool: ... + + +class StringGlobMatcher: + """Default fallback: ``${...}`` substitution + ``fnmatch``.""" + + def canonicalize(self, s: str, ctx: PermissionContext) -> str: + return ctx.substitute(s).strip() + + def matches(self, pattern: str, target: str) -> bool: + return fnmatch.fnmatchcase(target, pattern) diff --git a/tests/permissions/test_matchers.py b/tests/permissions/test_matchers.py new file mode 100644 index 0000000..bd9d356 --- /dev/null +++ b/tests/permissions/test_matchers.py @@ -0,0 +1,38 @@ +# tests/permissions/test_matchers.py +"""Tests for permission matchers + cap-name glob.""" + +from pathlib import Path + +import pytest + +from agentic_cli.workflow.permissions.matchers import ( + Matcher, + StringGlobMatcher, +) +from agentic_cli.workflow.permissions.store import PermissionContext + + +@pytest.fixture +def ctx(tmp_path: Path) -> PermissionContext: + return PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + + +class TestStringGlobMatcher: + def test_canonicalize_strips_and_substitutes(self, ctx): + m = StringGlobMatcher() + assert m.canonicalize(" ${workdir}/foo ", ctx) == f"{ctx.workdir}/foo" + + def test_matches_exact(self, ctx): + m = StringGlobMatcher() + assert m.matches("python.exec", "python.exec") + + def test_matches_glob_star(self, ctx): + m = StringGlobMatcher() + assert m.matches("*", "anything") + assert m.matches("foo*", "foobar") + assert not m.matches("foo*", "barbaz") + + +class TestMatcherProtocol: + def test_string_glob_matcher_satisfies_protocol(self): + assert isinstance(StringGlobMatcher(), Matcher) From 78114e5d2639bb4cbc0992e7b9fa915f7d9bbc06 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:02:55 -0400 Subject: [PATCH 11/41] feat(permissions): PathMatcher with ** glob support Add PathMatcher class and _glob_to_regex helper to matchers.py. --- .../workflow/permissions/matchers.py | 72 +++++++++++++++++++ tests/permissions/test_matchers.py | 52 ++++++++++++++ 2 files changed, 124 insertions(+) diff --git a/src/agentic_cli/workflow/permissions/matchers.py b/src/agentic_cli/workflow/permissions/matchers.py index 5a10917..5cdc069 100644 --- a/src/agentic_cli/workflow/permissions/matchers.py +++ b/src/agentic_cli/workflow/permissions/matchers.py @@ -9,6 +9,9 @@ from __future__ import annotations import fnmatch +import re +from functools import lru_cache +from pathlib import Path from typing import Protocol, runtime_checkable from agentic_cli.workflow.permissions.store import PermissionContext @@ -31,3 +34,72 @@ def canonicalize(self, s: str, ctx: PermissionContext) -> str: def matches(self, pattern: str, target: str) -> bool: return fnmatch.fnmatchcase(target, pattern) + + +@lru_cache(maxsize=512) +def _glob_to_regex(pattern: str) -> re.Pattern[str]: + """Translate a ``**``-aware glob into a compiled regex. + + * ``*`` matches one path segment (no ``/``). + * ``**`` matches zero or more complete path segments (including their + separating ``/`` characters). + * ``?`` matches one non-``/`` character within a segment. + """ + segments = pattern.split("/") + # Build per-segment regex pieces. Each piece is the regex for that + # segment *without* surrounding slashes; we join with "/" afterwards, + # but "**" segments need special junction treatment. + pieces: list[str] = [] + for seg in segments: + if seg == "**": + pieces.append("**") # placeholder; resolved during join below + continue + seg_re = "" + for ch in seg: + if ch == "*": + seg_re += "[^/]*" + elif ch == "?": + seg_re += "[^/]" + else: + seg_re += re.escape(ch) + pieces.append(seg_re) + + # Join pieces, collapsing "**" placeholders. + # "**" as a segment means "zero or more /" sequences. + # We absorb the "/" on the right side of "**" into its regex so that + # a/z still matches a/**/z (zero repetitions absorbs the "/" between + # "a" and "z" is just the normal "/" join when ** is absent). + # + # Strategy: iterate and build the body string, handling ** specially. + body = "" + for i, piece in enumerate(pieces): + if piece == "**": + # Replace the preceding "/" (already appended) with nothing, + # then emit the zero-or-more pattern that *includes* a trailing "/". + # We do this by emitting `(?:[^/]+/)*` and NOT prepending a "/". + # But we already appended "/" before this piece if i > 0. + # So strip the trailing "/" from body and emit the junction regex. + if i > 0 and body.endswith("/"): + body = body[:-1] + # Zero or more / sequences + body += "(?:/[^/]+)*" + else: + if i > 0: + body += "/" + body += piece + + return re.compile(f"^{body}$") + + +class PathMatcher: + """Matcher for ``filesystem.*`` capabilities.""" + + def canonicalize(self, s: str, ctx: PermissionContext) -> str: + s = ctx.substitute(s) + p = Path(s).expanduser() + if not p.is_absolute(): + p = ctx.workdir / p + return str(Path(p).resolve(strict=False)) + + def matches(self, pattern: str, target: str) -> bool: + return bool(_glob_to_regex(pattern).match(target)) diff --git a/tests/permissions/test_matchers.py b/tests/permissions/test_matchers.py index bd9d356..3c7cd4c 100644 --- a/tests/permissions/test_matchers.py +++ b/tests/permissions/test_matchers.py @@ -36,3 +36,55 @@ def test_matches_glob_star(self, ctx): class TestMatcherProtocol: def test_string_glob_matcher_satisfies_protocol(self): assert isinstance(StringGlobMatcher(), Matcher) + + +class TestPathMatcher: + def test_canonicalize_substitutes_workdir(self, ctx): + from agentic_cli.workflow.permissions.matchers import PathMatcher + m = PathMatcher() + assert m.canonicalize("${workdir}/src/**", ctx) == f"{ctx.workdir}/src/**" + + def test_canonicalize_expands_home(self, ctx): + from agentic_cli.workflow.permissions.matchers import PathMatcher + m = PathMatcher() + # ctx.home is a fake absolute path; expanduser() won't change it, + # but an input like "~/x" should be expanded via the OS user's home + # only when there isn't a ${home} substitution — we use ${home}. + assert m.canonicalize("${home}/.cache", ctx) == "/fake/home/.cache" + + def test_canonicalize_anchors_relative_to_workdir(self, ctx): + from agentic_cli.workflow.permissions.matchers import PathMatcher + m = PathMatcher() + assert m.canonicalize("foo/bar", ctx) == f"{ctx.workdir}/foo/bar" + + def test_canonicalize_resolves_dotdot(self, ctx): + from agentic_cli.workflow.permissions.matchers import PathMatcher + m = PathMatcher() + # ${workdir} is tmp_path; ../x resolves out of it. + out = m.canonicalize("${workdir}/a/../b", ctx) + assert out == f"{ctx.workdir}/b" + + def test_match_single_star_is_single_segment(self, ctx): + from agentic_cli.workflow.permissions.matchers import PathMatcher + m = PathMatcher() + assert m.matches("/a/*/c", "/a/b/c") + assert not m.matches("/a/*/c", "/a/b/x/c") + + def test_match_double_star_crosses_segments(self, ctx): + from agentic_cli.workflow.permissions.matchers import PathMatcher + m = PathMatcher() + assert m.matches("/a/**/z", "/a/z") + assert m.matches("/a/**/z", "/a/b/z") + assert m.matches("/a/**/z", "/a/b/c/z") + assert not m.matches("/a/**/z", "/x/b/z") + + def test_match_question_mark_single_char(self, ctx): + from agentic_cli.workflow.permissions.matchers import PathMatcher + m = PathMatcher() + assert m.matches("/a/?/c", "/a/b/c") + assert not m.matches("/a/?/c", "/a/bb/c") + + def test_match_case_sensitive(self, ctx): + from agentic_cli.workflow.permissions.matchers import PathMatcher + m = PathMatcher() + assert not m.matches("/A/b", "/a/b") From ffccb771e5ac705e5ea71ea9f778552f4eb04b47 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:05:14 -0400 Subject: [PATCH 12/41] feat(permissions): URLMatcher for http.* capabilities --- .../workflow/permissions/matchers.py | 39 +++++++++++++++++++ tests/permissions/test_matchers.py | 35 +++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/src/agentic_cli/workflow/permissions/matchers.py b/src/agentic_cli/workflow/permissions/matchers.py index 5cdc069..a98842a 100644 --- a/src/agentic_cli/workflow/permissions/matchers.py +++ b/src/agentic_cli/workflow/permissions/matchers.py @@ -13,6 +13,7 @@ from functools import lru_cache from pathlib import Path from typing import Protocol, runtime_checkable +from urllib.parse import urlsplit, urlunsplit from agentic_cli.workflow.permissions.store import PermissionContext @@ -103,3 +104,41 @@ def canonicalize(self, s: str, ctx: PermissionContext) -> str: def matches(self, pattern: str, target: str) -> bool: return bool(_glob_to_regex(pattern).match(target)) + + +class URLMatcher: + """Matcher for ``http.*`` capabilities.""" + + _DEFAULT_PORTS = {"http": 80, "https": 443} + + def canonicalize(self, s: str, ctx: PermissionContext) -> str: + s = ctx.substitute(s) + parts = urlsplit(s if "://" in s else f"https://{s}") + scheme = parts.scheme.lower() or "https" + host = (parts.hostname or "").lower() + port = parts.port + if port is not None and port == self._DEFAULT_PORTS.get(scheme): + netloc = host + elif port is not None: + netloc = f"{host}:{port}" + else: + netloc = host + path = parts.path + query = parts.query + # Drop fragment; reassemble + return urlunsplit((scheme, netloc, path, query, "")) + + def matches(self, pattern: str, target: str) -> bool: + pp = urlsplit(pattern if "://" in pattern else f"https://{pattern}") + tt = urlsplit(target) + if pp.scheme.lower() != tt.scheme.lower(): + return False + if (pp.hostname or "").lower() != (tt.hostname or "").lower(): + return False + if pp.port and pp.port != tt.port: + return False + if not _glob_to_regex(pp.path or "/").match(tt.path or "/"): + return False + if pp.query and not fnmatch.fnmatchcase(tt.query, pp.query): + return False + return True diff --git a/tests/permissions/test_matchers.py b/tests/permissions/test_matchers.py index 3c7cd4c..b1a31df 100644 --- a/tests/permissions/test_matchers.py +++ b/tests/permissions/test_matchers.py @@ -88,3 +88,38 @@ def test_match_case_sensitive(self, ctx): from agentic_cli.workflow.permissions.matchers import PathMatcher m = PathMatcher() assert not m.matches("/A/b", "/a/b") + + +class TestURLMatcher: + def test_canonicalize_lowercases_scheme_and_host(self, ctx): + from agentic_cli.workflow.permissions.matchers import URLMatcher + m = URLMatcher() + assert m.canonicalize("HTTPS://API.GitHub.com/repos", ctx) == "https://api.github.com/repos" + + def test_canonicalize_strips_default_port(self, ctx): + from agentic_cli.workflow.permissions.matchers import URLMatcher + m = URLMatcher() + assert m.canonicalize("https://example.com:443/x", ctx) == "https://example.com/x" + assert m.canonicalize("http://example.com:80/x", ctx) == "http://example.com/x" + + def test_canonicalize_defaults_to_https(self, ctx): + from agentic_cli.workflow.permissions.matchers import URLMatcher + m = URLMatcher() + assert m.canonicalize("api.github.com/x", ctx) == "https://api.github.com/x" + + def test_canonicalize_drops_fragment(self, ctx): + from agentic_cli.workflow.permissions.matchers import URLMatcher + m = URLMatcher() + assert m.canonicalize("https://example.com/x#frag", ctx) == "https://example.com/x" + + def test_matches_host_exact(self, ctx): + from agentic_cli.workflow.permissions.matchers import URLMatcher + m = URLMatcher() + assert m.matches("https://api.github.com/**", "https://api.github.com/repos/foo") + assert not m.matches("https://api.github.com/**", "https://pypi.org/x") + + def test_matches_path_glob(self, ctx): + from agentic_cli.workflow.permissions.matchers import URLMatcher + m = URLMatcher() + assert m.matches("https://api.github.com/repos/**", "https://api.github.com/repos/owner/repo") + assert not m.matches("https://api.github.com/repos/**", "https://api.github.com/gists/1") From 8d2ad3d03676ab8680361f72e971c6fb6a7cd952 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:06:40 -0400 Subject: [PATCH 13/41] feat(permissions): ShellMatcher for shell.* capabilities --- .../workflow/permissions/matchers.py | 10 ++++++++++ tests/permissions/test_matchers.py | 20 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/src/agentic_cli/workflow/permissions/matchers.py b/src/agentic_cli/workflow/permissions/matchers.py index a98842a..d81e4ab 100644 --- a/src/agentic_cli/workflow/permissions/matchers.py +++ b/src/agentic_cli/workflow/permissions/matchers.py @@ -142,3 +142,13 @@ def matches(self, pattern: str, target: str) -> bool: if pp.query and not fnmatch.fnmatchcase(tt.query, pp.query): return False return True + + +class ShellMatcher: + """Matcher for ``shell.*`` capabilities.""" + + def canonicalize(self, s: str, ctx: PermissionContext) -> str: + return ctx.substitute(s).strip() + + def matches(self, pattern: str, target: str) -> bool: + return fnmatch.fnmatchcase(target, pattern) diff --git a/tests/permissions/test_matchers.py b/tests/permissions/test_matchers.py index b1a31df..f8dd823 100644 --- a/tests/permissions/test_matchers.py +++ b/tests/permissions/test_matchers.py @@ -123,3 +123,23 @@ def test_matches_path_glob(self, ctx): m = URLMatcher() assert m.matches("https://api.github.com/repos/**", "https://api.github.com/repos/owner/repo") assert not m.matches("https://api.github.com/repos/**", "https://api.github.com/gists/1") + + +class TestShellMatcher: + def test_canonicalize_trims_and_substitutes(self, ctx): + from agentic_cli.workflow.permissions.matchers import ShellMatcher + m = ShellMatcher() + assert m.canonicalize(" ls ${workdir} ", ctx) == f"ls {ctx.workdir}" + + def test_matches_whole_command_glob(self, ctx): + from agentic_cli.workflow.permissions.matchers import ShellMatcher + m = ShellMatcher() + assert m.matches("git *", "git status") + assert m.matches("git commit*", "git commit -m 'x'") + assert not m.matches("git *", "svn up") + + def test_matches_does_not_tokenize(self, ctx): + """Shell matcher is intentionally naive; risk assessor handles semantics.""" + from agentic_cli.workflow.permissions.matchers import ShellMatcher + m = ShellMatcher() + assert m.matches("git *", "git rm -rf /") # permission matcher allows From 67ae7618fd1d6193b39617ccd8a617d959612f5f Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:08:02 -0400 Subject: [PATCH 14/41] feat(permissions): matcher registry + capability-name glob --- .../workflow/permissions/matchers.py | 23 ++++++++++++++ tests/permissions/test_matchers.py | 30 +++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/src/agentic_cli/workflow/permissions/matchers.py b/src/agentic_cli/workflow/permissions/matchers.py index d81e4ab..97548cf 100644 --- a/src/agentic_cli/workflow/permissions/matchers.py +++ b/src/agentic_cli/workflow/permissions/matchers.py @@ -152,3 +152,26 @@ def canonicalize(self, s: str, ctx: PermissionContext) -> str: def matches(self, pattern: str, target: str) -> bool: return fnmatch.fnmatchcase(target, pattern) + + +_MATCHERS: dict[str, Matcher] = { + "filesystem": PathMatcher(), + "http": URLMatcher(), + "shell": ShellMatcher(), +} +_FALLBACK: Matcher = StringGlobMatcher() + + +def get_matcher(capability: str) -> Matcher: + """Return the matcher for a capability name, or the string-glob fallback.""" + ns = capability.split(".", 1)[0] + return _MATCHERS.get(ns, _FALLBACK) + + +def _cap_matches(rule_cap: str, target_cap: str) -> bool: + """Capability-name glob: exact / ``ns.*`` / ``*``.""" + if rule_cap == "*" or rule_cap == target_cap: + return True + if rule_cap.endswith(".*"): + return target_cap.startswith(rule_cap[:-1]) + return False diff --git a/tests/permissions/test_matchers.py b/tests/permissions/test_matchers.py index f8dd823..0291023 100644 --- a/tests/permissions/test_matchers.py +++ b/tests/permissions/test_matchers.py @@ -143,3 +143,33 @@ def test_matches_does_not_tokenize(self, ctx): from agentic_cli.workflow.permissions.matchers import ShellMatcher m = ShellMatcher() assert m.matches("git *", "git rm -rf /") # permission matcher allows + + +class TestMatcherRegistry: + def test_get_matcher_dispatches_by_namespace(self): + from agentic_cli.workflow.permissions.matchers import ( + PathMatcher, URLMatcher, ShellMatcher, StringGlobMatcher, get_matcher, + ) + assert isinstance(get_matcher("filesystem.read"), PathMatcher) + assert isinstance(get_matcher("http.read"), URLMatcher) + assert isinstance(get_matcher("shell.exec"), ShellMatcher) + assert isinstance(get_matcher("python.exec"), StringGlobMatcher) # fallback + assert isinstance(get_matcher("kb.read"), StringGlobMatcher) + + +class TestCapMatches: + def test_exact(self): + from agentic_cli.workflow.permissions.matchers import _cap_matches + assert _cap_matches("filesystem.read", "filesystem.read") + assert not _cap_matches("filesystem.read", "filesystem.write") + + def test_namespace_glob(self): + from agentic_cli.workflow.permissions.matchers import _cap_matches + assert _cap_matches("filesystem.*", "filesystem.read") + assert _cap_matches("filesystem.*", "filesystem.write") + assert not _cap_matches("filesystem.*", "http.read") + + def test_star_matches_everything(self): + from agentic_cli.workflow.permissions.matchers import _cap_matches + assert _cap_matches("*", "filesystem.read") + assert _cap_matches("*", "anything") From a4b2a3ab65238a34e43e6e89021098c940f15f50 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:09:41 -0400 Subject: [PATCH 15/41] feat(permissions): BUILTIN_RULES default policy --- src/agentic_cli/workflow/permissions/store.py | 21 ++++++++++++++++ tests/permissions/test_store.py | 25 +++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/src/agentic_cli/workflow/permissions/store.py b/src/agentic_cli/workflow/permissions/store.py index 1c37f00..591b74b 100644 --- a/src/agentic_cli/workflow/permissions/store.py +++ b/src/agentic_cli/workflow/permissions/store.py @@ -10,6 +10,8 @@ from dataclasses import dataclass from pathlib import Path +from agentic_cli.workflow.permissions.rules import Effect, Rule, RuleSource + @dataclass(frozen=True) class PermissionContext: @@ -29,3 +31,22 @@ def substitute(self, s: str) -> str: s.replace("${workdir}", str(self.workdir)) .replace("${home}", str(self.home)) ) + + +BUILTIN_RULES: list[Rule] = [ + # Routine reads inside workdir: allowed without prompt. + Rule("filesystem.read", "${workdir}/**", Effect.ALLOW, RuleSource.BUILTIN), + + # System locations — writes always denied. + Rule("filesystem.write", "/etc/**", Effect.DENY, RuleSource.BUILTIN), + Rule("filesystem.write", "/usr/**", Effect.DENY, RuleSource.BUILTIN), + Rule("filesystem.write", "/bin/**", Effect.DENY, RuleSource.BUILTIN), + Rule("filesystem.write", "/sbin/**", Effect.DENY, RuleSource.BUILTIN), + Rule("filesystem.write", "/boot/**", Effect.DENY, RuleSource.BUILTIN), + Rule("filesystem.write", "/System/**", Effect.DENY, RuleSource.BUILTIN), # macOS + + # Credential directories. + Rule("filesystem.write", "${home}/.ssh/**", Effect.DENY, RuleSource.BUILTIN), + Rule("filesystem.write", "${home}/.aws/**", Effect.DENY, RuleSource.BUILTIN), + Rule("filesystem.write", "${home}/.gnupg/**", Effect.DENY, RuleSource.BUILTIN), +] diff --git a/tests/permissions/test_store.py b/tests/permissions/test_store.py index b6e2a65..1439fb1 100644 --- a/tests/permissions/test_store.py +++ b/tests/permissions/test_store.py @@ -30,3 +30,28 @@ def test_substitute_multiple(self, tmp_path: Path): def test_substitute_unknown_variable_passes_through(self, tmp_path: Path): ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) assert ctx.substitute("${unknown}/x") == "${unknown}/x" + + +class TestBuiltinRules: + def test_has_expected_entries(self): + from agentic_cli.workflow.permissions.rules import Effect, RuleSource + from agentic_cli.workflow.permissions.store import BUILTIN_RULES + + allows = [r for r in BUILTIN_RULES if r.effect is Effect.ALLOW] + denies = [r for r in BUILTIN_RULES if r.effect is Effect.DENY] + + # Reads within workdir allowed: + assert any(r.capability == "filesystem.read" and "${workdir}" in r.target for r in allows) + # System dirs denied: + system_targets = {r.target for r in denies if r.capability == "filesystem.write"} + for path in ("/etc/**", "/usr/**", "/bin/**", "/sbin/**", "/boot/**", "/System/**"): + assert path in system_targets + # Credential dirs denied: + for path in ("${home}/.ssh/**", "${home}/.aws/**", "${home}/.gnupg/**"): + assert path in system_targets + + def test_all_builtin_have_builtin_source(self): + from agentic_cli.workflow.permissions.rules import RuleSource + from agentic_cli.workflow.permissions.store import BUILTIN_RULES + for rule in BUILTIN_RULES: + assert rule.source is RuleSource.BUILTIN From ecf8d9612095bf6cfab394e59c9fd4dba849326f Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:12:13 -0400 Subject: [PATCH 16/41] feat(permissions): load_rules from settings.json permissions section Implements load_rules() in store.py that parses the permissions.allow/deny arrays from a settings.json file, canonicalizes targets via get_matcher(), and returns typed Rule objects. Uses a local import of get_matcher to avoid a circular dependency with matchers.py. Four new tests cover: missing file, missing section, allow+deny parsing with substitution, and malformed JSON. --- src/agentic_cli/workflow/permissions/store.py | 28 ++++++++++ tests/permissions/test_store.py | 55 +++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/src/agentic_cli/workflow/permissions/store.py b/src/agentic_cli/workflow/permissions/store.py index 591b74b..0bd16e2 100644 --- a/src/agentic_cli/workflow/permissions/store.py +++ b/src/agentic_cli/workflow/permissions/store.py @@ -7,6 +7,7 @@ from __future__ import annotations +import json from dataclasses import dataclass from pathlib import Path @@ -50,3 +51,30 @@ def substitute(self, s: str) -> str: Rule("filesystem.write", "${home}/.aws/**", Effect.DENY, RuleSource.BUILTIN), Rule("filesystem.write", "${home}/.gnupg/**", Effect.DENY, RuleSource.BUILTIN), ] + + +def load_rules(path: Path, source: RuleSource, ctx: PermissionContext) -> list[Rule]: + """Load rules from a settings.json file's ``permissions`` section. + + Returns an empty list when the file is absent or has no ``permissions`` + key. Raises ``ValueError`` if the file is not valid JSON. + """ + # Local import to avoid circular dependency: matchers.py imports PermissionContext from here. + from agentic_cli.workflow.permissions.matchers import get_matcher # noqa: PLC0415 + + if not path.exists(): + return [] + try: + data = json.loads(path.read_text()) + except json.JSONDecodeError as exc: + raise ValueError(f"Malformed JSON in {path}: {exc}") from exc + + section = data.get("permissions") or {} + rules: list[Rule] = [] + for effect_name, effect in (("allow", Effect.ALLOW), ("deny", Effect.DENY)): + for entry in section.get(effect_name) or []: + cap = entry["capability"] + target_raw = entry["target"] + target = get_matcher(cap).canonicalize(target_raw, ctx) + rules.append(Rule(cap, target, effect, source)) + return rules diff --git a/tests/permissions/test_store.py b/tests/permissions/test_store.py index 1439fb1..d43a54c 100644 --- a/tests/permissions/test_store.py +++ b/tests/permissions/test_store.py @@ -55,3 +55,58 @@ def test_all_builtin_have_builtin_source(self): from agentic_cli.workflow.permissions.store import BUILTIN_RULES for rule in BUILTIN_RULES: assert rule.source is RuleSource.BUILTIN + + +class TestLoadRules: + def test_missing_file_returns_empty(self, tmp_path: Path): + from agentic_cli.workflow.permissions.rules import RuleSource + from agentic_cli.workflow.permissions.store import PermissionContext, load_rules + + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + rules = load_rules(tmp_path / "missing.json", RuleSource.USER, ctx) + assert rules == [] + + def test_missing_permissions_section_returns_empty(self, tmp_path: Path): + import json + from agentic_cli.workflow.permissions.rules import RuleSource + from agentic_cli.workflow.permissions.store import PermissionContext, load_rules + + path = tmp_path / "settings.json" + path.write_text(json.dumps({"default_model": "gpt-4"})) + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + assert load_rules(path, RuleSource.USER, ctx) == [] + + def test_parses_allow_and_deny(self, tmp_path: Path): + import json + from agentic_cli.workflow.permissions.rules import Effect, RuleSource + from agentic_cli.workflow.permissions.store import PermissionContext, load_rules + + path = tmp_path / "settings.json" + path.write_text(json.dumps({ + "permissions": { + "allow": [{"capability": "filesystem.read", "target": "${workdir}/**"}], + "deny": [{"capability": "filesystem.write", "target": "/etc/**"}], + } + })) + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + rules = load_rules(path, RuleSource.PROJECT, ctx) + + assert len(rules) == 2 + allow = next(r for r in rules if r.effect is Effect.ALLOW) + deny = next(r for r in rules if r.effect is Effect.DENY) + assert allow.capability == "filesystem.read" + # PathMatcher canonicalises ${workdir} to the absolute path + /** + assert str(tmp_path) in allow.target and allow.target.endswith("/**") + assert allow.source is RuleSource.PROJECT + # PathMatcher resolves the path (resolves symlinks on macOS /etc -> /private/etc) + assert deny.target == str(Path("/etc/**").resolve(strict=False)) + + def test_malformed_json_raises(self, tmp_path: Path): + from agentic_cli.workflow.permissions.rules import RuleSource + from agentic_cli.workflow.permissions.store import PermissionContext, load_rules + + path = tmp_path / "settings.json" + path.write_text("{not json") + ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + with pytest.raises(ValueError): + load_rules(path, RuleSource.USER, ctx) From cb83c611d391b645da62061ca48ba88a602511f1 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:14:31 -0400 Subject: [PATCH 17/41] feat(permissions): append_project_rule with atomic JSON merge --- src/agentic_cli/workflow/permissions/store.py | 29 +++++++++ tests/permissions/test_store.py | 64 +++++++++++++++++++ 2 files changed, 93 insertions(+) diff --git a/src/agentic_cli/workflow/permissions/store.py b/src/agentic_cli/workflow/permissions/store.py index 0bd16e2..22f248e 100644 --- a/src/agentic_cli/workflow/permissions/store.py +++ b/src/agentic_cli/workflow/permissions/store.py @@ -11,6 +11,8 @@ from dataclasses import dataclass from pathlib import Path +from agentic_cli.file_utils import atomic_write_text +from agentic_cli.settings_persistence import get_project_config_path from agentic_cli.workflow.permissions.rules import Effect, Rule, RuleSource @@ -78,3 +80,30 @@ def load_rules(path: Path, source: RuleSource, ctx: PermissionContext) -> list[R target = get_matcher(cap).canonicalize(target_raw, ctx) rules.append(Rule(cap, target, effect, source)) return rules + + +def append_project_rule(app_name: str, rule: Rule) -> None: + """Append ``rule`` to the project ``settings.json`` permissions section. + + Creates the file (and its parent directory) if absent; preserves every + other settings key; dedupes by exact ``(capability, target)``. Atomic + rewrite via ``file_utils.atomic_write_text``. + + Only ``Rule`` instances with ``source == RuleSource.PROJECT`` should be + passed here — this helper doesn't validate (engine enforces the + invariant). + """ + path = get_project_config_path(app_name) + try: + data = json.loads(path.read_text()) if path.exists() else {} + except json.JSONDecodeError as exc: + raise ValueError(f"Malformed JSON in {path}: {exc}") from exc + + key = "allow" if rule.effect is Effect.ALLOW else "deny" + section = data.setdefault("permissions", {}).setdefault(key, []) + entry = {"capability": rule.capability, "target": rule.target} + if entry not in section: + section.append(entry) + + path.parent.mkdir(parents=True, exist_ok=True) + atomic_write_text(path, json.dumps(data, indent=2)) diff --git a/tests/permissions/test_store.py b/tests/permissions/test_store.py index d43a54c..7ded8f5 100644 --- a/tests/permissions/test_store.py +++ b/tests/permissions/test_store.py @@ -110,3 +110,67 @@ def test_malformed_json_raises(self, tmp_path: Path): ctx = PermissionContext(workdir=tmp_path, home=Path("/fake/home")) with pytest.raises(ValueError): load_rules(path, RuleSource.USER, ctx) + + +class TestAppendProjectRule: + def test_creates_file_when_absent(self, tmp_path, monkeypatch): + from agentic_cli.workflow.permissions.rules import Effect, Rule, RuleSource + from agentic_cli.workflow.permissions.store import append_project_rule + + monkeypatch.chdir(tmp_path) + rule = Rule("filesystem.write", "/abs/foo", Effect.ALLOW, RuleSource.PROJECT) + append_project_rule("agentic", rule) + + import json + data = json.loads((tmp_path / ".agentic/settings.json").read_text()) + assert data["permissions"]["allow"] == [ + {"capability": "filesystem.write", "target": "/abs/foo"} + ] + + def test_preserves_other_settings_keys(self, tmp_path, monkeypatch): + import json + from agentic_cli.workflow.permissions.rules import Effect, Rule, RuleSource + from agentic_cli.workflow.permissions.store import append_project_rule + + monkeypatch.chdir(tmp_path) + (tmp_path / ".agentic").mkdir() + (tmp_path / ".agentic/settings.json").write_text(json.dumps({ + "default_model": "claude-sonnet-4", + "thinking_effort": "medium", + })) + + rule = Rule("filesystem.write", "/abs/foo", Effect.ALLOW, RuleSource.PROJECT) + append_project_rule("agentic", rule) + + data = json.loads((tmp_path / ".agentic/settings.json").read_text()) + assert data["default_model"] == "claude-sonnet-4" + assert data["thinking_effort"] == "medium" + assert data["permissions"]["allow"][0]["capability"] == "filesystem.write" + + def test_deduplicates_identical_rules(self, tmp_path, monkeypatch): + import json + from agentic_cli.workflow.permissions.rules import Effect, Rule, RuleSource + from agentic_cli.workflow.permissions.store import append_project_rule + + monkeypatch.chdir(tmp_path) + rule = Rule("filesystem.write", "/abs/foo", Effect.ALLOW, RuleSource.PROJECT) + append_project_rule("agentic", rule) + append_project_rule("agentic", rule) + + data = json.loads((tmp_path / ".agentic/settings.json").read_text()) + assert len(data["permissions"]["allow"]) == 1 + + def test_writes_deny_section_for_deny_effect(self, tmp_path, monkeypatch): + import json + from agentic_cli.workflow.permissions.rules import Effect, Rule, RuleSource + from agentic_cli.workflow.permissions.store import append_project_rule + + monkeypatch.chdir(tmp_path) + rule = Rule("filesystem.write", "/etc/foo", Effect.DENY, RuleSource.PROJECT) + append_project_rule("agentic", rule) + + data = json.loads((tmp_path / ".agentic/settings.json").read_text()) + assert data["permissions"]["deny"] == [ + {"capability": "filesystem.write", "target": "/etc/foo"} + ] + assert "allow" not in data["permissions"] or data["permissions"]["allow"] == [] From 6c2886d30ce55d8c7570b35d0157c1af4558dd8c Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:16:23 -0400 Subject: [PATCH 18/41] feat(permissions): build_request + parse_response for ask dialog --- .../workflow/permissions/prompt.py | 52 +++++++++++++++ tests/permissions/test_prompt.py | 64 +++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 src/agentic_cli/workflow/permissions/prompt.py create mode 100644 tests/permissions/test_prompt.py diff --git a/src/agentic_cli/workflow/permissions/prompt.py b/src/agentic_cli/workflow/permissions/prompt.py new file mode 100644 index 0000000..4cf26f9 --- /dev/null +++ b/src/agentic_cli/workflow/permissions/prompt.py @@ -0,0 +1,52 @@ +"""UserInputRequest construction + response parsing for the permission engine.""" + +from __future__ import annotations + +import uuid + +from agentic_cli.workflow.events import InputType, UserInputRequest +from agentic_cli.workflow.permissions.capabilities import ResolvedCapability +from agentic_cli.workflow.permissions.rules import AskScope + +# Strings kept module-level so the UI and parser stay in sync. +ALLOW_ONCE_CHOICE = "Allow once" +ALLOW_SESSION_CHOICE = "Allow for this session" +ALLOW_ALWAYS_CHOICE = "Allow always (save to project)" +DENY_CHOICE = "Deny" + +_CHOICE_TO_SCOPE = { + ALLOW_ONCE_CHOICE: AskScope.ONCE, + ALLOW_SESSION_CHOICE: AskScope.SESSION, + ALLOW_ALWAYS_CHOICE: AskScope.PROJECT, + DENY_CHOICE: AskScope.DENY, +} + + +def build_request(tool_name: str, capabilities: list[ResolvedCapability]) -> UserInputRequest: + """Construct a ``UserInputRequest`` (CHOICE) describing the pending grant.""" + lines = [f"Tool `{tool_name}` wants:"] + for cap in capabilities: + target = cap.target if cap.target else "*" + lines.append(f" • {cap.name} → {target}") + lines.append("") + lines.append("Allow?") + prompt = "\n".join(lines) + + return UserInputRequest( + request_id=f"perm-{uuid.uuid4().hex[:8]}", + tool_name=tool_name, + prompt=prompt, + input_type=InputType.CHOICE, + choices=[ + ALLOW_ONCE_CHOICE, + ALLOW_SESSION_CHOICE, + ALLOW_ALWAYS_CHOICE, + DENY_CHOICE, + ], + default=DENY_CHOICE, + ) + + +def parse_response(text: str) -> AskScope: + """Parse a choice string into an ``AskScope``. Unknown values deny.""" + return _CHOICE_TO_SCOPE.get((text or "").strip(), AskScope.DENY) diff --git a/tests/permissions/test_prompt.py b/tests/permissions/test_prompt.py new file mode 100644 index 0000000..f0e5d27 --- /dev/null +++ b/tests/permissions/test_prompt.py @@ -0,0 +1,64 @@ +"""Tests for prompt build + response parse.""" + +import pytest + +from agentic_cli.workflow.events import InputType, UserInputRequest +from agentic_cli.workflow.permissions.capabilities import ResolvedCapability +from agentic_cli.workflow.permissions.prompt import ( + ALLOW_ALWAYS_CHOICE, + ALLOW_ONCE_CHOICE, + ALLOW_SESSION_CHOICE, + DENY_CHOICE, + build_request, + parse_response, +) +from agentic_cli.workflow.permissions.rules import AskScope + + +class TestBuildRequest: + def test_uses_choice_input_type(self): + req = build_request("write_file", [ResolvedCapability("filesystem.write", "/foo")]) + assert req.input_type is InputType.CHOICE + assert req.default == DENY_CHOICE + + def test_choices_in_fixed_order(self): + req = build_request("write_file", [ResolvedCapability("filesystem.write", "/foo")]) + assert req.choices == [ + ALLOW_ONCE_CHOICE, ALLOW_SESSION_CHOICE, ALLOW_ALWAYS_CHOICE, DENY_CHOICE + ] + + def test_prompt_mentions_tool_and_capabilities(self): + req = build_request( + "copy_file", + [ + ResolvedCapability("filesystem.read", "/a"), + ResolvedCapability("filesystem.write", "/b"), + ], + ) + assert "copy_file" in req.prompt + assert "filesystem.read" in req.prompt and "/a" in req.prompt + assert "filesystem.write" in req.prompt and "/b" in req.prompt + + def test_request_id_has_perm_prefix(self): + req = build_request("x", []) + assert req.request_id.startswith("perm-") + + +class TestParseResponse: + @pytest.mark.parametrize("text, scope", [ + (ALLOW_ONCE_CHOICE, AskScope.ONCE), + (ALLOW_SESSION_CHOICE, AskScope.SESSION), + (ALLOW_ALWAYS_CHOICE, AskScope.PROJECT), + (DENY_CHOICE, AskScope.DENY), + ]) + def test_round_trip(self, text, scope): + assert parse_response(text) is scope + + def test_unknown_defaults_to_deny(self): + assert parse_response("whatever") is AskScope.DENY + + def test_empty_defaults_to_deny(self): + assert parse_response("") is AskScope.DENY + + def test_whitespace_tolerant(self): + assert parse_response(f" {ALLOW_ONCE_CHOICE} ") is AskScope.ONCE From dab21911dcf46fce79a68179983a5461e4111c9e Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:18:44 -0400 Subject: [PATCH 19/41] feat(permissions): PermissionEngine init + rule loading + disabled short-circuit Implements Task 14: PermissionEngine.__init__, rule loading from builtin/user/ project sources, and the permissions_enabled=False early-return path. Uses Rule constructor copies (not object.__setattr__) to avoid mutating the module-level BUILTIN_RULES frozen dataclasses. --- .../workflow/permissions/engine.py | 104 ++++++++++++++++++ tests/permissions/test_engine.py | 74 +++++++++++++ 2 files changed, 178 insertions(+) create mode 100644 src/agentic_cli/workflow/permissions/engine.py create mode 100644 tests/permissions/test_engine.py diff --git a/src/agentic_cli/workflow/permissions/engine.py b/src/agentic_cli/workflow/permissions/engine.py new file mode 100644 index 0000000..548b0a0 --- /dev/null +++ b/src/agentic_cli/workflow/permissions/engine.py @@ -0,0 +1,104 @@ +# src/agentic_cli/workflow/permissions/engine.py +"""Permission engine. + +See docs/superpowers/specs/2026-04-18-permissions-system-design.md §4 for +the full decision flow. This file implements: + +1. Rule loading from builtin + user + project JSON + in-memory session. +2. ``permissions_enabled=False`` short-circuit. +3. Full check() flow (Tasks 15–17). +""" + +from __future__ import annotations + +import asyncio +from typing import TYPE_CHECKING + +from agentic_cli.logging import Loggers +from agentic_cli.settings_persistence import ( + get_project_config_path, + get_user_config_path, +) +from agentic_cli.workflow.permissions.capabilities import Capability, ResolvedCapability +from agentic_cli.workflow.permissions.matchers import get_matcher +from agentic_cli.workflow.permissions.rules import ( + AskScope, + CheckResult, + Effect, + Rule, + RuleSource, +) +from agentic_cli.workflow.permissions.store import ( + BUILTIN_RULES, + PermissionContext, + load_rules, +) + +if TYPE_CHECKING: + from agentic_cli.config import BaseSettings + from agentic_cli.workflow.base_manager import BaseWorkflowManager + +logger = Loggers.workflow() + + +class PermissionEngine: + """Evaluate tool invocations against rules from four sources. + + Concurrency: one ``asyncio.Lock`` around the ask prompt only (rule + matching is pure). See spec §4.4. + """ + + def __init__( + self, + settings: "BaseSettings", + workflow: "BaseWorkflowManager", + ctx: PermissionContext, + ) -> None: + self._settings = settings + self._workflow = workflow + self._ctx = ctx + self._session_rules: list[Rule] = [] + self._ask_lock = asyncio.Lock() + self._base_rules: list[Rule] = self._load_all_rules() + + # ------------------------------------------------------------------ + # Rule loading + # ------------------------------------------------------------------ + + def _load_all_rules(self) -> list[Rule]: + rules: list[Rule] = [] + for r in BUILTIN_RULES: + # Canonicalise each builtin rule's target (which may use ${workdir}/${home}). + rules.append( + Rule( + capability=r.capability, + target=get_matcher(r.capability).canonicalize(r.target, self._ctx), + effect=r.effect, + source=r.source, + ) + ) + app = self._settings.app_name + rules += load_rules(get_user_config_path(app), RuleSource.USER, self._ctx) + rules += load_rules(get_project_config_path(app), RuleSource.PROJECT, self._ctx) + return rules + + @property + def rules(self) -> list[Rule]: + """All currently-active rules, in source order (builtin→user→project→session).""" + return list(self._base_rules) + list(self._session_rules) + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + async def check( + self, + tool_name: str, + capabilities: list[Capability], + args: dict, + ) -> CheckResult: + """Evaluate whether ``tool_name`` may run with ``args``.""" + if not self._settings.permissions_enabled: + return CheckResult(True, "permissions disabled") + # Full flow lands in Tasks 15–17. + raise NotImplementedError diff --git a/tests/permissions/test_engine.py b/tests/permissions/test_engine.py new file mode 100644 index 0000000..297d5fc --- /dev/null +++ b/tests/permissions/test_engine.py @@ -0,0 +1,74 @@ +"""Tests for PermissionEngine.""" + +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from agentic_cli.workflow.permissions.capabilities import Capability +from agentic_cli.workflow.permissions.engine import PermissionEngine +from agentic_cli.workflow.permissions.rules import Effect, RuleSource +from agentic_cli.workflow.permissions.store import PermissionContext + + +def _stub_settings(*, enabled: bool = True, app_name: str = "agentic") -> MagicMock: + s = MagicMock() + s.permissions_enabled = enabled + s.app_name = app_name + return s + + +def _stub_workflow() -> MagicMock: + w = MagicMock() + w.request_user_input = AsyncMock(return_value="Deny") + return w + + +@pytest.fixture +def ctx(tmp_path: Path) -> PermissionContext: + return PermissionContext(workdir=tmp_path, home=Path("/fake/home")) + + +class TestEngineInit: + def test_loads_builtin_rules(self, ctx): + engine = PermissionEngine( + settings=_stub_settings(), workflow=_stub_workflow(), ctx=ctx, + ) + builtin = [r for r in engine.rules if r.source is RuleSource.BUILTIN] + assert any(r.capability == "filesystem.read" for r in builtin) + + def test_loads_user_and_project_from_files(self, ctx, tmp_path, monkeypatch): + import json + # Prepare user + project files at their canonical locations. + monkeypatch.chdir(tmp_path) + monkeypatch.setenv("HOME", str(tmp_path)) + (tmp_path / ".agentic").mkdir() + (tmp_path / ".agentic/settings.json").write_text(json.dumps({ + "permissions": {"allow": [{"capability": "http.read", "target": "https://x.test/**"}]}, + })) + # User file lives at ~/.agentic/settings.json (which is tmp_path/.agentic/...) + # — same path since HOME == tmp_path; test merging via overlap + + engine = PermissionEngine( + settings=_stub_settings(), workflow=_stub_workflow(), ctx=ctx, + ) + # Exactly one http.read allow should be loaded + http = [r for r in engine.rules if r.capability == "http.read"] + assert len(http) >= 1 + + +class TestEngineDisabled: + @pytest.mark.asyncio + async def test_short_circuits_when_disabled(self, ctx): + engine = PermissionEngine( + settings=_stub_settings(enabled=False), + workflow=_stub_workflow(), + ctx=ctx, + ) + result = await engine.check( + "write_file", + [Capability("filesystem.write", target_arg="path")], + {"path": "/any/where"}, + ) + assert result.allowed is True + assert "disabled" in result.reason.lower() From d0ae502dec21d853f43b7d2c6c9db4b41a5450c6 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:20:53 -0400 Subject: [PATCH 20/41] feat(permissions): engine rule-based allow/deny path Implement check(), _resolve(), _evaluate(), _fmt_rule_reason(), and _ask_and_apply() stub in PermissionEngine. DENY wins across both capabilities and sources; targetless capabilities match '*'. --- .../workflow/permissions/engine.py | 66 +++++++++++++++- tests/permissions/test_engine.py | 76 ++++++++++++++++++- 2 files changed, 140 insertions(+), 2 deletions(-) diff --git a/src/agentic_cli/workflow/permissions/engine.py b/src/agentic_cli/workflow/permissions/engine.py index 548b0a0..06f973e 100644 --- a/src/agentic_cli/workflow/permissions/engine.py +++ b/src/agentic_cli/workflow/permissions/engine.py @@ -100,5 +100,69 @@ async def check( """Evaluate whether ``tool_name`` may run with ``args``.""" if not self._settings.permissions_enabled: return CheckResult(True, "permissions disabled") - # Full flow lands in Tasks 15–17. + + resolved = self._resolve(capabilities, args) + outcomes = self._evaluate(resolved) + + # DENY wins. + deny_hits = [(c, r) for c, r in outcomes if r is not None and r.effect is Effect.DENY] + if deny_hits: + c, r = deny_hits[0] + return CheckResult(False, self._fmt_rule_reason(r, c)) + + # All allowed? + if all(r is not None and r.effect is Effect.ALLOW for _, r in outcomes): + any_c, any_r = outcomes[0] + return CheckResult(True, self._fmt_rule_reason(any_r, any_c)) + + # Ask flow lands in Task 16. + return await self._ask_and_apply(tool_name, resolved, outcomes) + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + + def _resolve( + self, capabilities: list[Capability], args: dict + ) -> list[ResolvedCapability]: + resolved: list[ResolvedCapability] = [] + for cap in capabilities: + raw = "*" if cap.target_arg is None else str(args.get(cap.target_arg, "")) + target = "*" if cap.target_arg is None else get_matcher(cap.name).canonicalize(raw, self._ctx) + resolved.append(ResolvedCapability(cap.name, target)) + return resolved + + def _evaluate( + self, resolved: list[ResolvedCapability] + ) -> list[tuple[ResolvedCapability, Rule | None]]: + """For each resolved capability, return the strongest matched rule or None.""" + from agentic_cli.workflow.permissions.matchers import _cap_matches + all_rules = self.rules + out: list[tuple[ResolvedCapability, Rule | None]] = [] + for cap in resolved: + matcher = get_matcher(cap.name) + matched: list[Rule] = [ + r for r in all_rules + if _cap_matches(r.capability, cap.name) + and matcher.matches(r.target, cap.target) + ] + if not matched: + out.append((cap, None)) + continue + # DENY wins per capability; otherwise any ALLOW. + deny = next((r for r in matched if r.effect is Effect.DENY), None) + out.append((cap, deny or matched[0])) + return out + + @staticmethod + def _fmt_rule_reason(rule: Rule, cap: ResolvedCapability) -> str: + return f"rule: {rule.source.value}/{rule.effect.value} {cap.name} {rule.target}" + + async def _ask_and_apply( + self, + tool_name: str, + resolved: list[ResolvedCapability], + outcomes: list[tuple[ResolvedCapability, Rule | None]], + ) -> CheckResult: + """Placeholder for Task 16.""" raise NotImplementedError diff --git a/tests/permissions/test_engine.py b/tests/permissions/test_engine.py index 297d5fc..97cd196 100644 --- a/tests/permissions/test_engine.py +++ b/tests/permissions/test_engine.py @@ -7,7 +7,7 @@ from agentic_cli.workflow.permissions.capabilities import Capability from agentic_cli.workflow.permissions.engine import PermissionEngine -from agentic_cli.workflow.permissions.rules import Effect, RuleSource +from agentic_cli.workflow.permissions.rules import Effect, Rule, RuleSource from agentic_cli.workflow.permissions.store import PermissionContext @@ -72,3 +72,77 @@ async def test_short_circuits_when_disabled(self, ctx): ) assert result.allowed is True assert "disabled" in result.reason.lower() + + +class TestEngineRuleBased: + @pytest.mark.asyncio + async def test_builtin_allow_reads_in_workdir(self, ctx, tmp_path): + engine = PermissionEngine(settings=_stub_settings(), workflow=_stub_workflow(), ctx=ctx) + result = await engine.check( + "read_file", + [Capability("filesystem.read", target_arg="path")], + {"path": str(tmp_path / "foo.py")}, + ) + assert result.allowed is True + assert "builtin" in result.reason and "allow" in result.reason + + @pytest.mark.asyncio + async def test_builtin_deny_writes_to_etc(self, ctx): + engine = PermissionEngine(settings=_stub_settings(), workflow=_stub_workflow(), ctx=ctx) + result = await engine.check( + "write_file", + [Capability("filesystem.write", target_arg="path")], + {"path": "/etc/passwd"}, + ) + assert result.allowed is False + assert "deny" in result.reason.lower() + + @pytest.mark.asyncio + async def test_deny_wins_across_capabilities(self, ctx, tmp_path): + """copy_file-style: one cap allowed, one denied → overall deny.""" + engine = PermissionEngine(settings=_stub_settings(), workflow=_stub_workflow(), ctx=ctx) + result = await engine.check( + "copy_file", + [ + Capability("filesystem.read", target_arg="src"), # inside workdir: allow + Capability("filesystem.write", target_arg="dest"), # /etc: deny + ], + {"src": str(tmp_path / "a"), "dest": "/etc/out"}, + ) + assert result.allowed is False + + @pytest.mark.asyncio + async def test_deny_wins_across_sources(self, ctx, tmp_path, monkeypatch): + """Add a session allow for filesystem.write → still blocked by builtin deny.""" + monkeypatch.chdir(tmp_path) + engine = PermissionEngine(settings=_stub_settings(), workflow=_stub_workflow(), ctx=ctx) + # Inject a session ALLOW for /etc/** — canonicalised so it matches on macOS + from agentic_cli.workflow.permissions.matchers import get_matcher + engine._session_rules.append( + Rule( + "filesystem.write", + get_matcher("filesystem.write").canonicalize("/etc/**", ctx), + Effect.ALLOW, + RuleSource.SESSION, + ) + ) + result = await engine.check( + "write_file", + [Capability("filesystem.write", target_arg="path")], + {"path": "/etc/x"}, + ) + assert result.allowed is False # DENY wins + + @pytest.mark.asyncio + async def test_targetless_capability_uses_star(self, ctx): + """A rule with target='*' matches any invocation of a targetless capability.""" + engine = PermissionEngine(settings=_stub_settings(), workflow=_stub_workflow(), ctx=ctx) + engine._session_rules.append( + Rule("python.exec", "*", Effect.ALLOW, RuleSource.SESSION) + ) + result = await engine.check( + "execute_python", + [Capability("python.exec")], # no target_arg + {"code": "print('hi')"}, + ) + assert result.allowed is True From d243545c7333dfd365b329b733b39dc6e659d843 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:22:46 -0400 Subject: [PATCH 21/41] feat(permissions): engine ask flow (once/session/always/deny) --- .../workflow/permissions/engine.py | 31 +++++- tests/permissions/test_engine.py | 98 +++++++++++++++++++ 2 files changed, 127 insertions(+), 2 deletions(-) diff --git a/src/agentic_cli/workflow/permissions/engine.py b/src/agentic_cli/workflow/permissions/engine.py index 06f973e..9d13492 100644 --- a/src/agentic_cli/workflow/permissions/engine.py +++ b/src/agentic_cli/workflow/permissions/engine.py @@ -164,5 +164,32 @@ async def _ask_and_apply( resolved: list[ResolvedCapability], outcomes: list[tuple[ResolvedCapability, Rule | None]], ) -> CheckResult: - """Placeholder for Task 16.""" - raise NotImplementedError + from agentic_cli.workflow.permissions.prompt import build_request, parse_response + from agentic_cli.workflow.permissions.store import append_project_rule + + unmatched = [cap for cap, r in outcomes if r is None] + async with self._ask_lock: + request = build_request(tool_name, resolved) + response = await self._workflow.request_user_input(request) + scope = parse_response(response) + + if scope is AskScope.DENY: + logger.info( + "permission_denied_by_user", + tool=tool_name, + capabilities=[(c.name, c.target) for c in resolved], + ) + return CheckResult(False, "no rule + user denied") + + if scope is AskScope.ONCE: + return CheckResult(True, "no rule + user allowed (once)") + + source = RuleSource.SESSION if scope is AskScope.SESSION else RuleSource.PROJECT + for cap in unmatched: + rule = Rule(cap.name, cap.target, Effect.ALLOW, source) + self._session_rules.append(rule) + if source is RuleSource.PROJECT: + append_project_rule(self._settings.app_name, rule) + + label = "session" if source is RuleSource.SESSION else "always, saved to project" + return CheckResult(True, f"no rule + user allowed ({label})") diff --git a/tests/permissions/test_engine.py b/tests/permissions/test_engine.py index 97cd196..8a68eab 100644 --- a/tests/permissions/test_engine.py +++ b/tests/permissions/test_engine.py @@ -146,3 +146,101 @@ async def test_targetless_capability_uses_star(self, ctx): {"code": "print('hi')"}, ) assert result.allowed is True + + +class TestEngineAskFlow: + @pytest.mark.asyncio + async def test_user_allow_once_no_rule_installed(self, ctx, tmp_path): + w = _stub_workflow() + w.request_user_input = AsyncMock(return_value="Allow once") + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + result = await engine.check( + "web_fetch", + [Capability("http.read", target_arg="url")], + {"url": "https://example.com/"}, + ) + assert result.allowed is True + assert "once" in result.reason + assert not any(r.source is RuleSource.SESSION for r in engine.rules) + + @pytest.mark.asyncio + async def test_user_allow_session_installs_session_rule(self, ctx, tmp_path): + w = _stub_workflow() + w.request_user_input = AsyncMock(return_value="Allow for this session") + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + result = await engine.check( + "web_fetch", + [Capability("http.read", target_arg="url")], + {"url": "https://example.com/x"}, + ) + assert result.allowed is True + session = [r for r in engine.rules if r.source is RuleSource.SESSION] + assert any(r.capability == "http.read" and "example.com" in r.target for r in session) + + # Second call for the same target → no new prompt. + w.request_user_input.reset_mock() + result2 = await engine.check( + "web_fetch", + [Capability("http.read", target_arg="url")], + {"url": "https://example.com/x"}, + ) + assert result2.allowed is True + w.request_user_input.assert_not_called() + + @pytest.mark.asyncio + async def test_user_allow_always_writes_project_file(self, ctx, tmp_path, monkeypatch): + import json + monkeypatch.chdir(tmp_path) + w = _stub_workflow() + w.request_user_input = AsyncMock(return_value="Allow always (save to project)") + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + result = await engine.check( + "web_fetch", + [Capability("http.read", target_arg="url")], + {"url": "https://example.com/x"}, + ) + assert result.allowed is True + + data = json.loads((tmp_path / ".agentic/settings.json").read_text()) + allow = data["permissions"]["allow"] + assert len(allow) == 1 + assert allow[0]["capability"] == "http.read" + assert "example.com" in allow[0]["target"] + + @pytest.mark.asyncio + async def test_user_denies(self, ctx): + w = _stub_workflow() + w.request_user_input = AsyncMock(return_value="Deny") + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + result = await engine.check( + "web_fetch", + [Capability("http.read", target_arg="url")], + {"url": "https://example.com/x"}, + ) + assert result.allowed is False + assert "denied" in result.reason.lower() + + @pytest.mark.asyncio + async def test_only_unmatched_capabilities_become_session_rules(self, ctx, tmp_path): + """copy_file with src inside workdir (allowed by builtin) and dest outside. + Only the write rule should be synthesised.""" + w = _stub_workflow() + w.request_user_input = AsyncMock(return_value="Allow for this session") + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + result = await engine.check( + "copy_file", + [ + Capability("filesystem.read", target_arg="src"), + Capability("filesystem.write", target_arg="dest"), + ], + {"src": str(tmp_path / "a"), "dest": str(tmp_path / "out")}, + ) + assert result.allowed is True + session = [r for r in engine.rules if r.source is RuleSource.SESSION] + assert len(session) == 1 + assert session[0].capability == "filesystem.write" From 96a9af4804206917f25fa98fe79662419746f510 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:23:52 -0400 Subject: [PATCH 22/41] test(permissions): serialise concurrent ask prompts --- tests/permissions/test_engine.py | 35 ++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/permissions/test_engine.py b/tests/permissions/test_engine.py index 8a68eab..45267b8 100644 --- a/tests/permissions/test_engine.py +++ b/tests/permissions/test_engine.py @@ -244,3 +244,38 @@ async def test_only_unmatched_capabilities_become_session_rules(self, ctx, tmp_p session = [r for r in engine.rules if r.source is RuleSource.SESSION] assert len(session) == 1 assert session[0].capability == "filesystem.write" + + +class TestEngineConcurrency: + @pytest.mark.asyncio + async def test_ask_is_serialised(self, ctx): + """Two simultaneous calls that both need to ask → one prompt at a time.""" + import asyncio + + ask_active = 0 + ask_peak = 0 + + async def fake_input(request): + nonlocal ask_active, ask_peak + ask_active += 1 + ask_peak = max(ask_peak, ask_active) + # Simulate think time + await asyncio.sleep(0.01) + ask_active -= 1 + return "Allow once" + + w = MagicMock() + w.request_user_input = fake_input + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + await asyncio.gather( + engine.check( + "web_fetch", [Capability("http.read", target_arg="url")], + {"url": "https://a.test/"}, + ), + engine.check( + "web_fetch", [Capability("http.read", target_arg="url")], + {"url": "https://b.test/"}, + ), + ) + assert ask_peak == 1 # never two asks in flight simultaneously From 9a6f1bf21ba40ad3bb9deb597d5bcac599604bd0 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:25:29 -0400 Subject: [PATCH 23/41] refactor(permissions): drop try/except scaffolding from __init__.py now that all modules exist --- .../workflow/permissions/__init__.py | 30 ++++++------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/src/agentic_cli/workflow/permissions/__init__.py b/src/agentic_cli/workflow/permissions/__init__.py index 310209d..d148e1a 100644 --- a/src/agentic_cli/workflow/permissions/__init__.py +++ b/src/agentic_cli/workflow/permissions/__init__.py @@ -12,27 +12,15 @@ EXEMPT, ResolvedCapability, ) - -try: - from agentic_cli.workflow.permissions.engine import PermissionEngine -except ImportError: # not yet implemented - PermissionEngine = None # type: ignore[assignment,misc] - -try: - from agentic_cli.workflow.permissions.rules import ( - AskScope, - CheckResult, - Effect, - Rule, - RuleSource, - ) -except ImportError: # not yet implemented - AskScope = CheckResult = Effect = Rule = RuleSource = None # type: ignore[assignment,misc] - -try: - from agentic_cli.workflow.permissions.store import PermissionContext -except ImportError: # not yet implemented - PermissionContext = None # type: ignore[assignment,misc] +from agentic_cli.workflow.permissions.engine import PermissionEngine +from agentic_cli.workflow.permissions.rules import ( + AskScope, + CheckResult, + Effect, + Rule, + RuleSource, +) +from agentic_cli.workflow.permissions.store import PermissionContext __all__ = [ "AskScope", From 230c59b1e0d516a49d51b9139318b1a599a855b8 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:38:56 -0400 Subject: [PATCH 24/41] feat(tools): add ToolDefinition.capabilities field + register_tool kwarg (staged) Optional kwarg with default=EXEMPT; existing permission_level kwarg untouched for staged migration. Tools will be updated to declare capabilities in Task 19; permission_level + PermissionLevel enum get removed in the final cleanup. --- src/agentic_cli/tools/registry.py | 44 ++++++++++++- tests/tools/test_registry.py | 106 ++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 tests/tools/test_registry.py diff --git a/src/agentic_cli/tools/registry.py b/src/agentic_cli/tools/registry.py index 8a09e4e..4018c33 100644 --- a/src/agentic_cli/tools/registry.py +++ b/src/agentic_cli/tools/registry.py @@ -5,11 +5,18 @@ - ToolRegistry: Registry for tool discovery and management """ -from dataclasses import dataclass +from dataclasses import dataclass, field from enum import Enum from typing import Any, Callable import inspect +from agentic_cli.workflow.permissions.capabilities import ( + Capability, + CapabilitiesSpec, + EXEMPT, +) +from agentic_cli.workflow.permissions.capabilities import _CapabilityExempt + class ToolCategory(Enum): """Categories for organizing tools. @@ -77,6 +84,7 @@ class ToolDefinition: name: str description: str func: Callable[..., Any] + capabilities: CapabilitiesSpec = field(default_factory=lambda: EXEMPT) category: ToolCategory = ToolCategory.OTHER permission_level: PermissionLevel = PermissionLevel.SAFE is_async: bool = False @@ -87,6 +95,35 @@ def __post_init__(self): self.is_async = True +def _validate_capabilities(caps: Any, tool_name: str) -> CapabilitiesSpec: + """Validate and return a capabilities value for a tool registration. + + - ``EXEMPT`` (or any ``_CapabilityExempt`` instance) passes through unchanged. + - A non-empty ``list`` of ``Capability`` instances is returned as-is. + - An empty list raises ``ValueError`` (use ``EXEMPT`` to opt out explicitly). + - Any other type raises ``TypeError``. + """ + if isinstance(caps, _CapabilityExempt): + return caps + if isinstance(caps, list): + if not caps: + raise ValueError( + f"Tool {tool_name!r}: capabilities=[] is not allowed. " + "Use capabilities=EXEMPT to opt out explicitly." + ) + for item in caps: + if not isinstance(item, Capability): + raise TypeError( + f"Tool {tool_name!r}: capabilities list items must be " + f"Capability instances, got {type(item)!r}." + ) + return caps + raise TypeError( + f"Tool {tool_name!r}: capabilities must be EXEMPT or a list of Capability " + f"instances, got {type(caps)!r}." + ) + + class ToolRegistry: """Registry for managing and discovering tools. @@ -107,6 +144,7 @@ def register( description: str | None = None, category: ToolCategory = ToolCategory.OTHER, permission_level: PermissionLevel = PermissionLevel.SAFE, + capabilities: CapabilitiesSpec = EXEMPT, ) -> Callable[..., Any]: """Register a tool function. @@ -122,11 +160,13 @@ def my_tool(query: str) -> dict: def decorator(f: Callable[..., Any]) -> Callable[..., Any]: tool_name = name or f.__name__ tool_desc = description or (f.__doc__ or "").split("\n")[0].strip() + validated_caps = _validate_capabilities(capabilities, tool_name) definition = ToolDefinition( name=tool_name, description=tool_desc, func=f, + capabilities=validated_caps, category=category, permission_level=permission_level, ) @@ -177,6 +217,7 @@ def register_tool( description: str | None = None, category: ToolCategory = ToolCategory.OTHER, permission_level: PermissionLevel = PermissionLevel.SAFE, + capabilities: CapabilitiesSpec = EXEMPT, ) -> Callable[..., Any]: """Register a tool with the default registry. @@ -192,6 +233,7 @@ def _outer(f: Callable[..., Any]) -> Callable[..., Any]: description=description, category=category, permission_level=permission_level, + capabilities=capabilities, ) if func is not None: diff --git a/tests/tools/test_registry.py b/tests/tools/test_registry.py new file mode 100644 index 0000000..719902b --- /dev/null +++ b/tests/tools/test_registry.py @@ -0,0 +1,106 @@ +"""Tests for ToolDefinition.capabilities field + register_tool capabilities= kwarg. + +This task adds `capabilities` as an optional kwarg (default EXEMPT) alongside +the existing `permission_level` kwarg so tools can be migrated incrementally. +Both fields coexist on `ToolDefinition` throughout Phase 2–5; the old field +is removed in the final cleanup task. +""" + +import pytest + +from agentic_cli.tools.registry import ( + PermissionLevel, + ToolCategory, + ToolRegistry, +) +from agentic_cli.workflow.permissions import Capability, EXEMPT +from agentic_cli.workflow.permissions.capabilities import _CapabilityExempt + + +class TestCapabilitiesKwarg: + def test_default_is_exempt(self): + reg = ToolRegistry() + + @reg.register(name="no_caps") + def no_caps() -> dict: + return {} + + defn = reg.get("no_caps") + assert isinstance(defn.capabilities, _CapabilityExempt) + + def test_exempt_stored(self): + reg = ToolRegistry() + + @reg.register(name="exempt", capabilities=EXEMPT) + def exempt() -> dict: + return {} + + defn = reg.get("exempt") + assert defn.capabilities is EXEMPT + + def test_list_stored(self): + reg = ToolRegistry() + caps = [Capability("filesystem.read", target_arg="path")] + + @reg.register(name="reader", capabilities=caps) + def reader(path: str) -> dict: + return {} + + defn = reg.get("reader") + assert defn.capabilities == caps + + def test_empty_list_rejected(self): + reg = ToolRegistry() + with pytest.raises(ValueError, match="EXEMPT"): + @reg.register(name="bad", capabilities=[]) + def bad() -> dict: + return {} + + def test_non_list_non_exempt_rejected(self): + reg = ToolRegistry() + with pytest.raises(TypeError): + @reg.register(name="bad2", capabilities="invalid") # type: ignore[arg-type] + def bad2() -> dict: + return {} + + def test_list_with_non_capability_rejected(self): + reg = ToolRegistry() + with pytest.raises(TypeError): + @reg.register(name="bad3", capabilities=["not a Capability"]) # type: ignore[list-item] + def bad3() -> dict: + return {} + + def test_permission_level_still_works(self): + """Old kwarg continues to be accepted during migration.""" + reg = ToolRegistry() + + @reg.register( + name="legacy", + permission_level=PermissionLevel.SAFE, + category=ToolCategory.READ, + ) + def legacy() -> dict: + return {} + + defn = reg.get("legacy") + assert defn.permission_level == PermissionLevel.SAFE + # capabilities defaults to EXEMPT when not specified: + assert isinstance(defn.capabilities, _CapabilityExempt) + + def test_both_kwargs_can_coexist(self): + """Tools in the middle of migration have both fields set.""" + reg = ToolRegistry() + caps = [Capability("filesystem.read", target_arg="path")] + + @reg.register( + name="migrating", + permission_level=PermissionLevel.SAFE, + capabilities=caps, + category=ToolCategory.READ, + ) + def migrating(path: str) -> dict: + return {} + + defn = reg.get("migrating") + assert defn.permission_level == PermissionLevel.SAFE + assert defn.capabilities == caps From 2628cf7f520aa58085d95f2c9e3d44f0aecdebdc Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:45:27 -0400 Subject: [PATCH 25/41] feat(tools): declare capabilities on every registered tool Maps each tool's side effects to (capability, target) tuples. Keeps permission_level= intact for staged migration; old field is removed in the final cleanup task. --- src/agentic_cli/tools/arxiv_tools.py | 4 ++++ src/agentic_cli/tools/execution_tools.py | 2 ++ src/agentic_cli/tools/file_read.py | 3 +++ src/agentic_cli/tools/file_write.py | 3 +++ src/agentic_cli/tools/glob_tool.py | 3 +++ src/agentic_cli/tools/grep_tool.py | 2 ++ src/agentic_cli/tools/interaction_tools.py | 2 ++ src/agentic_cli/tools/knowledge_tools.py | 7 +++++++ src/agentic_cli/tools/memory_tools.py | 5 +++++ src/agentic_cli/tools/reflection_tools.py | 2 ++ src/agentic_cli/tools/sandbox/__init__.py | 2 ++ src/agentic_cli/tools/search.py | 2 ++ src/agentic_cli/tools/shell/executor.py | 2 ++ src/agentic_cli/tools/webfetch_tool.py | 2 ++ 14 files changed, 41 insertions(+) diff --git a/src/agentic_cli/tools/arxiv_tools.py b/src/agentic_cli/tools/arxiv_tools.py index f10b937..436b628 100644 --- a/src/agentic_cli/tools/arxiv_tools.py +++ b/src/agentic_cli/tools/arxiv_tools.py @@ -22,6 +22,7 @@ ToolCategory, PermissionLevel, ) +from agentic_cli.workflow.permissions import Capability from agentic_cli.workflow.service_registry import ( ARXIV_SOURCE, KB_MANAGER, @@ -123,6 +124,7 @@ async def _fetch_arxiv_paper_with_source(source, arxiv_id: str) -> dict[str, Any @register_tool( category=ToolCategory.KNOWLEDGE, permission_level=PermissionLevel.SAFE, + capabilities=[Capability("http.read")], description="Search arXiv for academic papers by query, category, or date range. Use this to find research papers on a topic.", ) def search_arxiv( @@ -171,6 +173,7 @@ def search_arxiv( @register_tool( category=ToolCategory.KNOWLEDGE, permission_level=PermissionLevel.SAFE, + capabilities=[Capability("http.read")], description="Fetch metadata for a specific arXiv paper by ID or URL. Returns title, authors, abstract, categories, and PDF URL.", ) async def fetch_arxiv_paper( @@ -297,6 +300,7 @@ async def _ingest_arxiv_paper_with_services( @register_tool( category=ToolCategory.KNOWLEDGE, permission_level=PermissionLevel.SAFE, + capabilities=[Capability("http.read"), Capability("kb.write")], description="Download an arXiv paper's PDF, extract text, and ingest it into the knowledge base. Use this to add a specific arXiv paper to long-term storage so it can be searched later.", ) async def ingest_arxiv_paper( diff --git a/src/agentic_cli/tools/execution_tools.py b/src/agentic_cli/tools/execution_tools.py index ccc8160..e24ca2d 100644 --- a/src/agentic_cli/tools/execution_tools.py +++ b/src/agentic_cli/tools/execution_tools.py @@ -11,11 +11,13 @@ ToolCategory, PermissionLevel, ) +from agentic_cli.workflow.permissions import Capability @register_tool( category=ToolCategory.EXECUTION, permission_level=PermissionLevel.DANGEROUS, + capabilities=[Capability("python.exec")], description="Stateless Python scratchpad for quick calculations, formula checks, and mathematical reasoning. Each call starts fresh — no state persists. Only whitelisted modules (math, numpy, pandas, json, etc.) are available. Use sandbox_execute instead for stateful work.", ) def execute_python( diff --git a/src/agentic_cli/tools/file_read.py b/src/agentic_cli/tools/file_read.py index 6746064..05939d0 100644 --- a/src/agentic_cli/tools/file_read.py +++ b/src/agentic_cli/tools/file_read.py @@ -16,11 +16,13 @@ PermissionLevel, register_tool, ) +from agentic_cli.workflow.permissions import Capability @register_tool( category=ToolCategory.READ, permission_level=PermissionLevel.SAFE, + capabilities=[Capability("filesystem.read", target_arg="path")], description="Read the contents of a file at the given path. Use this to examine source code, config files, or any text file. For finding files by name pattern use glob instead; for searching file contents use grep.", ) def read_file( @@ -111,6 +113,7 @@ def read_file( @register_tool( category=ToolCategory.READ, permission_level=PermissionLevel.SAFE, + capabilities=[Capability("filesystem.read", target_arg="source_a"), Capability("filesystem.read", target_arg="source_b")], description="Compare two text sources (files or strings) and show differences. Use this to see what changed between two versions of content.", ) def diff_compare( diff --git a/src/agentic_cli/tools/file_write.py b/src/agentic_cli/tools/file_write.py index 3c01a01..e3d8b49 100644 --- a/src/agentic_cli/tools/file_write.py +++ b/src/agentic_cli/tools/file_write.py @@ -18,11 +18,13 @@ PermissionLevel, register_tool, ) +from agentic_cli.workflow.permissions import Capability @register_tool( category=ToolCategory.WRITE, permission_level=PermissionLevel.CAUTION, + capabilities=[Capability("filesystem.write", target_arg="path")], description="Write content to a file (creates or overwrites). Use this to create new files or replace entire file contents. For partial modifications use edit_file instead.", ) def write_file( @@ -88,6 +90,7 @@ def write_file( @register_tool( category=ToolCategory.WRITE, permission_level=PermissionLevel.CAUTION, + capabilities=[Capability("filesystem.read", target_arg="path"), Capability("filesystem.write", target_arg="path")], description="Replace specific text in an existing file. Use this for targeted edits (find-and-replace). For creating or fully rewriting files use write_file instead.", ) def edit_file( diff --git a/src/agentic_cli/tools/glob_tool.py b/src/agentic_cli/tools/glob_tool.py index 980ca4a..998b4b3 100644 --- a/src/agentic_cli/tools/glob_tool.py +++ b/src/agentic_cli/tools/glob_tool.py @@ -16,11 +16,13 @@ PermissionLevel, register_tool, ) +from agentic_cli.workflow.permissions import Capability @register_tool( category=ToolCategory.READ, permission_level=PermissionLevel.SAFE, + capabilities=[Capability("filesystem.read", target_arg="path")], description="Find files by name pattern (e.g. '**/*.py'). Use this to discover files in a directory tree. For searching inside file contents, use grep instead.", ) def glob( @@ -138,6 +140,7 @@ def glob( @register_tool( category=ToolCategory.READ, permission_level=PermissionLevel.SAFE, + capabilities=[Capability("filesystem.read", target_arg="path")], description="List directory contents organized by type (directories first, then files). Use this when you need a structured overview of a directory; for pattern-based file search use glob.", ) def list_dir( diff --git a/src/agentic_cli/tools/grep_tool.py b/src/agentic_cli/tools/grep_tool.py index 58fea08..03c9d4a 100644 --- a/src/agentic_cli/tools/grep_tool.py +++ b/src/agentic_cli/tools/grep_tool.py @@ -17,11 +17,13 @@ PermissionLevel, register_tool, ) +from agentic_cli.workflow.permissions import Capability @register_tool( category=ToolCategory.READ, permission_level=PermissionLevel.SAFE, + capabilities=[Capability("filesystem.read", target_arg="path")], description="Search for text patterns inside file contents (regex or literal). Use this to find code references, function definitions, or any text across files. For finding files by name, use glob instead.", ) def grep( diff --git a/src/agentic_cli/tools/interaction_tools.py b/src/agentic_cli/tools/interaction_tools.py index e910c9c..eb46d0f 100644 --- a/src/agentic_cli/tools/interaction_tools.py +++ b/src/agentic_cli/tools/interaction_tools.py @@ -10,11 +10,13 @@ ToolCategory, PermissionLevel, ) +from agentic_cli.workflow.permissions import EXEMPT @register_tool( category=ToolCategory.INTERACTION, permission_level=PermissionLevel.SAFE, + capabilities=EXEMPT, description="Ask the user a clarifying question and wait for their response. Use this when requirements are ambiguous or you need user input to proceed.", ) async def ask_clarification( diff --git a/src/agentic_cli/tools/knowledge_tools.py b/src/agentic_cli/tools/knowledge_tools.py index c6319a1..69d0df2 100644 --- a/src/agentic_cli/tools/knowledge_tools.py +++ b/src/agentic_cli/tools/knowledge_tools.py @@ -31,6 +31,7 @@ ToolCategory, PermissionLevel, ) +from agentic_cli.workflow.permissions import Capability from agentic_cli.workflow.service_registry import ( get_service, require_service, @@ -599,6 +600,7 @@ def _find_document_in_kbs(doc_id_or_title: str) -> tuple: @register_tool( category=ToolCategory.KNOWLEDGE, permission_level=PermissionLevel.SAFE, + capabilities=[Capability("kb.read")], description="Search the local knowledge base for relevant documents using semantic similarity. Use this when you need to find previously ingested papers, notes, or documents.", ) def kb_search( @@ -626,6 +628,7 @@ def kb_search( @register_tool( category=ToolCategory.KNOWLEDGE, permission_level=PermissionLevel.CAUTION, + capabilities=[Capability("kb.write")], description=( "Ingest a document into the knowledge base. " "REQUIRED: provide either 'content' (text) or 'url_or_path' (file path or URL). " @@ -685,6 +688,7 @@ async def kb_ingest( @register_tool( category=ToolCategory.KNOWLEDGE, permission_level=PermissionLevel.SAFE, + capabilities=[Capability("kb.read")], description=( "Read a stored document by ID or title. Returns the per-document " "sidecar (summary, key claims, entities) by default. Pass full=True " @@ -713,6 +717,7 @@ async def kb_read( @register_tool( category=ToolCategory.KNOWLEDGE, permission_level=PermissionLevel.SAFE, + capabilities=[Capability("kb.read")], description="List documents in the knowledge base with summaries. Filter by query or source type. Returns summaries, not full content.", ) def kb_list( @@ -740,6 +745,7 @@ def kb_list( @register_tool( category=ToolCategory.KNOWLEDGE, permission_level=PermissionLevel.CAUTION, + capabilities=[Capability("kb.write")], description=( "Save an agent-curated concept page summarizing what the KB " "knows about a topic. Pages live at concepts/{slug}.md and are " @@ -778,6 +784,7 @@ async def kb_write_concept( @register_tool( category=ToolCategory.KNOWLEDGE, permission_level=PermissionLevel.SAFE, + capabilities=[Capability("kb.read")], description=( "Search concept pages (agent-curated synthesis notes). " "Case-insensitive substring match; title hits rank above body " diff --git a/src/agentic_cli/tools/memory_tools.py b/src/agentic_cli/tools/memory_tools.py index 920ce29..b864366 100644 --- a/src/agentic_cli/tools/memory_tools.py +++ b/src/agentic_cli/tools/memory_tools.py @@ -33,6 +33,7 @@ ToolCategory, PermissionLevel, ) +from agentic_cli.workflow.permissions import Capability from agentic_cli.workflow.service_registry import require_service, MEMORY_STORE @@ -506,6 +507,7 @@ def _delete_memory_with_store( @register_tool( category=ToolCategory.MEMORY, permission_level=PermissionLevel.SAFE, + capabilities=[Capability("memory.write")], description="Save information to persistent memory that survives across sessions. Use this to remember user preferences, important facts, or learnings for future conversations.", ) def save_memory( @@ -529,6 +531,7 @@ def save_memory( @register_tool( category=ToolCategory.MEMORY, permission_level=PermissionLevel.SAFE, + capabilities=[Capability("memory.read")], description="Search persistent memory by keyword/substring. Use this to recall previously saved facts, preferences, or learnings.", ) def search_memory( @@ -552,6 +555,7 @@ def search_memory( @register_tool( category=ToolCategory.MEMORY, permission_level=PermissionLevel.SAFE, + capabilities=[Capability("memory.write")], description="Update an existing memory item", ) def update_memory( @@ -575,6 +579,7 @@ def update_memory( @register_tool( category=ToolCategory.MEMORY, permission_level=PermissionLevel.CAUTION, + capabilities=[Capability("memory.write")], description="Delete a memory item", ) def delete_memory( diff --git a/src/agentic_cli/tools/reflection_tools.py b/src/agentic_cli/tools/reflection_tools.py index a099e5e..e6a934f 100644 --- a/src/agentic_cli/tools/reflection_tools.py +++ b/src/agentic_cli/tools/reflection_tools.py @@ -15,6 +15,7 @@ from agentic_cli.file_utils import atomic_write_json from agentic_cli.logging import get_logger from agentic_cli.tools.registry import PermissionLevel, ToolCategory, register_tool +from agentic_cli.workflow.permissions import EXEMPT from agentic_cli.workflow.service_registry import require_service if TYPE_CHECKING: @@ -117,6 +118,7 @@ def format_for_prompt(self, tool_name: str) -> str: @register_tool( category=ToolCategory.MEMORY, permission_level=PermissionLevel.SAFE, + capabilities=EXEMPT, description="Save a learned heuristic from a tool failure", ) def save_reflection( diff --git a/src/agentic_cli/tools/sandbox/__init__.py b/src/agentic_cli/tools/sandbox/__init__.py index 40b0e50..1a8b889 100644 --- a/src/agentic_cli/tools/sandbox/__init__.py +++ b/src/agentic_cli/tools/sandbox/__init__.py @@ -8,11 +8,13 @@ from agentic_cli.tools.registry import register_tool, ToolCategory, PermissionLevel from agentic_cli.workflow.service_registry import require_service, SANDBOX_MANAGER +from agentic_cli.workflow.permissions import Capability @register_tool( category=ToolCategory.EXECUTION, permission_level=PermissionLevel.DANGEROUS, + capabilities=[Capability("python.exec")], description=( "Execute Python code in a stateful sandbox session. " "State (variables, imports) persists across calls within the same session. " diff --git a/src/agentic_cli/tools/search.py b/src/agentic_cli/tools/search.py index 8460cb4..1c47b4a 100644 --- a/src/agentic_cli/tools/search.py +++ b/src/agentic_cli/tools/search.py @@ -28,6 +28,7 @@ ToolCategory, PermissionLevel, ) +from agentic_cli.workflow.permissions import Capability @dataclass @@ -199,6 +200,7 @@ def _get_backend(settings: Any) -> SearchBackend: @register_tool( category=ToolCategory.NETWORK, permission_level=PermissionLevel.SAFE, + capabilities=[Capability("http.read")], description="Search the web for current information using the configured backend (Tavily or Brave). Use this for questions about recent events, documentation, or facts not in your training data.", ) async def web_search( diff --git a/src/agentic_cli/tools/shell/executor.py b/src/agentic_cli/tools/shell/executor.py index f18ddcd..8447a09 100644 --- a/src/agentic_cli/tools/shell/executor.py +++ b/src/agentic_cli/tools/shell/executor.py @@ -24,6 +24,7 @@ ToolCategory, PermissionLevel, ) +from agentic_cli.workflow.permissions import Capability # ============================================================================= # SHELL TOOL DISABLED @@ -203,6 +204,7 @@ def _execute_command( @register_tool( category=ToolCategory.EXECUTION, permission_level=PermissionLevel.DANGEROUS, + capabilities=[Capability("shell.exec", target_arg="command")], description="Execute a shell command with layered security and HITL approval", ) def shell_executor( diff --git a/src/agentic_cli/tools/webfetch_tool.py b/src/agentic_cli/tools/webfetch_tool.py index 17c5553..91887c0 100644 --- a/src/agentic_cli/tools/webfetch_tool.py +++ b/src/agentic_cli/tools/webfetch_tool.py @@ -10,6 +10,7 @@ from agentic_cli.config import get_settings from agentic_cli.tools.registry import register_tool, ToolCategory, PermissionLevel +from agentic_cli.workflow.permissions import Capability from agentic_cli.tools.webfetch import ( ContentFetcher, URLValidator, @@ -76,6 +77,7 @@ def get_or_create_fetcher(settings=None) -> ContentFetcher: @register_tool( category=ToolCategory.NETWORK, permission_level=PermissionLevel.SAFE, + capabilities=[Capability("http.read", target_arg="url")], description="Fetch a web page, convert it to markdown, and summarize it using an LLM based on your prompt. Use this to extract specific information from a URL (e.g., documentation, articles).", ) async def web_fetch(url: str, prompt: str, timeout: int = 30) -> dict[str, Any]: From a720c042b918dd56407b1f3fc18a94b703213625 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:48:31 -0400 Subject: [PATCH 26/41] feat(permissions): ADK PermissionPlugin --- .../workflow/adk/permission_plugin.py | 69 +++++++++ tests/integration/test_permission_adk.py | 140 ++++++++++++++++++ 2 files changed, 209 insertions(+) create mode 100644 src/agentic_cli/workflow/adk/permission_plugin.py create mode 100644 tests/integration/test_permission_adk.py diff --git a/src/agentic_cli/workflow/adk/permission_plugin.py b/src/agentic_cli/workflow/adk/permission_plugin.py new file mode 100644 index 0000000..0f7c67e --- /dev/null +++ b/src/agentic_cli/workflow/adk/permission_plugin.py @@ -0,0 +1,69 @@ +"""ADK plugin that gates tool calls via PermissionEngine. + +Replaces the old ConfirmationPlugin once Task 24 swaps the registration in +GoogleADKWorkflowManager. For now the old plugin continues to run alongside +this one until we're ready to cut over. + +Adapter check order (mirrors LangGraph wrapper for consistency): +1. EXEMPT tool → allow, no engine call. +2. Tool has no capability declaration → deny (author error, loud). +3. Engine absent from service registry → allow (test/dev fallback). +4. Otherwise call engine.check() and return None on allow, error dict on deny. +""" + +from __future__ import annotations + +from typing import Any, TYPE_CHECKING + +from google.adk.plugins.base_plugin import BasePlugin + +from agentic_cli.logging import Loggers +from agentic_cli.tools.registry import get_registry +from agentic_cli.workflow.permissions.capabilities import _CapabilityExempt +from agentic_cli.workflow.service_registry import get_service + +# NOTE: the ``PERMISSION_ENGINE`` constant is added to service_registry in +# Task 22. For now we look the engine up by the string literal matching the +# constant we'll define there. +_PERMISSION_ENGINE_KEY = "permission_engine" + +if TYPE_CHECKING: + from google.adk.tools import BaseTool + from google.adk.tools.tool_context import ToolContext + +logger = Loggers.workflow() + + +class PermissionPlugin(BasePlugin): + """Replaces the old ConfirmationPlugin with capability-based checks.""" + + def __init__(self) -> None: + super().__init__(name="permission") + + async def before_tool_callback( + self, + *, + tool: "BaseTool", + tool_args: dict[str, Any], + tool_context: "ToolContext | None", + ) -> dict | None: + defn = get_registry().get(tool.name) + caps = defn.capabilities if defn else None + + if isinstance(caps, _CapabilityExempt): + return None + if not caps: + logger.warning("permission_undeclared", tool=tool.name) + return { + "success": False, + "error": "Permission denied: tool has no capability declaration", + } + + engine = get_service(_PERMISSION_ENGINE_KEY) + if engine is None: + return None # test/dev fallback + + result = await engine.check(tool.name, caps, tool_args) + if result.allowed: + return None + return {"success": False, "error": f"Permission denied: {result.reason}"} diff --git a/tests/integration/test_permission_adk.py b/tests/integration/test_permission_adk.py new file mode 100644 index 0000000..0628c6c --- /dev/null +++ b/tests/integration/test_permission_adk.py @@ -0,0 +1,140 @@ +# tests/integration/test_permission_adk.py (unit-level for Task 20) +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from agentic_cli.workflow.permissions import Capability, EXEMPT + + +@pytest.fixture +def stub_engine(): + from agentic_cli.workflow.permissions.rules import CheckResult + engine = MagicMock() + engine.check = AsyncMock(return_value=CheckResult(True, "rule: test/allow")) + return engine + + +class TestPermissionPluginUnit: + @pytest.mark.asyncio + async def test_exempt_tool_passes_through(self, monkeypatch, stub_engine): + from agentic_cli.tools.registry import get_registry + from agentic_cli.workflow.adk.permission_plugin import PermissionPlugin + + monkeypatch.setattr( + "agentic_cli.workflow.adk.permission_plugin.get_service", + lambda k: stub_engine if k == "permission_engine" else None, + ) + reg = get_registry() + + @reg.register(name="exempt_x", capabilities=EXEMPT) + def exempt_x(): + return {} + + plugin = PermissionPlugin() + result = await plugin.before_tool_callback( + tool=SimpleNamespace(name="exempt_x"), tool_args={}, tool_context=None, + ) + assert result is None + stub_engine.check.assert_not_called() + + @pytest.mark.asyncio + async def test_missing_declaration_denies(self, monkeypatch, stub_engine): + from agentic_cli.workflow.adk.permission_plugin import PermissionPlugin + + monkeypatch.setattr( + "agentic_cli.workflow.adk.permission_plugin.get_service", + lambda k: stub_engine if k == "permission_engine" else None, + ) + plugin = PermissionPlugin() + result = await plugin.before_tool_callback( + tool=SimpleNamespace(name="never_registered"), + tool_args={}, + tool_context=None, + ) + assert result == { + "success": False, + "error": "Permission denied: tool has no capability declaration", + } + + @pytest.mark.asyncio + async def test_allow_calls_engine_and_passes(self, monkeypatch, stub_engine): + from agentic_cli.tools.registry import get_registry + from agentic_cli.workflow.adk.permission_plugin import PermissionPlugin + + monkeypatch.setattr( + "agentic_cli.workflow.adk.permission_plugin.get_service", + lambda k: stub_engine if k == "permission_engine" else None, + ) + reg = get_registry() + + @reg.register( + name="reader_x", + capabilities=[Capability("filesystem.read", target_arg="path")], + ) + def reader_x(path: str): + return {} + + plugin = PermissionPlugin() + result = await plugin.before_tool_callback( + tool=SimpleNamespace(name="reader_x"), + tool_args={"path": "/tmp/x"}, + tool_context=None, + ) + assert result is None + stub_engine.check.assert_awaited_once() + + @pytest.mark.asyncio + async def test_deny_returns_error_dict(self, monkeypatch): + from agentic_cli.tools.registry import get_registry + from agentic_cli.workflow.adk.permission_plugin import PermissionPlugin + from agentic_cli.workflow.permissions.rules import CheckResult + + denying_engine = MagicMock() + denying_engine.check = AsyncMock(return_value=CheckResult(False, "rule: builtin/deny")) + monkeypatch.setattr( + "agentic_cli.workflow.adk.permission_plugin.get_service", + lambda k: denying_engine if k == "permission_engine" else None, + ) + reg = get_registry() + + @reg.register( + name="writer_x", + capabilities=[Capability("filesystem.write", target_arg="path")], + ) + def writer_x(path: str): + return {} + + plugin = PermissionPlugin() + result = await plugin.before_tool_callback( + tool=SimpleNamespace(name="writer_x"), + tool_args={"path": "/etc/x"}, + tool_context=None, + ) + assert result == {"success": False, "error": "Permission denied: rule: builtin/deny"} + + @pytest.mark.asyncio + async def test_engine_absent_allows(self, monkeypatch): + from agentic_cli.tools.registry import get_registry + from agentic_cli.workflow.adk.permission_plugin import PermissionPlugin + + monkeypatch.setattr( + "agentic_cli.workflow.adk.permission_plugin.get_service", + lambda k: None, + ) + reg = get_registry() + + @reg.register( + name="reader_y", + capabilities=[Capability("filesystem.read", target_arg="path")], + ) + def reader_y(path: str): + return {} + + plugin = PermissionPlugin() + result = await plugin.before_tool_callback( + tool=SimpleNamespace(name="reader_y"), + tool_args={"path": "/tmp/x"}, + tool_context=None, + ) + assert result is None # fallback allow From de905c0c18f664e6df4a4acc36fefe45d4ee33b6 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:51:19 -0400 Subject: [PATCH 27/41] feat(permissions): LangGraph wrap_tool_for_permission --- .../workflow/langgraph/permission_wrap.py | 63 ++++++++++ .../integration/test_permission_langgraph.py | 109 ++++++++++++++++++ 2 files changed, 172 insertions(+) create mode 100644 src/agentic_cli/workflow/langgraph/permission_wrap.py create mode 100644 tests/integration/test_permission_langgraph.py diff --git a/src/agentic_cli/workflow/langgraph/permission_wrap.py b/src/agentic_cli/workflow/langgraph/permission_wrap.py new file mode 100644 index 0000000..a830ca3 --- /dev/null +++ b/src/agentic_cli/workflow/langgraph/permission_wrap.py @@ -0,0 +1,63 @@ +# src/agentic_cli/workflow/langgraph/permission_wrap.py +"""LangGraph tool wrapper that gates calls via PermissionEngine. + +Adapter check order matches ADK's PermissionPlugin: +1. EXEMPT tool → returned unwrapped (no engine call ever). +2. Tool has no capability declaration → wrapper returns deny dict at call time. +3. Engine absent from service registry → wrapper runs the original tool (fallback). +4. Otherwise call engine.check(); return on allow, deny dict on deny. + +Replaces LangGraphBuilder._wrap_for_confirmation once Task 25 swaps the +call site. +""" + +from __future__ import annotations + +import asyncio +import functools +from typing import Any, Callable + +from agentic_cli.logging import Loggers +from agentic_cli.tools.registry import get_registry +from agentic_cli.workflow.permissions.capabilities import _CapabilityExempt +from agentic_cli.workflow.service_registry import get_service + +_PERMISSION_ENGINE_KEY = "permission_engine" + +logger = Loggers.workflow() + + +def wrap_tool_for_permission(tool: Callable[..., Any]) -> Callable[..., Any]: + """Wrap ``tool`` so every call goes through ``PermissionEngine.check``. + + Exempt tools (``capabilities=EXEMPT``) are returned unmodified. + Tools that never declared capabilities (``defn is None``) get a wrapper + that returns a deny error dict, mirroring ADK. + """ + name = getattr(tool, "__name__", str(tool)) + defn = get_registry().get(name) + caps = defn.capabilities if defn else None + + if isinstance(caps, _CapabilityExempt): + return tool + + is_async = asyncio.iscoroutinefunction(tool) + + @functools.wraps(tool) + async def _guarded(*args: Any, **kwargs: Any) -> Any: + if not caps: + logger.warning("permission_undeclared", tool=name) + return { + "success": False, + "error": "Permission denied: tool has no capability declaration", + } + engine = get_service(_PERMISSION_ENGINE_KEY) + if engine is not None: + result = await engine.check(name, caps, kwargs) + if not result.allowed: + return {"success": False, "error": f"Permission denied: {result.reason}"} + if is_async: + return await tool(*args, **kwargs) + return tool(*args, **kwargs) + + return _guarded diff --git a/tests/integration/test_permission_langgraph.py b/tests/integration/test_permission_langgraph.py new file mode 100644 index 0000000..eb412c3 --- /dev/null +++ b/tests/integration/test_permission_langgraph.py @@ -0,0 +1,109 @@ +# tests/integration/test_permission_langgraph.py +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from agentic_cli.workflow.permissions import Capability, EXEMPT + + +class TestWrapToolForPermissionUnit: + def test_exempt_tool_returned_unmodified(self): + from agentic_cli.tools.registry import get_registry + from agentic_cli.workflow.langgraph.permission_wrap import wrap_tool_for_permission + + reg = get_registry() + + @reg.register(name="ex_lg", capabilities=EXEMPT) + def ex_lg(): + return {"success": True} + + wrapped = wrap_tool_for_permission(ex_lg) + assert wrapped is ex_lg + + @pytest.mark.asyncio + async def test_missing_declaration_denies(self, monkeypatch): + from agentic_cli.workflow.langgraph.permission_wrap import wrap_tool_for_permission + + def unregistered(path: str): + return {"ok": True} + + wrapped = wrap_tool_for_permission(unregistered) + result = await wrapped(path="/x") + assert result == { + "success": False, + "error": "Permission denied: tool has no capability declaration", + } + + @pytest.mark.asyncio + async def test_engine_allow_runs_tool(self, monkeypatch): + from agentic_cli.tools.registry import get_registry + from agentic_cli.workflow.langgraph.permission_wrap import wrap_tool_for_permission + from agentic_cli.workflow.permissions.rules import CheckResult + + engine = MagicMock() + engine.check = AsyncMock(return_value=CheckResult(True, "rule: test/allow")) + monkeypatch.setattr( + "agentic_cli.workflow.langgraph.permission_wrap.get_service", + lambda k: engine if k == "permission_engine" else None, + ) + + reg = get_registry() + + @reg.register( + name="read_lg", + capabilities=[Capability("filesystem.read", target_arg="path")], + ) + def read_lg(path: str): + return {"content": f"read:{path}"} + + wrapped = wrap_tool_for_permission(read_lg) + result = await wrapped(path="/abs/x") + assert result == {"content": "read:/abs/x"} + + @pytest.mark.asyncio + async def test_engine_deny_returns_error_dict(self, monkeypatch): + from agentic_cli.tools.registry import get_registry + from agentic_cli.workflow.langgraph.permission_wrap import wrap_tool_for_permission + from agentic_cli.workflow.permissions.rules import CheckResult + + engine = MagicMock() + engine.check = AsyncMock(return_value=CheckResult(False, "rule: builtin/deny")) + monkeypatch.setattr( + "agentic_cli.workflow.langgraph.permission_wrap.get_service", + lambda k: engine if k == "permission_engine" else None, + ) + + reg = get_registry() + + @reg.register( + name="write_lg", + capabilities=[Capability("filesystem.write", target_arg="path")], + ) + def write_lg(path: str): + return {"ok": True} + + wrapped = wrap_tool_for_permission(write_lg) + result = await wrapped(path="/etc/x") + assert result == {"success": False, "error": "Permission denied: rule: builtin/deny"} + + @pytest.mark.asyncio + async def test_engine_absent_allows(self, monkeypatch): + from agentic_cli.tools.registry import get_registry + from agentic_cli.workflow.langgraph.permission_wrap import wrap_tool_for_permission + + monkeypatch.setattr( + "agentic_cli.workflow.langgraph.permission_wrap.get_service", + lambda k: None, + ) + reg = get_registry() + + @reg.register( + name="read_lg2", + capabilities=[Capability("filesystem.read", target_arg="path")], + ) + def read_lg2(path: str): + return {"ok": True} + + wrapped = wrap_tool_for_permission(read_lg2) + result = await wrapped(path="/x") + assert result == {"ok": True} From 110ae22842e1a1e959b61136bd4e37a02e7df2c8 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:55:10 -0400 Subject: [PATCH 28/41] feat(permissions): construct PermissionEngine in BaseWorkflowManager + PERMISSION_ENGINE key - Add PERMISSION_ENGINE = "permission_engine" constant to service_registry.py (alphabetical order) - Construct PermissionEngine unconditionally in _ensure_managers_initialized (before WORKFLOW) - Update permission_plugin.py and permission_wrap.py to import and use the real constant - Update existing permission integration tests to use the imported PERMISSION_ENGINE constant - Add tests/workflow/test_base_manager_permissions.py verifying engine is published to services dict --- .../workflow/adk/permission_plugin.py | 9 ++----- src/agentic_cli/workflow/base_manager.py | 16 ++++++++--- .../workflow/langgraph/permission_wrap.py | 6 ++--- src/agentic_cli/workflow/service_registry.py | 7 ++--- tests/integration/test_permission_adk.py | 9 ++++--- .../integration/test_permission_langgraph.py | 5 ++-- .../workflow/test_base_manager_permissions.py | 27 +++++++++++++++++++ 7 files changed, 56 insertions(+), 23 deletions(-) create mode 100644 tests/workflow/test_base_manager_permissions.py diff --git a/src/agentic_cli/workflow/adk/permission_plugin.py b/src/agentic_cli/workflow/adk/permission_plugin.py index 0f7c67e..7668ba6 100644 --- a/src/agentic_cli/workflow/adk/permission_plugin.py +++ b/src/agentic_cli/workflow/adk/permission_plugin.py @@ -20,12 +20,7 @@ from agentic_cli.logging import Loggers from agentic_cli.tools.registry import get_registry from agentic_cli.workflow.permissions.capabilities import _CapabilityExempt -from agentic_cli.workflow.service_registry import get_service - -# NOTE: the ``PERMISSION_ENGINE`` constant is added to service_registry in -# Task 22. For now we look the engine up by the string literal matching the -# constant we'll define there. -_PERMISSION_ENGINE_KEY = "permission_engine" +from agentic_cli.workflow.service_registry import PERMISSION_ENGINE, get_service if TYPE_CHECKING: from google.adk.tools import BaseTool @@ -59,7 +54,7 @@ async def before_tool_callback( "error": "Permission denied: tool has no capability declaration", } - engine = get_service(_PERMISSION_ENGINE_KEY) + engine = get_service(PERMISSION_ENGINE) if engine is None: return None # test/dev fallback diff --git a/src/agentic_cli/workflow/base_manager.py b/src/agentic_cli/workflow/base_manager.py index 3c79bb0..8a3e005 100644 --- a/src/agentic_cli/workflow/base_manager.py +++ b/src/agentic_cli/workflow/base_manager.py @@ -22,13 +22,14 @@ from agentic_cli.workflow.models import ModelRegistry from agentic_cli.workflow.service_registry import ( set_service_registry, + ARXIV_SOURCE, KB_MANAGER, - USER_KB_MANAGER, - SANDBOX_MANAGER, LLM_SUMMARIZER, MEMORY_STORE, + PERMISSION_ENGINE, REFLECTION_STORE, - ARXIV_SOURCE, + SANDBOX_MANAGER, + USER_KB_MANAGER, WORKFLOW, ) from agentic_cli.logging import Loggers @@ -364,6 +365,15 @@ def _ensure_managers_initialized(self) -> None: from agentic_cli.tools.arxiv_source import ArxivSearchSource s[ARXIV_SOURCE] = ArxivSearchSource() + # Always construct the PermissionEngine (all agents may need it) + if PERMISSION_ENGINE not in s: + from pathlib import Path + from agentic_cli.workflow.permissions import PermissionContext, PermissionEngine + ctx = PermissionContext(workdir=Path.cwd(), home=Path.home()) + s[PERMISSION_ENGINE] = PermissionEngine( + settings=self._settings, workflow=self, ctx=ctx, + ) + # Always ensure workflow reference is available s[WORKFLOW] = self diff --git a/src/agentic_cli/workflow/langgraph/permission_wrap.py b/src/agentic_cli/workflow/langgraph/permission_wrap.py index a830ca3..0a4c411 100644 --- a/src/agentic_cli/workflow/langgraph/permission_wrap.py +++ b/src/agentic_cli/workflow/langgraph/permission_wrap.py @@ -20,9 +20,7 @@ from agentic_cli.logging import Loggers from agentic_cli.tools.registry import get_registry from agentic_cli.workflow.permissions.capabilities import _CapabilityExempt -from agentic_cli.workflow.service_registry import get_service - -_PERMISSION_ENGINE_KEY = "permission_engine" +from agentic_cli.workflow.service_registry import PERMISSION_ENGINE, get_service logger = Loggers.workflow() @@ -51,7 +49,7 @@ async def _guarded(*args: Any, **kwargs: Any) -> Any: "success": False, "error": "Permission denied: tool has no capability declaration", } - engine = get_service(_PERMISSION_ENGINE_KEY) + engine = get_service(PERMISSION_ENGINE) if engine is not None: result = await engine.check(name, caps, kwargs) if not result.allowed: diff --git a/src/agentic_cli/workflow/service_registry.py b/src/agentic_cli/workflow/service_registry.py index 3040089..cbad534 100644 --- a/src/agentic_cli/workflow/service_registry.py +++ b/src/agentic_cli/workflow/service_registry.py @@ -13,13 +13,14 @@ # ---- Well-known registry keys ---- +ARXIV_SOURCE = "arxiv_source" KB_MANAGER = "kb_manager" -USER_KB_MANAGER = "user_kb_manager" -SANDBOX_MANAGER = "sandbox_manager" LLM_SUMMARIZER = "llm_summarizer" MEMORY_STORE = "memory_store" +PERMISSION_ENGINE = "permission_engine" REFLECTION_STORE = "reflection_store" -ARXIV_SOURCE = "arxiv_source" +SANDBOX_MANAGER = "sandbox_manager" +USER_KB_MANAGER = "user_kb_manager" WORKFLOW = "workflow" diff --git a/tests/integration/test_permission_adk.py b/tests/integration/test_permission_adk.py index 0628c6c..5d36ade 100644 --- a/tests/integration/test_permission_adk.py +++ b/tests/integration/test_permission_adk.py @@ -5,6 +5,7 @@ import pytest from agentic_cli.workflow.permissions import Capability, EXEMPT +from agentic_cli.workflow.service_registry import PERMISSION_ENGINE @pytest.fixture @@ -23,7 +24,7 @@ async def test_exempt_tool_passes_through(self, monkeypatch, stub_engine): monkeypatch.setattr( "agentic_cli.workflow.adk.permission_plugin.get_service", - lambda k: stub_engine if k == "permission_engine" else None, + lambda k: stub_engine if k == PERMISSION_ENGINE else None, ) reg = get_registry() @@ -44,7 +45,7 @@ async def test_missing_declaration_denies(self, monkeypatch, stub_engine): monkeypatch.setattr( "agentic_cli.workflow.adk.permission_plugin.get_service", - lambda k: stub_engine if k == "permission_engine" else None, + lambda k: stub_engine if k == PERMISSION_ENGINE else None, ) plugin = PermissionPlugin() result = await plugin.before_tool_callback( @@ -64,7 +65,7 @@ async def test_allow_calls_engine_and_passes(self, monkeypatch, stub_engine): monkeypatch.setattr( "agentic_cli.workflow.adk.permission_plugin.get_service", - lambda k: stub_engine if k == "permission_engine" else None, + lambda k: stub_engine if k == PERMISSION_ENGINE else None, ) reg = get_registry() @@ -94,7 +95,7 @@ async def test_deny_returns_error_dict(self, monkeypatch): denying_engine.check = AsyncMock(return_value=CheckResult(False, "rule: builtin/deny")) monkeypatch.setattr( "agentic_cli.workflow.adk.permission_plugin.get_service", - lambda k: denying_engine if k == "permission_engine" else None, + lambda k: denying_engine if k == PERMISSION_ENGINE else None, ) reg = get_registry() diff --git a/tests/integration/test_permission_langgraph.py b/tests/integration/test_permission_langgraph.py index eb412c3..a9c4e45 100644 --- a/tests/integration/test_permission_langgraph.py +++ b/tests/integration/test_permission_langgraph.py @@ -4,6 +4,7 @@ import pytest from agentic_cli.workflow.permissions import Capability, EXEMPT +from agentic_cli.workflow.service_registry import PERMISSION_ENGINE class TestWrapToolForPermissionUnit: @@ -44,7 +45,7 @@ async def test_engine_allow_runs_tool(self, monkeypatch): engine.check = AsyncMock(return_value=CheckResult(True, "rule: test/allow")) monkeypatch.setattr( "agentic_cli.workflow.langgraph.permission_wrap.get_service", - lambda k: engine if k == "permission_engine" else None, + lambda k: engine if k == PERMISSION_ENGINE else None, ) reg = get_registry() @@ -70,7 +71,7 @@ async def test_engine_deny_returns_error_dict(self, monkeypatch): engine.check = AsyncMock(return_value=CheckResult(False, "rule: builtin/deny")) monkeypatch.setattr( "agentic_cli.workflow.langgraph.permission_wrap.get_service", - lambda k: engine if k == "permission_engine" else None, + lambda k: engine if k == PERMISSION_ENGINE else None, ) reg = get_registry() diff --git a/tests/workflow/test_base_manager_permissions.py b/tests/workflow/test_base_manager_permissions.py new file mode 100644 index 0000000..13efd8b --- /dev/null +++ b/tests/workflow/test_base_manager_permissions.py @@ -0,0 +1,27 @@ +"""Test that BaseWorkflowManager constructs and publishes PermissionEngine.""" + +from pathlib import Path + +import pytest + +from agentic_cli.workflow.permissions import PermissionEngine +from agentic_cli.workflow.service_registry import PERMISSION_ENGINE + + +class TestEngineInServiceRegistry: + def test_base_manager_registers_permission_engine(self, tmp_path, monkeypatch): + """After _ensure_managers_initialized, the engine is published to the registry dict.""" + from agentic_cli.config import BaseSettings + from agentic_cli.workflow.adk.manager import GoogleADKWorkflowManager + from agentic_cli.workflow.config import AgentConfig + + monkeypatch.chdir(tmp_path) + monkeypatch.setenv("HOME", str(tmp_path)) + + settings = BaseSettings(google_api_key="test") + configs = [AgentConfig(name="root", prompt="hi", tools=[])] + manager = GoogleADKWorkflowManager( + agent_configs=configs, settings=settings, model="gemini-2.5-flash", + ) + manager._ensure_managers_initialized() + assert isinstance(manager.services[PERMISSION_ENGINE], PermissionEngine) From 2de1afc6eb043cc1e98af60a950fafce9947fb94 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:57:41 -0400 Subject: [PATCH 29/41] feat(permissions): add permissions + permissions_enabled settings (staged) Adds PermissionRuleConfig, PermissionsConfig models and two new fields (permissions, permissions_enabled) to WorkflowSettingsMixin. hitl_enabled is preserved for migration compatibility. --- src/agentic_cli/workflow/settings.py | 30 ++++++++++++++- tests/workflow/test_settings_permissions.py | 41 +++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 tests/workflow/test_settings_permissions.py diff --git a/src/agentic_cli/workflow/settings.py b/src/agentic_cli/workflow/settings.py index 3839624..812fe40 100644 --- a/src/agentic_cli/workflow/settings.py +++ b/src/agentic_cli/workflow/settings.py @@ -10,7 +10,7 @@ from enum import Enum from typing import Literal, TYPE_CHECKING -from pydantic import Field +from pydantic import BaseModel, Field from agentic_cli.workflow.models import ModelFamily, ModelRegistry @@ -21,6 +21,20 @@ THINKING_EFFORT_LEVELS = ModelRegistry.THINKING_EFFORT_LEVELS +class PermissionRuleConfig(BaseModel): + """A single permission rule — serialised to settings.json as JSON.""" + + capability: str + target: str + + +class PermissionsConfig(BaseModel): + """Nested settings section holding user-editable permission rules.""" + + allow: list[PermissionRuleConfig] = [] + deny: list[PermissionRuleConfig] = [] + + class OrchestratorType(str, Enum): """Types of workflow orchestrators available.""" @@ -293,6 +307,20 @@ class WorkflowSettingsMixin: json_schema_extra={"ui_order": 135}, ) + # Permissions (new in phase 2) + permissions: PermissionsConfig = Field( + default_factory=PermissionsConfig, + title="Permissions", + description="Declarative allow/deny rules for tool capabilities.", + json_schema_extra={"ui_order": 135}, + ) + permissions_enabled: bool = Field( + default=True, + title="Permissions Enabled", + description="Master switch; when False, all tool calls are allowed.", + json_schema_extra={"ui_order": 136}, + ) + # Persistence settings (LangGraph) postgres_uri: str | None = Field( default=None, diff --git a/tests/workflow/test_settings_permissions.py b/tests/workflow/test_settings_permissions.py new file mode 100644 index 0000000..82b5bd9 --- /dev/null +++ b/tests/workflow/test_settings_permissions.py @@ -0,0 +1,41 @@ +"""Tests for permissions settings additions. + +Adds ``permissions`` (nested) and ``permissions_enabled`` to settings; +``hitl_enabled`` remains for now (removed in final cleanup task). +""" + +from agentic_cli.config import BaseSettings + + +class TestPermissionsSettings: + def test_permissions_enabled_default_true(self): + s = BaseSettings() + assert s.permissions_enabled is True + + def test_permissions_default_empty(self): + s = BaseSettings() + assert s.permissions.allow == [] + assert s.permissions.deny == [] + + def test_hitl_enabled_still_present(self): + """Old kwarg still accessible during the migration window.""" + s = BaseSettings() + assert hasattr(s, "hitl_enabled") + + def test_permissions_accepts_allow_and_deny(self): + from agentic_cli.workflow.settings import ( + PermissionRuleConfig, + PermissionsConfig, + ) + + cfg = PermissionsConfig( + allow=[PermissionRuleConfig(capability="filesystem.read", target="/x/**")], + deny=[PermissionRuleConfig(capability="filesystem.write", target="/etc/**")], + ) + assert cfg.allow[0].capability == "filesystem.read" + assert cfg.deny[0].target == "/etc/**" + + def test_base_settings_has_permissions_field(self): + s = BaseSettings() + from agentic_cli.workflow.settings import PermissionsConfig + assert isinstance(s.permissions, PermissionsConfig) From 2bf7950f81229a8232890dd90dd72b9d94cf6379 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:59:18 -0400 Subject: [PATCH 30/41] feat(permissions): ADK manager uses PermissionPlugin --- src/agentic_cli/workflow/adk/manager.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/agentic_cli/workflow/adk/manager.py b/src/agentic_cli/workflow/adk/manager.py index 7e851f7..763c355 100644 --- a/src/agentic_cli/workflow/adk/manager.py +++ b/src/agentic_cli/workflow/adk/manager.py @@ -24,7 +24,8 @@ from agentic_cli.workflow.events import WorkflowEvent, EventType from agentic_cli.workflow.config import AgentConfig from agentic_cli.workflow.adk.event_processor import ADKEventProcessor -from agentic_cli.workflow.adk.plugins import ConfirmationPlugin, LLMLoggingPlugin +from agentic_cli.workflow.adk.permission_plugin import PermissionPlugin +from agentic_cli.workflow.adk.plugins import LLMLoggingPlugin from agentic_cli.config import ( BaseSettings, @@ -400,7 +401,7 @@ def _init_plugins(self) -> list: """ from agentic_cli.workflow.adk.task_progress_plugin import TaskProgressPlugin - plugins: list = [ConfirmationPlugin()] + plugins: list = [PermissionPlugin()] # Task progress tracking via ToolContext.state self._task_progress_plugin = TaskProgressPlugin() From 353edc40850f82f7e78f99c72657879b9926f2ac Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 11:01:10 -0400 Subject: [PATCH 31/41] feat(permissions): LangGraph builder uses wrap_tool_for_permission --- src/agentic_cli/workflow/langgraph/graph_builder.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/agentic_cli/workflow/langgraph/graph_builder.py b/src/agentic_cli/workflow/langgraph/graph_builder.py index 679cfef..bc295d1 100644 --- a/src/agentic_cli/workflow/langgraph/graph_builder.py +++ b/src/agentic_cli/workflow/langgraph/graph_builder.py @@ -13,6 +13,7 @@ from agentic_cli.workflow.config import AgentConfig from agentic_cli.workflow.confirmation import is_dangerous, request_tool_confirmation +from agentic_cli.workflow.langgraph.permission_wrap import wrap_tool_for_permission from agentic_cli.logging import Loggers if TYPE_CHECKING: @@ -122,9 +123,7 @@ def _get_tools(config: AgentConfig) -> list: if tools: tool_node_name = f"{config.name}_tools" tool_map[config.name] = tool_node_name - wrapped_tools = [ - self._wrap_for_confirmation(t) for t in tools - ] + wrapped_tools = [wrap_tool_for_permission(t) for t in tools] tool_node = ToolNode( wrapped_tools, name=tool_node_name, handle_tool_errors=True, ) From 5b2965b200fe3f3713b600a4b1c23107ca7c5709 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 11:22:19 -0400 Subject: [PATCH 32/41] refactor(permissions): delete retired code (PermissionLevel, ConfirmationPlugin, _wrap_for_confirmation, hitl_tools, hitl_enabled) - Drop permission_level= from every tool; capabilities= is now required - Remove PermissionLevel enum + permission_level field from ToolDefinition - Delete workflow/confirmation.py and tools/hitl_tools.py - Strip ConfirmationPlugin from workflow/adk/plugins.py (LLMLoggingPlugin preserved) - Remove LangGraphBuilder._wrap_for_confirmation + its confirmation imports - Remove hitl_enabled setting - Update tests that referenced the retired symbols - Update README + CLAUDE.md snippets to the new API --- CLAUDE.md | 4 +- README.md | 10 +- src/agentic_cli/tools/__init__.py | 5 - src/agentic_cli/tools/arxiv_tools.py | 7 +- src/agentic_cli/tools/execution_tools.py | 2 - src/agentic_cli/tools/file_read.py | 5 - src/agentic_cli/tools/file_write.py | 4 - src/agentic_cli/tools/glob_tool.py | 5 - src/agentic_cli/tools/grep_tool.py | 4 - src/agentic_cli/tools/hitl_tools.py | 81 ------ src/agentic_cli/tools/interaction_tools.py | 2 - src/agentic_cli/tools/knowledge_tools.py | 7 - src/agentic_cli/tools/memory_tools.py | 5 - src/agentic_cli/tools/reflection_tools.py | 3 +- src/agentic_cli/tools/registry.py | 36 +-- src/agentic_cli/tools/sandbox/__init__.py | 3 +- src/agentic_cli/tools/search.py | 2 - src/agentic_cli/tools/shell/executor.py | 4 +- src/agentic_cli/tools/webfetch_tool.py | 3 +- src/agentic_cli/workflow/adk/__init__.py | 2 +- src/agentic_cli/workflow/adk/plugins.py | 48 ---- src/agentic_cli/workflow/confirmation.py | 62 ----- .../workflow/langgraph/graph_builder.py | 42 ---- src/agentic_cli/workflow/settings.py | 10 +- tests/test_adk_plugins.py | 232 +----------------- tests/test_config.py | 4 - tests/test_hitl.py | 61 ----- tests/test_langgraph_confirmation.py | 134 ---------- tests/test_tools.py | 114 +++------ tests/tools/test_registry.py | 56 +---- tests/workflow/test_backend_isolation.py | 2 +- tests/workflow/test_settings_permissions.py | 12 +- 32 files changed, 70 insertions(+), 901 deletions(-) delete mode 100644 src/agentic_cli/tools/hitl_tools.py delete mode 100644 src/agentic_cli/workflow/confirmation.py delete mode 100644 tests/test_hitl.py delete mode 100644 tests/test_langgraph_confirmation.py diff --git a/CLAUDE.md b/CLAUDE.md index 3f9c4dc..6ce79dc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -47,7 +47,7 @@ agentic-cli/ │ │ ├── persistence/ # Checkpointers, stores │ │ └── tools/ # LangChain-compatible wrappers │ ├── tools/ -│ │ ├── registry.py # ToolRegistry, @register_tool, ToolCategory, PermissionLevel +│ │ ├── registry.py # ToolRegistry, @register_tool, ToolCategory │ │ ├── executor.py # SafePythonExecutor │ │ ├── knowledge_tools.py # kb_search, kb_ingest, kb_list, kb_read │ │ ├── arxiv_tools.py # search_arxiv, fetch_arxiv_paper, analyze_arxiv_paper @@ -62,7 +62,7 @@ agentic-cli/ │ │ ├── memory_tools.py # save_memory, search_memory + MemoryStore │ │ ├── planning_tools.py # save_plan, get_plan + PlanStore │ │ ├── task_tools.py # save_tasks, get_tasks + TaskStore -│ │ ├── hitl_tools.py # request_approval + ApprovalManager, HITLConfig +│ │ ├── reflection_tools.py # save_reflection + ToolReflectionStore │ │ ├── shell/ # 8-layer shell security │ │ └── webfetch/ # Fetcher, converter, validator, robots │ ├── knowledge_base/ diff --git a/README.md b/README.md index 7ee0ce5..05d61da 100644 --- a/README.md +++ b/README.md @@ -320,11 +320,12 @@ configs = [coordinator, researcher, analyst] Tools are regular Python functions with type hints and docstrings. **All tools return `{"success": bool, ...}` dicts** — never raise exceptions. ```python -from agentic_cli.tools import register_tool, ToolCategory, PermissionLevel +from agentic_cli.tools import register_tool, ToolCategory +from agentic_cli.workflow.permissions import Capability, EXEMPT @register_tool( - category=ToolCategory.DATA, - permission_level=PermissionLevel.SAFE, + category=ToolCategory.NETWORK, + capabilities=[Capability("http.read")], description="Search the database for matching records.", ) def search_database(query: str, limit: int = 10) -> dict: @@ -726,7 +727,7 @@ agentic-cli/ │ │ ├── state.py │ │ └── persistence/ # Checkpointers and stores │ ├── tools/ -│ │ ├── registry.py # ToolRegistry, ToolCategory, PermissionLevel +│ │ ├── registry.py # ToolRegistry, ToolCategory, register_tool │ │ ├── factories.py # Backend-aware tool factories │ │ ├── executor.py # SafePythonExecutor │ │ ├── arxiv_tools.py # search_arxiv, fetch_arxiv_paper, ingest_arxiv_paper @@ -744,7 +745,6 @@ agentic-cli/ │ │ ├── pdf_utils.py # PDF text extraction helpers │ │ ├── memory_tools.py # save/search/update/delete + MemoryStore │ │ ├── reflection_tools.py # save_reflection + ToolReflectionStore -│ │ ├── hitl_tools.py # request_approval │ │ ├── _core/ # Shared planning/task logic │ │ │ ├── planning.py │ │ │ └── tasks.py diff --git a/src/agentic_cli/tools/__init__.py b/src/agentic_cli/tools/__init__.py index 443094f..43cd318 100644 --- a/src/agentic_cli/tools/__init__.py +++ b/src/agentic_cli/tools/__init__.py @@ -10,7 +10,6 @@ Framework Tools: - memory_tools: Working and long-term memory tools - - hitl_tools: Human-in-the-loop approval tools - web_search: Web search with pluggable backends (Tavily, Brave) For resilience patterns, use tenacity, pybreaker, aiolimiter directly. @@ -48,7 +47,6 @@ from agentic_cli.tools.webfetch_tool import web_fetch from agentic_cli.tools.registry import ( ToolCategory, - PermissionLevel, ToolDefinition, ToolRegistry, get_registry, @@ -61,7 +59,6 @@ __all__ = [ # Registry classes "ToolCategory", - "PermissionLevel", "ToolDefinition", "ToolRegistry", "get_registry", @@ -103,7 +100,6 @@ "ask_clarification", # Framework tool modules (lazy loaded) "memory_tools", - "hitl_tools", "sandbox_tools", "reflection_tools", ] @@ -112,7 +108,6 @@ # Lazy loading for framework tool modules _lazy_tool_modules = { "memory_tools": "agentic_cli.tools.memory_tools", - "hitl_tools": "agentic_cli.tools.hitl_tools", "sandbox_tools": "agentic_cli.tools.sandbox", "reflection_tools": "agentic_cli.tools.reflection_tools", } diff --git a/src/agentic_cli/tools/arxiv_tools.py b/src/agentic_cli/tools/arxiv_tools.py index 436b628..2b195e6 100644 --- a/src/agentic_cli/tools/arxiv_tools.py +++ b/src/agentic_cli/tools/arxiv_tools.py @@ -20,7 +20,6 @@ from agentic_cli.tools.registry import ( register_tool, ToolCategory, - PermissionLevel, ) from agentic_cli.workflow.permissions import Capability from agentic_cli.workflow.service_registry import ( @@ -123,7 +122,7 @@ async def _fetch_arxiv_paper_with_source(source, arxiv_id: str) -> dict[str, Any @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.SAFE, + capabilities=[Capability("http.read")], description="Search arXiv for academic papers by query, category, or date range. Use this to find research papers on a topic.", ) @@ -172,7 +171,7 @@ def search_arxiv( @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.SAFE, + capabilities=[Capability("http.read")], description="Fetch metadata for a specific arXiv paper by ID or URL. Returns title, authors, abstract, categories, and PDF URL.", ) @@ -299,7 +298,7 @@ async def _ingest_arxiv_paper_with_services( @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.SAFE, + capabilities=[Capability("http.read"), Capability("kb.write")], description="Download an arXiv paper's PDF, extract text, and ingest it into the knowledge base. Use this to add a specific arXiv paper to long-term storage so it can be searched later.", ) diff --git a/src/agentic_cli/tools/execution_tools.py b/src/agentic_cli/tools/execution_tools.py index e24ca2d..7d723f1 100644 --- a/src/agentic_cli/tools/execution_tools.py +++ b/src/agentic_cli/tools/execution_tools.py @@ -9,14 +9,12 @@ from agentic_cli.tools.registry import ( register_tool, ToolCategory, - PermissionLevel, ) from agentic_cli.workflow.permissions import Capability @register_tool( category=ToolCategory.EXECUTION, - permission_level=PermissionLevel.DANGEROUS, capabilities=[Capability("python.exec")], description="Stateless Python scratchpad for quick calculations, formula checks, and mathematical reasoning. Each call starts fresh — no state persists. Only whitelisted modules (math, numpy, pandas, json, etc.) are available. Use sandbox_execute instead for stateful work.", ) diff --git a/src/agentic_cli/tools/file_read.py b/src/agentic_cli/tools/file_read.py index 05939d0..7d88f8c 100644 --- a/src/agentic_cli/tools/file_read.py +++ b/src/agentic_cli/tools/file_read.py @@ -3,8 +3,6 @@ Provides safe, read-only tools for file system access: - read_file: Read file contents with optional offset/limit - diff_compare: Compare two text sources - -All tools in this module have PermissionLevel.SAFE. """ import difflib @@ -13,7 +11,6 @@ from agentic_cli.tools.registry import ( ToolCategory, - PermissionLevel, register_tool, ) from agentic_cli.workflow.permissions import Capability @@ -21,7 +18,6 @@ @register_tool( category=ToolCategory.READ, - permission_level=PermissionLevel.SAFE, capabilities=[Capability("filesystem.read", target_arg="path")], description="Read the contents of a file at the given path. Use this to examine source code, config files, or any text file. For finding files by name pattern use glob instead; for searching file contents use grep.", ) @@ -112,7 +108,6 @@ def read_file( @register_tool( category=ToolCategory.READ, - permission_level=PermissionLevel.SAFE, capabilities=[Capability("filesystem.read", target_arg="source_a"), Capability("filesystem.read", target_arg="source_b")], description="Compare two text sources (files or strings) and show differences. Use this to see what changed between two versions of content.", ) diff --git a/src/agentic_cli/tools/file_write.py b/src/agentic_cli/tools/file_write.py index e3d8b49..4a0a4a5 100644 --- a/src/agentic_cli/tools/file_write.py +++ b/src/agentic_cli/tools/file_write.py @@ -4,7 +4,6 @@ - write_file: Write content to a file (creates or overwrites) - edit_file: Replace text in a file (sed-like operation) -All tools in this module have PermissionLevel.CAUTION. Delete, move, and copy operations should use the shell tool. """ @@ -15,7 +14,6 @@ from agentic_cli.file_utils import atomic_write_text from agentic_cli.tools.registry import ( ToolCategory, - PermissionLevel, register_tool, ) from agentic_cli.workflow.permissions import Capability @@ -23,7 +21,6 @@ @register_tool( category=ToolCategory.WRITE, - permission_level=PermissionLevel.CAUTION, capabilities=[Capability("filesystem.write", target_arg="path")], description="Write content to a file (creates or overwrites). Use this to create new files or replace entire file contents. For partial modifications use edit_file instead.", ) @@ -89,7 +86,6 @@ def write_file( @register_tool( category=ToolCategory.WRITE, - permission_level=PermissionLevel.CAUTION, capabilities=[Capability("filesystem.read", target_arg="path"), Capability("filesystem.write", target_arg="path")], description="Replace specific text in an existing file. Use this for targeted edits (find-and-replace). For creating or fully rewriting files use write_file instead.", ) diff --git a/src/agentic_cli/tools/glob_tool.py b/src/agentic_cli/tools/glob_tool.py index 998b4b3..057e03e 100644 --- a/src/agentic_cli/tools/glob_tool.py +++ b/src/agentic_cli/tools/glob_tool.py @@ -2,8 +2,6 @@ Provides file discovery using glob patterns: - glob: Find files matching a pattern (also serves as directory listing) - -This is a read-only tool with PermissionLevel.SAFE. """ import os @@ -13,7 +11,6 @@ from agentic_cli.tools.registry import ( ToolCategory, - PermissionLevel, register_tool, ) from agentic_cli.workflow.permissions import Capability @@ -21,7 +18,6 @@ @register_tool( category=ToolCategory.READ, - permission_level=PermissionLevel.SAFE, capabilities=[Capability("filesystem.read", target_arg="path")], description="Find files by name pattern (e.g. '**/*.py'). Use this to discover files in a directory tree. For searching inside file contents, use grep instead.", ) @@ -139,7 +135,6 @@ def glob( @register_tool( category=ToolCategory.READ, - permission_level=PermissionLevel.SAFE, capabilities=[Capability("filesystem.read", target_arg="path")], description="List directory contents organized by type (directories first, then files). Use this when you need a structured overview of a directory; for pattern-based file search use glob.", ) diff --git a/src/agentic_cli/tools/grep_tool.py b/src/agentic_cli/tools/grep_tool.py index 03c9d4a..98c4384 100644 --- a/src/agentic_cli/tools/grep_tool.py +++ b/src/agentic_cli/tools/grep_tool.py @@ -2,8 +2,6 @@ Provides pattern-based content search across files: - grep: Search for patterns in files (ripgrep-like interface) - -This is a read-only tool with PermissionLevel.SAFE. """ import functools @@ -14,7 +12,6 @@ from agentic_cli.tools.registry import ( ToolCategory, - PermissionLevel, register_tool, ) from agentic_cli.workflow.permissions import Capability @@ -22,7 +19,6 @@ @register_tool( category=ToolCategory.READ, - permission_level=PermissionLevel.SAFE, capabilities=[Capability("filesystem.read", target_arg="path")], description="Search for text patterns inside file contents (regex or literal). Use this to find code references, function definitions, or any text across files. For finding files by name, use glob instead.", ) diff --git a/src/agentic_cli/tools/hitl_tools.py b/src/agentic_cli/tools/hitl_tools.py deleted file mode 100644 index 962a907..0000000 --- a/src/agentic_cli/tools/hitl_tools.py +++ /dev/null @@ -1,81 +0,0 @@ -"""Human-in-the-loop tools for agentic workflows. - -Provides ApprovalManager for audit history of HITL confirmation decisions. -Actual confirmation is handled at the framework level by ADK ConfirmationPlugin -and LangGraph's _wrap_for_confirmation wrapper. - -Example: - from agentic_cli.tools.hitl_tools import ApprovalManager, ApprovalResult - - manager = ApprovalManager() - result = manager.record("req-1", approved=True) -""" - -from dataclasses import dataclass, field -from datetime import datetime - - -# --------------------------------------------------------------------------- -# Data classes -# --------------------------------------------------------------------------- - - -@dataclass -class ApprovalResult: - """Result of an approval decision.""" - - request_id: str - approved: bool - reason: str | None = None - decided_at: datetime = field(default_factory=datetime.now) - - -# --------------------------------------------------------------------------- -# ApprovalManager -# --------------------------------------------------------------------------- - - -class ApprovalManager: - """Manages approval request records. - - The actual blocking is handled by framework-level confirmation hooks - (ADK ConfirmationPlugin / LangGraph _wrap_for_confirmation). This manager - records decisions for audit/history purposes. - - Example: - manager = ApprovalManager() - result = manager.record(request_id, approved=True) - """ - - def __init__(self) -> None: - """Initialize approval manager.""" - self._history: list[ApprovalResult] = [] - - def record( - self, - request_id: str, - approved: bool, - reason: str | None = None, - ) -> ApprovalResult: - """Record an approval decision. - - Args: - request_id: The request ID. - approved: Whether the request was approved. - reason: Optional reason for the decision. - - Returns: - ApprovalResult with the recorded decision. - """ - result = ApprovalResult( - request_id=request_id, - approved=approved, - reason=reason, - ) - self._history.append(result) - return result - - @property - def history(self) -> list[ApprovalResult]: - """Get all recorded approval decisions.""" - return list(self._history) diff --git a/src/agentic_cli/tools/interaction_tools.py b/src/agentic_cli/tools/interaction_tools.py index eb46d0f..1b8e6c6 100644 --- a/src/agentic_cli/tools/interaction_tools.py +++ b/src/agentic_cli/tools/interaction_tools.py @@ -8,14 +8,12 @@ from agentic_cli.tools.registry import ( register_tool, ToolCategory, - PermissionLevel, ) from agentic_cli.workflow.permissions import EXEMPT @register_tool( category=ToolCategory.INTERACTION, - permission_level=PermissionLevel.SAFE, capabilities=EXEMPT, description="Ask the user a clarifying question and wait for their response. Use this when requirements are ambiguous or you need user input to proceed.", ) diff --git a/src/agentic_cli/tools/knowledge_tools.py b/src/agentic_cli/tools/knowledge_tools.py index 69d0df2..dfa76e1 100644 --- a/src/agentic_cli/tools/knowledge_tools.py +++ b/src/agentic_cli/tools/knowledge_tools.py @@ -29,7 +29,6 @@ from agentic_cli.tools.registry import ( register_tool, ToolCategory, - PermissionLevel, ) from agentic_cli.workflow.permissions import Capability from agentic_cli.workflow.service_registry import ( @@ -599,7 +598,6 @@ def _find_document_in_kbs(doc_id_or_title: str) -> tuple: @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.SAFE, capabilities=[Capability("kb.read")], description="Search the local knowledge base for relevant documents using semantic similarity. Use this when you need to find previously ingested papers, notes, or documents.", ) @@ -627,7 +625,6 @@ def kb_search( @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.CAUTION, capabilities=[Capability("kb.write")], description=( "Ingest a document into the knowledge base. " @@ -687,7 +684,6 @@ async def kb_ingest( @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.SAFE, capabilities=[Capability("kb.read")], description=( "Read a stored document by ID or title. Returns the per-document " @@ -716,7 +712,6 @@ async def kb_read( @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.SAFE, capabilities=[Capability("kb.read")], description="List documents in the knowledge base with summaries. Filter by query or source type. Returns summaries, not full content.", ) @@ -744,7 +739,6 @@ def kb_list( @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.CAUTION, capabilities=[Capability("kb.write")], description=( "Save an agent-curated concept page summarizing what the KB " @@ -783,7 +777,6 @@ async def kb_write_concept( @register_tool( category=ToolCategory.KNOWLEDGE, - permission_level=PermissionLevel.SAFE, capabilities=[Capability("kb.read")], description=( "Search concept pages (agent-curated synthesis notes). " diff --git a/src/agentic_cli/tools/memory_tools.py b/src/agentic_cli/tools/memory_tools.py index b864366..2bc0a74 100644 --- a/src/agentic_cli/tools/memory_tools.py +++ b/src/agentic_cli/tools/memory_tools.py @@ -31,7 +31,6 @@ from agentic_cli.tools.registry import ( register_tool, ToolCategory, - PermissionLevel, ) from agentic_cli.workflow.permissions import Capability from agentic_cli.workflow.service_registry import require_service, MEMORY_STORE @@ -506,7 +505,6 @@ def _delete_memory_with_store( @register_tool( category=ToolCategory.MEMORY, - permission_level=PermissionLevel.SAFE, capabilities=[Capability("memory.write")], description="Save information to persistent memory that survives across sessions. Use this to remember user preferences, important facts, or learnings for future conversations.", ) @@ -530,7 +528,6 @@ def save_memory( @register_tool( category=ToolCategory.MEMORY, - permission_level=PermissionLevel.SAFE, capabilities=[Capability("memory.read")], description="Search persistent memory by keyword/substring. Use this to recall previously saved facts, preferences, or learnings.", ) @@ -554,7 +551,6 @@ def search_memory( @register_tool( category=ToolCategory.MEMORY, - permission_level=PermissionLevel.SAFE, capabilities=[Capability("memory.write")], description="Update an existing memory item", ) @@ -578,7 +574,6 @@ def update_memory( @register_tool( category=ToolCategory.MEMORY, - permission_level=PermissionLevel.CAUTION, capabilities=[Capability("memory.write")], description="Delete a memory item", ) diff --git a/src/agentic_cli/tools/reflection_tools.py b/src/agentic_cli/tools/reflection_tools.py index e6a934f..2909802 100644 --- a/src/agentic_cli/tools/reflection_tools.py +++ b/src/agentic_cli/tools/reflection_tools.py @@ -14,7 +14,7 @@ from agentic_cli.file_utils import atomic_write_json from agentic_cli.logging import get_logger -from agentic_cli.tools.registry import PermissionLevel, ToolCategory, register_tool +from agentic_cli.tools.registry import ToolCategory, register_tool from agentic_cli.workflow.permissions import EXEMPT from agentic_cli.workflow.service_registry import require_service @@ -117,7 +117,6 @@ def format_for_prompt(self, tool_name: str) -> str: @register_tool( category=ToolCategory.MEMORY, - permission_level=PermissionLevel.SAFE, capabilities=EXEMPT, description="Save a learned heuristic from a tool failure", ) diff --git a/src/agentic_cli/tools/registry.py b/src/agentic_cli/tools/registry.py index 4018c33..9803200 100644 --- a/src/agentic_cli/tools/registry.py +++ b/src/agentic_cli/tools/registry.py @@ -57,17 +57,6 @@ class ToolCategory(Enum): OTHER = "other" -class PermissionLevel(Enum): - """Permission levels for tool safety classification. - - Used to determine whether user confirmation is needed before tool execution. - """ - - SAFE = "safe" # No confirmation needed (read operations) - CAUTION = "caution" # May need allowlisting (write operations) - DANGEROUS = "dangerous" # Always requires confirmation (delete, shell) - - @dataclass class ToolDefinition: """Metadata-rich tool definition. @@ -76,7 +65,7 @@ class ToolDefinition: name: Tool name (defaults to function name) description: Human-readable description category: Tool category for organization (READ, WRITE, NETWORK, etc.) - permission_level: Safety classification (SAFE, CAUTION, DANGEROUS) + capabilities: Capability declarations for the permission engine is_async: Whether the tool is async func: The actual tool function """ @@ -84,9 +73,8 @@ class ToolDefinition: name: str description: str func: Callable[..., Any] - capabilities: CapabilitiesSpec = field(default_factory=lambda: EXEMPT) + capabilities: CapabilitiesSpec category: ToolCategory = ToolCategory.OTHER - permission_level: PermissionLevel = PermissionLevel.SAFE is_async: bool = False def __post_init__(self): @@ -143,18 +131,17 @@ def register( name: str | None = None, description: str | None = None, category: ToolCategory = ToolCategory.OTHER, - permission_level: PermissionLevel = PermissionLevel.SAFE, - capabilities: CapabilitiesSpec = EXEMPT, + capabilities: CapabilitiesSpec, ) -> Callable[..., Any]: """Register a tool function. Can be used as a decorator: - @registry.register(category=ToolCategory.READ, permission_level=PermissionLevel.SAFE) + @registry.register(category=ToolCategory.READ, capabilities=[Capability(...)]) def my_tool(query: str) -> dict: ... Or called directly: - registry.register(my_tool, category=ToolCategory.READ) + registry.register(my_tool, category=ToolCategory.READ, capabilities=EXEMPT) """ def decorator(f: Callable[..., Any]) -> Callable[..., Any]: @@ -168,7 +155,6 @@ def decorator(f: Callable[..., Any]) -> Callable[..., Any]: func=f, capabilities=validated_caps, category=category, - permission_level=permission_level, ) self._tools[tool_name] = definition @@ -216,14 +202,13 @@ def register_tool( name: str | None = None, description: str | None = None, category: ToolCategory = ToolCategory.OTHER, - permission_level: PermissionLevel = PermissionLevel.SAFE, - capabilities: CapabilitiesSpec = EXEMPT, + capabilities: CapabilitiesSpec, ) -> Callable[..., Any]: """Register a tool with the default registry. - DANGEROUS tools are no longer wrapped at registration time. Instead, - confirmation is handled at the framework level by ADK ConfirmationPlugin - and LangGraph's _wrap_for_confirmation wrapper. + ``capabilities`` is a required keyword argument. Pass ``EXEMPT`` to opt out + of the permission engine, or a list of ``Capability`` instances to declare + the resources this tool accesses. """ def _outer(f: Callable[..., Any]) -> Callable[..., Any]: @@ -232,12 +217,9 @@ def _outer(f: Callable[..., Any]) -> Callable[..., Any]: name=name, description=description, category=category, - permission_level=permission_level, capabilities=capabilities, ) if func is not None: return _outer(func) return _outer - - diff --git a/src/agentic_cli/tools/sandbox/__init__.py b/src/agentic_cli/tools/sandbox/__init__.py index 1a8b889..994ba14 100644 --- a/src/agentic_cli/tools/sandbox/__init__.py +++ b/src/agentic_cli/tools/sandbox/__init__.py @@ -6,14 +6,13 @@ from typing import Any -from agentic_cli.tools.registry import register_tool, ToolCategory, PermissionLevel +from agentic_cli.tools.registry import register_tool, ToolCategory from agentic_cli.workflow.service_registry import require_service, SANDBOX_MANAGER from agentic_cli.workflow.permissions import Capability @register_tool( category=ToolCategory.EXECUTION, - permission_level=PermissionLevel.DANGEROUS, capabilities=[Capability("python.exec")], description=( "Execute Python code in a stateful sandbox session. " diff --git a/src/agentic_cli/tools/search.py b/src/agentic_cli/tools/search.py index 1c47b4a..700022f 100644 --- a/src/agentic_cli/tools/search.py +++ b/src/agentic_cli/tools/search.py @@ -26,7 +26,6 @@ from agentic_cli.tools.registry import ( register_tool, ToolCategory, - PermissionLevel, ) from agentic_cli.workflow.permissions import Capability @@ -199,7 +198,6 @@ def _get_backend(settings: Any) -> SearchBackend: @register_tool( category=ToolCategory.NETWORK, - permission_level=PermissionLevel.SAFE, capabilities=[Capability("http.read")], description="Search the web for current information using the configured backend (Tavily or Brave). Use this for questions about recent events, documentation, or facts not in your training data.", ) diff --git a/src/agentic_cli/tools/shell/executor.py b/src/agentic_cli/tools/shell/executor.py index 8447a09..fa85176 100644 --- a/src/agentic_cli/tools/shell/executor.py +++ b/src/agentic_cli/tools/shell/executor.py @@ -22,7 +22,6 @@ from agentic_cli.tools.registry import ( register_tool, ToolCategory, - PermissionLevel, ) from agentic_cli.workflow.permissions import Capability @@ -203,7 +202,6 @@ def _execute_command( @register_tool( category=ToolCategory.EXECUTION, - permission_level=PermissionLevel.DANGEROUS, capabilities=[Capability("shell.exec", target_arg="command")], description="Execute a shell command with layered security and HITL approval", ) @@ -392,7 +390,7 @@ def shell_executor( "command": command, "risk_level": risk.overall_risk.value, "risk_factors": risk.risk_factors, - "message": "Command requires approval but no ApprovalManager configured", + "message": "Command requires approval but no approval manager configured", "error": None, } diff --git a/src/agentic_cli/tools/webfetch_tool.py b/src/agentic_cli/tools/webfetch_tool.py index 91887c0..b37fa07 100644 --- a/src/agentic_cli/tools/webfetch_tool.py +++ b/src/agentic_cli/tools/webfetch_tool.py @@ -9,7 +9,7 @@ from typing import Any from agentic_cli.config import get_settings -from agentic_cli.tools.registry import register_tool, ToolCategory, PermissionLevel +from agentic_cli.tools.registry import register_tool, ToolCategory from agentic_cli.workflow.permissions import Capability from agentic_cli.tools.webfetch import ( ContentFetcher, @@ -76,7 +76,6 @@ def get_or_create_fetcher(settings=None) -> ContentFetcher: @register_tool( category=ToolCategory.NETWORK, - permission_level=PermissionLevel.SAFE, capabilities=[Capability("http.read", target_arg="url")], description="Fetch a web page, convert it to markdown, and summarize it using an LLM based on your prompt. Use this to extract specific information from a URL (e.g., documentation, articles).", ) diff --git a/src/agentic_cli/workflow/adk/__init__.py b/src/agentic_cli/workflow/adk/__init__.py index 2afa6fc..6513ad0 100644 --- a/src/agentic_cli/workflow/adk/__init__.py +++ b/src/agentic_cli/workflow/adk/__init__.py @@ -3,7 +3,7 @@ This submodule contains ADK-specific components: - manager.py: GoogleADKWorkflowManager - event_processor.py: ADKEventProcessor -- plugins.py: ADK plugins (ConfirmationPlugin, LLMLoggingPlugin) +- plugins.py: ADK plugins (LLMLoggingPlugin) Note: GoogleADKWorkflowManager is NOT re-exported here to avoid circular imports. Import it directly from agentic_cli.workflow.adk.manager. diff --git a/src/agentic_cli/workflow/adk/plugins.py b/src/agentic_cli/workflow/adk/plugins.py index 0e2a5dc..b3069b8 100644 --- a/src/agentic_cli/workflow/adk/plugins.py +++ b/src/agentic_cli/workflow/adk/plugins.py @@ -1,7 +1,6 @@ """ADK Plugins for agentic-cli. Provides framework-level cross-cutting concerns as ADK Plugins: -- ConfirmationPlugin: HITL confirmation for DANGEROUS tools - LLMLoggingPlugin: Raw LLM traffic logging for debugging """ @@ -16,63 +15,16 @@ from google.adk.plugins.base_plugin import BasePlugin -from agentic_cli.workflow.confirmation import ( - is_dangerous, - request_tool_confirmation, -) from agentic_cli.workflow.events import WorkflowEvent from agentic_cli.logging import Loggers if TYPE_CHECKING: from google.adk.agents.callback_context import CallbackContext from google.adk.models import LlmRequest, LlmResponse - from google.adk.tools import BaseTool - from google.adk.tools.tool_context import ToolContext logger = Loggers.workflow() -class ConfirmationPlugin(BasePlugin): - """ADK Plugin that requires user confirmation for DANGEROUS tools. - - Uses the workflow manager's request_user_input callback to prompt - the user before executing any tool with PermissionLevel.DANGEROUS. - - Replaces the old _wrap_dangerous decorator pattern with a single - framework-level hook that applies to all agents globally. - """ - - def __init__(self) -> None: - super().__init__(name="confirmation") - - async def before_tool_callback( - self, - *, - tool: "BaseTool", - tool_args: dict[str, Any], - tool_context: "ToolContext", - ) -> dict | None: - """Intercept DANGEROUS tool calls and request user confirmation.""" - if not is_dangerous(tool.name): - return None - - approved = await request_tool_confirmation(tool.name, tool_args) - - if approved is None: - logger.warning("confirmation_plugin.no_workflow_or_callback", tool=tool.name) - return None - - if approved: - logger.debug("confirmation_plugin.approved", tool=tool.name) - return None - - logger.info("confirmation_plugin.denied", tool=tool.name) - return { - "success": False, - "error": f"User denied approval for {tool.name}", - } - - class LLMLoggingPlugin(BasePlugin): """ADK Plugin that captures raw LLM request/response traffic for debugging. diff --git a/src/agentic_cli/workflow/confirmation.py b/src/agentic_cli/workflow/confirmation.py deleted file mode 100644 index 21c7266..0000000 --- a/src/agentic_cli/workflow/confirmation.py +++ /dev/null @@ -1,62 +0,0 @@ -"""Backend-neutral HITL confirmation for DANGEROUS tools. - -Both the ADK ConfirmationPlugin and the LangGraph tool wrapper call into -this module to decide whether a tool call needs user approval and, if so, -to request it through the active workflow manager. Keeping these helpers -here (rather than under workflow/adk/) prevents LangGraph from having to -import ADK internals. -""" - -from __future__ import annotations - -import uuid -from typing import Any - -from agentic_cli.tools.registry import PermissionLevel, get_registry -from agentic_cli.workflow.events import InputType, UserInputRequest -from agentic_cli.workflow.service_registry import WORKFLOW, get_service - -_APPROVED_RESPONSES = ("yes", "y", "approve", "true") - - -def is_dangerous(tool_name: str) -> bool: - """Return True if the named tool is registered as DANGEROUS.""" - defn = get_registry().get(tool_name) - if defn is None: - return False - return defn.permission_level == PermissionLevel.DANGEROUS - - -async def request_tool_confirmation( - tool_name: str, tool_args: dict[str, Any] -) -> bool | None: - """Prompt the user for confirmation of a dangerous tool call. - - Returns: - True if approved, False if denied, None if no workflow/callback - is available to prompt the user. - """ - workflow = get_service(WORKFLOW) - if workflow is None: - return None - - arg_summary = ", ".join( - f"{k}={repr(v)[:50]}" for k, v in list(tool_args.items())[:3] - ) - request = UserInputRequest( - request_id=str(uuid.uuid4())[:8], - tool_name=tool_name, - prompt=( - f"Tool requires approval: {tool_name}({arg_summary})\n\n" - f"Allow this operation? (yes/no)" - ), - input_type=InputType.CONFIRM, - default="no", - ) - - try: - response = await workflow.request_user_input(request) - except RuntimeError: - return None - - return response.strip().lower() in _APPROVED_RESPONSES diff --git a/src/agentic_cli/workflow/langgraph/graph_builder.py b/src/agentic_cli/workflow/langgraph/graph_builder.py index bc295d1..95787a8 100644 --- a/src/agentic_cli/workflow/langgraph/graph_builder.py +++ b/src/agentic_cli/workflow/langgraph/graph_builder.py @@ -6,13 +6,10 @@ from __future__ import annotations -import asyncio -import functools from collections import deque from typing import Any, Callable, TYPE_CHECKING from agentic_cli.workflow.config import AgentConfig -from agentic_cli.workflow.confirmation import is_dangerous, request_tool_confirmation from agentic_cli.workflow.langgraph.permission_wrap import wrap_tool_for_permission from agentic_cli.logging import Loggers @@ -303,45 +300,6 @@ def get_retry_policy(self): backoff_factor=self._settings.retry_backoff_factor, ) - @staticmethod - def _wrap_for_confirmation(tool: Callable) -> Callable: - """Wrap a DANGEROUS tool with user confirmation logic. - - Safe tools are returned as-is. DANGEROUS tools get an async wrapper - that uses the workflow manager's request_user_input callback to prompt - the user before executing. - - The wrapper is always async regardless of the original tool, because - the confirmation prompt requires awaiting request_user_input(). This - is safe because LangGraph's ToolNode calls ainvoke() for all tools. - - Args: - tool: The tool function to potentially wrap. - - Returns: - The original function (if safe) or a confirmation-wrapped version. - """ - tool_name = getattr(tool, "__name__", str(tool)) - if not is_dangerous(tool_name): - return tool - - is_async = asyncio.iscoroutinefunction(tool) - - @functools.wraps(tool) - async def _confirmed(*args: Any, **kwargs: Any) -> Any: - approved = await request_tool_confirmation(tool_name, kwargs) - if approved is False: - return { - "success": False, - "error": f"User denied approval for {tool_name}", - } - # approved is True or None (no workflow/callback) → proceed - if is_async: - return await tool(*args, **kwargs) - return tool(*args, **kwargs) - - return _confirmed - def _create_agent_node( self, config: AgentConfig, default_model: str, tools: list | None = None, ) -> Callable: diff --git a/src/agentic_cli/workflow/settings.py b/src/agentic_cli/workflow/settings.py index 812fe40..1246380 100644 --- a/src/agentic_cli/workflow/settings.py +++ b/src/agentic_cli/workflow/settings.py @@ -299,15 +299,7 @@ class WorkflowSettingsMixin: json_schema_extra={"ui_order": 132}, ) - # HITL (Human-in-the-Loop) settings - hitl_enabled: bool = Field( - default=True, - title="HITL Enabled", - description="Enable human-in-the-loop features (approvals)", - json_schema_extra={"ui_order": 135}, - ) - - # Permissions (new in phase 2) + # Permissions permissions: PermissionsConfig = Field( default_factory=PermissionsConfig, title="Permissions", diff --git a/tests/test_adk_plugins.py b/tests/test_adk_plugins.py index 5c03f16..8530e83 100644 --- a/tests/test_adk_plugins.py +++ b/tests/test_adk_plugins.py @@ -1,235 +1,11 @@ -"""Tests for ADK Plugins (ConfirmationPlugin, LLMLoggingPlugin).""" +"""Tests for ADK Plugins (LLMLoggingPlugin).""" import json import pytest -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import MagicMock -from agentic_cli.workflow.adk.plugins import ( - ConfirmationPlugin, LLMLoggingPlugin, is_dangerous, -) -from agentic_cli.workflow.events import EventType, UserInputRequest, WorkflowEvent - - -def _make_mock_tool(name: str) -> MagicMock: - """Create a mock ADK BaseTool with a given name.""" - tool = MagicMock() - tool.name = name - return tool - - -def _make_mock_tool_context() -> MagicMock: - """Create a mock ToolContext.""" - return MagicMock() - - -class TestIsDangerous: - """Tests for the is_dangerous helper.""" - - def test_dangerous_tool_detected(self): - """A tool with PermissionLevel.DANGEROUS should be detected.""" - from agentic_cli.tools.registry import ToolDefinition, PermissionLevel, ToolRegistry - - mock_registry = ToolRegistry() - mock_registry.register( - lambda: None, - name="risky_tool", - permission_level=PermissionLevel.DANGEROUS, - ) - - with patch("agentic_cli.workflow.confirmation.get_registry", return_value=mock_registry): - result = is_dangerous("risky_tool") - assert result is True - - def test_safe_tool_not_dangerous(self): - """A tool with PermissionLevel.SAFE should not be dangerous.""" - from agentic_cli.tools.registry import ToolDefinition, PermissionLevel, ToolRegistry - - mock_registry = ToolRegistry() - mock_registry.register( - lambda: None, - name="safe_tool", - permission_level=PermissionLevel.SAFE, - ) - - with patch("agentic_cli.workflow.confirmation.get_registry", return_value=mock_registry): - result = is_dangerous("safe_tool") - assert result is False - - def test_unknown_tool_not_dangerous(self): - """An unregistered tool should not be dangerous.""" - from agentic_cli.tools.registry import ToolRegistry - - mock_registry = ToolRegistry() - - with patch("agentic_cli.workflow.confirmation.get_registry", return_value=mock_registry): - result = is_dangerous("unknown_tool") - assert result is False - - -class TestConfirmationPlugin: - """Tests for the ConfirmationPlugin.""" - - @pytest.fixture - def plugin(self): - return ConfirmationPlugin() - - async def test_safe_tool_passes_through(self, plugin): - """Safe tools should not be intercepted (returns None).""" - tool = _make_mock_tool("safe_tool") - tool_context = _make_mock_tool_context() - - with patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=False): - result = await plugin.before_tool_callback( - tool=tool, - tool_args={"query": "hello"}, - tool_context=tool_context, - ) - - assert result is None - - async def test_dangerous_tool_approved(self, plugin): - """DANGEROUS tool should pass through when user approves.""" - tool = _make_mock_tool("dangerous_tool") - tool_context = _make_mock_tool_context() - - mock_workflow = AsyncMock() - mock_workflow.request_user_input = AsyncMock(return_value="yes") - - with ( - patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.confirmation.get_service", return_value=mock_workflow), - ): - result = await plugin.before_tool_callback( - tool=tool, - tool_args={"path": "/tmp/file"}, - tool_context=tool_context, - ) - - assert result is None - mock_workflow.request_user_input.assert_called_once() - # Verify the request was a CONFIRM type - request = mock_workflow.request_user_input.call_args[0][0] - assert isinstance(request, UserInputRequest) - assert request.tool_name == "dangerous_tool" - assert request.input_type.value == "confirm" - - async def test_dangerous_tool_denied(self, plugin): - """DANGEROUS tool should return error dict when user denies.""" - tool = _make_mock_tool("dangerous_tool") - tool_context = _make_mock_tool_context() - - mock_workflow = AsyncMock() - mock_workflow.request_user_input = AsyncMock(return_value="no") - - with ( - patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.confirmation.get_service", return_value=mock_workflow), - ): - result = await plugin.before_tool_callback( - tool=tool, - tool_args={"cmd": "rm -rf /"}, - tool_context=tool_context, - ) - - assert result is not None - assert result["success"] is False - assert "denied" in result["error"].lower() - assert "dangerous_tool" in result["error"] - - async def test_no_workflow_passes_through(self, plugin): - """When no workflow is available, tool should proceed (returns None).""" - tool = _make_mock_tool("dangerous_tool") - tool_context = _make_mock_tool_context() - - with ( - patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.confirmation.get_service", return_value=None), - ): - result = await plugin.before_tool_callback( - tool=tool, - tool_args={"cmd": "ls"}, - tool_context=tool_context, - ) - - assert result is None - - async def test_no_callback_passes_through(self, plugin): - """When workflow has no callback, tool should proceed (returns None).""" - tool = _make_mock_tool("dangerous_tool") - tool_context = _make_mock_tool_context() - - mock_workflow = AsyncMock() - mock_workflow.request_user_input = AsyncMock( - side_effect=RuntimeError("No callback registered") - ) - - with ( - patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.confirmation.get_service", return_value=mock_workflow), - ): - result = await plugin.before_tool_callback( - tool=tool, - tool_args={"cmd": "ls"}, - tool_context=tool_context, - ) - - assert result is None - - async def test_various_approval_responses(self, plugin): - """Multiple response strings should be treated as approval.""" - tool = _make_mock_tool("dangerous_tool") - tool_context = _make_mock_tool_context() - - for response in ["yes", "y", "YES", " Yes ", "approve", "true"]: - mock_workflow = AsyncMock() - mock_workflow.request_user_input = AsyncMock(return_value=response) - - with ( - patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.confirmation.get_service", return_value=mock_workflow), - ): - result = await plugin.before_tool_callback( - tool=tool, - tool_args={}, - tool_context=tool_context, - ) - - assert result is None, f"Expected approval for response {response!r}" - - async def test_arg_summary_truncation(self, plugin): - """Tool args should be summarized and truncated in the prompt.""" - tool = _make_mock_tool("dangerous_tool") - tool_context = _make_mock_tool_context() - - mock_workflow = AsyncMock() - mock_workflow.request_user_input = AsyncMock(return_value="yes") - - long_args = { - "arg1": "x" * 100, - "arg2": "short", - "arg3": "included", - "arg4": "excluded_beyond_3", - } - - with ( - patch("agentic_cli.workflow.adk.plugins.is_dangerous", return_value=True), - patch("agentic_cli.workflow.confirmation.get_service", return_value=mock_workflow), - ): - result = await plugin.before_tool_callback( - tool=tool, - tool_args=long_args, - tool_context=tool_context, - ) - - assert result is None - request = mock_workflow.request_user_input.call_args[0][0] - # arg4 should not appear (only first 3 args included) - assert "arg4" not in request.prompt - assert "arg1" in request.prompt - - def test_plugin_name(self, plugin): - """Plugin should have the correct name.""" - assert plugin.name == "confirmation" +from agentic_cli.workflow.adk.plugins import LLMLoggingPlugin +from agentic_cli.workflow.events import EventType, WorkflowEvent # ------------------------------------------------------------------------- diff --git a/tests/test_config.py b/tests/test_config.py index 8a439a3..054733e 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -417,14 +417,10 @@ def test_package_exports_new_modules(): # Memory from agentic_cli.tools.memory_tools import MemoryStore, MemoryItem - # HITL - from agentic_cli.tools.hitl_tools import ApprovalManager - # Tools from agentic_cli.tools import shell_executor, read_file, write_file, diff_compare # All imports should succeed assert MemoryStore is not None assert MemoryItem is not None - assert ApprovalManager is not None assert shell_executor is not None diff --git a/tests/test_hitl.py b/tests/test_hitl.py deleted file mode 100644 index 9db85be..0000000 --- a/tests/test_hitl.py +++ /dev/null @@ -1,61 +0,0 @@ -"""Tests for Human-in-the-Loop module.""" - -import pytest - - -class TestApprovalManager: - """Tests for simplified ApprovalManager class.""" - - def test_empty_history(self): - """Test manager starts with empty history.""" - from agentic_cli.tools.hitl_tools import ApprovalManager - - manager = ApprovalManager() - assert manager.history == [] - - def test_record_approval(self): - """Test recording an approval.""" - from agentic_cli.tools.hitl_tools import ApprovalManager - - manager = ApprovalManager() - result = manager.record("req-1", approved=True) - - assert result.request_id == "req-1" - assert result.approved is True - assert result.reason is None - assert len(manager.history) == 1 - - def test_record_rejection(self): - """Test recording a rejection with reason.""" - from agentic_cli.tools.hitl_tools import ApprovalManager - - manager = ApprovalManager() - result = manager.record("req-1", approved=False, reason="Too risky") - - assert result.approved is False - assert result.reason == "Too risky" - - def test_history_accumulates(self): - """Test that history accumulates multiple records.""" - from agentic_cli.tools.hitl_tools import ApprovalManager - - manager = ApprovalManager() - manager.record("req-1", approved=True) - manager.record("req-2", approved=False, reason="No") - manager.record("req-3", approved=True) - - assert len(manager.history) == 3 - - -class TestHITLImports: - """Tests for hitl_tools module exports.""" - - def test_import_core_classes(self): - """Test core exports are available.""" - from agentic_cli.tools.hitl_tools import ( - ApprovalManager, - ApprovalResult, - ) - - assert ApprovalManager is not None - assert ApprovalResult is not None diff --git a/tests/test_langgraph_confirmation.py b/tests/test_langgraph_confirmation.py deleted file mode 100644 index 747cacd..0000000 --- a/tests/test_langgraph_confirmation.py +++ /dev/null @@ -1,134 +0,0 @@ -"""Tests for LangGraph confirmation wrapper.""" - -import pytest -from unittest.mock import AsyncMock, MagicMock, patch - -from agentic_cli.workflow.langgraph.graph_builder import LangGraphBuilder - - -def _make_sync_tool(name: str = "my_tool"): - """Create a simple sync tool function.""" - def tool_fn(x: str) -> dict: - """A test tool.""" - return {"success": True, "result": x} - tool_fn.__name__ = name - return tool_fn - - -def _make_async_tool(name: str = "my_async_tool"): - """Create a simple async tool function.""" - async def tool_fn(x: str) -> dict: - """An async test tool.""" - return {"success": True, "result": x} - tool_fn.__name__ = name - return tool_fn - - -# Patch targets — both imported into graph_builder from plugins -_IS_DANGEROUS = "agentic_cli.workflow.langgraph.graph_builder.is_dangerous" -_CONFIRM = "agentic_cli.workflow.langgraph.graph_builder.request_tool_confirmation" - - -class TestWrapForConfirmation: - """Tests for LangGraphBuilder._wrap_for_confirmation.""" - - def test_safe_tool_passes_through(self): - """Safe tools should be returned unwrapped.""" - tool = _make_sync_tool("safe_tool") - with patch(_IS_DANGEROUS, return_value=False): - result = LangGraphBuilder._wrap_for_confirmation(tool) - assert result is tool - - def test_dangerous_tool_is_wrapped(self): - """DANGEROUS tools should be wrapped (different function object).""" - tool = _make_sync_tool("dangerous_tool") - with patch(_IS_DANGEROUS, return_value=True): - result = LangGraphBuilder._wrap_for_confirmation(tool) - assert result is not tool - assert result.__name__ == "dangerous_tool" - - async def test_dangerous_sync_tool_approved(self): - """Approved DANGEROUS sync tool should execute and return result.""" - tool = _make_sync_tool("dangerous_tool") - with patch(_IS_DANGEROUS, return_value=True): - wrapped = LangGraphBuilder._wrap_for_confirmation(tool) - - with patch(_CONFIRM, return_value=True): - result = await wrapped(x="hello") - assert result == {"success": True, "result": "hello"} - - async def test_dangerous_async_tool_approved(self): - """Approved DANGEROUS async tool should execute and return result.""" - tool = _make_async_tool("dangerous_async") - with patch(_IS_DANGEROUS, return_value=True): - wrapped = LangGraphBuilder._wrap_for_confirmation(tool) - - with patch(_CONFIRM, return_value=True): - result = await wrapped(x="world") - assert result == {"success": True, "result": "world"} - - async def test_dangerous_tool_denied(self): - """Denied DANGEROUS tool should return error dict without executing.""" - call_count = 0 - def tool_fn(x: str) -> dict: - """A test tool.""" - nonlocal call_count - call_count += 1 - return {"success": True} - tool_fn.__name__ = "dangerous_tool" - - with patch(_IS_DANGEROUS, return_value=True): - wrapped = LangGraphBuilder._wrap_for_confirmation(tool_fn) - - with patch(_CONFIRM, return_value=False): - result = await wrapped(x="test") - - assert result["success"] is False - assert "denied" in result["error"].lower() - assert call_count == 0 - - async def test_no_workflow_allows_execution(self): - """When request_tool_confirmation returns None, tool executes.""" - tool = _make_sync_tool("dangerous_tool") - with patch(_IS_DANGEROUS, return_value=True): - wrapped = LangGraphBuilder._wrap_for_confirmation(tool) - - with patch(_CONFIRM, return_value=None): - result = await wrapped(x="test") - assert result == {"success": True, "result": "test"} - - async def test_no_callback_allows_execution(self): - """When request_tool_confirmation returns None (no callback), tool executes.""" - tool = _make_sync_tool("dangerous_tool") - with patch(_IS_DANGEROUS, return_value=True): - wrapped = LangGraphBuilder._wrap_for_confirmation(tool) - - with patch(_CONFIRM, return_value=None): - result = await wrapped(x="test") - assert result == {"success": True, "result": "test"} - - def test_preserves_docstring(self): - """Wrapped function should preserve the docstring.""" - tool = _make_sync_tool("dangerous_tool") - with patch(_IS_DANGEROUS, return_value=True): - wrapped = LangGraphBuilder._wrap_for_confirmation(tool) - assert wrapped.__doc__ == tool.__doc__ - - def test_preserves_annotations(self): - """Wrapped function should preserve type annotations.""" - tool = _make_sync_tool("dangerous_tool") - with patch(_IS_DANGEROUS, return_value=True): - wrapped = LangGraphBuilder._wrap_for_confirmation(tool) - assert wrapped.__annotations__ == tool.__annotations__ - - async def test_confirmation_called_with_tool_args(self): - """request_tool_confirmation should receive tool name and kwargs.""" - tool = _make_sync_tool("dangerous_tool") - with patch(_IS_DANGEROUS, return_value=True): - wrapped = LangGraphBuilder._wrap_for_confirmation(tool) - - mock_confirm = AsyncMock(return_value=True) - with patch(_CONFIRM, mock_confirm): - await wrapped(x="hello") - - mock_confirm.assert_called_once_with("dangerous_tool", {"x": "hello"}) diff --git a/tests/test_tools.py b/tests/test_tools.py index 4362b10..2b387ee 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -426,10 +426,13 @@ def test_definition_basic(self): def my_tool(query: str) -> dict: return {"result": query} + from agentic_cli.workflow.permissions import EXEMPT + definition = ToolDefinition( name="my_tool", description="A test tool", func=my_tool, + capabilities=EXEMPT, ) assert definition.name == "my_tool" @@ -440,6 +443,7 @@ def my_tool(query: str) -> dict: def test_definition_async_detection(self): """Test async function detection.""" + from agentic_cli.workflow.permissions import EXEMPT async def async_tool(query: str) -> dict: return {"result": query} @@ -448,12 +452,14 @@ async def async_tool(query: str) -> dict: name="async_tool", description="An async tool", func=async_tool, + capabilities=EXEMPT, ) assert definition.is_async is True def test_definition_with_category(self): """Test definition with category.""" + from agentic_cli.workflow.permissions import EXEMPT def search(query: str) -> dict: return {} @@ -462,6 +468,7 @@ def search(query: str) -> dict: name="search", description="Search tool", func=search, + capabilities=EXEMPT, category=ToolCategory.NETWORK, ) @@ -481,13 +488,15 @@ def test_registry_empty(self): def test_registry_register_direct(self): """Test direct registration.""" + from agentic_cli.workflow.permissions import EXEMPT + registry = ToolRegistry() def my_tool(x: int) -> int: """Multiply by two.""" return x * 2 - registry.register(my_tool, category=ToolCategory.KNOWLEDGE) + registry.register(my_tool, category=ToolCategory.KNOWLEDGE, capabilities=EXEMPT) assert len(registry) == 1 assert "my_tool" in registry @@ -498,9 +507,11 @@ def my_tool(x: int) -> int: def test_registry_register_decorator(self): """Test decorator registration.""" + from agentic_cli.workflow.permissions import EXEMPT + registry = ToolRegistry() - @registry.register(category=ToolCategory.NETWORK) + @registry.register(category=ToolCategory.NETWORK, capabilities=EXEMPT) def search_tool(query: str) -> dict: """Search for things.""" return {"results": []} @@ -511,9 +522,11 @@ def search_tool(query: str) -> dict: def test_registry_custom_name(self): """Test registration with custom name.""" + from agentic_cli.workflow.permissions import EXEMPT + registry = ToolRegistry() - @registry.register(name="custom_search", description="Custom search") + @registry.register(name="custom_search", description="Custom search", capabilities=EXEMPT) def internal_search(q: str) -> dict: return {} @@ -523,6 +536,8 @@ def internal_search(q: str) -> dict: def test_registry_get_functions(self): """Test getting all functions.""" + from agentic_cli.workflow.permissions import EXEMPT + registry = ToolRegistry() def tool_a(): @@ -531,8 +546,8 @@ def tool_a(): def tool_b(): pass - registry.register(tool_a) - registry.register(tool_b) + registry.register(tool_a, capabilities=EXEMPT) + registry.register(tool_b, capabilities=EXEMPT) functions = registry.get_functions() assert len(functions) == 2 @@ -541,17 +556,19 @@ def tool_b(): def test_registry_list_by_category(self): """Test listing tools by category.""" + from agentic_cli.workflow.permissions import EXEMPT + registry = ToolRegistry() - @registry.register(category=ToolCategory.NETWORK) + @registry.register(category=ToolCategory.NETWORK, capabilities=EXEMPT) def search1(): pass - @registry.register(category=ToolCategory.NETWORK) + @registry.register(category=ToolCategory.NETWORK, capabilities=EXEMPT) def search2(): pass - @registry.register(category=ToolCategory.EXECUTION) + @registry.register(category=ToolCategory.EXECUTION, capabilities=EXEMPT) def execute(): pass @@ -1294,19 +1311,6 @@ def test_all_registered_tools_have_category(self): assert tool.category is not None, f"Tool '{tool.name}' has no category" assert tool.category.value is not None, f"Tool '{tool.name}' has invalid category" - def test_all_registered_tools_have_permission_level(self): - """Test that all registered tools have a permission level defined.""" - from agentic_cli.tools import get_registry, PermissionLevel - - registry = get_registry() - tools = registry.list_tools() - - for tool in tools: - assert tool.permission_level is not None, f"Tool '{tool.name}' has no permission_level" - assert isinstance(tool.permission_level, PermissionLevel), ( - f"Tool '{tool.name}' has invalid permission_level type" - ) - def test_expected_tools_are_registered(self): """Test that all expected tools are in the registry.""" from agentic_cli.tools import get_registry @@ -1323,7 +1327,6 @@ def test_expected_tools_are_registered(self): import agentic_cli.tools.interaction_tools # noqa: F401 import agentic_cli.tools.webfetch_tool # noqa: F401 import agentic_cli.tools.memory_tools # noqa: F401 - import agentic_cli.tools.hitl_tools # noqa: F401 import agentic_cli.tools.shell.executor # noqa: F401 registry = get_registry() @@ -1368,58 +1371,6 @@ def test_expected_tools_are_registered(self): f"Registered tools: {sorted(registered_names)}" ) - def test_dangerous_tools_have_correct_permission(self): - """Test that dangerous tools are properly marked.""" - from agentic_cli.tools import get_registry, PermissionLevel - - registry = get_registry() - - dangerous_tools = ["shell_executor", "execute_python"] - for tool_name in dangerous_tools: - tool = registry.get(tool_name) - if tool: - assert tool.permission_level == PermissionLevel.DANGEROUS, ( - f"{tool_name} should be DANGEROUS, got {tool.permission_level}" - ) - - def test_caution_tools_have_correct_permission(self): - """Test that caution-level tools are properly marked.""" - from agentic_cli.tools import get_registry, PermissionLevel - - registry = get_registry() - - caution_tools = ["write_file", "edit_file", "kb_ingest"] - - for tool_name in caution_tools: - tool = registry.get(tool_name) - if tool: - assert tool.permission_level == PermissionLevel.CAUTION, ( - f"{tool_name} should be CAUTION, got {tool.permission_level}" - ) - - def test_safe_tools_have_correct_permission(self): - """Test that safe tools are properly marked.""" - from agentic_cli.tools import get_registry, PermissionLevel - - registry = get_registry() - - safe_tools = [ - "read_file", "diff_compare", "grep", "glob", "list_dir", - "web_search", "web_fetch", "kb_search", - "kb_read", "kb_list", - "search_arxiv", "fetch_arxiv_paper", - "ask_clarification", - "save_memory", "search_memory", - "save_plan", "get_plan", - ] - - for tool_name in safe_tools: - tool = registry.get(tool_name) - if tool: - assert tool.permission_level == PermissionLevel.SAFE, ( - f"{tool_name} should be SAFE, got {tool.permission_level}" - ) - def test_tools_by_category(self): """Test that tools are properly categorized.""" from agentic_cli.tools import get_registry, ToolCategory @@ -1436,7 +1387,6 @@ def test_tools_by_category(self): import agentic_cli.tools.interaction_tools # noqa: F401 import agentic_cli.tools.webfetch_tool # noqa: F401 import agentic_cli.tools.memory_tools # noqa: F401 - import agentic_cli.tools.hitl_tools # noqa: F401 import agentic_cli.tools.shell.executor # noqa: F401 registry = get_registry() @@ -1488,7 +1438,6 @@ def test_registry_tool_count(self): import agentic_cli.tools.interaction_tools # noqa: F401 import agentic_cli.tools.webfetch_tool # noqa: F401 import agentic_cli.tools.memory_tools # noqa: F401 - import agentic_cli.tools.hitl_tools # noqa: F401 import agentic_cli.tools.shell.executor # noqa: F401 registry = get_registry() @@ -1507,10 +1456,10 @@ def test_registry_tool_count(self): class TestDangerousToolDirectExecution: - """Tests that DANGEROUS tools execute directly (no registry-level wrapping). + """Tests that tools execute directly (no registry-level wrapping). - Confirmation is now handled at the framework level by ADK ConfirmationPlugin - and LangGraph's _wrap_for_confirmation wrapper. + Permission checks are now handled at the framework level by the + capability-based permission engine. """ def test_dangerous_tool_not_wrapped(self): @@ -1544,11 +1493,4 @@ def test_dangerous_tool_executes_directly(self): tok.var.reset(tok) mgr.cleanup() - def test_dangerous_tool_has_correct_permission_level(self): - """DANGEROUS tools should still be registered with DANGEROUS permission.""" - from agentic_cli.tools import get_registry, PermissionLevel - registry = get_registry() - tool = registry.get("sandbox_execute") - if tool: - assert tool.permission_level == PermissionLevel.DANGEROUS diff --git a/tests/tools/test_registry.py b/tests/tools/test_registry.py index 719902b..440a959 100644 --- a/tests/tools/test_registry.py +++ b/tests/tools/test_registry.py @@ -1,15 +1,12 @@ """Tests for ToolDefinition.capabilities field + register_tool capabilities= kwarg. -This task adds `capabilities` as an optional kwarg (default EXEMPT) alongside -the existing `permission_level` kwarg so tools can be migrated incrementally. -Both fields coexist on `ToolDefinition` throughout Phase 2–5; the old field -is removed in the final cleanup task. +capabilities= is a required kwarg on @register_tool / ToolRegistry.register. +Omitting it raises TypeError. """ import pytest from agentic_cli.tools.registry import ( - PermissionLevel, ToolCategory, ToolRegistry, ) @@ -18,15 +15,13 @@ class TestCapabilitiesKwarg: - def test_default_is_exempt(self): + def test_capabilities_kwarg_is_required(self): + """Omitting capabilities= should raise TypeError.""" reg = ToolRegistry() - - @reg.register(name="no_caps") - def no_caps() -> dict: - return {} - - defn = reg.get("no_caps") - assert isinstance(defn.capabilities, _CapabilityExempt) + with pytest.raises(TypeError): + @reg.register(name="no_caps") + def no_caps() -> dict: + return {} def test_exempt_stored(self): reg = ToolRegistry() @@ -69,38 +64,3 @@ def test_list_with_non_capability_rejected(self): @reg.register(name="bad3", capabilities=["not a Capability"]) # type: ignore[list-item] def bad3() -> dict: return {} - - def test_permission_level_still_works(self): - """Old kwarg continues to be accepted during migration.""" - reg = ToolRegistry() - - @reg.register( - name="legacy", - permission_level=PermissionLevel.SAFE, - category=ToolCategory.READ, - ) - def legacy() -> dict: - return {} - - defn = reg.get("legacy") - assert defn.permission_level == PermissionLevel.SAFE - # capabilities defaults to EXEMPT when not specified: - assert isinstance(defn.capabilities, _CapabilityExempt) - - def test_both_kwargs_can_coexist(self): - """Tools in the middle of migration have both fields set.""" - reg = ToolRegistry() - caps = [Capability("filesystem.read", target_arg="path")] - - @reg.register( - name="migrating", - permission_level=PermissionLevel.SAFE, - capabilities=caps, - category=ToolCategory.READ, - ) - def migrating(path: str) -> dict: - return {} - - defn = reg.get("migrating") - assert defn.permission_level == PermissionLevel.SAFE - assert defn.capabilities == caps diff --git a/tests/workflow/test_backend_isolation.py b/tests/workflow/test_backend_isolation.py index 2714778..9820e7b 100644 --- a/tests/workflow/test_backend_isolation.py +++ b/tests/workflow/test_backend_isolation.py @@ -2,7 +2,7 @@ ADK and LangGraph are pluggable orchestrators behind BaseWorkflowManager. Neither backend should depend on the other — shared logic lives in -workflow.* (e.g. workflow.confirmation, workflow.events, workflow.service_registry). +workflow.* (e.g. workflow.events, workflow.service_registry, workflow.permissions). If one backend imports from the other, adding a third orchestrator would drag the dependency along. """ diff --git a/tests/workflow/test_settings_permissions.py b/tests/workflow/test_settings_permissions.py index 82b5bd9..25e1b59 100644 --- a/tests/workflow/test_settings_permissions.py +++ b/tests/workflow/test_settings_permissions.py @@ -1,8 +1,4 @@ -"""Tests for permissions settings additions. - -Adds ``permissions`` (nested) and ``permissions_enabled`` to settings; -``hitl_enabled`` remains for now (removed in final cleanup task). -""" +"""Tests for permissions settings.""" from agentic_cli.config import BaseSettings @@ -17,10 +13,10 @@ def test_permissions_default_empty(self): assert s.permissions.allow == [] assert s.permissions.deny == [] - def test_hitl_enabled_still_present(self): - """Old kwarg still accessible during the migration window.""" + def test_hitl_enabled_removed(self): + """hitl_enabled should no longer exist.""" s = BaseSettings() - assert hasattr(s, "hitl_enabled") + assert not hasattr(s, "hitl_enabled") def test_permissions_accepts_allow_and_deny(self): from agentic_cli.workflow.settings import ( From 7c84fc6b4933b40fd6a183adefcd1e357c3189e8 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 11:24:04 -0400 Subject: [PATCH 33/41] docs(permissions): drop transient 'once Task N' phrasing from adapter docstrings --- src/agentic_cli/workflow/adk/permission_plugin.py | 6 +----- src/agentic_cli/workflow/langgraph/permission_wrap.py | 3 --- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/agentic_cli/workflow/adk/permission_plugin.py b/src/agentic_cli/workflow/adk/permission_plugin.py index 7668ba6..f39def9 100644 --- a/src/agentic_cli/workflow/adk/permission_plugin.py +++ b/src/agentic_cli/workflow/adk/permission_plugin.py @@ -1,9 +1,5 @@ """ADK plugin that gates tool calls via PermissionEngine. -Replaces the old ConfirmationPlugin once Task 24 swaps the registration in -GoogleADKWorkflowManager. For now the old plugin continues to run alongside -this one until we're ready to cut over. - Adapter check order (mirrors LangGraph wrapper for consistency): 1. EXEMPT tool → allow, no engine call. 2. Tool has no capability declaration → deny (author error, loud). @@ -30,7 +26,7 @@ class PermissionPlugin(BasePlugin): - """Replaces the old ConfirmationPlugin with capability-based checks.""" + """ADK plugin: gates every tool call through :class:`PermissionEngine`.""" def __init__(self) -> None: super().__init__(name="permission") diff --git a/src/agentic_cli/workflow/langgraph/permission_wrap.py b/src/agentic_cli/workflow/langgraph/permission_wrap.py index 0a4c411..08600fa 100644 --- a/src/agentic_cli/workflow/langgraph/permission_wrap.py +++ b/src/agentic_cli/workflow/langgraph/permission_wrap.py @@ -6,9 +6,6 @@ 2. Tool has no capability declaration → wrapper returns deny dict at call time. 3. Engine absent from service registry → wrapper runs the original tool (fallback). 4. Otherwise call engine.check(); return on allow, deny dict on deny. - -Replaces LangGraphBuilder._wrap_for_confirmation once Task 25 swaps the -call site. """ from __future__ import annotations From af1c55192a9693076326a09169c46ef4ebf6ce37 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 11:26:46 -0400 Subject: [PATCH 34/41] docs(permissions): update CLAUDE.md + README.md to describe the new permission engine --- CLAUDE.md | 3 ++- README.md | 39 +++++++++++++++++++++------------------ 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 6ce79dc..ddece9f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -133,7 +133,8 @@ Workflow: ### Key Design Patterns - **Tool error handling**: All tools return `{"success": bool, ...}` dicts. Never raise `ToolError`. -- **Tool registration**: Use `@register_tool(category=..., permission_level=..., description=...)` decorator. Tools are auto-discovered via the global `ToolRegistry`. +- **Tool registration**: Use `@register_tool(category=..., capabilities=..., description=...)` decorator. `capabilities=` is required — pass `EXEMPT` for tools that need no permission check or a list of `Capability(name, target_arg=...)` tuples the engine matches against rules. Tools are auto-discovered via the global `ToolRegistry`. +- **Permissions**: `workflow/permissions/` holds a framework-independent engine that evaluates declared capabilities against rules from four sources (builtin, user `~/.{app_name}/settings.json`, project `./.{app_name}/settings.json`, in-memory session). ADK + LangGraph gate tool calls via `workflow/adk/permission_plugin.py::PermissionPlugin` and `workflow/langgraph/permission_wrap.py::wrap_tool_for_permission`. See `docs/superpowers/specs/2026-04-18-permissions-system-design.md`. - **Service registry**: Tools access services and shared state via `get_service(key)` from `workflow.service_registry`. A single ContextVar holds a `dict[str, Any]` set by the workflow manager during processing. Complex services (KBManager, SandboxManager, MemoryStore) are lazily created; simple state (plan string, task list) lives directly in the registry dict. - **Manager detection**: Tools decorated with `@requires("kb_manager")` etc. are scanned by `BaseWorkflowManager._detect_required_managers()` which lazily creates only the needed services. - **Atomic writes**: Use `atomic_write_json`/`atomic_write_text` from `persistence/_utils.py` for file persistence. diff --git a/README.md b/README.md index 05d61da..ac53a24 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ Agentic CLI provides the core infrastructure for building interactive CLI applic ├─────────────────────────────┬───────────────────────────────────────┤ │ GoogleADKWorkflowManager │ LangGraphWorkflowManager │ │ (default) │ (optional: langgraph extra) │ -│ + ConfirmationPlugin │ + confirmation tool wrapper │ +│ + PermissionPlugin │ + wrap_tool_for_permission │ │ + LLMLoggingPlugin │ + native ToolNode │ │ + TaskProgressPlugin │ │ └─────────────────────────────┴───────────────────────────────────────┘ @@ -145,7 +145,7 @@ manager = GoogleADKWorkflowManager( ``` ADK integrations: -- **ConfirmationPlugin** — intercepts `DANGEROUS` tools and prompts the user for approval +- **PermissionPlugin** — evaluates each tool's declared capabilities against the permission engine and prompts the user when no rule matches - **LLMLoggingPlugin** — structured logging of LLM requests/responses - **TaskProgressPlugin** — streams the task checklist into its own thinking box @@ -171,7 +171,7 @@ Features: - **Explicit provider support**: Uses `langchain-google-genai` for Gemini (not VertexAI) - **Thinking mode**: Native support for Claude and Gemini thinking/reasoning - **Retry policies**: Automatic retry with exponential backoff -- **Confirmation wrapper**: Wraps `DANGEROUS` tools to request HITL approval +- **Permission wrapper**: Wraps each tool to gate execution through the permission engine - **Event streaming**: Real-time workflow events via `WorkflowEvent` Requires: `pip install agentic-cli[langgraph]` @@ -186,7 +186,7 @@ Requires: `pip install agentic-cli[langgraph]` | State persistence | In-memory | Memory, PostgreSQL, or SQLite | | Thinking support | Native (Gemini) | Native (Claude & Gemini) | | Retry handling | Built-in | Built-in with backoff | -| HITL confirmation | ConfirmationPlugin | Tool wrapper | +| Permission gate | PermissionPlugin | wrap_tool_for_permission | | Context trimming | Native | Native | ### Auto-selection via Settings @@ -535,15 +535,9 @@ Each tool keeps at most N reflections (FIFO eviction). Reflections can be inject #### HITL (Human-in-the-Loop) -Two mechanisms: +Tool calls are gated by the **permission engine** (`workflow/permissions/`). Each tool declares a list of capabilities (e.g. `filesystem.write(path=...)`); the engine evaluates them against rules from four sources (builtin defaults, user `~/.{app_name}/settings.json`, project `./.{app_name}/settings.json`, in-memory session). When no rule matches, the user is prompted with `Allow once / Allow for session / Allow always (save to project) / Deny`. Always-grants persist into the project settings file so the next run picks them up automatically. -1. **Automatic confirmation for `DANGEROUS` tools** — handled by ADK's `ConfirmationPlugin` and LangGraph's tool wrapper. No code changes needed; just mark the tool `DANGEROUS`. -2. **Explicit `request_approval` tool** — for domain-level checkpoints: - - ```python - from agentic_cli.tools import hitl_tools - # request_approval(message, options) -> {"success": True, "choice": "..."} - ``` +See `docs/superpowers/specs/2026-04-18-permissions-system-design.md` for the full design. ## CLI Commands @@ -719,13 +713,22 @@ agentic-cli/ │ │ ├── adk/ │ │ │ ├── manager.py # GoogleADKWorkflowManager │ │ │ ├── event_processor.py -│ │ │ ├── plugins.py # ConfirmationPlugin, LLMLoggingPlugin +│ │ │ ├── plugins.py # LLMLoggingPlugin +│ │ │ ├── permission_plugin.py # PermissionPlugin (capability gating) │ │ │ └── task_progress_plugin.py -│ │ └── langgraph/ -│ │ ├── manager.py # LangGraphWorkflowManager -│ │ ├── graph_builder.py -│ │ ├── state.py -│ │ └── persistence/ # Checkpointers and stores +│ │ ├── langgraph/ +│ │ │ ├── manager.py # LangGraphWorkflowManager +│ │ │ ├── graph_builder.py +│ │ │ ├── permission_wrap.py # wrap_tool_for_permission +│ │ │ ├── state.py +│ │ │ └── persistence/ # Checkpointers and stores +│ │ └── permissions/ # Framework-independent permission engine +│ │ ├── capabilities.py # Capability, ResolvedCapability, EXEMPT +│ │ ├── rules.py # Rule, Effect, RuleSource, CheckResult, AskScope +│ │ ├── matchers.py # Path/URL/Shell/StringGlob matchers +│ │ ├── store.py # PermissionContext, BUILTIN_RULES, JSON load/save +│ │ ├── prompt.py # build_request + parse_response +│ │ └── engine.py # PermissionEngine │ ├── tools/ │ │ ├── registry.py # ToolRegistry, ToolCategory, register_tool │ │ ├── factories.py # Backend-aware tool factories From 4e929bb076f241ba3e2c80244697f257a78b4e7d Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 12:16:33 -0400 Subject: [PATCH 35/41] fix(permissions): preserve '*' wildcard in matcher canonicalize + matches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Targetless capabilities (Capability with target_arg=None, used by web_search, save_memory, etc.) resolve to target='*'. When a user picked 'Allow always', URLMatcher.canonicalize was mangling '*' into 'https://*' on JSON reload, and URLMatcher.matches rejected the bare '*' on schema-comparison, so the just-granted rule didn't match subsequent calls — the prompt reappeared. Fix: every matcher short-circuits the '*' sentinel in both canonicalize (pass-through) and matches (wildcard pattern matches anything). Narrow patterns still correctly deny targetless ('*') calls — only pattern=='*' is treated as the universal wildcard. Adds TestTargetlessAllowAlwaysRegression covering: - Allow-always on web_search (http.read, targetless) → next two calls are allowed without re-prompting - Cross-tool sharing: search_arxiv with same capability is also covered - After project JSON reload, the persisted wildcard rule still matches --- .../workflow/permissions/matchers.py | 16 +++++ tests/permissions/test_engine.py | 70 +++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/src/agentic_cli/workflow/permissions/matchers.py b/src/agentic_cli/workflow/permissions/matchers.py index 97548cf..b4b1c28 100644 --- a/src/agentic_cli/workflow/permissions/matchers.py +++ b/src/agentic_cli/workflow/permissions/matchers.py @@ -31,9 +31,13 @@ class StringGlobMatcher: """Default fallback: ``${...}`` substitution + ``fnmatch``.""" def canonicalize(self, s: str, ctx: PermissionContext) -> str: + if s == "*": + return "*" return ctx.substitute(s).strip() def matches(self, pattern: str, target: str) -> bool: + if pattern == "*": + return True return fnmatch.fnmatchcase(target, pattern) @@ -96,6 +100,8 @@ class PathMatcher: """Matcher for ``filesystem.*`` capabilities.""" def canonicalize(self, s: str, ctx: PermissionContext) -> str: + if s == "*": + return "*" s = ctx.substitute(s) p = Path(s).expanduser() if not p.is_absolute(): @@ -103,6 +109,8 @@ def canonicalize(self, s: str, ctx: PermissionContext) -> str: return str(Path(p).resolve(strict=False)) def matches(self, pattern: str, target: str) -> bool: + if pattern == "*": + return True return bool(_glob_to_regex(pattern).match(target)) @@ -112,6 +120,8 @@ class URLMatcher: _DEFAULT_PORTS = {"http": 80, "https": 443} def canonicalize(self, s: str, ctx: PermissionContext) -> str: + if s == "*": + return "*" s = ctx.substitute(s) parts = urlsplit(s if "://" in s else f"https://{s}") scheme = parts.scheme.lower() or "https" @@ -129,6 +139,8 @@ def canonicalize(self, s: str, ctx: PermissionContext) -> str: return urlunsplit((scheme, netloc, path, query, "")) def matches(self, pattern: str, target: str) -> bool: + if pattern == "*": + return True pp = urlsplit(pattern if "://" in pattern else f"https://{pattern}") tt = urlsplit(target) if pp.scheme.lower() != tt.scheme.lower(): @@ -148,9 +160,13 @@ class ShellMatcher: """Matcher for ``shell.*`` capabilities.""" def canonicalize(self, s: str, ctx: PermissionContext) -> str: + if s == "*": + return "*" return ctx.substitute(s).strip() def matches(self, pattern: str, target: str) -> bool: + if pattern == "*": + return True return fnmatch.fnmatchcase(target, pattern) diff --git a/tests/permissions/test_engine.py b/tests/permissions/test_engine.py index 45267b8..957342f 100644 --- a/tests/permissions/test_engine.py +++ b/tests/permissions/test_engine.py @@ -279,3 +279,73 @@ async def fake_input(request): ), ) assert ask_peak == 1 # never two asks in flight simultaneously + + +class TestTargetlessAllowAlwaysRegression: + """Regression: after 'Allow always' on a targetless capability (target_arg=None), + subsequent calls must not re-prompt. + + Bug: matchers that canonicalise targets (URLMatcher, PathMatcher) were mangling + the ``"*"`` sentinel, so the just-installed rule didn't match the next + invocation's resolved target (also ``"*"``). Fix short-circuits ``"*"`` in + ``canonicalize`` and in ``matches``. + """ + + @pytest.mark.asyncio + async def test_http_read_allow_always_matches_next_call(self, ctx, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + w = _stub_workflow() + w.request_user_input = AsyncMock(return_value="Allow always (save to project)") + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + # First call: no rule → ask → allow always (saves session + project rule) + result1 = await engine.check( + "web_search", + [Capability("http.read")], # targetless + {"query": "test"}, + ) + assert result1.allowed is True + assert w.request_user_input.await_count == 1 + + # Second call must match the session rule and NOT re-prompt. + result2 = await engine.check( + "web_search", + [Capability("http.read")], + {"query": "different query"}, + ) + assert result2.allowed is True + assert w.request_user_input.await_count == 1 + + # Third call with a DIFFERENT tool that declares the same capability + # is also covered — same cap, still target=='*'. + result3 = await engine.check( + "search_arxiv", + [Capability("http.read")], + {"query": "papers"}, + ) + assert result3.allowed is True + assert w.request_user_input.await_count == 1 + + @pytest.mark.asyncio + async def test_reloaded_wildcard_rule_still_matches(self, ctx, tmp_path, monkeypatch): + """After the project JSON is reloaded (simulating next process run), + a rule stored with target='*' must still match a targetless capability.""" + monkeypatch.chdir(tmp_path) + + # Round 1: grant "allow always" so the rule is persisted. + w1 = _stub_workflow() + w1.request_user_input = AsyncMock(return_value="Allow always (save to project)") + engine1 = PermissionEngine(settings=_stub_settings(), workflow=w1, ctx=ctx) + await engine1.check("web_search", [Capability("http.read")], {"query": "x"}) + + # Round 2: fresh engine reads rules from disk. + w2 = _stub_workflow() + w2.request_user_input = AsyncMock(return_value="Deny") # would deny if re-asked + engine2 = PermissionEngine(settings=_stub_settings(), workflow=w2, ctx=ctx) + result = await engine2.check( + "web_search", + [Capability("http.read")], + {"query": "y"}, + ) + assert result.allowed is True + w2.request_user_input.assert_not_called() From fc74e9a4a7f72743d9addd4d5e87fddc79bb9e25 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 12:26:06 -0400 Subject: [PATCH 36/41] fix(permissions): register backend state tools as EXEMPT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit save_plan / get_plan / save_tasks / get_tasks in tools/{adk,langgraph}/state_tools.py are injected into agent tool lists by BaseWorkflowManager._get_state_tools() and were never decorated with @register_tool. The adapter therefore saw defn is None and denied every call with 'tool has no capability declaration'. These tools only touch internal workflow state (plan string, task list) — no file, network, or shell side effects — so EXEMPT is the right classification. --- src/agentic_cli/tools/adk/state_tools.py | 6 ++++++ src/agentic_cli/tools/langgraph/state_tools.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/src/agentic_cli/tools/adk/state_tools.py b/src/agentic_cli/tools/adk/state_tools.py index 212423b..00f3b42 100644 --- a/src/agentic_cli/tools/adk/state_tools.py +++ b/src/agentic_cli/tools/adk/state_tools.py @@ -13,8 +13,11 @@ from agentic_cli.tools._core.planning import summarize_checkboxes from agentic_cli.tools._core.tasks import validate_tasks, normalize_tasks, filter_tasks +from agentic_cli.tools.registry import ToolCategory, register_tool +from agentic_cli.workflow.permissions import EXEMPT +@register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING) def save_plan(content: str, tool_context: ToolContext) -> dict[str, Any]: """Save or update the execution plan as markdown with checkboxes. @@ -33,6 +36,7 @@ def save_plan(content: str, tool_context: ToolContext) -> dict[str, Any]: return {"success": True, "message": message} +@register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING) def get_plan(tool_context: ToolContext) -> dict[str, Any]: """Retrieve the current execution plan. @@ -47,6 +51,7 @@ def get_plan(tool_context: ToolContext) -> dict[str, Any]: return {"success": True, "content": plan} +@register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING) def save_tasks( tasks: list[dict[str, Any]], tool_context: ToolContext ) -> dict[str, Any]: @@ -86,6 +91,7 @@ def save_tasks( } +@register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING) def get_tasks( status: str = "", priority: str = "", diff --git a/src/agentic_cli/tools/langgraph/state_tools.py b/src/agentic_cli/tools/langgraph/state_tools.py index cef95db..9c049f6 100644 --- a/src/agentic_cli/tools/langgraph/state_tools.py +++ b/src/agentic_cli/tools/langgraph/state_tools.py @@ -17,8 +17,11 @@ from agentic_cli.tools._core.planning import summarize_checkboxes from agentic_cli.tools._core.tasks import validate_tasks, normalize_tasks, filter_tasks +from agentic_cli.tools.registry import ToolCategory, register_tool +from agentic_cli.workflow.permissions import EXEMPT +@register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING) def save_plan( content: str, tool_call_id: Annotated[str, InjectedToolCallId], @@ -40,6 +43,7 @@ def save_plan( }) +@register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING) def get_plan( state: Annotated[dict, InjectedState], tool_call_id: Annotated[str, InjectedToolCallId], @@ -58,6 +62,7 @@ def get_plan( }) +@register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING) def save_tasks( tasks: list[dict[str, Any]], tool_call_id: Annotated[str, InjectedToolCallId], @@ -103,6 +108,7 @@ def save_tasks( }) +@register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING) def get_tasks( status: str = "", priority: str = "", From 0227b2db221a45a1a6b779824eb022b48ccc5ade Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 13:25:12 -0400 Subject: [PATCH 37/41] feat(permissions): allow memory.* and kb.* by default; register ADK transfer_to_agent as EXEMPT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - BUILTIN_RULES now includes 'memory.* → *' and 'kb.* → *' ALLOW entries. Memory + KB are agent-internal stores the user already opted into by giving the tool. Prompting for every save_memory / kb_ingest is noisy. - ADK auto-injects transfer_to_agent into coordinator agents with sub_agents; it's an internal routing primitive, not an external side effect. Register it with EXEMPT at permission_plugin.py import time so the plugin lets it through. - Tests for both new behaviours. --- src/agentic_cli/workflow/adk/permission_plugin.py | 14 +++++++++++++- src/agentic_cli/workflow/permissions/store.py | 5 +++++ tests/permissions/test_engine.py | 15 +++++++++++++++ tests/permissions/test_store.py | 3 +++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/agentic_cli/workflow/adk/permission_plugin.py b/src/agentic_cli/workflow/adk/permission_plugin.py index f39def9..6524895 100644 --- a/src/agentic_cli/workflow/adk/permission_plugin.py +++ b/src/agentic_cli/workflow/adk/permission_plugin.py @@ -14,7 +14,8 @@ from google.adk.plugins.base_plugin import BasePlugin from agentic_cli.logging import Loggers -from agentic_cli.tools.registry import get_registry +from agentic_cli.tools.registry import ToolCategory, get_registry, register_tool +from agentic_cli.workflow.permissions import EXEMPT from agentic_cli.workflow.permissions.capabilities import _CapabilityExempt from agentic_cli.workflow.service_registry import PERMISSION_ENGINE, get_service @@ -25,6 +26,17 @@ logger = Loggers.workflow() +# ADK auto-injects ``transfer_to_agent`` into coordinator agents that declare +# ``sub_agents``. It's an internal routing primitive, not an external side +# effect, so register it with EXEMPT so the plugin lets it through. +try: + from google.adk.tools.transfer_to_agent_tool import transfer_to_agent + + register_tool(capabilities=EXEMPT, category=ToolCategory.PLANNING)(transfer_to_agent) +except ImportError: + pass + + class PermissionPlugin(BasePlugin): """ADK plugin: gates every tool call through :class:`PermissionEngine`.""" diff --git a/src/agentic_cli/workflow/permissions/store.py b/src/agentic_cli/workflow/permissions/store.py index 22f248e..2667aa1 100644 --- a/src/agentic_cli/workflow/permissions/store.py +++ b/src/agentic_cli/workflow/permissions/store.py @@ -40,6 +40,11 @@ def substitute(self, s: str) -> str: # Routine reads inside workdir: allowed without prompt. Rule("filesystem.read", "${workdir}/**", Effect.ALLOW, RuleSource.BUILTIN), + # Agent-internal stores — reads and writes to the user's own knowledge base + # and memory are routine workflow activity, not external side effects. + Rule("memory.*", "*", Effect.ALLOW, RuleSource.BUILTIN), + Rule("kb.*", "*", Effect.ALLOW, RuleSource.BUILTIN), + # System locations — writes always denied. Rule("filesystem.write", "/etc/**", Effect.DENY, RuleSource.BUILTIN), Rule("filesystem.write", "/usr/**", Effect.DENY, RuleSource.BUILTIN), diff --git a/tests/permissions/test_engine.py b/tests/permissions/test_engine.py index 957342f..e48dd28 100644 --- a/tests/permissions/test_engine.py +++ b/tests/permissions/test_engine.py @@ -326,6 +326,21 @@ async def test_http_read_allow_always_matches_next_call(self, ctx, tmp_path, mon assert result3.allowed is True assert w.request_user_input.await_count == 1 + @pytest.mark.asyncio + async def test_memory_and_kb_allowed_by_builtin(self, ctx, tmp_path): + """Memory and KB capabilities should be allowed without prompting.""" + w = _stub_workflow() + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + for cap_name in ("memory.read", "memory.write", "kb.read", "kb.write"): + result = await engine.check( + f"tool_using_{cap_name}", + [Capability(cap_name)], + {}, + ) + assert result.allowed is True, f"{cap_name} should be allowed by builtin" + w.request_user_input.assert_not_called() + @pytest.mark.asyncio async def test_reloaded_wildcard_rule_still_matches(self, ctx, tmp_path, monkeypatch): """After the project JSON is reloaded (simulating next process run), diff --git a/tests/permissions/test_store.py b/tests/permissions/test_store.py index 7ded8f5..5f52707 100644 --- a/tests/permissions/test_store.py +++ b/tests/permissions/test_store.py @@ -42,6 +42,9 @@ def test_has_expected_entries(self): # Reads within workdir allowed: assert any(r.capability == "filesystem.read" and "${workdir}" in r.target for r in allows) + # Memory + KB: agent-internal stores allowed by default. + assert any(r.capability == "memory.*" and r.target == "*" for r in allows) + assert any(r.capability == "kb.*" and r.target == "*" for r in allows) # System dirs denied: system_targets = {r.target for r in denies if r.capability == "filesystem.write"} for path in ("/etc/**", "/usr/**", "/bin/**", "/sbin/**", "/boot/**", "/System/**"): From c1c9b77a4998e44a709d5c03c16356a737799a44 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 13:33:41 -0400 Subject: [PATCH 38/41] perf(workflow): offload _ensure_managers_initialized to a worker thread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EmbeddingService construction loads a sentence-transformers model, which takes 1-3 seconds of pure sync CPU/IO. It ran on the main event loop inside initialize_services(), blocking prompt_toolkit's key handler during background init — the REPL appeared frozen for seconds at startup and keystrokes only rendered after init completed. Run _ensure_managers_initialized() via asyncio.to_thread so the event loop stays free. Prompt responsiveness is restored; the PermissionEngine construction (which also does a small amount of sync I/O) rides along. --- src/agentic_cli/workflow/base_manager.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/agentic_cli/workflow/base_manager.py b/src/agentic_cli/workflow/base_manager.py index 8a3e005..e4b2d4b 100644 --- a/src/agentic_cli/workflow/base_manager.py +++ b/src/agentic_cli/workflow/base_manager.py @@ -507,7 +507,13 @@ async def initialize_services(self, validate: bool = True) -> None: # Create services BEFORE backend init so _build_tools() can # produce factory-bound tools during agent/graph creation. - self._ensure_managers_initialized() + # Offloaded to a worker thread because constructors here may + # load heavy dependencies (e.g. the sentence-transformers model + # inside EmbeddingService) that would otherwise block the event + # loop — which keeps the prompt unresponsive at startup. + import asyncio as _asyncio + + await _asyncio.to_thread(self._ensure_managers_initialized) await self._do_initialize() self._initialized = True From 73ca202eed2685d42029c2663f6861a1e2fda00b Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 13:41:09 -0400 Subject: [PATCH 39/41] feat(permissions): broaden filesystem.* grants to the parent directory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the user picks 'Allow for session' or 'Allow always' on a filesystem.* capability, store a rule covering the parent directory (with /** glob) instead of the exact file. One grant then covers every file the agent writes or reads in that folder, which matches typical workflows — if an agent is writing one file, it's almost always going to write more. Non-filesystem namespaces (http.*, shell.*, etc.) keep exact-target grants. The wildcard sentinel '*' (used by targetless capabilities like web_search) also passes through unchanged. The ask prompt now prints '(Session/Always grants apply to the parent directory.)' when a filesystem capability is involved so the user knows what scope they're agreeing to. Tests cover the broadening, scope preservation for other namespaces, and nested-subdirectory matching. --- .../workflow/permissions/engine.py | 25 ++++++- .../workflow/permissions/prompt.py | 5 ++ tests/permissions/test_engine.py | 69 +++++++++++++++++++ 3 files changed, 98 insertions(+), 1 deletion(-) diff --git a/src/agentic_cli/workflow/permissions/engine.py b/src/agentic_cli/workflow/permissions/engine.py index 9d13492..face56e 100644 --- a/src/agentic_cli/workflow/permissions/engine.py +++ b/src/agentic_cli/workflow/permissions/engine.py @@ -158,6 +158,28 @@ def _evaluate( def _fmt_rule_reason(rule: Rule, cap: ResolvedCapability) -> str: return f"rule: {rule.source.value}/{rule.effect.value} {cap.name} {rule.target}" + @staticmethod + def _broaden_target_for_grant(cap: ResolvedCapability) -> str: + """Widen a resolved target before synthesising a session/persistent rule. + + For ``filesystem.*`` capabilities we broaden to the parent directory + (glob) so one grant covers every file the agent writes/reads there — + otherwise each new file in the same directory would prompt again. + Other namespaces keep the exact resolved target (URL, command, etc.), + and the wildcard sentinel ``"*"`` passes through unchanged. + """ + if cap.target == "*": + return "*" + if cap.name.startswith("filesystem."): + from pathlib import Path + + p = Path(cap.target) + # Don't widen root or already-rootless targets. + if str(p) == str(p.parent): + return cap.target + return f"{p.parent}/**" + return cap.target + async def _ask_and_apply( self, tool_name: str, @@ -186,7 +208,8 @@ async def _ask_and_apply( source = RuleSource.SESSION if scope is AskScope.SESSION else RuleSource.PROJECT for cap in unmatched: - rule = Rule(cap.name, cap.target, Effect.ALLOW, source) + target = self._broaden_target_for_grant(cap) + rule = Rule(cap.name, target, Effect.ALLOW, source) self._session_rules.append(rule) if source is RuleSource.PROJECT: append_project_rule(self._settings.app_name, rule) diff --git a/src/agentic_cli/workflow/permissions/prompt.py b/src/agentic_cli/workflow/permissions/prompt.py index 4cf26f9..b60c7f8 100644 --- a/src/agentic_cli/workflow/permissions/prompt.py +++ b/src/agentic_cli/workflow/permissions/prompt.py @@ -25,10 +25,15 @@ def build_request(tool_name: str, capabilities: list[ResolvedCapability]) -> UserInputRequest: """Construct a ``UserInputRequest`` (CHOICE) describing the pending grant.""" lines = [f"Tool `{tool_name}` wants:"] + has_filesystem = False for cap in capabilities: target = cap.target if cap.target else "*" lines.append(f" • {cap.name} → {target}") + if cap.name.startswith("filesystem.") and target not in ("*", ""): + has_filesystem = True lines.append("") + if has_filesystem: + lines.append("(Session/Always grants apply to the parent directory.)") lines.append("Allow?") prompt = "\n".join(lines) diff --git a/tests/permissions/test_engine.py b/tests/permissions/test_engine.py index e48dd28..98fa4b4 100644 --- a/tests/permissions/test_engine.py +++ b/tests/permissions/test_engine.py @@ -326,6 +326,75 @@ async def test_http_read_allow_always_matches_next_call(self, ctx, tmp_path, mon assert result3.allowed is True assert w.request_user_input.await_count == 1 + @pytest.mark.asyncio + async def test_filesystem_grant_broadens_to_parent_directory( + self, ctx, tmp_path, monkeypatch + ): + """When user picks 'Allow always' for filesystem.write to /foo/bar.txt, + the rule covers /foo/** — subsequent writes to /foo/baz.txt must not re-prompt.""" + monkeypatch.chdir(tmp_path) + outside = tmp_path / "out" + outside.mkdir() + + w = _stub_workflow() + w.request_user_input = AsyncMock(return_value="Allow always (save to project)") + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + # First write → prompts → allow always. + result1 = await engine.check( + "write_file", + [Capability("filesystem.write", target_arg="path")], + {"path": str(outside / "bar.txt")}, + ) + assert result1.allowed is True + assert w.request_user_input.await_count == 1 + + # The stored session rule should target the parent with ** glob. + session_write_rules = [ + r for r in engine.rules + if r.capability == "filesystem.write" and str(outside) in r.target + ] + assert any(r.target.endswith("/**") for r in session_write_rules) + + # Second write to a DIFFERENT file in the same directory → no prompt. + result2 = await engine.check( + "write_file", + [Capability("filesystem.write", target_arg="path")], + {"path": str(outside / "baz.txt")}, + ) + assert result2.allowed is True + assert w.request_user_input.await_count == 1 + + # Write to a nested subdir also matches. + sub = outside / "sub" + sub.mkdir() + result3 = await engine.check( + "write_file", + [Capability("filesystem.write", target_arg="path")], + {"path": str(sub / "deep.txt")}, + ) + assert result3.allowed is True + assert w.request_user_input.await_count == 1 + + @pytest.mark.asyncio + async def test_filesystem_broaden_preserves_other_namespaces(self, ctx, tmp_path, monkeypatch): + """Non-filesystem capabilities keep their exact resolved target.""" + monkeypatch.chdir(tmp_path) + w = _stub_workflow() + w.request_user_input = AsyncMock(return_value="Allow for this session") + engine = PermissionEngine(settings=_stub_settings(), workflow=w, ctx=ctx) + + await engine.check( + "web_fetch", + [Capability("http.read", target_arg="url")], + {"url": "https://example.com/a"}, + ) + + http_session = [r for r in engine.rules if r.capability == "http.read" and r.source is RuleSource.SESSION] + assert http_session + # URL stays exact (host + path), NOT broadened. + assert all(r.target == "https://example.com/a" for r in http_session) + @pytest.mark.asyncio async def test_memory_and_kb_allowed_by_builtin(self, ctx, tmp_path): """Memory and KB capabilities should be allowed without prompting.""" From 74c0db1e029bb87856311ee7f6f8363f53b1cfd6 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 13:51:01 -0400 Subject: [PATCH 40/41] feat(permissions): show directory-scope target in ask prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The UI was displaying the exact filename (e.g. 'filesystem.write → /foo/bar.txt'), but Session/Always grants store /foo/**. Show the broadened target in the prompt so the displayed scope matches the scope that will actually be granted. - Moved the widening helper to a module-level function (engine.broaden_target_for_grant) so both engine._ask_and_apply and prompt.build_request use the same logic. - prompt.build_request now displays broaden_target_for_grant(cap) for each capability line. A '(Grant scope widened to the parent directory.)' hint replaces the previous 'applies to parent directory' wording and only appears when any capability was actually broadened. - Handle the root-parent edge case: a file directly under / collapses to '/**' instead of '//**'. Tests cover: - Filesystem capabilities display the /parent/** scope - Exact filenames do NOT appear in the prompt - Non-filesystem (http.*) capabilities keep exact targets - Root-parent paths render as '/**' --- .../workflow/permissions/engine.py | 52 +++++++++++-------- .../workflow/permissions/prompt.py | 25 ++++++--- tests/permissions/test_prompt.py | 36 +++++++++++-- 3 files changed, 79 insertions(+), 34 deletions(-) diff --git a/src/agentic_cli/workflow/permissions/engine.py b/src/agentic_cli/workflow/permissions/engine.py index face56e..998410c 100644 --- a/src/agentic_cli/workflow/permissions/engine.py +++ b/src/agentic_cli/workflow/permissions/engine.py @@ -12,6 +12,7 @@ from __future__ import annotations import asyncio +from pathlib import Path from typing import TYPE_CHECKING from agentic_cli.logging import Loggers @@ -41,6 +42,34 @@ logger = Loggers.workflow() +def broaden_target_for_grant(cap: ResolvedCapability) -> str: + """Widen a resolved target before synthesising a session/persistent rule. + + For ``filesystem.*`` capabilities we broaden to the parent directory + (glob) so one grant covers every file the agent writes/reads there — + otherwise each new file in the same directory would prompt again. + Other namespaces keep the exact resolved target (URL, command, etc.), + and the wildcard sentinel ``"*"`` passes through unchanged. + + Used by both the engine (when installing a rule) and the prompt + builder (when describing the pending grant) so the displayed scope + always matches what will actually be stored. + """ + if cap.target == "*": + return "*" + if cap.name.startswith("filesystem."): + p = Path(cap.target) + parent = p.parent + # Already at the root: nothing to widen to. + if str(p) == str(parent): + return cap.target + # Avoid "//**" when the parent is the filesystem root. + if str(parent) == "/": + return "/**" + return f"{parent}/**" + return cap.target + + class PermissionEngine: """Evaluate tool invocations against rules from four sources. @@ -158,27 +187,6 @@ def _evaluate( def _fmt_rule_reason(rule: Rule, cap: ResolvedCapability) -> str: return f"rule: {rule.source.value}/{rule.effect.value} {cap.name} {rule.target}" - @staticmethod - def _broaden_target_for_grant(cap: ResolvedCapability) -> str: - """Widen a resolved target before synthesising a session/persistent rule. - - For ``filesystem.*`` capabilities we broaden to the parent directory - (glob) so one grant covers every file the agent writes/reads there — - otherwise each new file in the same directory would prompt again. - Other namespaces keep the exact resolved target (URL, command, etc.), - and the wildcard sentinel ``"*"`` passes through unchanged. - """ - if cap.target == "*": - return "*" - if cap.name.startswith("filesystem."): - from pathlib import Path - - p = Path(cap.target) - # Don't widen root or already-rootless targets. - if str(p) == str(p.parent): - return cap.target - return f"{p.parent}/**" - return cap.target async def _ask_and_apply( self, @@ -208,7 +216,7 @@ async def _ask_and_apply( source = RuleSource.SESSION if scope is AskScope.SESSION else RuleSource.PROJECT for cap in unmatched: - target = self._broaden_target_for_grant(cap) + target = broaden_target_for_grant(cap) rule = Rule(cap.name, target, Effect.ALLOW, source) self._session_rules.append(rule) if source is RuleSource.PROJECT: diff --git a/src/agentic_cli/workflow/permissions/prompt.py b/src/agentic_cli/workflow/permissions/prompt.py index b60c7f8..4bbdbe6 100644 --- a/src/agentic_cli/workflow/permissions/prompt.py +++ b/src/agentic_cli/workflow/permissions/prompt.py @@ -6,6 +6,7 @@ from agentic_cli.workflow.events import InputType, UserInputRequest from agentic_cli.workflow.permissions.capabilities import ResolvedCapability +from agentic_cli.workflow.permissions.engine import broaden_target_for_grant from agentic_cli.workflow.permissions.rules import AskScope # Strings kept module-level so the UI and parser stay in sync. @@ -23,17 +24,25 @@ def build_request(tool_name: str, capabilities: list[ResolvedCapability]) -> UserInputRequest: - """Construct a ``UserInputRequest`` (CHOICE) describing the pending grant.""" + """Construct a ``UserInputRequest`` (CHOICE) describing the pending grant. + + The displayed target is the **effective grant scope** — i.e. what will be + stored as a rule if the user picks Session or Always. For ``filesystem.*`` + that's the parent directory (``/foo/**``) rather than the exact file, so + one grant covers every sibling/nested file. + """ lines = [f"Tool `{tool_name}` wants:"] - has_filesystem = False + has_broadened_filesystem = False for cap in capabilities: - target = cap.target if cap.target else "*" - lines.append(f" • {cap.name} → {target}") - if cap.name.startswith("filesystem.") and target not in ("*", ""): - has_filesystem = True + display_target = broaden_target_for_grant(cap) + if not display_target: + display_target = "*" + lines.append(f" • {cap.name} → {display_target}") + if cap.name.startswith("filesystem.") and display_target != cap.target: + has_broadened_filesystem = True lines.append("") - if has_filesystem: - lines.append("(Session/Always grants apply to the parent directory.)") + if has_broadened_filesystem: + lines.append("(Grant scope widened to the parent directory.)") lines.append("Allow?") prompt = "\n".join(lines) diff --git a/tests/permissions/test_prompt.py b/tests/permissions/test_prompt.py index f0e5d27..98409e0 100644 --- a/tests/permissions/test_prompt.py +++ b/tests/permissions/test_prompt.py @@ -28,16 +28,44 @@ def test_choices_in_fixed_order(self): ] def test_prompt_mentions_tool_and_capabilities(self): + """For filesystem.* the prompt shows the broadened (parent-directory) + scope — matching what an Allow Session/Always would actually store.""" req = build_request( "copy_file", [ - ResolvedCapability("filesystem.read", "/a"), - ResolvedCapability("filesystem.write", "/b"), + ResolvedCapability("filesystem.read", "/src/file_a.txt"), + ResolvedCapability("filesystem.write", "/dst/file_b.txt"), ], ) assert "copy_file" in req.prompt - assert "filesystem.read" in req.prompt and "/a" in req.prompt - assert "filesystem.write" in req.prompt and "/b" in req.prompt + # Both entries show the parent-directory glob, not the exact file. + assert "filesystem.read → /src/**" in req.prompt + assert "filesystem.write → /dst/**" in req.prompt + # The exact filenames do NOT appear — the scope the user will actually + # grant is the directory. + assert "file_a.txt" not in req.prompt + assert "file_b.txt" not in req.prompt + # And a hint explaining the widening: + assert "parent directory" in req.prompt + + def test_prompt_preserves_exact_http_target(self): + """Non-filesystem capabilities keep their exact display target.""" + req = build_request( + "web_fetch", + [ResolvedCapability("http.read", "https://example.com/path")], + ) + assert "https://example.com/path" in req.prompt + # No broadening hint when no filesystem capability is involved. + assert "parent directory" not in req.prompt + + def test_prompt_handles_root_parent(self): + """A file directly under /: broadening collapses '//**' to '/**'.""" + req = build_request( + "write_file", + [ResolvedCapability("filesystem.write", "/hello.txt")], + ) + assert "filesystem.write → /**" in req.prompt + assert "//**" not in req.prompt def test_request_id_has_perm_prefix(self): req = build_request("x", []) From fffdee1853abf0fbbe9f45bcd250ae1fd0f1b002 Mon Sep 17 00:00:00 2001 From: Andrey Shiryaev <7615137+shoom1@users.noreply.github.com> Date: Sat, 18 Apr 2026 23:29:39 -0400 Subject: [PATCH 41/41] Release v0.5.1 Bump version to 0.5.1 and document the capability-based permission engine that replaces the ConfirmationPlugin-based HITL. Refresh README to drop the stale PermissionLevel table in favor of a Capabilities subsection. --- CHANGELOG.md | 26 ++++++++++++++++++++++++++ README.md | 15 +++++++-------- pyproject.toml | 2 +- src/agentic_cli/__init__.py | 2 +- 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b358a63..cc4844c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,32 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.5.1] - 2026-04-18 + +### Added +- **Capability-based permission engine** (PR #72): Framework-independent engine that replaces the ConfirmationPlugin-based HITL. Tools declare capabilities (e.g. `filesystem.write(path=...)`, `http.read`, `shell.exec`) via `@register_tool(capabilities=...)`. Rules are evaluated from four sources — builtin defaults, user `~/.{app_name}/settings.json`, project `./.{app_name}/settings.json`, and in-memory session. When no rule matches, the user is prompted with `Allow once / for session / always (save to project) / Deny`; "always" grants persist into project settings. +- **Matchers**: `PathMatcher` (with `**` glob), `URLMatcher`, `ShellMatcher`, and `StringGlobMatcher`. +- **ADK `PermissionPlugin`**: capability gating for ADK agents. +- **LangGraph `wrap_tool_for_permission`**: per-tool permission wrapping for LangGraph `ToolNode`. +- **`permissions` and `permissions_enabled` settings** for runtime control. +- **`EXEMPT` sentinel**: opts a tool out of the permission engine (used for backend state tools and ADK `transfer_to_agent`). +- **Real BM25 backends**: `bm25s` (preferred) and `rank_bm25` fallback. + +### Changed +- **`@register_tool`** now requires the `capabilities=` kwarg. +- **Default grants broadened**: `filesystem.*` auto-extends to the parent directory; `memory.*` and `kb.*` are allowed by default. +- **Ask prompt** now shows the directory-scope target. +- **Workflow init**: `_ensure_managers_initialized` runs in a worker thread to avoid blocking the event loop. +- **HITL confirmation** extracted into a backend-neutral module during the permissions migration. +- **Memory tools**: deduped registry-bound and factory-bound entry points. + +### Fixed +- Matcher canonicalize/matches preserves the `*` wildcard. +- `KnowledgeBaseManager` concurrency contract tightened. + +### Removed +- **`PermissionLevel`** (SAFE / CAUTION / DANGEROUS), `ConfirmationPlugin`, `_wrap_for_confirmation`, `hitl_tools`, and the `hitl_enabled` setting — superseded by the permission engine. + ## [0.5.0] - 2026-04-17 ### Added diff --git a/README.md b/README.md index ac53a24..6f72800 100644 --- a/README.md +++ b/README.md @@ -342,15 +342,14 @@ def search_database(query: str, limit: int = 10) -> dict: return {"success": True, "results": results, "count": len(results)} ``` -Registering via `@register_tool` is optional — you can pass raw callables into `AgentConfig.tools`. Registering gives the tool metadata for the registry, permission-aware HITL wrapping, and tool-summary formatting. +Registering via `@register_tool` is optional — you can pass raw callables into `AgentConfig.tools`. Registering gives the tool metadata for the registry, capability-aware permission gating, and tool-summary formatting. -### Permission Levels +### Capabilities -| Level | Behavior | -|-------|----------| -| `SAFE` | Runs silently | -| `CAUTION` | Runs; result is surfaced prominently | -| `DANGEROUS` | Intercepted by HITL — user must approve before execution | +Tool access is gated by the **permission engine** (see the HITL section below). Each registered tool declares what it touches via `capabilities=`: + +- **`Capability("namespace.action", target_arg="...")`** — e.g. `Capability("filesystem.write", target_arg="path")`, `Capability("http.read", target_arg="url")`, `Capability("shell.exec", target_arg="command")`. The engine resolves `target_arg` against the actual tool-call arguments and matches against rules. +- **`EXEMPT`** — opts the tool out of the engine entirely. Reserved for backend-internal tools (e.g. ADK `transfer_to_agent`, backend state tools). ### Built-in Tools @@ -431,7 +430,7 @@ list_dir("src/", include_hidden=False) diff_compare(source_a="old.txt", source_b="new.txt") ``` -**WRITE tools (caution)** +**WRITE tools** ```python from agentic_cli.tools import write_file, edit_file diff --git a/pyproject.toml b/pyproject.toml index d91b37f..e2630e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "agentic-cli" -version = "0.5.0" +version = "0.5.1" description = "A framework for building domain-specific agentic CLI applications" readme = "README.md" license = "MIT" diff --git a/src/agentic_cli/__init__.py b/src/agentic_cli/__init__.py index 849ec3c..0eee686 100644 --- a/src/agentic_cli/__init__.py +++ b/src/agentic_cli/__init__.py @@ -92,4 +92,4 @@ def __getattr__(name: str): "CLISettingsMixin", ] -__version__ = "0.5.0" +__version__ = "0.5.1"