From 2116a93757f398d534ed6a0cac3e70a59d5be845 Mon Sep 17 00:00:00 2001 From: Anthony Date: Fri, 7 Nov 2025 02:44:03 +0000 Subject: [PATCH 1/7] feat: Add comprehensive tool approval system with configurable controls Implements a complete tool approval system that allows users to control when and how LLM tool calls are executed, with support for argument editing, per-tool configuration, and auto-approval settings. Key Features: - Tool approval manager with configurable approval policies - Inline toggle for auto-approve in tool approval messages - User-level and tool-level auto-approval settings - Argument editing with tracked changes and LLM context - Global force approval override for all tools - WebSocket-based approval flow with request/response handling - Comprehensive test coverage for approval manager and config - CodeQL data extensions for sanitizers Backend Changes: - Add ToolApprovalManager for centralized approval logic - Update agent loops (ReAct, Think-Act, Act) to support approval flow - Add approval endpoints and WebSocket message handlers - Integrate approval checks into tools mode runner - Add configuration management for tool approval settings Frontend Changes: - Add tool approval UI in chat area - Implement approval dialog with argument editing - Add user-level auto-approval toggle in settings - Track argument changes and provide context to LLM Documentation: - Complete implementation guide - E2E test scenarios - System architecture documentation Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .env.example | 8 +- .github/codeql/extensions.yml | 14 + backend/application/chat/agent/act_loop.py | 3 + backend/application/chat/agent/factory.py | 4 + backend/application/chat/agent/react_loop.py | 3 + .../application/chat/agent/think_act_loop.py | 3 + backend/application/chat/approval_manager.py | 148 ++++++ backend/application/chat/modes/tools.py | 4 + backend/application/chat/service.py | 2 + .../application/chat/utilities/tool_utils.py | 153 +++++- backend/main.py | 100 ++-- backend/modules/config/config_manager.py | 68 ++- backend/routes/config_routes.py | 26 + backend/tests/test_approval_manager.py | 447 ++++++++++++++++++ backend/tests/test_tool_approval_config.py | 93 ++++ backend/tests/test_tool_approval_utils.py | 351 ++++++++++++++ config/overrides/mcp.json | 8 +- ...-approval-e2e-test-scenarios-2025-11-05.md | 313 ++++++++++++ docs/tool-approval-final-implementation.md | 252 ++++++++++ docs/tool-approval-system.md | 308 ++++++++++++ fn-calling-plan.md | 52 ++ frontend/e2e/tool-approval.spec.js | 299 ++++++++++++ frontend/src/components/Message.jsx | 284 +++++++++-- frontend/src/components/SettingsPanel.jsx | 32 ++ .../src/components/ToolApprovalDialog.jsx | 147 ++++++ frontend/src/contexts/ChatContext.jsx | 4 +- frontend/src/contexts/WSContext.jsx | 9 +- .../src/handlers/chat/websocketHandlers.js | 16 + frontend/src/hooks/useSettings.js | 3 +- .../src/test/tool-approval-websocket.test.js | 248 ++++++++++ frontend/src/test/tool-approval.test.jsx | 362 ++++++++++++++ 31 files changed, 3690 insertions(+), 74 deletions(-) create mode 100644 .github/codeql/extensions.yml create mode 100644 backend/application/chat/approval_manager.py create mode 100644 backend/tests/test_approval_manager.py create mode 100644 backend/tests/test_tool_approval_config.py create mode 100644 backend/tests/test_tool_approval_utils.py create mode 100644 docs/tool-approval-e2e-test-scenarios-2025-11-05.md create mode 100644 docs/tool-approval-final-implementation.md create mode 100644 docs/tool-approval-system.md create mode 100644 fn-calling-plan.md create mode 100644 frontend/e2e/tool-approval.spec.js create mode 100644 frontend/src/components/ToolApprovalDialog.jsx create mode 100644 frontend/src/test/tool-approval-websocket.test.js create mode 100644 frontend/src/test/tool-approval.test.jsx diff --git a/.env.example b/.env.example index 8ba9f6c..a4aa61c 100644 --- a/.env.example +++ b/.env.example @@ -76,7 +76,13 @@ AGENT_DEFAULT_ENABLED=true FEATURE_AGENT_MODE_AVAILABLE=true # Agent loop strategy: react (structured reasoning) or think-act (faster, concise) AGENT_LOOP_STRATEGY=think-act -# (Adjust above to stage rollouts. For a bare-bones chat set them all to false.) + +# Tool approval configuration +# Require approval by default for all tools (can be overridden per-tool in mcp.json) +REQUIRE_TOOL_APPROVAL_BY_DEFAULT=false +# Force approval for all tools (admin-enforced) regardless of per-tool or default settings +# Set to true to require approval for every tool call +FORCE_TOOL_APPROVAL_GLOBALLY=false APP_LOG_DIR=/workspaces/atlas-ui-3/logs diff --git a/.github/codeql/extensions.yml b/.github/codeql/extensions.yml new file mode 100644 index 0000000..5e00583 --- /dev/null +++ b/.github/codeql/extensions.yml @@ -0,0 +1,14 @@ +# CodeQL data extension to model sanitizers in this repo +# Registers sanitize_for_logging as a sanitizer so taint is removed when data flows through it. +# Reference: CodeQL model packs (Python) + +extensions: + - addsTo: + pack: codeql/python-all + extensible: sanitizer + data: + # Format: [module, function, output, taintKinds] + # Mark the function's return value as sanitized for all taint kinds. + - ["core.utils", "sanitize_for_logging", "ReturnValue", "All"] + # Also include fully-qualified path with backend prefix in case imports resolve that way in analysis + - ["backend.core.utils", "sanitize_for_logging", "ReturnValue", "All"] diff --git a/backend/application/chat/agent/act_loop.py b/backend/application/chat/agent/act_loop.py index f74c818..1578093 100644 --- a/backend/application/chat/agent/act_loop.py +++ b/backend/application/chat/agent/act_loop.py @@ -30,11 +30,13 @@ def __init__( tool_manager: Optional[ToolManagerProtocol], prompt_provider: Optional[PromptProvider], connection: Any = None, + config_manager=None, ) -> None: self.llm = llm self.tool_manager = tool_manager self.prompt_provider = prompt_provider self.connection = connection + self.config_manager = config_manager def _extract_finished_args(self, tool_calls: List[Dict[str, Any]]) -> Optional[str]: """Extract final_answer from finished tool call if present.""" @@ -151,6 +153,7 @@ async def run( }, tool_manager=self.tool_manager, update_callback=(self.connection.send_json if self.connection else None), + config_manager=self.config_manager, ) messages.append({ diff --git a/backend/application/chat/agent/factory.py b/backend/application/chat/agent/factory.py index c7538a5..b703871 100644 --- a/backend/application/chat/agent/factory.py +++ b/backend/application/chat/agent/factory.py @@ -30,6 +30,7 @@ def __init__( tool_manager: Optional[ToolManagerProtocol] = None, prompt_provider: Optional[PromptProvider] = None, connection: Optional[ChatConnectionProtocol] = None, + config_manager=None, ): """ Initialize factory with shared dependencies. @@ -39,11 +40,13 @@ def __init__( tool_manager: Optional tool manager prompt_provider: Optional prompt provider connection: Optional connection for sending updates + config_manager: Optional config manager for approval settings """ self.llm = llm self.tool_manager = tool_manager self.prompt_provider = prompt_provider self.connection = connection + self.config_manager = config_manager # Registry of available strategies self._strategy_registry = { @@ -93,6 +96,7 @@ def create(self, strategy: str = "think-act") -> AgentLoopProtocol: tool_manager=self.tool_manager, prompt_provider=self.prompt_provider, connection=self.connection, + config_manager=self.config_manager, ) # Cache for future use diff --git a/backend/application/chat/agent/react_loop.py b/backend/application/chat/agent/react_loop.py index a69ff96..36a2a2e 100644 --- a/backend/application/chat/agent/react_loop.py +++ b/backend/application/chat/agent/react_loop.py @@ -31,11 +31,13 @@ def __init__( tool_manager: Optional[ToolManagerProtocol], prompt_provider: Optional[PromptProvider], connection: Any = None, + config_manager=None, ) -> None: self.llm = llm self.tool_manager = tool_manager self.prompt_provider = prompt_provider self.connection = connection + self.config_manager = config_manager # ---- Internal helpers (mirroring service implementation) ---- def _latest_user_question(self, msgs: List[Dict[str, Any]]) -> str: @@ -243,6 +245,7 @@ async def run( }, tool_manager=self.tool_manager, update_callback=(self.connection.send_json if self.connection else None), + config_manager=self.config_manager, ) tool_results.append(result) messages.append({ diff --git a/backend/application/chat/agent/think_act_loop.py b/backend/application/chat/agent/think_act_loop.py index 7a9b672..b9df776 100644 --- a/backend/application/chat/agent/think_act_loop.py +++ b/backend/application/chat/agent/think_act_loop.py @@ -26,11 +26,13 @@ def __init__( tool_manager: Optional[ToolManagerProtocol], prompt_provider: Optional[PromptProvider], connection: Any = None, + config_manager=None, ) -> None: self.llm = llm self.tool_manager = tool_manager self.prompt_provider = prompt_provider self.connection = connection + self.config_manager = config_manager async def run( self, @@ -140,6 +142,7 @@ async def emit_think(text: str, step: int) -> None: }, tool_manager=self.tool_manager, update_callback=(self.connection.send_json if self.connection else None), + config_manager=self.config_manager, ) messages.append({"role": "tool", "content": result.content, "tool_call_id": result.tool_call_id}) # Notify service to ingest artifacts diff --git a/backend/application/chat/approval_manager.py b/backend/application/chat/approval_manager.py new file mode 100644 index 0000000..71d84bd --- /dev/null +++ b/backend/application/chat/approval_manager.py @@ -0,0 +1,148 @@ +""" +Tool approval service for managing approval requests and responses. + +This module handles the approval workflow for tool calls, allowing users to +approve, reject, or edit tool arguments before execution. +""" + +import asyncio +import logging +from typing import Dict, Any, Optional +from uuid import UUID + +from core.utils import sanitize_for_logging + +logger = logging.getLogger(__name__) + + +class ToolApprovalRequest: + """Represents a pending tool approval request.""" + + def __init__( + self, + tool_call_id: str, + tool_name: str, + arguments: Dict[str, Any], + allow_edit: bool = True + ): + self.tool_call_id = tool_call_id + self.tool_name = tool_name + self.arguments = arguments + self.allow_edit = allow_edit + self.future: asyncio.Future = asyncio.Future() + + async def wait_for_response(self, timeout: float = 300.0) -> Dict[str, Any]: + """ + Wait for user response to this approval request. + + Args: + timeout: Maximum time to wait in seconds (default 5 minutes) + + Returns: + Dict with 'approved', 'arguments', and optional 'reason' + + Raises: + asyncio.TimeoutError: If timeout is reached + """ + try: + return await asyncio.wait_for(self.future, timeout=timeout) + except asyncio.TimeoutError: + logger.warning(f"Approval request timed out for tool {self.tool_name}") + raise + + def set_response(self, approved: bool, arguments: Optional[Dict[str, Any]] = None, reason: Optional[str] = None): + """Set the user's response to this approval request.""" + if not self.future.done(): + self.future.set_result({ + "approved": approved, + "arguments": arguments or self.arguments, + "reason": reason + }) + + +class ToolApprovalManager: + """Manages tool approval requests and responses.""" + + def __init__(self): + self._pending_requests: Dict[str, ToolApprovalRequest] = {} + + def create_approval_request( + self, + tool_call_id: str, + tool_name: str, + arguments: Dict[str, Any], + allow_edit: bool = True + ) -> ToolApprovalRequest: + """ + Create a new approval request. + + Args: + tool_call_id: Unique ID for this tool call + tool_name: Name of the tool being called + arguments: Tool arguments + allow_edit: Whether to allow editing of arguments + + Returns: + ToolApprovalRequest object + """ + request = ToolApprovalRequest(tool_call_id, tool_name, arguments, allow_edit) + self._pending_requests[tool_call_id] = request + logger.info(f"Created approval request for tool {sanitize_for_logging(tool_name)} (call_id: {sanitize_for_logging(tool_call_id)})") + return request + + def handle_approval_response( + self, + tool_call_id: str, + approved: bool, + arguments: Optional[Dict[str, Any]] = None, + reason: Optional[str] = None + ) -> bool: + """ + Handle a user's response to an approval request. + + Args: + tool_call_id: ID of the tool call being responded to + approved: Whether the user approved the call + arguments: Potentially edited arguments (if allowed) + reason: Optional reason for rejection + + Returns: + True if request was found and handled, False otherwise + """ + logger.info(f"handle_approval_response called: tool_call_id={sanitize_for_logging(tool_call_id)}, approved={approved}") + logger.info(f"Pending requests: {list(self._pending_requests.keys())}") + + request = self._pending_requests.get(tool_call_id) + if request is None: + logger.warning(f"Received approval response for unknown tool call: {sanitize_for_logging(tool_call_id)}") + logger.warning(f"Available pending requests: {list(self._pending_requests.keys())}") + return False + + logger.info(f"Found pending request for {sanitize_for_logging(tool_call_id)}, setting response") + request.set_response(approved, arguments, reason) + # Keep the request in the dict for a bit to avoid race conditions + # It will be cleaned up later + logger.info(f"Approval response handled for tool {sanitize_for_logging(request.tool_name)}: approved={approved}") + return True + + def cleanup_request(self, tool_call_id: str): + """Remove a completed approval request.""" + if tool_call_id in self._pending_requests: + del self._pending_requests[tool_call_id] + logger.debug(f"Cleaned up approval request: {tool_call_id}") + + def get_pending_requests(self) -> Dict[str, ToolApprovalRequest]: + """Get all pending approval requests.""" + return dict(self._pending_requests) + + +# Global approval manager instance (one per application) +_approval_manager: Optional[ToolApprovalManager] = None + + +def get_approval_manager() -> ToolApprovalManager: + """Get the global tool approval manager instance.""" + global _approval_manager + if _approval_manager is None: + _approval_manager = ToolApprovalManager() + return _approval_manager diff --git a/backend/application/chat/modes/tools.py b/backend/application/chat/modes/tools.py index a2d9fc0..2f311cd 100644 --- a/backend/application/chat/modes/tools.py +++ b/backend/application/chat/modes/tools.py @@ -33,6 +33,7 @@ def __init__( event_publisher: EventPublisher, prompt_provider: Optional[PromptProvider] = None, artifact_processor: Optional[Callable[[Session, List[ToolResult], Optional[UpdateCallback]], Awaitable[None]]] = None, + config_manager=None, ): """ Initialize tools mode runner. @@ -43,12 +44,14 @@ def __init__( event_publisher: Event publisher for UI updates prompt_provider: Optional prompt provider artifact_processor: Optional callback for processing tool artifacts + config_manager: Optional config manager for approval settings """ self.llm = llm self.tool_manager = tool_manager self.event_publisher = event_publisher self.prompt_provider = prompt_provider self.artifact_processor = artifact_processor + self.config_manager = config_manager async def run( self, @@ -119,6 +122,7 @@ async def run( llm_caller=self.llm, prompt_provider=self.prompt_provider, update_callback=update_callback or self._get_send_json(), + config_manager=self.config_manager, ) # Process artifacts if handler provided diff --git a/backend/application/chat/service.py b/backend/application/chat/service.py index e1d28a5..f80463e 100644 --- a/backend/application/chat/service.py +++ b/backend/application/chat/service.py @@ -125,6 +125,7 @@ def __init__( event_publisher=self.event_publisher, prompt_provider=self.prompt_provider, artifact_processor=self._update_session_from_tool_results, + config_manager=self.config_manager, ) @@ -138,6 +139,7 @@ def __init__( tool_manager=self.tool_manager, prompt_provider=self.prompt_provider, connection=self.connection, + config_manager=self.config_manager, ) # Get default strategy from config diff --git a/backend/application/chat/utilities/tool_utils.py b/backend/application/chat/utilities/tool_utils.py index 519f0fe..c657383 100644 --- a/backend/application/chat/utilities/tool_utils.py +++ b/backend/application/chat/utilities/tool_utils.py @@ -5,6 +5,7 @@ argument processing, and synthesis decisions without maintaining any state. """ +import asyncio import json import logging from typing import Any, Dict, List, Optional, Callable, Awaitable @@ -13,6 +14,7 @@ from interfaces.llm import LLMResponse from core.capabilities import create_download_url from .notification_utils import _sanitize_filename_value # reuse same filename sanitizer for UI args +from ..approval_manager import get_approval_manager logger = logging.getLogger(__name__) @@ -28,7 +30,8 @@ async def execute_tools_workflow( tool_manager, llm_caller, prompt_provider, - update_callback: Optional[UpdateCallback] = None + update_callback: Optional[UpdateCallback] = None, + config_manager=None ) -> tuple[str, List[ToolResult]]: """ Execute the complete tools workflow: calls -> results -> synthesis. @@ -50,7 +53,8 @@ async def execute_tools_workflow( tool_call=tool_call, session_context=session_context, tool_manager=tool_manager, - update_callback=update_callback + update_callback=update_callback, + config_manager=config_manager ) tool_results.append(result) @@ -76,6 +80,54 @@ async def execute_tools_workflow( return final_response, tool_results +def requires_approval(tool_name: str, config_manager) -> tuple[bool, bool, bool]: + """ + Check if a tool requires approval before execution. + + Args: + tool_name: Name of the tool to check + config_manager: ConfigManager instance (can be None) + + Returns: + Tuple of (requires_approval, allow_edit, admin_required) + - requires_approval: Whether approval is needed + - allow_edit: Whether arguments can be edited + - admin_required: Whether this is admin-mandated (True) or user-level (False) + """ + if config_manager is None: + return (True, True, False) # Default to requiring user-level approval + + try: + # Global override: force approval for all tools (admin-enforced) + app_settings = getattr(config_manager, "app_settings", None) + force_flag = False + if app_settings is not None: + raw_force = getattr(app_settings, "force_tool_approval_globally", False) + force_flag = (isinstance(raw_force, bool) and raw_force is True) + if force_flag: + return (True, True, True) + + approvals_config = config_manager.tool_approvals_config + + # Per-tool explicit requirement (admin-enforced) + if tool_name in approvals_config.tools: + tool_config = approvals_config.tools[tool_name] + # Only treat as admin-required if explicitly required + if getattr(tool_config, "require_approval", False): + return (True, True, True) + # Explicit false falls through to default behavior + + # Default requirement: admin-enforced if default=True; otherwise user-level + if approvals_config.require_approval_by_default: + return (True, True, True) + else: + return (True, True, False) + + except Exception as e: + logger.warning(f"Error checking approval requirements for {tool_name}: {e}") + return (True, True, False) # Default to user-level approval on error + + def tool_accepts_username(tool_name: str, tool_manager) -> bool: """ Check if a tool accepts a username parameter by examining its schema. @@ -109,7 +161,8 @@ async def execute_single_tool( tool_call, session_context: Dict[str, Any], tool_manager, - update_callback: Optional[UpdateCallback] = None + update_callback: Optional[UpdateCallback] = None, + config_manager=None ) -> ToolResult: """ Execute a single tool with argument preparation and error handling. @@ -129,6 +182,84 @@ async def execute_single_tool( # Sanitize arguments for UI (hide tokens in URLs, etc.) display_args = _sanitize_args_for_ui(dict(filtered_args)) + # Check if this tool requires approval + needs_approval = False + allow_edit = True + admin_required = False + if config_manager: + needs_approval, allow_edit, admin_required = requires_approval(tool_call.function.name, config_manager) + else: + # No config manager means user-level approval by default + needs_approval = True + allow_edit = True + admin_required = False + + # Track if arguments were edited (for LLM context) + arguments_were_edited = False + original_display_args = dict(display_args) if isinstance(display_args, dict) else display_args + + # If approval is required, request it from the user + if needs_approval: + logger.info(f"Tool {tool_call.function.name} requires approval (admin_required={admin_required})") + + # Send approval request to frontend + if update_callback: + await update_callback({ + "type": "tool_approval_request", + "tool_call_id": tool_call.id, + "tool_name": tool_call.function.name, + "arguments": display_args, + "allow_edit": allow_edit, + "admin_required": admin_required + }) + + # Wait for approval response + approval_manager = get_approval_manager() + request = approval_manager.create_approval_request( + tool_call.id, + tool_call.function.name, + filtered_args, + allow_edit + ) + + try: + response = await request.wait_for_response(timeout=300.0) + approval_manager.cleanup_request(tool_call.id) + + if not response["approved"]: + # Tool was rejected + reason = response.get("reason", "User rejected the tool call") + logger.info(f"Tool {tool_call.function.name} rejected by user: {reason}") + return ToolResult( + tool_call_id=tool_call.id, + content=f"Tool execution rejected by user: {reason}", + success=False, + error=reason + ) + + # Use potentially edited arguments + if allow_edit and response.get("arguments"): + edited_args = response["arguments"] + # Check if arguments actually changed by comparing with what we sent (display_args) + # Use json comparison to avoid false positives from dict ordering + if json.dumps(edited_args, sort_keys=True) != json.dumps(original_display_args, sort_keys=True): + arguments_were_edited = True + filtered_args = edited_args + logger.info(f"User edited arguments for tool {tool_call.function.name}") + else: + # No actual changes, but response included arguments - keep original filtered_args + logger.debug(f"Arguments returned unchanged for tool {tool_call.function.name}") + + except asyncio.TimeoutError: + approval_manager.cleanup_request(tool_call.id) + logger.warning(f"Approval timeout for tool {tool_call.function.name}") + return ToolResult( + tool_call_id=tool_call.id, + content="Tool execution timed out waiting for user approval", + success=False, + error="Approval timeout" + ) + # Send tool start notification with sanitized args await notification_utils.notify_tool_start(tool_call, display_args, update_callback) @@ -149,6 +280,20 @@ async def execute_single_tool( } ) + # If arguments were edited, prepend a note to the result for LLM context + if arguments_were_edited: + edit_note = ( + f"[IMPORTANT: The user manually edited the tool arguments before execution. " + f"Disregard your original arguments. The ACTUAL arguments executed were: {json.dumps(filtered_args)}. " + f"Your response must reflect these edited arguments as the user's true intent. " + # f"Do NOT reference the original arguments: {json.dumps(original_filtered_args)}]\n\n" + ) + if isinstance(result.content, str): + result.content = edit_note + result.content + else: + # If content is not a string, convert and prepend + result.content = edit_note + str(result.content) + # Send tool complete notification await notification_utils.notify_tool_complete(tool_call, result, parsed_args, update_callback) @@ -424,4 +569,4 @@ def build_files_manifest(session_context: Dict[str, Any]) -> Optional[Dict[str, "(You can ask to open or analyze any of these by name. " "Large contents are not fully in this prompt unless user or tools provided excerpts.)" ) - } \ No newline at end of file + } diff --git a/backend/main.py b/backend/main.py index 93ebdd3..6e8ee1c 100644 --- a/backend/main.py +++ b/backend/main.py @@ -3,6 +3,7 @@ Focuses on essential chat functionality only. """ +import asyncio import logging from contextlib import asynccontextmanager from pathlib import Path @@ -217,44 +218,50 @@ async def websocket_endpoint(websocket: WebSocket): chat_service = app_factory.create_chat_service(connection_adapter) logger.info(f"WebSocket connection established for session {sanitize_for_logging(str(session_id))}") - + try: while True: data = await websocket.receive_json() message_type = data.get("type") - + + # Debug: Log ALL incoming messages + logger.info(f"WS RECEIVED message_type={message_type}, data keys={list(data.keys())}") + if message_type == "chat": - # Handle chat message with streaming updates - try: - await chat_service.handle_chat_message( - session_id=session_id, - content=data.get("content", ""), - model=data.get("model", ""), - selected_tools=data.get("selected_tools"), - selected_prompts=data.get("selected_prompts"), - selected_data_sources=data.get("selected_data_sources"), - only_rag=data.get("only_rag", False), - tool_choice_required=data.get("tool_choice_required", False), - user_email=data.get("user"), - agent_mode=data.get("agent_mode", False), - agent_max_steps=data.get("agent_max_steps", 10), - temperature=data.get("temperature", 0.7), - agent_loop_strategy=data.get("agent_loop_strategy"), - update_callback=lambda message: websocket_update_callback(websocket, message), - files=data.get("files") - ) - # Final response is already sent via callbacks, but we keep this for backward compatibility - except ValidationError as e: - await websocket.send_json({ - "type": "error", - "message": str(e) - }) - except Exception as e: - logger.error(f"Error in chat handler: {e}", exc_info=True) - await websocket.send_json({ - "type": "error", - "message": "An unexpected error occurred" - }) + # Handle chat message in background so we can still receive approval responses + async def handle_chat(): + try: + await chat_service.handle_chat_message( + session_id=session_id, + content=data.get("content", ""), + model=data.get("model", ""), + selected_tools=data.get("selected_tools"), + selected_prompts=data.get("selected_prompts"), + selected_data_sources=data.get("selected_data_sources"), + only_rag=data.get("only_rag", False), + tool_choice_required=data.get("tool_choice_required", False), + user_email=data.get("user"), + agent_mode=data.get("agent_mode", False), + agent_max_steps=data.get("agent_max_steps", 10), + temperature=data.get("temperature", 0.7), + agent_loop_strategy=data.get("agent_loop_strategy"), + update_callback=lambda message: websocket_update_callback(websocket, message), + files=data.get("files") + ) + except ValidationError as e: + await websocket.send_json({ + "type": "error", + "message": str(e) + }) + except Exception as e: + logger.error(f"Error in chat handler: {e}", exc_info=True) + await websocket.send_json({ + "type": "error", + "message": "An unexpected error occurred" + }) + + # Start chat handling in background + asyncio.create_task(handle_chat()) elif message_type == "download_file": # Handle file download @@ -283,11 +290,34 @@ async def websocket_endpoint(websocket: WebSocket): ) await websocket.send_json(response) + elif message_type == "tool_approval_response": + # Handle tool approval response + logger.info(f"Received tool approval response: {sanitize_for_logging(str(data))}") + from application.chat.approval_manager import get_approval_manager + approval_manager = get_approval_manager() + + tool_call_id = data.get("tool_call_id") + approved = data.get("approved", False) + arguments = data.get("arguments") + reason = data.get("reason") + + logger.info(f"Processing approval: tool_call_id={sanitize_for_logging(tool_call_id)}, approved={approved}") + + result = approval_manager.handle_approval_response( + tool_call_id=tool_call_id, + approved=approved, + arguments=arguments, + reason=reason + ) + + logger.info(f"Approval response handled: result={result}") + # No response needed - the approval will unblock the waiting tool execution + else: - logger.warning(f"Unknown message type: {message_type}") + logger.warning(f"Unknown message type: {sanitize_for_logging(message_type)}") await websocket.send_json({ "type": "error", - "message": f"Unknown message type: {message_type}" + "message": f"Unknown message type: {sanitize_for_logging(message_type)}" }) except WebSocketDisconnect: diff --git a/backend/modules/config/config_manager.py b/backend/modules/config/config_manager.py index c8334fc..5bac0e3 100644 --- a/backend/modules/config/config_manager.py +++ b/backend/modules/config/config_manager.py @@ -64,6 +64,8 @@ class MCPServerConfig(BaseModel): type: str = "stdio" # Server type: "stdio" or "http" (deprecated, use transport) transport: Optional[str] = None # Explicit transport: "stdio", "http", "sse" - takes priority over auto-detection compliance_level: Optional[str] = None # Compliance/security level (e.g., "SOC2", "HIPAA", "Public") + require_approval: List[str] = Field(default_factory=list) # List of tool names (without server prefix) requiring approval + allow_edit: List[str] = Field(default_factory=list) # List of tool names (without server prefix) allowing argument editing class MCPConfig(BaseModel): @@ -80,6 +82,27 @@ def validate_servers(cls, v): return v +class ToolApprovalConfig(BaseModel): + """Configuration for a single tool's approval settings.""" + require_approval: bool = False + allow_edit: bool = True + + +class ToolApprovalsConfig(BaseModel): + """Configuration for tool approvals.""" + require_approval_by_default: bool = False + tools: Dict[str, ToolApprovalConfig] = Field(default_factory=dict) + + @field_validator('tools', mode='before') + @classmethod + def validate_tools(cls, v): + """Convert dict values to ToolApprovalConfig objects.""" + if isinstance(v, dict): + return {name: ToolApprovalConfig(**config) if isinstance(config, dict) else config + for name, config in v.items()} + return v + + class AppSettings(BaseSettings): """Main application settings loaded from environment variables.""" @@ -115,7 +138,12 @@ class AppSettings(BaseSettings): def agent_mode_available(self) -> bool: """Maintain backward compatibility for code still referencing agent_mode_available.""" return self.feature_agent_mode_available - + + # Tool approval settings + require_tool_approval_by_default: bool = False + # When true, all tools require approval (admin-enforced), overriding per-tool and default settings + force_tool_approval_globally: bool = Field(default=False, validation_alias="FORCE_TOOL_APPROVAL_GLOBALLY") + # LLM Health Check settings llm_health_check_interval: int = 5 # minutes @@ -190,6 +218,7 @@ def agent_mode_available(self) -> bool: llm_config_file: str = Field(default="llmconfig.yml", validation_alias="LLM_CONFIG_FILE") help_config_file: str = Field(default="help-config.json", validation_alias="HELP_CONFIG_FILE") messages_config_file: str = Field(default="messages.txt", validation_alias="MESSAGES_CONFIG_FILE") + tool_approvals_config_file: str = Field(default="tool-approvals.json", validation_alias="TOOL_APPROVALS_CONFIG_FILE") # Config directory paths app_config_overrides: str = Field(default="config/overrides", validation_alias="APP_CONFIG_OVERRIDES") @@ -226,6 +255,7 @@ def __init__(self, backend_root: Optional[Path] = None): self._llm_config: Optional[LLMConfig] = None self._mcp_config: Optional[MCPConfig] = None self._rag_mcp_config: Optional[MCPConfig] = None + self._tool_approvals_config: Optional[ToolApprovalsConfig] = None def _search_paths(self, file_name: str) -> List[Path]: """Generate common search paths for a configuration file. @@ -435,6 +465,41 @@ def rag_mcp_config(self) -> MCPConfig: return self._rag_mcp_config + @property + def tool_approvals_config(self) -> ToolApprovalsConfig: + """Get tool approvals configuration built from mcp.json and env variables (cached).""" + if self._tool_approvals_config is None: + try: + # Get default from environment + default_require_approval = self.app_settings.require_tool_approval_by_default + + # Build tool-specific configs from MCP servers (Option B): + # Only include entries explicitly listed under require_approval. + tools_config: Dict[str, ToolApprovalConfig] = {} + + for server_name, server_config in self.mcp_config.servers.items(): + require_approval_list = server_config.require_approval or [] + + for tool_name in require_approval_list: + full_tool_name = f"{server_name}_{tool_name}" + # Mark as explicitly requiring approval; allow_edit is moot for requirement + tools_config[full_tool_name] = ToolApprovalConfig( + require_approval=True, + allow_edit=True # UI always allows edits; keep True for compatibility + ) + + self._tool_approvals_config = ToolApprovalsConfig( + require_approval_by_default=default_require_approval, + tools=tools_config + ) + logger.info(f"Built tool approvals config from mcp.json with {len(tools_config)} tool-specific settings (default: {default_require_approval})") + + except Exception as e: + logger.error(f"Failed to build tool approvals configuration: {e}", exc_info=True) + self._tool_approvals_config = ToolApprovalsConfig() + + return self._tool_approvals_config + def _validate_mcp_compliance_levels(self, config: MCPConfig, config_type: str): """Validate compliance levels for all MCP servers.""" try: @@ -459,6 +524,7 @@ def reload_configs(self) -> None: self._llm_config = None self._mcp_config = None self._rag_mcp_config = None + self._tool_approvals_config = None logger.info("Configuration cache cleared, will reload on next access") def validate_config(self) -> Dict[str, bool]: diff --git a/backend/routes/config_routes.py b/backend/routes/config_routes.py index 44664f4..7d7c3f7 100644 --- a/backend/routes/config_routes.py +++ b/backend/routes/config_routes.py @@ -259,6 +259,28 @@ async def get_config( model_info["compliance_level"] = model_config.compliance_level models_list.append(model_info) + # Build tool approval settings - only include tools from authorized servers + tool_approvals_config = config_manager.tool_approvals_config + filtered_tool_approvals = {} + + # Get all tool names from authorized servers + authorized_tool_names = set() + for tool_group in tools_info: + server_name = tool_group.get('server') + if server_name in authorized_servers: + # tools is a list of strings (tool names), not dicts + for tool_name in tool_group.get('tools', []): + if isinstance(tool_name, str): + authorized_tool_names.add(tool_name) + + # Only include approval settings for tools the user has access to + for tool_name, approval_config in tool_approvals_config.tools.items(): + if tool_name in authorized_tool_names: + filtered_tool_approvals[tool_name] = { + "require_approval": approval_config.require_approval, + "allow_edit": approval_config.allow_edit + } + return { "app_name": app_settings.app_name, "models": models_list, @@ -273,6 +295,10 @@ async def get_config( "agent_mode_available": app_settings.agent_mode_available, # Whether agent mode UI should be shown "banner_enabled": app_settings.banner_enabled, # Whether banner system is enabled "help_config": help_config, # Help page configuration from help-config.json + "tool_approvals": { + "require_approval_by_default": tool_approvals_config.require_approval_by_default, + "tools": filtered_tool_approvals + }, "features": { "workspaces": app_settings.feature_workspaces_enabled, "rag": app_settings.feature_rag_enabled, diff --git a/backend/tests/test_approval_manager.py b/backend/tests/test_approval_manager.py new file mode 100644 index 0000000..ff83aac --- /dev/null +++ b/backend/tests/test_approval_manager.py @@ -0,0 +1,447 @@ +"""Tests for the tool approval manager.""" + +import asyncio +import pytest +from application.chat.approval_manager import ( + ToolApprovalManager, + ToolApprovalRequest, + get_approval_manager +) + + +class TestToolApprovalRequest: + """Test ToolApprovalRequest class.""" + + def test_create_approval_request(self): + """Test creating an approval request.""" + request = ToolApprovalRequest( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"}, + allow_edit=True + ) + assert request.tool_call_id == "test_123" + assert request.tool_name == "test_tool" + assert request.arguments == {"arg1": "value1"} + assert request.allow_edit is True + + @pytest.mark.asyncio + async def test_set_response(self): + """Test setting a response to an approval request.""" + request = ToolApprovalRequest( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"} + ) + + # Set approved response + request.set_response(approved=True, arguments={"arg1": "edited_value"}) + + # Wait for the response (should be immediate since we already set it) + response = await request.wait_for_response(timeout=1.0) + + assert response["approved"] is True + assert response["arguments"] == {"arg1": "edited_value"} + + @pytest.mark.asyncio + async def test_rejection_response(self): + """Test rejecting an approval request.""" + request = ToolApprovalRequest( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"} + ) + + # Set rejected response + request.set_response(approved=False, reason="User rejected") + + # Wait for the response + response = await request.wait_for_response(timeout=1.0) + + assert response["approved"] is False + assert response["reason"] == "User rejected" + + @pytest.mark.asyncio + async def test_timeout(self): + """Test that timeout works correctly.""" + request = ToolApprovalRequest( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"} + ) + + # Should timeout since we don't set a response + with pytest.raises(asyncio.TimeoutError): + await request.wait_for_response(timeout=0.1) + + +class TestToolApprovalManager: + """Test ToolApprovalManager class.""" + + def test_create_approval_request(self): + """Test creating an approval request via manager.""" + manager = ToolApprovalManager() + request = manager.create_approval_request( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"}, + allow_edit=True + ) + + assert request.tool_call_id == "test_123" + assert "test_123" in manager.get_pending_requests() + + def test_handle_approval_response(self): + """Test handling an approval response.""" + manager = ToolApprovalManager() + manager.create_approval_request( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"} + ) + + # Handle approval response + result = manager.handle_approval_response( + tool_call_id="test_123", + approved=True, + arguments={"arg1": "edited_value"} + ) + + assert result is True + # Request should still be in pending (cleaned up manually later) + assert "test_123" in manager.get_pending_requests() + + def test_handle_unknown_request(self): + """Test handling response for unknown request.""" + manager = ToolApprovalManager() + + result = manager.handle_approval_response( + tool_call_id="unknown_123", + approved=True + ) + + assert result is False + + def test_cleanup_request(self): + """Test cleaning up a completed request.""" + manager = ToolApprovalManager() + manager.create_approval_request( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"} + ) + + assert "test_123" in manager.get_pending_requests() + + manager.cleanup_request("test_123") + + assert "test_123" not in manager.get_pending_requests() + + def test_get_approval_manager_singleton(self): + """Test that get_approval_manager returns a singleton.""" + manager1 = get_approval_manager() + manager2 = get_approval_manager() + + assert manager1 is manager2 + + @pytest.mark.asyncio + async def test_full_approval_workflow(self): + """Test the complete approval workflow.""" + manager = ToolApprovalManager() + + # Create request + request = manager.create_approval_request( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"code": "print('test')"}, + allow_edit=True + ) + + # Simulate async approval (in a separate task) + async def approve_after_delay(): + await asyncio.sleep(0.1) + manager.handle_approval_response( + tool_call_id="test_123", + approved=True, + arguments={"code": "print('edited test')"} + ) + + # Start approval task + approval_task = asyncio.create_task(approve_after_delay()) + + # Wait for response + response = await request.wait_for_response(timeout=1.0) + + assert response["approved"] is True + assert response["arguments"]["code"] == "print('edited test')" + + # Cleanup + manager.cleanup_request("test_123") + assert "test_123" not in manager.get_pending_requests() + + await approval_task + + @pytest.mark.asyncio + async def test_multiple_concurrent_approvals(self): + """Test handling multiple concurrent approval requests.""" + manager = ToolApprovalManager() + + # Create multiple requests + request1 = manager.create_approval_request( + tool_call_id="test_1", + tool_name="tool_a", + arguments={"arg": "value1"} + ) + request2 = manager.create_approval_request( + tool_call_id="test_2", + tool_name="tool_b", + arguments={"arg": "value2"} + ) + request3 = manager.create_approval_request( + tool_call_id="test_3", + tool_name="tool_c", + arguments={"arg": "value3"} + ) + + assert len(manager.get_pending_requests()) == 3 + + # Approve them in different order + async def approve_requests(): + await asyncio.sleep(0.05) + manager.handle_approval_response("test_2", approved=True) + await asyncio.sleep(0.05) + manager.handle_approval_response("test_1", approved=False, reason="Rejected") + await asyncio.sleep(0.05) + manager.handle_approval_response("test_3", approved=True) + + approval_task = asyncio.create_task(approve_requests()) + + # Wait for all responses + response1 = await request1.wait_for_response(timeout=1.0) + response2 = await request2.wait_for_response(timeout=1.0) + response3 = await request3.wait_for_response(timeout=1.0) + + assert response1["approved"] is False + assert response1["reason"] == "Rejected" + assert response2["approved"] is True + assert response3["approved"] is True + + await approval_task + + @pytest.mark.asyncio + async def test_approval_with_no_arguments_change(self): + """Test approval where arguments are returned but not changed.""" + manager = ToolApprovalManager() + + original_args = {"code": "print('hello')"} + request = manager.create_approval_request( + tool_call_id="test_123", + tool_name="test_tool", + arguments=original_args, + allow_edit=True + ) + + # Approve with same arguments + async def approve(): + await asyncio.sleep(0.05) + manager.handle_approval_response( + tool_call_id="test_123", + approved=True, + arguments={"code": "print('hello')"} # Same as original + ) + + approval_task = asyncio.create_task(approve()) + response = await request.wait_for_response(timeout=1.0) + + assert response["approved"] is True + assert response["arguments"] == original_args + + await approval_task + + @pytest.mark.asyncio + async def test_double_response_handling(self): + """Test that setting response twice doesn't cause issues.""" + request = ToolApprovalRequest( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"} + ) + + # Set response first time + request.set_response(approved=True, arguments={"arg1": "first"}) + + # Try to set response second time (should be ignored) + request.set_response(approved=False, arguments={"arg1": "second"}) + + # Should get the first response + response = await request.wait_for_response(timeout=0.5) + assert response["approved"] is True + assert response["arguments"]["arg1"] == "first" + + @pytest.mark.asyncio + async def test_rejection_with_empty_reason(self): + """Test rejection with no reason provided.""" + manager = ToolApprovalManager() + + request = manager.create_approval_request( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"} + ) + + async def reject(): + await asyncio.sleep(0.05) + manager.handle_approval_response( + tool_call_id="test_123", + approved=False + # No reason provided + ) + + approval_task = asyncio.create_task(reject()) + response = await request.wait_for_response(timeout=1.0) + + assert response["approved"] is False + assert response.get("reason") is None or response.get("reason") == "" + + await approval_task + + @pytest.mark.asyncio + async def test_allow_edit_false(self): + """Test approval request with editing disabled.""" + request = ToolApprovalRequest( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"}, + allow_edit=False + ) + + assert request.allow_edit is False + + # Even if arguments are provided, they should be used + request.set_response(approved=True, arguments={"arg1": "edited_value"}) + response = await request.wait_for_response(timeout=0.5) + + # The response will contain the edited arguments, but the UI should + # respect allow_edit=False to prevent showing edit controls + assert response["arguments"] == {"arg1": "edited_value"} + + def test_cleanup_nonexistent_request(self): + """Test cleaning up a request that doesn't exist.""" + manager = ToolApprovalManager() + + # Should not raise an error + manager.cleanup_request("nonexistent_id") + + assert "nonexistent_id" not in manager.get_pending_requests() + + def test_multiple_managers_vs_singleton(self): + """Test that direct instantiation creates different instances but singleton returns same.""" + manager1 = ToolApprovalManager() + manager2 = ToolApprovalManager() + + # Direct instantiation creates different instances + assert manager1 is not manager2 + + # But singleton returns the same instance + singleton1 = get_approval_manager() + singleton2 = get_approval_manager() + assert singleton1 is singleton2 + + @pytest.mark.asyncio + async def test_approval_with_complex_arguments(self): + """Test approval with complex nested arguments.""" + manager = ToolApprovalManager() + + complex_args = { + "nested": { + "level1": { + "level2": ["item1", "item2", "item3"] + } + }, + "list_of_dicts": [ + {"key": "value1"}, + {"key": "value2"} + ], + "numbers": [1, 2, 3, 4, 5] + } + + request = manager.create_approval_request( + tool_call_id="test_complex", + tool_name="complex_tool", + arguments=complex_args, + allow_edit=True + ) + + # Modify nested structure + edited_args = { + "nested": { + "level1": { + "level2": ["item1", "modified_item", "item3"] + } + }, + "list_of_dicts": [ + {"key": "value1"}, + {"key": "new_value"} + ], + "numbers": [1, 2, 3, 4, 5, 6] + } + + async def approve(): + await asyncio.sleep(0.05) + manager.handle_approval_response( + tool_call_id="test_complex", + approved=True, + arguments=edited_args + ) + + approval_task = asyncio.create_task(approve()) + response = await request.wait_for_response(timeout=1.0) + + assert response["approved"] is True + assert response["arguments"]["nested"]["level1"]["level2"][1] == "modified_item" + assert len(response["arguments"]["numbers"]) == 6 + + await approval_task + + @pytest.mark.asyncio + async def test_sequential_approvals(self): + """Test approving requests one after another in sequence.""" + manager = ToolApprovalManager() + + # First approval + request1 = manager.create_approval_request( + tool_call_id="seq_1", + tool_name="tool_1", + arguments={"step": 1} + ) + + async def approve1(): + await asyncio.sleep(0.05) + manager.handle_approval_response("seq_1", approved=True) + + task1 = asyncio.create_task(approve1()) + response1 = await request1.wait_for_response(timeout=1.0) + manager.cleanup_request("seq_1") + await task1 + + assert response1["approved"] is True + assert "seq_1" not in manager.get_pending_requests() + + # Second approval after first is complete + request2 = manager.create_approval_request( + tool_call_id="seq_2", + tool_name="tool_2", + arguments={"step": 2} + ) + + async def approve2(): + await asyncio.sleep(0.05) + manager.handle_approval_response("seq_2", approved=True) + + task2 = asyncio.create_task(approve2()) + response2 = await request2.wait_for_response(timeout=1.0) + manager.cleanup_request("seq_2") + await task2 + + assert response2["approved"] is True + assert "seq_2" not in manager.get_pending_requests() diff --git a/backend/tests/test_tool_approval_config.py b/backend/tests/test_tool_approval_config.py new file mode 100644 index 0000000..d01eaa6 --- /dev/null +++ b/backend/tests/test_tool_approval_config.py @@ -0,0 +1,93 @@ +"""Tests for tool approval configuration loading and management.""" + +from modules.config.config_manager import ConfigManager + + +class TestToolApprovalConfig: + """Test tool approval configuration loading.""" + + def test_tool_approvals_config_loads(self): + """Test that tool approvals config loads successfully.""" + cm = ConfigManager() + approval_config = cm.tool_approvals_config + + assert approval_config is not None + assert hasattr(approval_config, "require_approval_by_default") + assert hasattr(approval_config, "tools") + + def test_default_approval_config_structure(self): + """Test the structure of default approval config.""" + cm = ConfigManager() + approval_config = cm.tool_approvals_config + + # Default config should have require_approval_by_default (check it's boolean) + assert isinstance(approval_config.require_approval_by_default, bool) + # Default config should have tools dict (may or may not be empty) + assert isinstance(approval_config.tools, dict) + + def test_tool_specific_config(self): + """Test that tool-specific configurations can be loaded.""" + cm = ConfigManager() + approval_config = cm.tool_approvals_config + + # Test basic structure - config may have tool-specific configs from overrides + assert hasattr(approval_config, 'tools') + assert isinstance(approval_config.tools, dict) + + # If there are any tool configs, verify they have the right structure + for tool_name, tool_config in approval_config.tools.items(): + assert hasattr(tool_config, 'require_approval') + assert hasattr(tool_config, 'allow_edit') + assert isinstance(tool_config.require_approval, bool) + assert isinstance(tool_config.allow_edit, bool) + + def test_config_has_boolean_default(self): + """Test that require_approval_by_default is a boolean.""" + cm = ConfigManager() + approval_config = cm.tool_approvals_config + + assert isinstance(approval_config.require_approval_by_default, bool) + + def test_tools_config_structure(self): + """Test that tools in config have correct structure.""" + cm = ConfigManager() + approval_config = cm.tool_approvals_config + + # Each tool config should have require_approval and allow_edit + for tool_name, tool_config in approval_config.tools.items(): + assert hasattr(tool_config, 'require_approval') + assert hasattr(tool_config, 'allow_edit') + assert isinstance(tool_config.require_approval, bool) + assert isinstance(tool_config.allow_edit, bool) + + def test_config_manager_provides_approvals_config(self): + """Test that ConfigManager provides tool_approvals_config.""" + cm = ConfigManager() + + assert hasattr(cm, 'tool_approvals_config') + assert cm.tool_approvals_config is not None + + def test_multiple_config_manager_instances(self): + """Test that multiple ConfigManager instances can coexist.""" + cm1 = ConfigManager() + cm2 = ConfigManager() + + config1 = cm1.tool_approvals_config + config2 = cm2.tool_approvals_config + + # Both should have valid configs + assert config1 is not None + assert config2 is not None + + def test_config_contains_expected_fields(self): + """Test that approval config has all expected fields.""" + cm = ConfigManager() + approval_config = cm.tool_approvals_config + + # Should have these attributes + assert hasattr(approval_config, 'require_approval_by_default') + assert hasattr(approval_config, 'tools') + + # Types should be correct + assert isinstance(approval_config.require_approval_by_default, bool) + assert isinstance(approval_config.tools, dict) diff --git a/backend/tests/test_tool_approval_utils.py b/backend/tests/test_tool_approval_utils.py new file mode 100644 index 0000000..9b86a25 --- /dev/null +++ b/backend/tests/test_tool_approval_utils.py @@ -0,0 +1,351 @@ +"""Tests for tool approval utilities in tool_utils.py""" + +from unittest.mock import Mock +from application.chat.utilities.tool_utils import ( + requires_approval, + tool_accepts_username, + _sanitize_args_for_ui, + _filter_args_to_schema +) + + +class MockToolConfig: + """Mock tool configuration.""" + def __init__(self, require_approval, allow_edit): + self.require_approval = require_approval + self.allow_edit = allow_edit + + +class MockApprovalsConfig: + """Mock approvals configuration.""" + def __init__(self, require_by_default=True, tools=None): + self.require_approval_by_default = require_by_default + self.tools = tools or {} + + +class TestRequiresApproval: + """Test the requires_approval function.""" + + def test_requires_approval_no_config_manager(self): + """Test requires_approval with no config manager.""" + needs_approval, allow_edit, admin_required = requires_approval("test_tool", None) + + assert needs_approval is True + assert allow_edit is True + assert admin_required is False + + def test_requires_approval_tool_specific_config(self): + """Test requires_approval with tool-specific configuration.""" + config_manager = Mock() + config_manager.tool_approvals_config = MockApprovalsConfig( + require_by_default=False, + tools={ + "dangerous_tool": MockToolConfig(require_approval=True, allow_edit=False) + } + ) + + needs_approval, allow_edit, admin_required = requires_approval("dangerous_tool", config_manager) + + assert needs_approval is True + # UI is always editable when approval is required + assert allow_edit is True + assert admin_required is True + + def test_requires_approval_default_true(self): + """Test requires_approval with default set to require approval.""" + config_manager = Mock() + config_manager.tool_approvals_config = MockApprovalsConfig( + require_by_default=True, + tools={} + ) + + needs_approval, allow_edit, admin_required = requires_approval("any_tool", config_manager) + + assert needs_approval is True + assert allow_edit is True + assert admin_required is True + + def test_requires_approval_default_false(self): + """Test requires_approval with default set to not require approval.""" + config_manager = Mock() + config_manager.tool_approvals_config = MockApprovalsConfig( + require_by_default=False, + tools={} + ) + + needs_approval, allow_edit, admin_required = requires_approval("any_tool", config_manager) + + # Default is False but function returns True with user-level approval + assert needs_approval is True + assert allow_edit is True + assert admin_required is False + + def test_requires_approval_exception_handling(self): + """Test requires_approval handles exceptions gracefully.""" + config_manager = Mock() + config_manager.tool_approvals_config = None + + # Should not raise, should return default + needs_approval, allow_edit, admin_required = requires_approval("test_tool", config_manager) + + assert needs_approval is True + assert allow_edit is True + assert admin_required is False + + def test_requires_approval_multiple_tools(self): + """Test requires_approval with multiple tool configurations.""" + config_manager = Mock() + config_manager.tool_approvals_config = MockApprovalsConfig( + require_by_default=False, + tools={ + "tool_a": MockToolConfig(require_approval=True, allow_edit=True), + "tool_b": MockToolConfig(require_approval=True, allow_edit=False), + "tool_c": MockToolConfig(require_approval=False, allow_edit=True) + } + ) + + # Tool A + needs_approval, allow_edit, admin_required = requires_approval("tool_a", config_manager) + assert needs_approval is True + assert allow_edit is True + assert admin_required is True + + # Tool B (allow_edit False in config is ignored for UI gating) + needs_approval, allow_edit, admin_required = requires_approval("tool_b", config_manager) + assert needs_approval is True + assert allow_edit is True + assert admin_required is True + + # Tool C: with Option B, entries with require_approval=False are not + # considered explicit; fall back to default (which is False here), + # resulting in user-level approval required by design. + config_manager2 = Mock() + config_manager2.tool_approvals_config = MockApprovalsConfig( + require_by_default=False, + tools={ + "tool_a": MockToolConfig(require_approval=True, allow_edit=True), + "tool_b": MockToolConfig(require_approval=True, allow_edit=False), + # tool_c omitted to simulate Option B config building + } + ) + needs_approval, allow_edit, admin_required = requires_approval("tool_c", config_manager2) + assert needs_approval is True + assert allow_edit is True + assert admin_required is False + + +class TestToolAcceptsUsername: + """Test the tool_accepts_username function.""" + + def test_tool_accepts_username_true(self): + """Test tool that accepts username parameter.""" + tool_manager = Mock() + tool_manager.get_tools_schema.return_value = [ + { + "function": { + "name": "test_tool", + "parameters": { + "properties": { + "username": {"type": "string"}, + "other_param": {"type": "string"} + } + } + } + } + ] + + result = tool_accepts_username("test_tool", tool_manager) + assert result is True + + def test_tool_accepts_username_false(self): + """Test tool that does not accept username parameter.""" + tool_manager = Mock() + tool_manager.get_tools_schema.return_value = [ + { + "function": { + "name": "test_tool", + "parameters": { + "properties": { + "other_param": {"type": "string"} + } + } + } + } + ] + + result = tool_accepts_username("test_tool", tool_manager) + assert result is False + + def test_tool_accepts_username_no_tool_manager(self): + """Test with no tool manager.""" + result = tool_accepts_username("test_tool", None) + assert result is False + + def test_tool_accepts_username_no_schema(self): + """Test when tool schema is not found.""" + tool_manager = Mock() + tool_manager.get_tools_schema.return_value = [] + + result = tool_accepts_username("test_tool", tool_manager) + assert result is False + + def test_tool_accepts_username_exception(self): + """Test exception handling.""" + tool_manager = Mock() + tool_manager.get_tools_schema.side_effect = Exception("Schema error") + + result = tool_accepts_username("test_tool", tool_manager) + assert result is False + + +class TestSanitizeArgsForUI: + """Test the _sanitize_args_for_ui function.""" + + def test_sanitize_simple_args(self): + """Test sanitizing simple arguments.""" + args = {"param1": "value1", "param2": "value2"} + result = _sanitize_args_for_ui(args) + + assert result == args + + def test_sanitize_filename(self): + """Test sanitizing filename with URL.""" + args = {"filename": "http://example.com/path/file.txt?token=secret"} + result = _sanitize_args_for_ui(args) + + # Should extract just the filename + assert "token" not in result["filename"] + assert "file.txt" in result["filename"] + + def test_sanitize_file_names_list(self): + """Test sanitizing list of filenames.""" + args = { + "file_names": [ + "http://example.com/file1.txt?token=abc", + "http://example.com/file2.txt?token=def" + ] + } + result = _sanitize_args_for_ui(args) + + assert len(result["file_names"]) == 2 + for filename in result["file_names"]: + assert "token" not in filename + + def test_sanitize_file_url(self): + """Test sanitizing file_url field.""" + args = {"file_url": "http://example.com/path/file.txt?token=secret"} + result = _sanitize_args_for_ui(args) + + assert "token" not in result["file_url"] + + def test_sanitize_file_urls_list(self): + """Test sanitizing file_urls list.""" + args = { + "file_urls": [ + "http://example.com/file1.txt?token=abc", + "http://example.com/file2.txt?token=def" + ] + } + result = _sanitize_args_for_ui(args) + + assert len(result["file_urls"]) == 2 + for url in result["file_urls"]: + assert "token" not in url + + def test_sanitize_mixed_args(self): + """Test sanitizing mixed arguments.""" + args = { + "filename": "http://example.com/file.txt?token=secret", + "other_param": "normal_value", + "file_names": ["file1.txt", "file2.txt"] + } + result = _sanitize_args_for_ui(args) + + assert "token" not in result["filename"] + assert result["other_param"] == "normal_value" + assert len(result["file_names"]) == 2 + + +class TestFilterArgsToSchema: + """Test the _filter_args_to_schema function.""" + + def test_filter_with_schema(self): + """Test filtering arguments with available schema.""" + tool_manager = Mock() + tool_manager.get_tools_schema.return_value = [ + { + "function": { + "name": "test_tool", + "parameters": { + "properties": { + "allowed_param": {"type": "string"}, + "another_param": {"type": "number"} + } + } + } + } + ] + + args = { + "allowed_param": "value", + "another_param": 42, + "original_filename": "old.txt", + "file_url": "http://example.com/file.txt", + "extra_param": "should_be_removed" + } + + result = _filter_args_to_schema(args, "test_tool", tool_manager) + + assert "allowed_param" in result + assert "another_param" in result + assert "original_filename" not in result + assert "file_url" not in result + assert "extra_param" not in result + + def test_filter_without_schema(self): + """Test filtering when schema is unavailable.""" + tool_manager = Mock() + tool_manager.get_tools_schema.return_value = [] + + args = { + "param": "value", + "original_filename": "old.txt", + "file_url": "http://example.com/file.txt" + } + + result = _filter_args_to_schema(args, "test_tool", tool_manager) + + # Should keep param but drop original_* and file_url(s) + assert "param" in result + assert "original_filename" not in result + assert "file_url" not in result + + def test_filter_no_tool_manager(self): + """Test filtering with no tool manager.""" + args = { + "param": "value", + "original_something": "should_be_removed", + "file_urls": ["url1", "url2"] + } + + result = _filter_args_to_schema(args, "test_tool", None) + + assert "param" in result + assert "original_something" not in result + assert "file_urls" not in result + + def test_filter_exception_handling(self): + """Test filtering handles exceptions gracefully.""" + tool_manager = Mock() + tool_manager.get_tools_schema.side_effect = Exception("Schema error") + + args = { + "param": "value", + "original_param": "remove_me" + } + + result = _filter_args_to_schema(args, "test_tool", tool_manager) + + # Should fall back to conservative filtering + assert "param" in result + assert "original_param" not in result diff --git a/config/overrides/mcp.json b/config/overrides/mcp.json index 26a0c6b..da3f9b3 100644 --- a/config/overrides/mcp.json +++ b/config/overrides/mcp.json @@ -10,7 +10,9 @@ "author": "Chat UI Team", "short_description": "Mathematical calculator", "help_email": "support@chatui.example.com", - "compliance_level": "Public" + "compliance_level": "Public", + "require_approval": [], + "allow_edit": ["evaluate"] }, "pptx_generator": { @@ -33,7 +35,9 @@ "author": "Chat UI Team", "short_description": "PDF text extraction and analysis", "help_email": "support@chatui.example.com", - "compliance_level": "SOC2" + "compliance_level": "SOC2", + "require_approval": ["generate_report_about_pdf", "word_frequency_pdf"], + "allow_edit": ["generate_report_about_pdf", "word_frequency_pdf"] }, "file_size_test": { "command": ["python", "mcp/file_size_test/main.py"], diff --git a/docs/tool-approval-e2e-test-scenarios-2025-11-05.md b/docs/tool-approval-e2e-test-scenarios-2025-11-05.md new file mode 100644 index 0000000..1ce62e1 --- /dev/null +++ b/docs/tool-approval-e2e-test-scenarios-2025-11-05.md @@ -0,0 +1,313 @@ +# Tool Approval E2E Test Scenarios + +**Last updated: 2025-11-05** + +This document provides manual E2E test scenarios for the tool approval and auto-approval features in Atlas UI 3. These tests should be performed in the UI to verify the complete approval workflow. + +## Test Environment Setup + +Before testing, ensure: +- Backend is running with the latest changes +- Frontend is built and loaded +- Configuration file `config/overrides/tool_approvals.yml` exists +- At least one MCP tool is configured (e.g., calculator) + +--- + +## Basic Approval Tests + +### Test 1: Approve a Tool Call +**Objective:** Verify basic approval flow works end-to-end + +**Steps:** +1. Send message: " +use hte tool, what is 9879*tan(0.12354) +2. Wait for approval request modal to appear +3. Review the tool name (calculator_evaluate) and arguments +4. Click "Approve" button + +**Expected Result:** +- Tool executes successfully +- Result is displayed in chat +- Approval modal disappears + +--- + +### Test 2: Reject a Tool Call +**Objective:** Verify rejection prevents tool execution + +**Steps:** +1. Send message: "calculate 500 * 250" +2. Wait for approval request +3. Enter rejection reason: "Not needed right now" +4. Click "Reject" button + +**Expected Result:** +- Tool does not execute +- Chat shows rejection message with reason +- User can continue chatting + +--- + +### Test 3: Edit Arguments Before Approval +**Objective:** Verify argument editing functionality + +**Steps:** +1. Send message: "calculate 100 + 50" +2. Wait for approval request +3. Click "Edit Arguments" button +4. Modify expression to "100 + 500" +5. Click "Approve" button + +**Expected Result:** +- Tool executes with edited arguments (100 + 500 = 600) +- Result reflects the edited calculation + +--- + +### Test 4: Multiple Sequential Tool Calls +**Objective:** Verify approval works for multiple tools in sequence + +**Steps:** +1. Send message: "calculate 10 * 20, then calculate 5 + 5" +2. Approve first tool call (10 * 20) +3. Wait for second approval request +4. Approve second tool call (5 + 5) + +**Expected Result:** +- Both tools execute in order +- Both results appear in chat +- Approval requests appear one at a time + +--- + +## Auto-Approval Tests + +### Test 5: Enable User-Level Auto-Approval +**Objective:** Verify user can enable auto-approval for all tools + +**Steps:** +1. Locate auto-approval toggle in UI (user settings or approval modal) +2. Enable "Auto-approve all tools" +3. Send message: "calculate 777 * 3" + +**Expected Result:** +- Tool executes immediately without approval prompt +- Result appears in chat automatically + +--- + +### Test 6: Disable User-Level Auto-Approval +**Objective:** Verify auto-approval can be turned off + +**Steps:** +1. With auto-approval enabled, disable the toggle +2. Send message: "calculate 123 + 456" + +**Expected Result:** +- Approval modal appears as normal +- User must manually approve + +--- + +### Test 7: Function-Specific Auto-Approval +**Objective:** Verify auto-approval for specific functions only + +**Steps:** +1. Edit `config/overrides/tool_approvals.yml`: +```yaml +tools: + calculator_evaluate: + require_approval: false + allow_edit: true +``` +2. Restart backend +3. Send message: "calculate 999 / 3" + +**Expected Result:** +- Calculator tool executes without approval +- Other tools (if available) still require approval + +--- + +### Test 8: Mixed Auto-Approval and Manual Approval +**Objective:** Verify some tools auto-approve while others require approval + +**Steps:** +1. Configure calculator to NOT require approval +2. Configure another tool (e.g., PDF tool) to require approval +3. Send message that uses both tools + +**Expected Result:** +- Calculator executes immediately +- PDF tool shows approval modal +- Workflow continues smoothly + +--- + +## Edge Cases and Error Handling + +### Test 9: Approval Timeout +**Objective:** Verify system handles no response gracefully + +**Steps:** +1. Send message: "calculate 50 * 50" +2. Wait for approval modal +3. Do not click approve or reject +4. Wait 5+ minutes (timeout period) + +**Expected Result:** +- System times out gracefully +- Error message appears: "Tool execution timed out waiting for user approval" +- Chat remains functional + +--- + +### Test 10: Cancel Chat During Approval Wait +**Objective:** Verify cancellation doesn't break system + +**Steps:** +1. Send message: "calculate 88 * 11" +2. Wait for approval modal +3. Refresh page or reset session +4. Send new message + +**Expected Result:** +- Previous approval request is cleared +- New chat works normally +- No hanging approvals + +--- + +### Test 11: Rapid Approve Button Clicks +**Objective:** Verify no duplicate executions from double-clicking + +**Steps:** +1. Send message: "calculate 10 + 10" +2. Wait for approval modal +3. Rapidly click "Approve" button 5 times + +**Expected Result:** +- Tool executes only once +- No duplicate results +- No error messages + +--- + +### Test 12: Invalid Edited Arguments +**Objective:** Verify validation of edited arguments + +**Steps:** +1. Send message: "calculate 5 * 5" +2. Wait for approval modal +3. Edit arguments to invalid JSON: `{expression: "broken}` +4. Click "Approve" + +**Expected Result:** +- Either: validation error before approval +- Or: tool execution fails gracefully with error message +- User can retry or cancel + +--- + +## Admin Configuration Tests + +### Test 13: Admin-Mandated Approval (Cannot Override) +**Objective:** Verify admin settings override user preferences + +**Steps:** +1. Edit `config/overrides/tool_approvals.yml`: +```yaml +require_approval_by_default: true +tools: + calculator_evaluate: + require_approval: true + allow_edit: false +``` +2. Restart backend +3. Enable user-level auto-approval toggle +4. Send message: "calculate 100 * 2" + +**Expected Result:** +- Approval modal appears despite user auto-approval setting +- Admin requirement overrides user preference +- "Edit Arguments" button is disabled (allow_edit: false) + +--- + +### Test 14: Disable Argument Editing +**Objective:** Verify admin can prevent argument editing + +**Steps:** +1. Configure tool with `allow_edit: false` +2. Send message: "calculate 7 * 7" +3. Approval modal appears + +**Expected Result:** +- "Edit Arguments" button is hidden or disabled +- User can only approve or reject with original arguments + +--- + +## Agent Mode Tests + +### Test 15: Approvals in Agent Mode +**Objective:** Verify approval works with multi-step agent loops + +**Steps:** +1. Enable agent mode in UI +2. Send message: "calculate 10 * 10, then use that result to calculate another operation" +3. Approve first tool call +4. Wait for agent to process +5. Approve any subsequent tool calls + +**Expected Result:** +- Agent pauses at each approval request +- Agent continues after each approval +- Multi-step reasoning completes successfully +- All intermediate steps and results are visible + +--- + +## Testing Checklist + +Use this checklist to track test completion: + +- [ ] Test 1: Basic approval +- [ ] Test 2: Basic rejection +- [ ] Test 3: Argument editing +- [ ] Test 4: Sequential tools +- [ ] Test 5: Enable auto-approval +- [ ] Test 6: Disable auto-approval +- [ ] Test 7: Function-specific auto-approval +- [ ] Test 8: Mixed approval modes +- [ ] Test 9: Timeout handling +- [ ] Test 10: Session cancellation +- [ ] Test 11: Duplicate click prevention +- [ ] Test 12: Invalid argument validation +- [ ] Test 13: Admin override +- [ ] Test 14: Disabled editing +- [ ] Test 15: Agent mode approvals + +--- + +## Common Issues and Debugging + +**Approval modal doesn't appear:** +- Check `config/overrides/tool_approvals.yml` exists +- Verify `require_approval: true` for the tool +- Check browser console for WebSocket errors + +**Approve button does nothing:** +- Check backend logs for `tool_approval_response` message +- Verify WebSocket connection is open +- Look for approval manager logs + +**Auto-approval not working:** +- Verify configuration is loaded (check backend startup logs) +- Ensure backend was restarted after config changes +- Check user-level setting is enabled in UI + +**Tool executes twice:** +- This is a bug - report immediately +- Check for duplicate WebSocket messages in logs diff --git a/docs/tool-approval-final-implementation.md b/docs/tool-approval-final-implementation.md new file mode 100644 index 0000000..05aaeb5 --- /dev/null +++ b/docs/tool-approval-final-implementation.md @@ -0,0 +1,252 @@ +# Tool Approval System - Final Implementation + +## Overview + +The tool approval system provides a two-tier approval mechanism: +1. **Admin-level**: Administrators can mandate approval for specific tools +2. **User-level**: Users control auto-approval for non-admin-required tools + +## User Experience + +### Approval in Chat Area + +Approval requests now appear as inline system messages in the chat, similar to tool calls and agent messages: + +``` +┌─────────────────────────────────────────────────┐ +│ [S] System │ +│ ┌───────────────────────────────────────────┐ │ +│ │ [APPROVAL REQUIRED] code-executor_run_py…│ │ +│ │ │ │ +│ │ ▶ Tool Arguments (1 params) [Edit Args] │ │ +│ │ { │ │ +│ │ "code": "print('Hello, world!')" │ │ +│ │ } │ │ +│ │ │ │ +│ │ Rejection Reason (optional): │ │ +│ │ [________________________] │ │ +│ │ │ │ +│ │ [Reject] [Approve] │ │ +│ └───────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────┘ +``` + +### Large Text Handling + +When editing large inputs (code, markdown, JSON): +- Edit mode shows each argument in a separate textarea +- Each textarea auto-sizes: min 3 rows, max 20 rows +- Container has max-height of 60vh (60% of viewport) +- Scrollable when content exceeds available space +- Full screen width for editing + +Example for large code input: +``` +┌─────────────────────────────────────────────────┐ +│ Edit Mode Active │ +│ ┌───────────────────────────────────────────┐ │ +│ │ code │ │ +│ │ ┌─────────────────────────────────────┐ │ │ +│ │ │ def analyze_data(df): │ │ ← Scrollable +│ │ │ # Process data │ │ area +│ │ │ results = [] │ │ (60vh) +│ │ │ for col in df.columns: │ │ +│ │ │ stats = calculate_stats(c…) │ │ +│ │ │ results.append(stats) │ │ +│ │ │ return results │ │ +│ │ └─────────────────────────────────────┘ │ │ +│ └───────────────────────────────────────────┘ │ +│ [Reject] [Approve (with edits)] │ +└─────────────────────────────────────────────────┘ +``` + +### User Settings + +New toggle in Settings panel: + +``` +┌─────────────────────────────────────────────────┐ +│ Auto-Approve Tool Calls [●────○] │ +│ │ +│ When enabled, tools that don't require admin │ +│ approval will execute automatically without │ +│ prompting. Tools that require admin approval │ +│ will still prompt for confirmation. │ +│ │ +│ ⚠ Currently: You will be prompted to approve │ +│ all tool calls unless admin has disabled │ +│ approval for specific tools. │ +└─────────────────────────────────────────────────┘ +``` + +When auto-approval is enabled: +``` +┌─────────────────────────────────────────────────┐ +│ [S] System │ +│ ┌───────────────────────────────────────────┐ │ +│ │ [APPROVAL REQUIRED] [AUTO-APPROVING...] │ │ +│ │ calculator_eval │ │ +│ │ ▶ Tool Arguments (1 params) │ │ +│ └───────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────┘ +``` +(Automatically approves after brief delay) + +## Technical Implementation + +### Backend Logic + +```python +def requires_approval(tool_name, config_manager): + """ + Returns: (needs_approval, allow_edit, admin_required) + + admin_required = True: Admin mandates approval (always enforced) + admin_required = False: User-level approval (can auto-approve) + """ + if tool in admin_config.tools: + if admin_config.tools[tool].require_approval: + return (True, allow_edit, True) # Admin-required + else: + return (False, True, False) # Admin disabled + + if admin_config.require_approval_by_default: + return (True, True, True) # Admin-required by default + + return (True, True, False) # User-level approval +``` + +### Frontend Auto-Approval Logic + +```javascript +// In ToolApprovalMessage component +useEffect(() => { + if (settings?.autoApproveTools && + !message.admin_required && + message.status === 'pending') { + // Auto-approve after brief delay to show message + setTimeout(() => { + sendApprovalResponse({ + tool_call_id: message.tool_call_id, + approved: true, + arguments: message.arguments + }) + }, 100) + } +}, [settings, message]) +``` + +## Configuration Examples + +### Example 1: Admin Requires Specific Tools + +**Backend config** (`config/overrides/tool-approvals.json`): +```json +{ + "require_approval_by_default": false, + "tools": { + "code-executor_run_python": { + "require_approval": true, + "allow_edit": true + }, + "code-executor_run_bash": { + "require_approval": true, + "allow_edit": true + } + } +} +``` + +**User experience:** +- Python/Bash execution: ALWAYS prompts (admin-required, no auto-approval) +- Other tools: Prompt by default, can enable auto-approval in settings +- User toggles auto-approval ON: Calculator/other tools auto-approve +- User toggles auto-approval OFF: All tools prompt + +### Example 2: Strict Admin Mode + +**Backend config:** +```json +{ + "require_approval_by_default": true, + "tools": {} +} +``` + +**User experience:** +- ALL tools: ALWAYS prompt (admin-required) +- Auto-approval toggle has no effect (admin overrides) +- Maximum security mode + +### Example 3: User-Controlled Mode + +**Backend config:** +```json +{ + "require_approval_by_default": false, + "tools": {} +} +``` + +**User experience:** +- All tools: Prompt by default +- User can enable auto-approval for all tools +- Maximum user flexibility + +## Workflow Examples + +### Scenario 1: Code Execution with Editing + +1. User: "Write and run a Python script to analyze this data" +2. LLM generates code +3. System shows approval message in chat: + - Yellow badge: "APPROVAL REQUIRED" + - Tool name: "code-executor_run_python" + - Arguments collapsed by default +4. User clicks expand arrow to view code +5. User clicks "Edit Arguments" +6. Large textarea appears (60vh height, scrollable) +7. User modifies code +8. User clicks "Approve (with edits)" +9. Tool executes with edited code +10. Result appears in chat + +### Scenario 2: Auto-Approval Enabled + +1. User enables "Auto-Approve Tool Calls" in Settings +2. User: "Calculate 15% of 250" +3. LLM calls calculator_eval +4. System shows approval message with "AUTO-APPROVING..." badge +5. After 100ms, automatically approves +6. Tool executes +7. Result appears in chat + +### Scenario 3: Admin-Required Tool + +1. User enables auto-approval in Settings +2. User: "Run this bash command" +3. LLM calls code-executor_run_bash +4. System shows approval message (admin-required flag set) +5. Auto-approval does NOT activate (admin override) +6. User must manually approve or reject +7. User reviews command and approves +8. Tool executes + +## Benefits + +1. **Inline Experience**: Approval fits naturally in chat flow +2. **Contextual**: See approval request in context of conversation +3. **Flexible**: Large text editing works well for code/markdown +4. **Two-Tier Security**: + - Admins enforce critical tool approval + - Users control convenience vs. security for other tools +5. **Clear Indicators**: Visual feedback for auto-approval status +6. **Backward Compatible**: Works with existing agent modes + +## Migration Notes + +- Modal dialog code removed but not deleted (ToolApprovalDialog.jsx still exists for reference) +- Settings persist in localStorage +- Default setting: auto-approval OFF (safe default) +- Existing admin configs work unchanged +- New `admin_required` flag is backward compatible (defaults to false if missing) diff --git a/docs/tool-approval-system.md b/docs/tool-approval-system.md new file mode 100644 index 0000000..57386af --- /dev/null +++ b/docs/tool-approval-system.md @@ -0,0 +1,308 @@ +# Tool Approval System + +## Overview + +The tool approval system allows administrators to configure which tools require user approval before execution. This provides an additional security layer for potentially sensitive or dangerous operations. + +## Features + +- **Configurable Approval Requirements**: Specify which tools require approval on a per-tool basis +- **Global Default Setting**: Set a default approval requirement for all tools +- **Argument Editing**: Allow users to edit tool arguments before approving execution +- **Timeout Handling**: Automatically reject tool calls that don't receive approval within a configurable timeout period +- **Real-time UI**: Modal dialog shows pending approval requests with full argument details + +## Configuration + +### Configuration Files + +Tool approval settings are managed in JSON configuration files: + +- **Defaults**: `config/defaults/tool-approvals.json` +- **Overrides**: `config/overrides/tool-approvals.json` + +### Configuration Format + +```json +{ + "require_approval_by_default": false, + "tools": { + "server_toolname": { + "require_approval": true, + "allow_edit": true + } + } +} +``` + +### Configuration Fields + +- `require_approval_by_default` (boolean): If true, all tools require approval unless explicitly configured otherwise +- `tools` (object): Tool-specific approval settings + - Key: Tool name in the format `server_toolname` (e.g., `code-executor_run_python`) + - Value: Tool approval configuration object: + - `require_approval` (boolean): Whether this tool requires approval + - `allow_edit` (boolean): Whether users can edit the tool arguments before approval + +### Example Configuration + +```json +{ + "require_approval_by_default": false, + "tools": { + "code-executor_run_python": { + "require_approval": true, + "allow_edit": true + }, + "code-executor_run_bash": { + "require_approval": true, + "allow_edit": true + }, + "filesystem_write_file": { + "require_approval": true, + "allow_edit": false + } + } +} +``` + +In this example: +- Most tools don't require approval (default is false) +- Python and Bash code execution require approval with argument editing allowed +- File write operations require approval but don't allow argument editing + +## User Experience + +### Approval Dialog + +When a tool requiring approval is called, the user sees a modal dialog with: + +1. **Tool Information**: Tool name and server +2. **Arguments Display**: Full JSON view of the tool arguments +3. **Edit Mode** (if allowed): Ability to modify arguments before approval +4. **Actions**: + - **Approve**: Execute the tool with current (or edited) arguments + - **Reject**: Cancel the tool execution with optional reason + +### Timeout Behavior + +- If the user doesn't respond within 5 minutes (300 seconds), the tool call is automatically rejected +- The timeout error is displayed in the chat interface + +## Backend Architecture + +### Components + +1. **Configuration Manager** (`backend/modules/config/config_manager.py`) + - Loads and validates tool approval configuration + - Provides `ToolApprovalsConfig` model + +2. **Approval Manager** (`backend/application/chat/approval_manager.py`) + - Manages pending approval requests + - Handles approval/rejection responses + - Implements timeout logic using asyncio futures + +3. **Tool Execution** (`backend/application/chat/utilities/tool_utils.py`) + - Checks if tool requires approval before execution + - Sends approval request to frontend + - Waits for user response + - Executes tool with approved (potentially edited) arguments + +4. **WebSocket Handler** (`backend/main.py`) + - Handles `tool_approval_response` messages from frontend + - Routes responses to approval manager + +### Workflow + +``` +1. LLM decides to call a tool +2. Tool execution checks if approval is required +3. If approval required: + a. Send tool_approval_request to frontend + b. Create approval request in approval manager + c. Wait for response (with timeout) + d. Handle approval/rejection +4. Execute tool (if approved) +5. Return result to LLM +``` + +## Frontend Architecture + +### Components + +1. **ToolApprovalDialog** (`frontend/src/components/ToolApprovalDialog.jsx`) + - React component that displays the approval dialog + - Handles argument editing (if allowed) + - Sends approval/rejection response + +2. **WebSocket Handler** (`frontend/src/handlers/chat/websocketHandlers.js`) + - Handles `tool_approval_request` messages from backend + - Updates approval request state + +3. **ChatContext** (`frontend/src/contexts/ChatContext.jsx`) + - Manages approval request state + - Provides methods to send responses and clear requests + +4. **App** (`frontend/src/App.jsx`) + - Renders the approval dialog when request is present + - Handles approval response submission + +### Message Protocol + +#### Backend → Frontend: `tool_approval_request` + +```json +{ + "type": "tool_approval_request", + "tool_call_id": "unique-call-id", + "tool_name": "server_toolname", + "arguments": { + "param1": "value1", + "param2": "value2" + }, + "allow_edit": true +} +``` + +#### Frontend → Backend: `tool_approval_response` + +```json +{ + "type": "tool_approval_response", + "tool_call_id": "unique-call-id", + "approved": true, + "arguments": { + "param1": "edited_value1", + "param2": "value2" + }, + "reason": "Optional rejection reason" +} +``` + +## Testing + +### Running Tests + +```bash +# Test approval manager +python -m pytest backend/tests/test_approval_manager.py -v + +# Test configuration loading +python -m pytest backend/tests/test_config_manager.py -v +``` + +### Test Coverage + +The test suite includes: +- Approval request creation and lifecycle +- Approval/rejection handling +- Timeout behavior +- Manager singleton pattern +- Full approval workflow simulation + +## Security Considerations + +1. **Default Deny**: When in doubt, configure tools to require approval +2. **Least Privilege**: Only enable argument editing when necessary +3. **Audit Trail**: All approval decisions are logged +4. **Timeout Protection**: Prevents indefinite hanging on approval requests +5. **User Authentication**: Approval responses are tied to authenticated sessions + +## Future Enhancements + +Potential improvements: +- Role-based approval requirements (different settings per user group) +- Approval history and audit log +- Bulk approval for multiple tool calls +- Approval delegation (assign to another user) +- Custom timeout per tool +- Pre-approved argument patterns (e.g., allow specific file paths) + +## Troubleshooting + +### Tools Not Requiring Approval + +Check: +1. Configuration file is loaded correctly +2. Tool name matches format `server_toolname` +3. `require_approval` is set to `true` in config +4. Configuration cache is cleared if recently changed + +### Approval Dialog Not Appearing + +Check: +1. WebSocket connection is active +2. Frontend has loaded the latest build +3. Browser console for JavaScript errors +4. Backend logs for approval request sending + +### Timeouts + +If approval requests timeout frequently: +1. Check network connectivity +2. Verify user is actively monitoring the chat +3. Consider increasing timeout in `execute_single_tool` (currently 300 seconds) + +## Example Usage Scenarios + +### Scenario 1: Code Execution Review + +**Configuration**: +```json +{ + "tools": { + "code-executor_run_python": { + "require_approval": true, + "allow_edit": true + } + } +} +``` + +**User Experience**: +1. User asks: "Create a Python script to analyze this data" +2. LLM generates code and wants to execute it +3. User sees approval dialog with the Python code +4. User reviews code, optionally edits it +5. User approves or rejects execution + +### Scenario 2: File System Protection + +**Configuration**: +```json +{ + "tools": { + "filesystem_delete_file": { + "require_approval": true, + "allow_edit": false + }, + "filesystem_write_file": { + "require_approval": true, + "allow_edit": true + } + } +} +``` + +**User Experience**: +- File deletions always require approval (no editing to prevent accidents) +- File writes require approval and allow editing the content/path + +### Scenario 3: Strict Mode + +**Configuration**: +```json +{ + "require_approval_by_default": true, + "tools": { + "calculator_eval": { + "require_approval": false + } + } +} +``` + +**User Experience**: +- All tools require approval by default +- Only safe tools like calculator are auto-approved +- Maximum control and security diff --git a/fn-calling-plan.md b/fn-calling-plan.md new file mode 100644 index 0000000..62ed116 --- /dev/null +++ b/fn-calling-plan.md @@ -0,0 +1,52 @@ +# Tool Approval: Option B Implementation Plan + +This plan implements Option B (cleaner) and addresses all failing requirements: + +- Only add ToolApprovalConfig entries when a tool is explicitly listed under `require_approval` in `mcp.json`. +- Treat `allow_edit` as moot for approval requirement. UI will always allow editing when approval is requested. +- Add FORCE flag with correct semantics: `FORCE_TOOL_APPROVAL_GLOBALLY=true` forces approval for all tools (admin-required) and overrides everything else. +- Ensure the UI does not display “Thinking…” while waiting on user approval. + +## Backend changes + +1) AppSettings: FORCE flag +- Add `force_tool_approval_globally: bool = Field(default=False, validation_alias="FORCE_TOOL_APPROVAL_GLOBALLY")` to `AppSettings`. + +2) Approval config build (Option B) +- In `ConfigManager.tool_approvals_config`: + - Build `tools` map ONLY for tools that are explicitly in a server’s `require_approval` list (fully-qualified `server_tool`). + - Do NOT add entries for tools that appear only in `allow_edit` lists; `allow_edit` does not affect approval requirement. + +3) requires_approval(tool_name, config_manager) +- Short-circuit: if `config_manager.app_settings.force_tool_approval_globally` is true, return `(True, True, True)` — approval required, admin-enforced, always editable. +- Per-tool: if tool present in `tool_approvals_config.tools` (i.e., explicitly require_approval), return `(True, True, True)`. +- Default: fall back to `tool_approvals_config.require_approval_by_default`: + - If true: `(True, True, True)` (admin-required by default). + - If false: `(True, True, False)` (user-level approval; UI can auto-approve if user enables it). + +Notes: +- “Always editable” is enforced at the UI level; backend can keep sending `allow_edit` for compatibility but it no longer gates editing. + +## Frontend changes + +4) Always-allow editing in approval UI +- In `Message.jsx` ToolApprovalMessage, remove the `message.allow_edit` condition and always render the Edit controls. + +5) Hide Thinking during approval wait +- In `websocketHandlers.js`, when receiving `tool_approval_request`, call `setIsThinking(false)` before adding the approval message. + +## Docs and env + +6) .env example +- Add `FORCE_TOOL_APPROVAL_GLOBALLY=false` with comment: “true = force approval for all tools (admin-required).” + +## Tests (follow-up) + +- Backend unit tests: FORCE short-circuit, per-tool precedence, allow_edit-only doesn’t disable defaults, default true vs false behavior. +- Frontend tests: thinking hidden on approval request; edit controls visible regardless of `allow_edit`. + +## Acceptance criteria + +- With `REQUIRE_TOOL_APPROVAL_BY_DEFAULT=true` (and FORCE=false), any tool call triggers an approval request (admin-required). UI shows approval with editable args; no “Thinking…” while waiting. +- With `FORCE_TOOL_APPROVAL_GLOBALLY=true`, same approval behavior regardless of per-tool/default settings. +- With `REQUIRE_TOOL_APPROVAL_BY_DEFAULT=false` and user auto-approve ON, non-admin-required approvals auto-approve; admin-required approvals still prompt; inline toggle visible. diff --git a/frontend/e2e/tool-approval.spec.js b/frontend/e2e/tool-approval.spec.js new file mode 100644 index 0000000..23d51d5 --- /dev/null +++ b/frontend/e2e/tool-approval.spec.js @@ -0,0 +1,299 @@ +import { test, expect } from '@playwright/test' + +test.describe('Tool Approval E2E Tests', () => { + test.beforeEach(async ({ page }) => { + await page.goto('/') + await page.waitForLoadState('networkidle') + }) + + test('should display tool approval dialog when tool requires approval', async ({ page }) => { + // Send a message that triggers a tool requiring approval + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Execute a dangerous operation') + await messageInput.press('Enter') + + // Wait for approval dialog to appear + await page.waitForSelector('text=Tool Approval Required', { timeout: 10000 }) + + // Verify dialog elements + await expect(page.locator('text=Tool Approval Required')).toBeVisible() + await expect(page.locator('button:has-text("Approve")')).toBeVisible() + await expect(page.locator('button:has-text("Reject")')).toBeVisible() + } + }) + + test('should allow approving a tool call', async ({ page }) => { + // Trigger tool approval + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Run approved tool') + await messageInput.press('Enter') + + // Wait for approval dialog + await page.waitForSelector('text=Tool Approval Required', { timeout: 10000 }) + + // Click approve button + const approveButton = page.locator('button:has-text("Approve")').first() + await approveButton.click() + + // Dialog should close + await expect(page.locator('text=Tool Approval Required')).not.toBeVisible({ timeout: 5000 }) + + // Tool should execute (look for success indicators) + await page.waitForTimeout(2000) + } + }) + + test('should allow rejecting a tool call', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Run rejected tool') + await messageInput.press('Enter') + + await page.waitForSelector('text=Tool Approval Required', { timeout: 10000 }) + + // Click reject button + const rejectButton = page.locator('button:has-text("Reject")').first() + await rejectButton.click() + + // Dialog should close + await expect(page.locator('text=Tool Approval Required')).not.toBeVisible({ timeout: 5000 }) + + // Should show rejection message + await page.waitForTimeout(2000) + } + }) + + test('should allow editing tool arguments before approval', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Execute editable tool') + await messageInput.press('Enter') + + await page.waitForSelector('text=Tool Approval Required', { timeout: 10000 }) + + // Check if edit button is present + const editButton = page.locator('button:has-text("Edit Mode")').first() + if (await editButton.isVisible()) { + await editButton.click() + + // Switch to edit mode + await expect(page.locator('button:has-text("View Mode")')).toBeVisible() + + // Find and edit an argument (this is generic - actual implementation may vary) + const textarea = page.locator('textarea').first() + if (await textarea.isVisible()) { + await textarea.fill('edited_value') + } + + // Approve with edits + const approveButton = page.locator('button:has-text("Approve")').first() + await approveButton.click() + + await expect(page.locator('text=Tool Approval Required')).not.toBeVisible({ timeout: 5000 }) + } + } + }) + + test('should show rejection reason input', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Tool to reject with reason') + await messageInput.press('Enter') + + await page.waitForSelector('text=Tool Approval Required', { timeout: 10000 }) + + // Find rejection reason input + const reasonInput = page.locator('input[placeholder*="reason"], input[placeholder*="Rejection"]').first() + if (await reasonInput.isVisible()) { + await reasonInput.fill('This is not safe') + + // Reject with reason + const rejectButton = page.locator('button:has-text("Reject")').first() + await rejectButton.click() + + await expect(page.locator('text=Tool Approval Required')).not.toBeVisible({ timeout: 5000 }) + } + } + }) + + test('should handle multiple pending approvals', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + // Trigger multiple tools + await messageInput.fill('Execute multiple tools requiring approval') + await messageInput.press('Enter') + + // Wait for first approval dialog + await page.waitForSelector('text=Tool Approval Required', { timeout: 10000 }) + + // Approve first one + const approveButton = page.locator('button:has-text("Approve")').first() + await approveButton.click() + + // Wait a bit + await page.waitForTimeout(1000) + + // Second approval might appear + const secondApproval = page.locator('text=Tool Approval Required') + if (await secondApproval.isVisible({ timeout: 3000 }).catch(() => false)) { + const approveButton2 = page.locator('button:has-text("Approve")').first() + await approveButton2.click() + } + } + }) + + test('should display tool name and arguments in approval dialog', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Show tool details') + await messageInput.press('Enter') + + await page.waitForSelector('text=Tool Approval Required', { timeout: 10000 }) + + // Check for "Arguments" section + await expect(page.locator('text=Arguments')).toBeVisible() + + // Dialog should show some content (tool name or arguments) + const dialogContent = page.locator('[class*="dialog"], [class*="modal"]').first() + if (await dialogContent.isVisible()) { + const content = await dialogContent.textContent() + expect(content.length).toBeGreaterThan(20) + } + } + }) + + test('should close approval dialog after approval', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Quick approval test') + await messageInput.press('Enter') + + await page.waitForSelector('text=Tool Approval Required', { timeout: 10000 }) + + // Verify dialog is visible + const dialog = page.locator('text=Tool Approval Required') + await expect(dialog).toBeVisible() + + // Approve + const approveButton = page.locator('button:has-text("Approve")').first() + await approveButton.click() + + // Dialog should disappear + await expect(dialog).not.toBeVisible({ timeout: 5000 }) + } + }) + + test('should handle approval timeout gracefully', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Test timeout scenario') + await messageInput.press('Enter') + + // Wait for approval dialog + const hasDialog = await page.waitForSelector('text=Tool Approval Required', { + timeout: 10000 + }).catch(() => null) + + if (hasDialog) { + // Wait for a long time without approving (simulating timeout) + // In production, this would trigger timeout - here we just verify the UI stays responsive + await page.waitForTimeout(2000) + + // Dialog should still be visible and functional + await expect(page.locator('button:has-text("Approve")')).toBeVisible() + await expect(page.locator('button:has-text("Reject")')).toBeVisible() + } + } + }) + + test('should preserve message history after approval workflow', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + // Send initial message + await messageInput.fill('First message') + await messageInput.press('Enter') + await page.waitForTimeout(1000) + + // Send message that triggers approval + await messageInput.fill('Tool requiring approval') + await messageInput.press('Enter') + + const hasDialog = await page.waitForSelector('text=Tool Approval Required', { + timeout: 10000 + }).catch(() => null) + + if (hasDialog) { + // Approve + const approveButton = page.locator('button:has-text("Approve")').first() + await approveButton.click() + + await page.waitForTimeout(1000) + + // Check that previous messages are still visible + const chatArea = page.locator('[class*="chat"], [class*="message"]').first() + if (await chatArea.isVisible()) { + const content = await chatArea.textContent() + // Should contain indication of message history + expect(content.length).toBeGreaterThan(0) + } + } + } + }) +}) + +test.describe('Tool Approval Configuration E2E Tests', () => { + test('should respect tool approval settings', async ({ page }) => { + await page.goto('/') + await page.waitForLoadState('networkidle') + + // Look for settings panel or configuration + const settingsButton = page.locator('[aria-label*="settings"], button:has-text("Settings")').first() + + if (await settingsButton.isVisible({ timeout: 3000 }).catch(() => false)) { + await settingsButton.click() + await page.waitForTimeout(500) + + // Look for approval-related settings + const approvalSettings = page.locator('text=/approval/i, text=/require.*approval/i') + if (await approvalSettings.isVisible({ timeout: 3000 }).catch(() => false)) { + // Settings panel exists - verify it's accessible + expect(await approvalSettings.isVisible()).toBeTruthy() + } + } + }) + + test('should display appropriate UI for admin-required approvals', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Admin-restricted tool') + await messageInput.press('Enter') + + const hasDialog = await page.waitForSelector('text=Tool Approval Required', { + timeout: 10000 + }).catch(() => null) + + if (hasDialog) { + // Check for admin-required indicator (this is implementation-specific) + const dialogText = await page.locator('text=Tool Approval Required').textContent() + expect(dialogText).toBeTruthy() + + // Close the dialog + const rejectButton = page.locator('button:has-text("Reject")').first() + await rejectButton.click() + } + } + }) +}) diff --git a/frontend/src/components/Message.jsx b/frontend/src/components/Message.jsx index 23044a2..f315419 100644 --- a/frontend/src/components/Message.jsx +++ b/frontend/src/components/Message.jsx @@ -496,6 +496,221 @@ const downloadReturnedFile = (filename, base64Data) => { } } +// Tool Approval Message Component +const ToolApprovalMessage = ({ message }) => { + const { sendApprovalResponse, settings, updateSettings } = useChat() + const [isEditing, setIsEditing] = useState(false) + const [editedArgs, setEditedArgs] = useState(message.arguments) + const [reason, setReason] = useState('') + const [isExpanded, setIsExpanded] = useState(true) + + // Auto-approve if user has the setting enabled and this is not an admin-required approval + useEffect(() => { + if (settings?.autoApproveTools && !message.admin_required && message.status === 'pending') { + // Auto-approve after a brief delay to show the message + const timer = setTimeout(() => { + sendApprovalResponse({ + type: 'tool_approval_response', + tool_call_id: message.tool_call_id, + approved: true, + arguments: message.arguments, + }) + }, 100) + return () => clearTimeout(timer) + } + }, [settings?.autoApproveTools, message.admin_required, message.status, message.tool_call_id, message.arguments, sendApprovalResponse]) + + const handleApprove = () => { + console.log('[ToolApproval] Approve button clicked', { + tool_call_id: message.tool_call_id, + tool_name: message.tool_name, + isEditing, + arguments: isEditing ? editedArgs : message.arguments + }) + + const response = { + type: 'tool_approval_response', + tool_call_id: message.tool_call_id, + approved: true, + arguments: isEditing ? editedArgs : message.arguments, + } + + console.log('[ToolApproval] Sending approval response:', JSON.stringify(response, null, 2)) + sendApprovalResponse(response) + console.log('[ToolApproval] Approval response sent') + } + + const handleReject = () => { + console.log('[ToolApproval] Reject button clicked', { + tool_call_id: message.tool_call_id, + tool_name: message.tool_name, + reason + }) + + const response = { + type: 'tool_approval_response', + tool_call_id: message.tool_call_id, + approved: false, + reason: reason || 'User rejected the tool call', + } + + console.log('[ToolApproval] Sending rejection response:', JSON.stringify(response, null, 2)) + sendApprovalResponse(response) + console.log('[ToolApproval] Rejection response sent') + } + + const handleArgumentChange = (key, value) => { + setEditedArgs(prev => ({ + ...prev, + [key]: value + })) + } + + // Don't show if already responded + if (message.status === 'approved' || message.status === 'rejected') { + return ( +
+
+ + {message.status === 'approved' ? 'APPROVED' : 'REJECTED'} + + {message.tool_name} +
+ {message.status === 'rejected' && message.rejection_reason && ( +
Reason: {message.rejection_reason}
+ )} +
+ ) + } + + return ( +
+
+ + {settings?.autoApproveTools && !message.admin_required ? 'AUTO-APPROVED' : 'APPROVAL REQUIRED'} + + {message.tool_name} + {!message.admin_required && ( + + )} +
+ + {/* Arguments Section */} +
+
+ + + {isExpanded && ( + <> +
+ +
+ + {!isEditing ? ( +
+
+                    {JSON.stringify(message.arguments, null, 2)}
+                  
+
+ ) : ( +
+ {Object.entries(editedArgs).map(([key, value]) => ( +
+ +