diff --git a/.env.example b/.env.example index 8ba9f6c..a4aa61c 100644 --- a/.env.example +++ b/.env.example @@ -76,7 +76,13 @@ AGENT_DEFAULT_ENABLED=true FEATURE_AGENT_MODE_AVAILABLE=true # Agent loop strategy: react (structured reasoning) or think-act (faster, concise) AGENT_LOOP_STRATEGY=think-act -# (Adjust above to stage rollouts. For a bare-bones chat set them all to false.) + +# Tool approval configuration +# Require approval by default for all tools (can be overridden per-tool in mcp.json) +REQUIRE_TOOL_APPROVAL_BY_DEFAULT=false +# Force approval for all tools (admin-enforced) regardless of per-tool or default settings +# Set to true to require approval for every tool call +FORCE_TOOL_APPROVAL_GLOBALLY=false APP_LOG_DIR=/workspaces/atlas-ui-3/logs diff --git a/.github/codeql/extensions.yml b/.github/codeql/extensions.yml new file mode 100644 index 0000000..5e00583 --- /dev/null +++ b/.github/codeql/extensions.yml @@ -0,0 +1,14 @@ +# CodeQL data extension to model sanitizers in this repo +# Registers sanitize_for_logging as a sanitizer so taint is removed when data flows through it. +# Reference: CodeQL model packs (Python) + +extensions: + - addsTo: + pack: codeql/python-all + extensible: sanitizer + data: + # Format: [module, function, output, taintKinds] + # Mark the function's return value as sanitized for all taint kinds. + - ["core.utils", "sanitize_for_logging", "ReturnValue", "All"] + # Also include fully-qualified path with backend prefix in case imports resolve that way in analysis + - ["backend.core.utils", "sanitize_for_logging", "ReturnValue", "All"] diff --git a/backend/application/chat/agent/act_loop.py b/backend/application/chat/agent/act_loop.py index f74c818..1578093 100644 --- a/backend/application/chat/agent/act_loop.py +++ b/backend/application/chat/agent/act_loop.py @@ -30,11 +30,13 @@ def __init__( tool_manager: Optional[ToolManagerProtocol], prompt_provider: Optional[PromptProvider], connection: Any = None, + config_manager=None, ) -> None: self.llm = llm self.tool_manager = tool_manager self.prompt_provider = prompt_provider self.connection = connection + self.config_manager = config_manager def _extract_finished_args(self, tool_calls: List[Dict[str, Any]]) -> Optional[str]: """Extract final_answer from finished tool call if present.""" @@ -151,6 +153,7 @@ async def run( }, tool_manager=self.tool_manager, update_callback=(self.connection.send_json if self.connection else None), + config_manager=self.config_manager, ) messages.append({ diff --git a/backend/application/chat/agent/factory.py b/backend/application/chat/agent/factory.py index c7538a5..b703871 100644 --- a/backend/application/chat/agent/factory.py +++ b/backend/application/chat/agent/factory.py @@ -30,6 +30,7 @@ def __init__( tool_manager: Optional[ToolManagerProtocol] = None, prompt_provider: Optional[PromptProvider] = None, connection: Optional[ChatConnectionProtocol] = None, + config_manager=None, ): """ Initialize factory with shared dependencies. @@ -39,11 +40,13 @@ def __init__( tool_manager: Optional tool manager prompt_provider: Optional prompt provider connection: Optional connection for sending updates + config_manager: Optional config manager for approval settings """ self.llm = llm self.tool_manager = tool_manager self.prompt_provider = prompt_provider self.connection = connection + self.config_manager = config_manager # Registry of available strategies self._strategy_registry = { @@ -93,6 +96,7 @@ def create(self, strategy: str = "think-act") -> AgentLoopProtocol: tool_manager=self.tool_manager, prompt_provider=self.prompt_provider, connection=self.connection, + config_manager=self.config_manager, ) # Cache for future use diff --git a/backend/application/chat/agent/react_loop.py b/backend/application/chat/agent/react_loop.py index a69ff96..36a2a2e 100644 --- a/backend/application/chat/agent/react_loop.py +++ b/backend/application/chat/agent/react_loop.py @@ -31,11 +31,13 @@ def __init__( tool_manager: Optional[ToolManagerProtocol], prompt_provider: Optional[PromptProvider], connection: Any = None, + config_manager=None, ) -> None: self.llm = llm self.tool_manager = tool_manager self.prompt_provider = prompt_provider self.connection = connection + self.config_manager = config_manager # ---- Internal helpers (mirroring service implementation) ---- def _latest_user_question(self, msgs: List[Dict[str, Any]]) -> str: @@ -243,6 +245,7 @@ async def run( }, tool_manager=self.tool_manager, update_callback=(self.connection.send_json if self.connection else None), + config_manager=self.config_manager, ) tool_results.append(result) messages.append({ diff --git a/backend/application/chat/agent/think_act_loop.py b/backend/application/chat/agent/think_act_loop.py index 7a9b672..b9df776 100644 --- a/backend/application/chat/agent/think_act_loop.py +++ b/backend/application/chat/agent/think_act_loop.py @@ -26,11 +26,13 @@ def __init__( tool_manager: Optional[ToolManagerProtocol], prompt_provider: Optional[PromptProvider], connection: Any = None, + config_manager=None, ) -> None: self.llm = llm self.tool_manager = tool_manager self.prompt_provider = prompt_provider self.connection = connection + self.config_manager = config_manager async def run( self, @@ -140,6 +142,7 @@ async def emit_think(text: str, step: int) -> None: }, tool_manager=self.tool_manager, update_callback=(self.connection.send_json if self.connection else None), + config_manager=self.config_manager, ) messages.append({"role": "tool", "content": result.content, "tool_call_id": result.tool_call_id}) # Notify service to ingest artifacts diff --git a/backend/application/chat/approval_manager.py b/backend/application/chat/approval_manager.py new file mode 100644 index 0000000..47c7c13 --- /dev/null +++ b/backend/application/chat/approval_manager.py @@ -0,0 +1,147 @@ +""" +Tool approval service for managing approval requests and responses. + +This module handles the approval workflow for tool calls, allowing users to +approve, reject, or edit tool arguments before execution. +""" + +import asyncio +import logging +from typing import Dict, Any, Optional + +from core.utils import sanitize_for_logging + +logger = logging.getLogger(__name__) + + +class ToolApprovalRequest: + """Represents a pending tool approval request.""" + + def __init__( + self, + tool_call_id: str, + tool_name: str, + arguments: Dict[str, Any], + allow_edit: bool = True + ): + self.tool_call_id = tool_call_id + self.tool_name = tool_name + self.arguments = arguments + self.allow_edit = allow_edit + self.future: asyncio.Future = asyncio.Future() + + async def wait_for_response(self, timeout: float = 300.0) -> Dict[str, Any]: + """ + Wait for user response to this approval request. + + Args: + timeout: Maximum time to wait in seconds (default 5 minutes) + + Returns: + Dict with 'approved', 'arguments', and optional 'reason' + + Raises: + asyncio.TimeoutError: If timeout is reached + """ + try: + return await asyncio.wait_for(self.future, timeout=timeout) + except asyncio.TimeoutError: + logger.warning(f"Approval request timed out for tool {self.tool_name}") + raise + + def set_response(self, approved: bool, arguments: Optional[Dict[str, Any]] = None, reason: Optional[str] = None): + """Set the user's response to this approval request.""" + if not self.future.done(): + self.future.set_result({ + "approved": approved, + "arguments": arguments or self.arguments, + "reason": reason + }) + + +class ToolApprovalManager: + """Manages tool approval requests and responses.""" + + def __init__(self): + self._pending_requests: Dict[str, ToolApprovalRequest] = {} + + def create_approval_request( + self, + tool_call_id: str, + tool_name: str, + arguments: Dict[str, Any], + allow_edit: bool = True + ) -> ToolApprovalRequest: + """ + Create a new approval request. + + Args: + tool_call_id: Unique ID for this tool call + tool_name: Name of the tool being called + arguments: Tool arguments + allow_edit: Whether to allow editing of arguments + + Returns: + ToolApprovalRequest object + """ + request = ToolApprovalRequest(tool_call_id, tool_name, arguments, allow_edit) + self._pending_requests[tool_call_id] = request + logger.info(f"Created approval request for tool {sanitize_for_logging(tool_name)} (call_id: {sanitize_for_logging(tool_call_id)})") + return request + + def handle_approval_response( + self, + tool_call_id: str, + approved: bool, + arguments: Optional[Dict[str, Any]] = None, + reason: Optional[str] = None + ) -> bool: + """ + Handle a user's response to an approval request. + + Args: + tool_call_id: ID of the tool call being responded to + approved: Whether the user approved the call + arguments: Potentially edited arguments (if allowed) + reason: Optional reason for rejection + + Returns: + True if request was found and handled, False otherwise + """ + logger.info(f"handle_approval_response called: tool_call_id={sanitize_for_logging(tool_call_id)}, approved={sanitize_for_logging(approved)}") + logger.info(f"Pending requests: {[sanitize_for_logging(key) for key in self._pending_requests.keys()]}") + + request = self._pending_requests.get(tool_call_id) + if request is None: + logger.warning(f"Received approval response for unknown tool call: {sanitize_for_logging(tool_call_id)}") + logger.warning(f"Available pending requests: {list(self._pending_requests.keys())}") + return False + + logger.info(f"Found pending request for {sanitize_for_logging(tool_call_id)}, setting response") + request.set_response(approved, arguments, reason) + # Keep the request in the dict for a bit to avoid race conditions + # It will be cleaned up later + logger.info(f"Approval response handled for tool {sanitize_for_logging(request.tool_name)}: approved={approved}") + return True + + def cleanup_request(self, tool_call_id: str): + """Remove a completed approval request.""" + if tool_call_id in self._pending_requests: + del self._pending_requests[tool_call_id] + logger.debug(f"Cleaned up approval request: {tool_call_id}") + + def get_pending_requests(self) -> Dict[str, ToolApprovalRequest]: + """Get all pending approval requests.""" + return dict(self._pending_requests) + + +# Global approval manager instance (one per application) +_approval_manager: Optional[ToolApprovalManager] = None + + +def get_approval_manager() -> ToolApprovalManager: + """Get the global tool approval manager instance.""" + global _approval_manager + if _approval_manager is None: + _approval_manager = ToolApprovalManager() + return _approval_manager diff --git a/backend/application/chat/modes/tools.py b/backend/application/chat/modes/tools.py index a2d9fc0..2f311cd 100644 --- a/backend/application/chat/modes/tools.py +++ b/backend/application/chat/modes/tools.py @@ -33,6 +33,7 @@ def __init__( event_publisher: EventPublisher, prompt_provider: Optional[PromptProvider] = None, artifact_processor: Optional[Callable[[Session, List[ToolResult], Optional[UpdateCallback]], Awaitable[None]]] = None, + config_manager=None, ): """ Initialize tools mode runner. @@ -43,12 +44,14 @@ def __init__( event_publisher: Event publisher for UI updates prompt_provider: Optional prompt provider artifact_processor: Optional callback for processing tool artifacts + config_manager: Optional config manager for approval settings """ self.llm = llm self.tool_manager = tool_manager self.event_publisher = event_publisher self.prompt_provider = prompt_provider self.artifact_processor = artifact_processor + self.config_manager = config_manager async def run( self, @@ -119,6 +122,7 @@ async def run( llm_caller=self.llm, prompt_provider=self.prompt_provider, update_callback=update_callback or self._get_send_json(), + config_manager=self.config_manager, ) # Process artifacts if handler provided diff --git a/backend/application/chat/service.py b/backend/application/chat/service.py index e1d28a5..a18d7e6 100644 --- a/backend/application/chat/service.py +++ b/backend/application/chat/service.py @@ -125,6 +125,7 @@ def __init__( event_publisher=self.event_publisher, prompt_provider=self.prompt_provider, artifact_processor=self._update_session_from_tool_results, + config_manager=self.config_manager, ) @@ -138,6 +139,7 @@ def __init__( tool_manager=self.tool_manager, prompt_provider=self.prompt_provider, connection=self.connection, + config_manager=self.config_manager, ) # Get default strategy from config @@ -221,13 +223,13 @@ async def handle_chat_message( # Log input arguments with content trimmed content_preview = content[:100] + "..." if len(content) > 100 else content sanitized_kwargs = error_utils.sanitize_kwargs_for_logging(kwargs) - + logger.info( f"handle_chat_message called - session_id: {session_id}, " - f"content: '{content_preview}', model: {model}, " + f"content: '{sanitize_for_logging(content_preview)}', model: {model}, " f"selected_tools: {selected_tools}, selected_prompts: {selected_prompts}, selected_data_sources: {selected_data_sources}, " f"only_rag: {only_rag}, tool_choice_required: {tool_choice_required}, " - f"user_email: {user_email}, agent_mode: {agent_mode}, " + f"user_email: {sanitize_for_logging(user_email)}, agent_mode: {agent_mode}, " f"kwargs: {sanitized_kwargs}" ) diff --git a/backend/application/chat/utilities/tool_utils.py b/backend/application/chat/utilities/tool_utils.py index 519f0fe..94e1586 100644 --- a/backend/application/chat/utilities/tool_utils.py +++ b/backend/application/chat/utilities/tool_utils.py @@ -5,6 +5,7 @@ argument processing, and synthesis decisions without maintaining any state. """ +import asyncio import json import logging from typing import Any, Dict, List, Optional, Callable, Awaitable @@ -13,6 +14,7 @@ from interfaces.llm import LLMResponse from core.capabilities import create_download_url from .notification_utils import _sanitize_filename_value # reuse same filename sanitizer for UI args +from ..approval_manager import get_approval_manager logger = logging.getLogger(__name__) @@ -28,7 +30,8 @@ async def execute_tools_workflow( tool_manager, llm_caller, prompt_provider, - update_callback: Optional[UpdateCallback] = None + update_callback: Optional[UpdateCallback] = None, + config_manager=None ) -> tuple[str, List[ToolResult]]: """ Execute the complete tools workflow: calls -> results -> synthesis. @@ -50,7 +53,8 @@ async def execute_tools_workflow( tool_call=tool_call, session_context=session_context, tool_manager=tool_manager, - update_callback=update_callback + update_callback=update_callback, + config_manager=config_manager ) tool_results.append(result) @@ -76,6 +80,59 @@ async def execute_tools_workflow( return final_response, tool_results +def requires_approval(tool_name: str, config_manager) -> tuple[bool, bool, bool]: + """ + Check if a tool requires approval before execution. + + Args: + tool_name: Name of the tool to check + config_manager: ConfigManager instance (can be None) + + Returns: + Tuple of (requires_approval, allow_edit, admin_required) + - requires_approval: Whether approval is needed (always True) + - allow_edit: Whether arguments can be edited (always True) + - admin_required: Whether this is admin-mandated (True) or user-level (False) + + Admin-required (True) means user CANNOT toggle auto-approve: + - FORCE_TOOL_APPROVAL_GLOBALLY=true + - Per-tool require_approval=true in mcp.json + + User-level (False) means user CAN toggle auto-approve via inline UI: + - All other cases (including REQUIRE_TOOL_APPROVAL_BY_DEFAULT) + """ + if config_manager is None: + return (True, True, False) # Default to requiring user-level approval + + try: + # Global override: force approval for all tools (admin-enforced) + app_settings = getattr(config_manager, "app_settings", None) + force_flag = False + if app_settings is not None: + raw_force = getattr(app_settings, "force_tool_approval_globally", False) + force_flag = (isinstance(raw_force, bool) and raw_force is True) + if force_flag: + return (True, True, True) + + approvals_config = config_manager.tool_approvals_config + + # Per-tool explicit requirement (admin-enforced) + if tool_name in approvals_config.tools: + tool_config = approvals_config.tools[tool_name] + # Only treat as admin-required if explicitly required + if getattr(tool_config, "require_approval", False): + return (True, True, True) + # Explicit false falls through to default behavior + + # Default requirement: user-level regardless of default setting + # Users can always toggle auto-approve via inline UI unless admin explicitly requires it + return (True, True, False) + + except Exception as e: + logger.warning(f"Error checking approval requirements for {tool_name}: {e}") + return (True, True, False) # Default to user-level approval on error + + def tool_accepts_username(tool_name: str, tool_manager) -> bool: """ Check if a tool accepts a username parameter by examining its schema. @@ -109,7 +166,8 @@ async def execute_single_tool( tool_call, session_context: Dict[str, Any], tool_manager, - update_callback: Optional[UpdateCallback] = None + update_callback: Optional[UpdateCallback] = None, + config_manager=None ) -> ToolResult: """ Execute a single tool with argument preparation and error handling. @@ -129,6 +187,84 @@ async def execute_single_tool( # Sanitize arguments for UI (hide tokens in URLs, etc.) display_args = _sanitize_args_for_ui(dict(filtered_args)) + # Check if this tool requires approval + needs_approval = False + allow_edit = True + admin_required = False + if config_manager: + needs_approval, allow_edit, admin_required = requires_approval(tool_call.function.name, config_manager) + else: + # No config manager means user-level approval by default + needs_approval = True + allow_edit = True + admin_required = False + + # Track if arguments were edited (for LLM context) + arguments_were_edited = False + original_display_args = dict(display_args) if isinstance(display_args, dict) else display_args + + # If approval is required, request it from the user + if needs_approval: + logger.info(f"Tool {tool_call.function.name} requires approval (admin_required={admin_required})") + + # Send approval request to frontend + if update_callback: + await update_callback({ + "type": "tool_approval_request", + "tool_call_id": tool_call.id, + "tool_name": tool_call.function.name, + "arguments": display_args, + "allow_edit": allow_edit, + "admin_required": admin_required + }) + + # Wait for approval response + approval_manager = get_approval_manager() + request = approval_manager.create_approval_request( + tool_call.id, + tool_call.function.name, + filtered_args, + allow_edit + ) + + try: + response = await request.wait_for_response(timeout=300.0) + approval_manager.cleanup_request(tool_call.id) + + if not response["approved"]: + # Tool was rejected + reason = response.get("reason", "User rejected the tool call") + logger.info(f"Tool {tool_call.function.name} rejected by user: {reason}") + return ToolResult( + tool_call_id=tool_call.id, + content=f"Tool execution rejected by user: {reason}", + success=False, + error=reason + ) + + # Use potentially edited arguments + if allow_edit and response.get("arguments"): + edited_args = response["arguments"] + # Check if arguments actually changed by comparing with what we sent (display_args) + # Use json comparison to avoid false positives from dict ordering + if json.dumps(edited_args, sort_keys=True) != json.dumps(original_display_args, sort_keys=True): + arguments_were_edited = True + filtered_args = edited_args + logger.info(f"User edited arguments for tool {tool_call.function.name}") + else: + # No actual changes, but response included arguments - keep original filtered_args + logger.debug(f"Arguments returned unchanged for tool {tool_call.function.name}") + + except asyncio.TimeoutError: + approval_manager.cleanup_request(tool_call.id) + logger.warning(f"Approval timeout for tool {tool_call.function.name}") + return ToolResult( + tool_call_id=tool_call.id, + content="Tool execution timed out waiting for user approval", + success=False, + error="Approval timeout" + ) + # Send tool start notification with sanitized args await notification_utils.notify_tool_start(tool_call, display_args, update_callback) @@ -149,6 +285,20 @@ async def execute_single_tool( } ) + # If arguments were edited, prepend a note to the result for LLM context + if arguments_were_edited: + edit_note = ( + f"[IMPORTANT: The user manually edited the tool arguments before execution. " + f"Disregard your original arguments. The ACTUAL arguments executed were: {json.dumps(filtered_args)}. " + f"Your response must reflect these edited arguments as the user's true intent. " + # f"Do NOT reference the original arguments: {json.dumps(original_filtered_args)}]\n\n" + ) + if isinstance(result.content, str): + result.content = edit_note + result.content + else: + # If content is not a string, convert and prepend + result.content = edit_note + str(result.content) + # Send tool complete notification await notification_utils.notify_tool_complete(tool_call, result, parsed_args, update_callback) @@ -424,4 +574,4 @@ def build_files_manifest(session_context: Dict[str, Any]) -> Optional[Dict[str, "(You can ask to open or analyze any of these by name. " "Large contents are not fully in this prompt unless user or tools provided excerpts.)" ) - } \ No newline at end of file + } diff --git a/backend/core/utils.py b/backend/core/utils.py index eee0e64..123cbc3 100644 --- a/backend/core/utils.py +++ b/backend/core/utils.py @@ -10,28 +10,33 @@ logger = logging.getLogger(__name__) _CONTROL_CHARS_RE = re.compile(r'[\x00-\x1f\x7f-\x9f]') +# Matches Unicode line separators (LINE SEPARATOR and PARAGRAPH SEPARATOR) +_UNICODE_NEWLINES_RE = re.compile(r'[\u2028\u2029]') def sanitize_for_logging(value: Any) -> str: """ - Sanitize a value for safe logging by removing control characters. + Sanitize a value for safe logging by removing control characters and Unicode newlines. - Removes ASCII control characters (C0 and C1 ranges) to prevent log injection - attacks and log corruption. This includes characters like newlines, tabs, - escape sequences, and other non-printable characters that could be used to - manipulate log output or inject fake log entries. + Removes ASCII control characters (C0 and C1 ranges) and Unicode line separators + to prevent log injection attacks and log corruption. This includes characters + like newlines, tabs, escape sequences, and other non-printable characters that + could be used to manipulate log output or inject fake log entries. Additionally, + removes Unicode line/paragraph separators such as U+2028 and U+2029. Args: value: Any value to sanitize. If not a string, it will be converted to string representation first. Returns: - str: Sanitized string with all control characters removed. + str: Sanitized string with all control and newline characters removed. Examples: >>> sanitize_for_logging("Hello\\nWorld") 'HelloWorld' >>> sanitize_for_logging("Test\\x1b[31mRed\\x1b[0m") 'TestRed' + >>> sanitize_for_logging("Fake\u2028Log") + 'FakeLog' >>> sanitize_for_logging(123) '123' """ @@ -39,7 +44,9 @@ def sanitize_for_logging(value: Any) -> str: return '' if not isinstance(value, str): value = str(value) - return _CONTROL_CHARS_RE.sub('', value) + value = _CONTROL_CHARS_RE.sub('', value) + value = _UNICODE_NEWLINES_RE.sub('', value) + return value diff --git a/backend/main.py b/backend/main.py index 93ebdd3..f7c02cc 100644 --- a/backend/main.py +++ b/backend/main.py @@ -3,6 +3,7 @@ Focuses on essential chat functionality only. """ +import asyncio import logging from contextlib import asynccontextmanager from pathlib import Path @@ -217,44 +218,54 @@ async def websocket_endpoint(websocket: WebSocket): chat_service = app_factory.create_chat_service(connection_adapter) logger.info(f"WebSocket connection established for session {sanitize_for_logging(str(session_id))}") - + try: while True: data = await websocket.receive_json() message_type = data.get("type") - + + # Debug: Log ALL incoming messages + logger.info( + "WS RECEIVED message_type=[%s], data keys=%s", + sanitize_for_logging(message_type), + [f"[{sanitize_for_logging(key)}]" for key in data.keys()] + ) + if message_type == "chat": - # Handle chat message with streaming updates - try: - await chat_service.handle_chat_message( - session_id=session_id, - content=data.get("content", ""), - model=data.get("model", ""), - selected_tools=data.get("selected_tools"), - selected_prompts=data.get("selected_prompts"), - selected_data_sources=data.get("selected_data_sources"), - only_rag=data.get("only_rag", False), - tool_choice_required=data.get("tool_choice_required", False), - user_email=data.get("user"), - agent_mode=data.get("agent_mode", False), - agent_max_steps=data.get("agent_max_steps", 10), - temperature=data.get("temperature", 0.7), - agent_loop_strategy=data.get("agent_loop_strategy"), - update_callback=lambda message: websocket_update_callback(websocket, message), - files=data.get("files") - ) - # Final response is already sent via callbacks, but we keep this for backward compatibility - except ValidationError as e: - await websocket.send_json({ - "type": "error", - "message": str(e) - }) - except Exception as e: - logger.error(f"Error in chat handler: {e}", exc_info=True) - await websocket.send_json({ - "type": "error", - "message": "An unexpected error occurred" - }) + # Handle chat message in background so we can still receive approval responses + async def handle_chat(): + try: + await chat_service.handle_chat_message( + session_id=session_id, + content=data.get("content", ""), + model=data.get("model", ""), + selected_tools=data.get("selected_tools"), + selected_prompts=data.get("selected_prompts"), + selected_data_sources=data.get("selected_data_sources"), + only_rag=data.get("only_rag", False), + tool_choice_required=data.get("tool_choice_required", False), + user_email=data.get("user"), + agent_mode=data.get("agent_mode", False), + agent_max_steps=data.get("agent_max_steps", 10), + temperature=data.get("temperature", 0.7), + agent_loop_strategy=data.get("agent_loop_strategy"), + update_callback=lambda message: websocket_update_callback(websocket, message), + files=data.get("files") + ) + except ValidationError as e: + await websocket.send_json({ + "type": "error", + "message": str(e) + }) + except Exception as e: + logger.error(f"Error in chat handler: {e}", exc_info=True) + await websocket.send_json({ + "type": "error", + "message": "An unexpected error occurred" + }) + + # Start chat handling in background + asyncio.create_task(handle_chat()) elif message_type == "download_file": # Handle file download @@ -283,11 +294,34 @@ async def websocket_endpoint(websocket: WebSocket): ) await websocket.send_json(response) + elif message_type == "tool_approval_response": + # Handle tool approval response + logger.info(f"Received tool approval response: {sanitize_for_logging(str(data))}") + from application.chat.approval_manager import get_approval_manager + approval_manager = get_approval_manager() + + tool_call_id = data.get("tool_call_id") + approved = data.get("approved", False) + arguments = data.get("arguments") + reason = data.get("reason") + + logger.info(f"Processing approval: tool_call_id={sanitize_for_logging(tool_call_id)}, approved={approved}") + + result = approval_manager.handle_approval_response( + tool_call_id=tool_call_id, + approved=approved, + arguments=arguments, + reason=reason + ) + + logger.info(f"Approval response handled: result={sanitize_for_logging(result)}") + # No response needed - the approval will unblock the waiting tool execution + else: - logger.warning(f"Unknown message type: {message_type}") + logger.warning(f"Unknown message type: {sanitize_for_logging(message_type)}") await websocket.send_json({ "type": "error", - "message": f"Unknown message type: {message_type}" + "message": f"Unknown message type: {sanitize_for_logging(message_type)}" }) except WebSocketDisconnect: diff --git a/backend/modules/config/config_manager.py b/backend/modules/config/config_manager.py index c8334fc..5bac0e3 100644 --- a/backend/modules/config/config_manager.py +++ b/backend/modules/config/config_manager.py @@ -64,6 +64,8 @@ class MCPServerConfig(BaseModel): type: str = "stdio" # Server type: "stdio" or "http" (deprecated, use transport) transport: Optional[str] = None # Explicit transport: "stdio", "http", "sse" - takes priority over auto-detection compliance_level: Optional[str] = None # Compliance/security level (e.g., "SOC2", "HIPAA", "Public") + require_approval: List[str] = Field(default_factory=list) # List of tool names (without server prefix) requiring approval + allow_edit: List[str] = Field(default_factory=list) # List of tool names (without server prefix) allowing argument editing class MCPConfig(BaseModel): @@ -80,6 +82,27 @@ def validate_servers(cls, v): return v +class ToolApprovalConfig(BaseModel): + """Configuration for a single tool's approval settings.""" + require_approval: bool = False + allow_edit: bool = True + + +class ToolApprovalsConfig(BaseModel): + """Configuration for tool approvals.""" + require_approval_by_default: bool = False + tools: Dict[str, ToolApprovalConfig] = Field(default_factory=dict) + + @field_validator('tools', mode='before') + @classmethod + def validate_tools(cls, v): + """Convert dict values to ToolApprovalConfig objects.""" + if isinstance(v, dict): + return {name: ToolApprovalConfig(**config) if isinstance(config, dict) else config + for name, config in v.items()} + return v + + class AppSettings(BaseSettings): """Main application settings loaded from environment variables.""" @@ -115,7 +138,12 @@ class AppSettings(BaseSettings): def agent_mode_available(self) -> bool: """Maintain backward compatibility for code still referencing agent_mode_available.""" return self.feature_agent_mode_available - + + # Tool approval settings + require_tool_approval_by_default: bool = False + # When true, all tools require approval (admin-enforced), overriding per-tool and default settings + force_tool_approval_globally: bool = Field(default=False, validation_alias="FORCE_TOOL_APPROVAL_GLOBALLY") + # LLM Health Check settings llm_health_check_interval: int = 5 # minutes @@ -190,6 +218,7 @@ def agent_mode_available(self) -> bool: llm_config_file: str = Field(default="llmconfig.yml", validation_alias="LLM_CONFIG_FILE") help_config_file: str = Field(default="help-config.json", validation_alias="HELP_CONFIG_FILE") messages_config_file: str = Field(default="messages.txt", validation_alias="MESSAGES_CONFIG_FILE") + tool_approvals_config_file: str = Field(default="tool-approvals.json", validation_alias="TOOL_APPROVALS_CONFIG_FILE") # Config directory paths app_config_overrides: str = Field(default="config/overrides", validation_alias="APP_CONFIG_OVERRIDES") @@ -226,6 +255,7 @@ def __init__(self, backend_root: Optional[Path] = None): self._llm_config: Optional[LLMConfig] = None self._mcp_config: Optional[MCPConfig] = None self._rag_mcp_config: Optional[MCPConfig] = None + self._tool_approvals_config: Optional[ToolApprovalsConfig] = None def _search_paths(self, file_name: str) -> List[Path]: """Generate common search paths for a configuration file. @@ -435,6 +465,41 @@ def rag_mcp_config(self) -> MCPConfig: return self._rag_mcp_config + @property + def tool_approvals_config(self) -> ToolApprovalsConfig: + """Get tool approvals configuration built from mcp.json and env variables (cached).""" + if self._tool_approvals_config is None: + try: + # Get default from environment + default_require_approval = self.app_settings.require_tool_approval_by_default + + # Build tool-specific configs from MCP servers (Option B): + # Only include entries explicitly listed under require_approval. + tools_config: Dict[str, ToolApprovalConfig] = {} + + for server_name, server_config in self.mcp_config.servers.items(): + require_approval_list = server_config.require_approval or [] + + for tool_name in require_approval_list: + full_tool_name = f"{server_name}_{tool_name}" + # Mark as explicitly requiring approval; allow_edit is moot for requirement + tools_config[full_tool_name] = ToolApprovalConfig( + require_approval=True, + allow_edit=True # UI always allows edits; keep True for compatibility + ) + + self._tool_approvals_config = ToolApprovalsConfig( + require_approval_by_default=default_require_approval, + tools=tools_config + ) + logger.info(f"Built tool approvals config from mcp.json with {len(tools_config)} tool-specific settings (default: {default_require_approval})") + + except Exception as e: + logger.error(f"Failed to build tool approvals configuration: {e}", exc_info=True) + self._tool_approvals_config = ToolApprovalsConfig() + + return self._tool_approvals_config + def _validate_mcp_compliance_levels(self, config: MCPConfig, config_type: str): """Validate compliance levels for all MCP servers.""" try: @@ -459,6 +524,7 @@ def reload_configs(self) -> None: self._llm_config = None self._mcp_config = None self._rag_mcp_config = None + self._tool_approvals_config = None logger.info("Configuration cache cleared, will reload on next access") def validate_config(self) -> Dict[str, bool]: diff --git a/backend/modules/mcp_tools/client.py b/backend/modules/mcp_tools/client.py index d63fab8..8025191 100644 --- a/backend/modules/mcp_tools/client.py +++ b/backend/modules/mcp_tools/client.py @@ -96,7 +96,7 @@ def _determine_transport_type(self, config: Dict[str, Any]) -> str: async def _initialize_single_client(self, server_name: str, config: Dict[str, Any]) -> Optional[Client]: """Initialize a single MCP client. Returns None if initialization fails.""" - logger.info(f"=== Initializing client for server '{server_name}' ===\n\nServer config: {config}") + logger.info(f"=== Initializing client for server '{sanitize_for_logging(server_name)}' ===\n\nServer config: {sanitize_for_logging(str(config))}") try: transport_type = self._determine_transport_type(config) logger.info(f"Determined transport type: {transport_type}") diff --git a/backend/modules/rag/client.py b/backend/modules/rag/client.py index acb1373..6c81ad8 100644 --- a/backend/modules/rag/client.py +++ b/backend/modules/rag/client.py @@ -137,7 +137,7 @@ async def query_rag(self, user_name: str, data_source: str, messages: List[Dict] if self.mock_mode and self.test_client: try: - logger.info(f"Using TestClient to query RAG for {user_name} with data source {data_source}") + logger.info(f"Using TestClient to query RAG for {sanitize_for_logging(user_name)} with data source {sanitize_for_logging(data_source)}") response = self.test_client.post("/v1/chat/completions", json=payload) response.raise_for_status() data = response.json() diff --git a/backend/routes/admin_routes.py b/backend/routes/admin_routes.py index 195fd17..f0e57af 100644 --- a/backend/routes/admin_routes.py +++ b/backend/routes/admin_routes.py @@ -86,9 +86,9 @@ def setup_config_overrides() -> None: dest = overrides_root / file_path.name try: shutil.copy2(file_path, dest) - logger.info(f"Copied seed config {file_path} -> {dest}") + logger.info(f"Copied seed config {sanitize_for_logging(str(file_path))} -> {sanitize_for_logging(str(dest))}") except Exception as e: # noqa: BLE001 - logger.error(f"Failed seeding {file_path}: {e}") + logger.error(f"Failed seeding {sanitize_for_logging(str(file_path))}: {e}") break @@ -236,7 +236,7 @@ async def update_banner_config( messages_file = get_admin_config_path("messages.txt") content = ("\n".join(update.messages) + "\n") if update.messages else "" write_file_content(messages_file, content) - logger.info(f"Banner messages updated by {admin_user}") + logger.info(f"Banner messages updated by {sanitize_for_logging(admin_user)}") return { "message": "Banner messages updated successfully", "messages": update.messages, @@ -660,4 +660,4 @@ async def get_system_status(admin_user: str = Depends(require_admin)): } except Exception as e: # noqa: BLE001 logger.error(f"Error getting system status: {e}") - raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file + raise HTTPException(status_code=500, detail=str(e)) diff --git a/backend/routes/config_routes.py b/backend/routes/config_routes.py index 44664f4..7d7c3f7 100644 --- a/backend/routes/config_routes.py +++ b/backend/routes/config_routes.py @@ -259,6 +259,28 @@ async def get_config( model_info["compliance_level"] = model_config.compliance_level models_list.append(model_info) + # Build tool approval settings - only include tools from authorized servers + tool_approvals_config = config_manager.tool_approvals_config + filtered_tool_approvals = {} + + # Get all tool names from authorized servers + authorized_tool_names = set() + for tool_group in tools_info: + server_name = tool_group.get('server') + if server_name in authorized_servers: + # tools is a list of strings (tool names), not dicts + for tool_name in tool_group.get('tools', []): + if isinstance(tool_name, str): + authorized_tool_names.add(tool_name) + + # Only include approval settings for tools the user has access to + for tool_name, approval_config in tool_approvals_config.tools.items(): + if tool_name in authorized_tool_names: + filtered_tool_approvals[tool_name] = { + "require_approval": approval_config.require_approval, + "allow_edit": approval_config.allow_edit + } + return { "app_name": app_settings.app_name, "models": models_list, @@ -273,6 +295,10 @@ async def get_config( "agent_mode_available": app_settings.agent_mode_available, # Whether agent mode UI should be shown "banner_enabled": app_settings.banner_enabled, # Whether banner system is enabled "help_config": help_config, # Help page configuration from help-config.json + "tool_approvals": { + "require_approval_by_default": tool_approvals_config.require_approval_by_default, + "tools": filtered_tool_approvals + }, "features": { "workspaces": app_settings.feature_workspaces_enabled, "rag": app_settings.feature_rag_enabled, diff --git a/backend/tests/test_approval_manager.py b/backend/tests/test_approval_manager.py new file mode 100644 index 0000000..32caecd --- /dev/null +++ b/backend/tests/test_approval_manager.py @@ -0,0 +1,437 @@ +"""Tests for the tool approval manager.""" + +import asyncio +import pytest +from application.chat.approval_manager import ( + ToolApprovalManager, + ToolApprovalRequest, + get_approval_manager +) + + +class TestToolApprovalRequest: + """Test ToolApprovalRequest class.""" + + def test_create_approval_request(self): + """Test creating an approval request.""" + request = ToolApprovalRequest( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"}, + allow_edit=True + ) + assert request.tool_call_id == "test_123" + assert request.tool_name == "test_tool" + assert request.arguments == {"arg1": "value1"} + assert request.allow_edit is True + + @pytest.mark.asyncio + async def test_set_response(self): + """Test setting a response to an approval request.""" + request = ToolApprovalRequest( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"} + ) + + # Set approved response + request.set_response(approved=True, arguments={"arg1": "edited_value"}) + + # Wait for the response (should be immediate since we already set it) + response = await request.wait_for_response(timeout=1.0) + + assert response["approved"] is True + assert response["arguments"] == {"arg1": "edited_value"} + + @pytest.mark.asyncio + async def test_rejection_response(self): + """Test rejecting an approval request.""" + request = ToolApprovalRequest( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"} + ) + + # Set rejected response + request.set_response(approved=False, reason="User rejected") + + # Wait for the response + response = await request.wait_for_response(timeout=1.0) + + assert response["approved"] is False + assert response["reason"] == "User rejected" + + @pytest.mark.asyncio + async def test_timeout(self): + """Test that timeout works correctly.""" + request = ToolApprovalRequest( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"} + ) + + # Should timeout since we don't set a response + with pytest.raises(asyncio.TimeoutError): + await request.wait_for_response(timeout=0.1) + + +class TestToolApprovalManager: + """Test ToolApprovalManager class.""" + + def test_create_approval_request(self): + """Test creating an approval request via manager.""" + manager = ToolApprovalManager() + request = manager.create_approval_request( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"}, + allow_edit=True + ) + + assert request.tool_call_id == "test_123" + assert "test_123" in manager.get_pending_requests() + + def test_handle_approval_response(self): + """Test handling an approval response.""" + manager = ToolApprovalManager() + manager.create_approval_request( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"} + ) + + # Handle approval response + result = manager.handle_approval_response( + tool_call_id="test_123", + approved=True, + arguments={"arg1": "edited_value"} + ) + + assert result is True + # Request should still be in pending (cleaned up manually later) + assert "test_123" in manager.get_pending_requests() + + def test_handle_unknown_request(self): + """Test handling response for unknown request.""" + manager = ToolApprovalManager() + + result = manager.handle_approval_response( + tool_call_id="unknown_123", + approved=True + ) + + assert result is False + + def test_cleanup_request(self): + """Test cleaning up a completed request.""" + manager = ToolApprovalManager() + manager.create_approval_request( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"} + ) + + assert "test_123" in manager.get_pending_requests() + + manager.cleanup_request("test_123") + + assert "test_123" not in manager.get_pending_requests() + + def test_get_approval_manager_singleton(self): + """Test that get_approval_manager returns a singleton.""" + manager1 = get_approval_manager() + manager2 = get_approval_manager() + + assert manager1 is manager2 + + @pytest.mark.asyncio + async def test_full_approval_workflow(self): + """Test the complete approval workflow.""" + manager = ToolApprovalManager() + + # Create request + request = manager.create_approval_request( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"code": "print('test')"}, + allow_edit=True + ) + + # Simulate async approval (in a separate task) + async def approve_after_delay(): + await asyncio.sleep(0.1) + manager.handle_approval_response( + tool_call_id="test_123", + approved=True, + arguments={"code": "print('edited test')"} + ) + + # Start approval task + asyncio.create_task(approve_after_delay()) + + # Wait for response + response = await request.wait_for_response(timeout=1.0) + + assert response["approved"] is True + assert response["arguments"]["code"] == "print('edited test')" + + # Cleanup + manager.cleanup_request("test_123") + assert "test_123" not in manager.get_pending_requests() + + @pytest.mark.asyncio + async def test_multiple_concurrent_approvals(self): + """Test handling multiple concurrent approval requests.""" + manager = ToolApprovalManager() + + # Create multiple requests + request1 = manager.create_approval_request( + tool_call_id="test_1", + tool_name="tool_a", + arguments={"arg": "value1"} + ) + request2 = manager.create_approval_request( + tool_call_id="test_2", + tool_name="tool_b", + arguments={"arg": "value2"} + ) + request3 = manager.create_approval_request( + tool_call_id="test_3", + tool_name="tool_c", + arguments={"arg": "value3"} + ) + + assert len(manager.get_pending_requests()) == 3 + + # Approve them in different order + async def approve_requests(): + await asyncio.sleep(0.05) + manager.handle_approval_response("test_2", approved=True) + await asyncio.sleep(0.05) + manager.handle_approval_response("test_1", approved=False, reason="Rejected") + await asyncio.sleep(0.05) + manager.handle_approval_response("test_3", approved=True) + + asyncio.create_task(approve_requests()) + + # Wait for all responses + response1 = await request1.wait_for_response(timeout=1.0) + response2 = await request2.wait_for_response(timeout=1.0) + response3 = await request3.wait_for_response(timeout=1.0) + + assert response1["approved"] is False + assert response1["reason"] == "Rejected" + assert response2["approved"] is True + assert response3["approved"] is True + + @pytest.mark.asyncio + async def test_approval_with_no_arguments_change(self): + """Test approval where arguments are returned but not changed.""" + manager = ToolApprovalManager() + + original_args = {"code": "print('hello')"} + request = manager.create_approval_request( + tool_call_id="test_123", + tool_name="test_tool", + arguments=original_args, + allow_edit=True + ) + + # Approve with same arguments + async def approve(): + await asyncio.sleep(0.05) + manager.handle_approval_response( + tool_call_id="test_123", + approved=True, + arguments={"code": "print('hello')"} # Same as original + ) + + asyncio.create_task(approve()) + response = await request.wait_for_response(timeout=1.0) + + assert response["approved"] is True + assert response["arguments"] == original_args + + @pytest.mark.asyncio + async def test_double_response_handling(self): + """Test that setting response twice doesn't cause issues.""" + request = ToolApprovalRequest( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"} + ) + + # Set response first time + request.set_response(approved=True, arguments={"arg1": "first"}) + + # Try to set response second time (should be ignored) + request.set_response(approved=False, arguments={"arg1": "second"}) + + # Should get the first response + response = await request.wait_for_response(timeout=0.5) + assert response["approved"] is True + assert response["arguments"]["arg1"] == "first" + + @pytest.mark.asyncio + async def test_rejection_with_empty_reason(self): + """Test rejection with no reason provided.""" + manager = ToolApprovalManager() + + request = manager.create_approval_request( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"} + ) + + async def reject(): + await asyncio.sleep(0.05) + manager.handle_approval_response( + tool_call_id="test_123", + approved=False + # No reason provided + ) + + _ = asyncio.create_task(reject()) + response = await request.wait_for_response(timeout=1.0) + + assert response["approved"] is False + assert response.get("reason") is None or response.get("reason") == "" + + @pytest.mark.asyncio + async def test_allow_edit_false(self): + """Test approval request with editing disabled.""" + request = ToolApprovalRequest( + tool_call_id="test_123", + tool_name="test_tool", + arguments={"arg1": "value1"}, + allow_edit=False + ) + + assert request.allow_edit is False + + # Even if arguments are provided, they should be used + request.set_response(approved=True, arguments={"arg1": "edited_value"}) + response = await request.wait_for_response(timeout=0.5) + + # The response will contain the edited arguments, but the UI should + # respect allow_edit=False to prevent showing edit controls + assert response["arguments"] == {"arg1": "edited_value"} + + def test_cleanup_nonexistent_request(self): + """Test cleaning up a request that doesn't exist.""" + manager = ToolApprovalManager() + + # Should not raise an error + manager.cleanup_request("nonexistent_id") + + assert "nonexistent_id" not in manager.get_pending_requests() + + def test_multiple_managers_vs_singleton(self): + """Test that direct instantiation creates different instances but singleton returns same.""" + manager1 = ToolApprovalManager() + manager2 = ToolApprovalManager() + + # Direct instantiation creates different instances + assert manager1 is not manager2 + + # But singleton returns the same instance + singleton1 = get_approval_manager() + singleton2 = get_approval_manager() + assert singleton1 is singleton2 + + @pytest.mark.asyncio + async def test_approval_with_complex_arguments(self): + """Test approval with complex nested arguments.""" + manager = ToolApprovalManager() + + complex_args = { + "nested": { + "level1": { + "level2": ["item1", "item2", "item3"] + } + }, + "list_of_dicts": [ + {"key": "value1"}, + {"key": "value2"} + ], + "numbers": [1, 2, 3, 4, 5] + } + + request = manager.create_approval_request( + tool_call_id="test_complex", + tool_name="complex_tool", + arguments=complex_args, + allow_edit=True + ) + + # Modify nested structure + edited_args = { + "nested": { + "level1": { + "level2": ["item1", "modified_item", "item3"] + } + }, + "list_of_dicts": [ + {"key": "value1"}, + {"key": "new_value"} + ], + "numbers": [1, 2, 3, 4, 5, 6] + } + + async def approve(): + await asyncio.sleep(0.05) + manager.handle_approval_response( + tool_call_id="test_complex", + approved=True, + arguments=edited_args + ) + + asyncio.create_task(approve()) + response = await request.wait_for_response(timeout=1.0) + + assert response["approved"] is True + assert response["arguments"]["nested"]["level1"]["level2"][1] == "modified_item" + assert len(response["arguments"]["numbers"]) == 6 + + @pytest.mark.asyncio + async def test_sequential_approvals(self): + """Test approving requests one after another in sequence.""" + manager = ToolApprovalManager() + + # First approval + request1 = manager.create_approval_request( + tool_call_id="seq_1", + tool_name="tool_1", + arguments={"step": 1} + ) + + async def approve1(): + await asyncio.sleep(0.05) + manager.handle_approval_response("seq_1", approved=True) + + task1 = asyncio.create_task(approve1()) + response1 = await request1.wait_for_response(timeout=1.0) + manager.cleanup_request("seq_1") + await task1 + + assert response1["approved"] is True + assert "seq_1" not in manager.get_pending_requests() + + # Second approval after first is complete + request2 = manager.create_approval_request( + tool_call_id="seq_2", + tool_name="tool_2", + arguments={"step": 2} + ) + + async def approve2(): + await asyncio.sleep(0.05) + manager.handle_approval_response("seq_2", approved=True) + + task2 = asyncio.create_task(approve2()) + response2 = await request2.wait_for_response(timeout=1.0) + manager.cleanup_request("seq_2") + await task2 + + assert response2["approved"] is True + assert "seq_2" not in manager.get_pending_requests() diff --git a/backend/tests/test_core_utils.py b/backend/tests/test_core_utils.py index d298f00..610760a 100644 --- a/backend/tests/test_core_utils.py +++ b/backend/tests/test_core_utils.py @@ -126,3 +126,65 @@ def test_preserves_regular_punctuation(self): assert sanitize_for_logging("Hello, World!") == "Hello, World!" assert sanitize_for_logging("Cost: $100 (20% off)") == "Cost: $100 (20% off)" assert sanitize_for_logging("Email: test@example.com") == "Email: test@example.com" + + def test_removes_unicode_line_separator(self): + """Test that Unicode LINE SEPARATOR (U+2028) is removed.""" + assert sanitize_for_logging("Hello\u2028World") == "HelloWorld" + assert sanitize_for_logging("\u2028Starting") == "Starting" + assert sanitize_for_logging("Ending\u2028") == "Ending" + assert sanitize_for_logging("Line1\u2028Line2\u2028Line3") == "Line1Line2Line3" + + def test_removes_unicode_paragraph_separator(self): + """Test that Unicode PARAGRAPH SEPARATOR (U+2029) is removed.""" + assert sanitize_for_logging("Para1\u2029Para2") == "Para1Para2" + assert sanitize_for_logging("\u2029Starting") == "Starting" + assert sanitize_for_logging("Ending\u2029") == "Ending" + assert sanitize_for_logging("P1\u2029P2\u2029P3") == "P1P2P3" + + def test_removes_both_unicode_separators(self): + """Test that both Unicode separators are removed together.""" + assert sanitize_for_logging("Text\u2028with\u2029separators") == "Textwithseparators" + assert sanitize_for_logging("\u2028\u2029Mixed") == "Mixed" + assert sanitize_for_logging("A\u2028B\u2029C\u2028D") == "ABCD" + + def test_unicode_separators_with_regular_unicode(self): + """Test Unicode separators mixed with regular Unicode characters.""" + assert sanitize_for_logging("Hello\u2028世界") == "Hello世界" + assert sanitize_for_logging("Café\u2029☕") == "Café☕" + assert sanitize_for_logging("Test\u2028データ\u2029More") == "TestデータMore" + + def test_unicode_separator_log_injection(self): + """Test that Unicode separators can't be used for log injection.""" + malicious_input = "user@example.com\u2028[ERROR] Fake error message\u2029[INFO] Fake info" + sanitized = sanitize_for_logging(malicious_input) + assert "\u2028" not in sanitized + assert "\u2029" not in sanitized + assert sanitized == "user@example.com[ERROR] Fake error message[INFO] Fake info" + + def test_multiple_consecutive_unicode_separators(self): + """Test multiple consecutive Unicode separators are all removed.""" + assert sanitize_for_logging("Text\u2028\u2028\u2028More") == "TextMore" + assert sanitize_for_logging("Text\u2029\u2029\u2029More") == "TextMore" + assert sanitize_for_logging("\u2028\u2029\u2028\u2029Data") == "Data" + + def test_unicode_separators_with_ascii_control_chars(self): + """Test Unicode separators combined with ASCII control characters.""" + assert sanitize_for_logging("Test\n\u2028\rData\u2029\tEnd") == "TestDataEnd" + assert sanitize_for_logging("\x00\u2028Text\u2029\x1b[31m") == "Text[31m" + + def test_complex_unicode_injection_scenario(self): + """Test complex scenario with Unicode separators in structured log attempt.""" + attack = "Normal text\u2028[2025-11-08 10:00:00] CRITICAL: Injected message\u2029admin logged in" + sanitized = sanitize_for_logging(attack) + assert "\u2028" not in sanitized + assert "\u2029" not in sanitized + assert "\n" not in sanitized + assert sanitized == "Normal text[2025-11-08 10:00:00] CRITICAL: Injected messageadmin logged in" + + def test_unicode_separators_do_not_affect_other_unicode(self): + """Test that removing Unicode separators doesn't affect other Unicode characters.""" + text_with_emoji = "Hello\u2028😀\u2029World🌍" + assert sanitize_for_logging(text_with_emoji) == "Hello😀World🌍" + + text_with_chars = "Test\u2028中文\u2029العربية\u2028Ελληνικά" + assert sanitize_for_logging(text_with_chars) == "Test中文العربيةΕλληνικά" diff --git a/backend/tests/test_tool_approval_config.py b/backend/tests/test_tool_approval_config.py new file mode 100644 index 0000000..d01eaa6 --- /dev/null +++ b/backend/tests/test_tool_approval_config.py @@ -0,0 +1,93 @@ +"""Tests for tool approval configuration loading and management.""" + +from modules.config.config_manager import ConfigManager + + +class TestToolApprovalConfig: + """Test tool approval configuration loading.""" + + def test_tool_approvals_config_loads(self): + """Test that tool approvals config loads successfully.""" + cm = ConfigManager() + approval_config = cm.tool_approvals_config + + assert approval_config is not None + assert hasattr(approval_config, "require_approval_by_default") + assert hasattr(approval_config, "tools") + + def test_default_approval_config_structure(self): + """Test the structure of default approval config.""" + cm = ConfigManager() + approval_config = cm.tool_approvals_config + + # Default config should have require_approval_by_default (check it's boolean) + assert isinstance(approval_config.require_approval_by_default, bool) + # Default config should have tools dict (may or may not be empty) + assert isinstance(approval_config.tools, dict) + + def test_tool_specific_config(self): + """Test that tool-specific configurations can be loaded.""" + cm = ConfigManager() + approval_config = cm.tool_approvals_config + + # Test basic structure - config may have tool-specific configs from overrides + assert hasattr(approval_config, 'tools') + assert isinstance(approval_config.tools, dict) + + # If there are any tool configs, verify they have the right structure + for tool_name, tool_config in approval_config.tools.items(): + assert hasattr(tool_config, 'require_approval') + assert hasattr(tool_config, 'allow_edit') + assert isinstance(tool_config.require_approval, bool) + assert isinstance(tool_config.allow_edit, bool) + + def test_config_has_boolean_default(self): + """Test that require_approval_by_default is a boolean.""" + cm = ConfigManager() + approval_config = cm.tool_approvals_config + + assert isinstance(approval_config.require_approval_by_default, bool) + + def test_tools_config_structure(self): + """Test that tools in config have correct structure.""" + cm = ConfigManager() + approval_config = cm.tool_approvals_config + + # Each tool config should have require_approval and allow_edit + for tool_name, tool_config in approval_config.tools.items(): + assert hasattr(tool_config, 'require_approval') + assert hasattr(tool_config, 'allow_edit') + assert isinstance(tool_config.require_approval, bool) + assert isinstance(tool_config.allow_edit, bool) + + def test_config_manager_provides_approvals_config(self): + """Test that ConfigManager provides tool_approvals_config.""" + cm = ConfigManager() + + assert hasattr(cm, 'tool_approvals_config') + assert cm.tool_approvals_config is not None + + def test_multiple_config_manager_instances(self): + """Test that multiple ConfigManager instances can coexist.""" + cm1 = ConfigManager() + cm2 = ConfigManager() + + config1 = cm1.tool_approvals_config + config2 = cm2.tool_approvals_config + + # Both should have valid configs + assert config1 is not None + assert config2 is not None + + def test_config_contains_expected_fields(self): + """Test that approval config has all expected fields.""" + cm = ConfigManager() + approval_config = cm.tool_approvals_config + + # Should have these attributes + assert hasattr(approval_config, 'require_approval_by_default') + assert hasattr(approval_config, 'tools') + + # Types should be correct + assert isinstance(approval_config.require_approval_by_default, bool) + assert isinstance(approval_config.tools, dict) diff --git a/backend/tests/test_tool_approval_utils.py b/backend/tests/test_tool_approval_utils.py new file mode 100644 index 0000000..5196f83 --- /dev/null +++ b/backend/tests/test_tool_approval_utils.py @@ -0,0 +1,355 @@ +"""Tests for tool approval utilities in tool_utils.py""" + +from unittest.mock import Mock +from application.chat.utilities.tool_utils import ( + requires_approval, + tool_accepts_username, + _sanitize_args_for_ui, + _filter_args_to_schema +) + + +class MockToolConfig: + """Mock tool configuration.""" + def __init__(self, require_approval, allow_edit): + self.require_approval = require_approval + self.allow_edit = allow_edit + + +class MockApprovalsConfig: + """Mock approvals configuration.""" + def __init__(self, require_by_default=True, tools=None): + self.require_approval_by_default = require_by_default + self.tools = tools or {} + + +class TestRequiresApproval: + """Test the requires_approval function.""" + + def test_requires_approval_no_config_manager(self): + """Test requires_approval with no config manager.""" + needs_approval, allow_edit, admin_required = requires_approval("test_tool", None) + + assert needs_approval is True + assert allow_edit is True + assert admin_required is False + + def test_requires_approval_tool_specific_config(self): + """Test requires_approval with tool-specific configuration.""" + config_manager = Mock() + config_manager.tool_approvals_config = MockApprovalsConfig( + require_by_default=False, + tools={ + "dangerous_tool": MockToolConfig(require_approval=True, allow_edit=False) + } + ) + + needs_approval, allow_edit, admin_required = requires_approval("dangerous_tool", config_manager) + + assert needs_approval is True + # UI is always editable when approval is required + assert allow_edit is True + assert admin_required is True + + def test_requires_approval_default_true(self): + """Test requires_approval with default set to require approval. + + REQUIRE_TOOL_APPROVAL_BY_DEFAULT=true should be user-level (admin_required=False) + so users can toggle auto-approve via the inline UI. + """ + config_manager = Mock() + config_manager.tool_approvals_config = MockApprovalsConfig( + require_by_default=True, + tools={} + ) + + needs_approval, allow_edit, admin_required = requires_approval("any_tool", config_manager) + + assert needs_approval is True + assert allow_edit is True + assert admin_required is False # User CAN toggle auto-approve + + def test_requires_approval_default_false(self): + """Test requires_approval with default set to not require approval.""" + config_manager = Mock() + config_manager.tool_approvals_config = MockApprovalsConfig( + require_by_default=False, + tools={} + ) + + needs_approval, allow_edit, admin_required = requires_approval("any_tool", config_manager) + + # Default is False but function returns True with user-level approval + assert needs_approval is True + assert allow_edit is True + assert admin_required is False + + def test_requires_approval_exception_handling(self): + """Test requires_approval handles exceptions gracefully.""" + config_manager = Mock() + config_manager.tool_approvals_config = None + + # Should not raise, should return default + needs_approval, allow_edit, admin_required = requires_approval("test_tool", config_manager) + + assert needs_approval is True + assert allow_edit is True + assert admin_required is False + + def test_requires_approval_multiple_tools(self): + """Test requires_approval with multiple tool configurations.""" + config_manager = Mock() + config_manager.tool_approvals_config = MockApprovalsConfig( + require_by_default=False, + tools={ + "tool_a": MockToolConfig(require_approval=True, allow_edit=True), + "tool_b": MockToolConfig(require_approval=True, allow_edit=False), + "tool_c": MockToolConfig(require_approval=False, allow_edit=True) + } + ) + + # Tool A + needs_approval, allow_edit, admin_required = requires_approval("tool_a", config_manager) + assert needs_approval is True + assert allow_edit is True + assert admin_required is True + + # Tool B (allow_edit False in config is ignored for UI gating) + needs_approval, allow_edit, admin_required = requires_approval("tool_b", config_manager) + assert needs_approval is True + assert allow_edit is True + assert admin_required is True + + # Tool C: with Option B, entries with require_approval=False are not + # considered explicit; fall back to default (which is False here), + # resulting in user-level approval required by design. + config_manager2 = Mock() + config_manager2.tool_approvals_config = MockApprovalsConfig( + require_by_default=False, + tools={ + "tool_a": MockToolConfig(require_approval=True, allow_edit=True), + "tool_b": MockToolConfig(require_approval=True, allow_edit=False), + # tool_c omitted to simulate Option B config building + } + ) + needs_approval, allow_edit, admin_required = requires_approval("tool_c", config_manager2) + assert needs_approval is True + assert allow_edit is True + assert admin_required is False + + +class TestToolAcceptsUsername: + """Test the tool_accepts_username function.""" + + def test_tool_accepts_username_true(self): + """Test tool that accepts username parameter.""" + tool_manager = Mock() + tool_manager.get_tools_schema.return_value = [ + { + "function": { + "name": "test_tool", + "parameters": { + "properties": { + "username": {"type": "string"}, + "other_param": {"type": "string"} + } + } + } + } + ] + + result = tool_accepts_username("test_tool", tool_manager) + assert result is True + + def test_tool_accepts_username_false(self): + """Test tool that does not accept username parameter.""" + tool_manager = Mock() + tool_manager.get_tools_schema.return_value = [ + { + "function": { + "name": "test_tool", + "parameters": { + "properties": { + "other_param": {"type": "string"} + } + } + } + } + ] + + result = tool_accepts_username("test_tool", tool_manager) + assert result is False + + def test_tool_accepts_username_no_tool_manager(self): + """Test with no tool manager.""" + result = tool_accepts_username("test_tool", None) + assert result is False + + def test_tool_accepts_username_no_schema(self): + """Test when tool schema is not found.""" + tool_manager = Mock() + tool_manager.get_tools_schema.return_value = [] + + result = tool_accepts_username("test_tool", tool_manager) + assert result is False + + def test_tool_accepts_username_exception(self): + """Test exception handling.""" + tool_manager = Mock() + tool_manager.get_tools_schema.side_effect = Exception("Schema error") + + result = tool_accepts_username("test_tool", tool_manager) + assert result is False + + +class TestSanitizeArgsForUI: + """Test the _sanitize_args_for_ui function.""" + + def test_sanitize_simple_args(self): + """Test sanitizing simple arguments.""" + args = {"param1": "value1", "param2": "value2"} + result = _sanitize_args_for_ui(args) + + assert result == args + + def test_sanitize_filename(self): + """Test sanitizing filename with URL.""" + args = {"filename": "http://example.com/path/file.txt?token=secret"} + result = _sanitize_args_for_ui(args) + + # Should extract just the filename + assert "token" not in result["filename"] + assert "file.txt" in result["filename"] + + def test_sanitize_file_names_list(self): + """Test sanitizing list of filenames.""" + args = { + "file_names": [ + "http://example.com/file1.txt?token=abc", + "http://example.com/file2.txt?token=def" + ] + } + result = _sanitize_args_for_ui(args) + + assert len(result["file_names"]) == 2 + for filename in result["file_names"]: + assert "token" not in filename + + def test_sanitize_file_url(self): + """Test sanitizing file_url field.""" + args = {"file_url": "http://example.com/path/file.txt?token=secret"} + result = _sanitize_args_for_ui(args) + + assert "token" not in result["file_url"] + + def test_sanitize_file_urls_list(self): + """Test sanitizing file_urls list.""" + args = { + "file_urls": [ + "http://example.com/file1.txt?token=abc", + "http://example.com/file2.txt?token=def" + ] + } + result = _sanitize_args_for_ui(args) + + assert len(result["file_urls"]) == 2 + for url in result["file_urls"]: + assert "token" not in url + + def test_sanitize_mixed_args(self): + """Test sanitizing mixed arguments.""" + args = { + "filename": "http://example.com/file.txt?token=secret", + "other_param": "normal_value", + "file_names": ["file1.txt", "file2.txt"] + } + result = _sanitize_args_for_ui(args) + + assert "token" not in result["filename"] + assert result["other_param"] == "normal_value" + assert len(result["file_names"]) == 2 + + +class TestFilterArgsToSchema: + """Test the _filter_args_to_schema function.""" + + def test_filter_with_schema(self): + """Test filtering arguments with available schema.""" + tool_manager = Mock() + tool_manager.get_tools_schema.return_value = [ + { + "function": { + "name": "test_tool", + "parameters": { + "properties": { + "allowed_param": {"type": "string"}, + "another_param": {"type": "number"} + } + } + } + } + ] + + args = { + "allowed_param": "value", + "another_param": 42, + "original_filename": "old.txt", + "file_url": "http://example.com/file.txt", + "extra_param": "should_be_removed" + } + + result = _filter_args_to_schema(args, "test_tool", tool_manager) + + assert "allowed_param" in result + assert "another_param" in result + assert "original_filename" not in result + assert "file_url" not in result + assert "extra_param" not in result + + def test_filter_without_schema(self): + """Test filtering when schema is unavailable.""" + tool_manager = Mock() + tool_manager.get_tools_schema.return_value = [] + + args = { + "param": "value", + "original_filename": "old.txt", + "file_url": "http://example.com/file.txt" + } + + result = _filter_args_to_schema(args, "test_tool", tool_manager) + + # Should keep param but drop original_* and file_url(s) + assert "param" in result + assert "original_filename" not in result + assert "file_url" not in result + + def test_filter_no_tool_manager(self): + """Test filtering with no tool manager.""" + args = { + "param": "value", + "original_something": "should_be_removed", + "file_urls": ["url1", "url2"] + } + + result = _filter_args_to_schema(args, "test_tool", None) + + assert "param" in result + assert "original_something" not in result + assert "file_urls" not in result + + def test_filter_exception_handling(self): + """Test filtering handles exceptions gracefully.""" + tool_manager = Mock() + tool_manager.get_tools_schema.side_effect = Exception("Schema error") + + args = { + "param": "value", + "original_param": "remove_me" + } + + result = _filter_args_to_schema(args, "test_tool", tool_manager) + + # Should fall back to conservative filtering + assert "param" in result + assert "original_param" not in result diff --git a/config/overrides/mcp.json b/config/overrides/mcp.json index 26a0c6b..da3f9b3 100644 --- a/config/overrides/mcp.json +++ b/config/overrides/mcp.json @@ -10,7 +10,9 @@ "author": "Chat UI Team", "short_description": "Mathematical calculator", "help_email": "support@chatui.example.com", - "compliance_level": "Public" + "compliance_level": "Public", + "require_approval": [], + "allow_edit": ["evaluate"] }, "pptx_generator": { @@ -33,7 +35,9 @@ "author": "Chat UI Team", "short_description": "PDF text extraction and analysis", "help_email": "support@chatui.example.com", - "compliance_level": "SOC2" + "compliance_level": "SOC2", + "require_approval": ["generate_report_about_pdf", "word_frequency_pdf"], + "allow_edit": ["generate_report_about_pdf", "word_frequency_pdf"] }, "file_size_test": { "command": ["python", "mcp/file_size_test/main.py"], diff --git a/docs/planning/fn-approvals.md b/docs/planning/fn-approvals.md new file mode 100644 index 0000000..4c1a98e --- /dev/null +++ b/docs/planning/fn-approvals.md @@ -0,0 +1,9 @@ +The expected behavior is +1. FORCE_TOOL_APPROVAL_GLOBALLY=false requires all tools to be approves. overrides anything else. +2. REQUIRE_TOOL_APPROVAL_BY_DEFAULT=true make it so if there is an option to auto approve, then approval is needed by default, but the user can turn it off. +All tools when seeking approval in the UI can have their args edited. no need for a special case of allowing edits for some and not others. +3. in the mcp.json default and override, a key:value for require_approval is the key. The admin can set somem functions to always requires approval (for example dangerious commands). The usr cannot turn this off. +4. The usre can easily toggle auto approval via the UI settings panel OR when a tool approval is requested an an inline ui element that is adjancent to the approval requests. the use can always enable tool approval for themselves so it is not auto approved by toggleing the global toggle. +5. the UI should not say "thinking" while the UI is waiting on an approval. + +can you check the code and verify these requirments are met \ No newline at end of file diff --git a/docs/tool-approval-e2e-test-scenarios-2025-11-05.md b/docs/tool-approval-e2e-test-scenarios-2025-11-05.md new file mode 100644 index 0000000..1ce62e1 --- /dev/null +++ b/docs/tool-approval-e2e-test-scenarios-2025-11-05.md @@ -0,0 +1,313 @@ +# Tool Approval E2E Test Scenarios + +**Last updated: 2025-11-05** + +This document provides manual E2E test scenarios for the tool approval and auto-approval features in Atlas UI 3. These tests should be performed in the UI to verify the complete approval workflow. + +## Test Environment Setup + +Before testing, ensure: +- Backend is running with the latest changes +- Frontend is built and loaded +- Configuration file `config/overrides/tool_approvals.yml` exists +- At least one MCP tool is configured (e.g., calculator) + +--- + +## Basic Approval Tests + +### Test 1: Approve a Tool Call +**Objective:** Verify basic approval flow works end-to-end + +**Steps:** +1. Send message: " +use hte tool, what is 9879*tan(0.12354) +2. Wait for approval request modal to appear +3. Review the tool name (calculator_evaluate) and arguments +4. Click "Approve" button + +**Expected Result:** +- Tool executes successfully +- Result is displayed in chat +- Approval modal disappears + +--- + +### Test 2: Reject a Tool Call +**Objective:** Verify rejection prevents tool execution + +**Steps:** +1. Send message: "calculate 500 * 250" +2. Wait for approval request +3. Enter rejection reason: "Not needed right now" +4. Click "Reject" button + +**Expected Result:** +- Tool does not execute +- Chat shows rejection message with reason +- User can continue chatting + +--- + +### Test 3: Edit Arguments Before Approval +**Objective:** Verify argument editing functionality + +**Steps:** +1. Send message: "calculate 100 + 50" +2. Wait for approval request +3. Click "Edit Arguments" button +4. Modify expression to "100 + 500" +5. Click "Approve" button + +**Expected Result:** +- Tool executes with edited arguments (100 + 500 = 600) +- Result reflects the edited calculation + +--- + +### Test 4: Multiple Sequential Tool Calls +**Objective:** Verify approval works for multiple tools in sequence + +**Steps:** +1. Send message: "calculate 10 * 20, then calculate 5 + 5" +2. Approve first tool call (10 * 20) +3. Wait for second approval request +4. Approve second tool call (5 + 5) + +**Expected Result:** +- Both tools execute in order +- Both results appear in chat +- Approval requests appear one at a time + +--- + +## Auto-Approval Tests + +### Test 5: Enable User-Level Auto-Approval +**Objective:** Verify user can enable auto-approval for all tools + +**Steps:** +1. Locate auto-approval toggle in UI (user settings or approval modal) +2. Enable "Auto-approve all tools" +3. Send message: "calculate 777 * 3" + +**Expected Result:** +- Tool executes immediately without approval prompt +- Result appears in chat automatically + +--- + +### Test 6: Disable User-Level Auto-Approval +**Objective:** Verify auto-approval can be turned off + +**Steps:** +1. With auto-approval enabled, disable the toggle +2. Send message: "calculate 123 + 456" + +**Expected Result:** +- Approval modal appears as normal +- User must manually approve + +--- + +### Test 7: Function-Specific Auto-Approval +**Objective:** Verify auto-approval for specific functions only + +**Steps:** +1. Edit `config/overrides/tool_approvals.yml`: +```yaml +tools: + calculator_evaluate: + require_approval: false + allow_edit: true +``` +2. Restart backend +3. Send message: "calculate 999 / 3" + +**Expected Result:** +- Calculator tool executes without approval +- Other tools (if available) still require approval + +--- + +### Test 8: Mixed Auto-Approval and Manual Approval +**Objective:** Verify some tools auto-approve while others require approval + +**Steps:** +1. Configure calculator to NOT require approval +2. Configure another tool (e.g., PDF tool) to require approval +3. Send message that uses both tools + +**Expected Result:** +- Calculator executes immediately +- PDF tool shows approval modal +- Workflow continues smoothly + +--- + +## Edge Cases and Error Handling + +### Test 9: Approval Timeout +**Objective:** Verify system handles no response gracefully + +**Steps:** +1. Send message: "calculate 50 * 50" +2. Wait for approval modal +3. Do not click approve or reject +4. Wait 5+ minutes (timeout period) + +**Expected Result:** +- System times out gracefully +- Error message appears: "Tool execution timed out waiting for user approval" +- Chat remains functional + +--- + +### Test 10: Cancel Chat During Approval Wait +**Objective:** Verify cancellation doesn't break system + +**Steps:** +1. Send message: "calculate 88 * 11" +2. Wait for approval modal +3. Refresh page or reset session +4. Send new message + +**Expected Result:** +- Previous approval request is cleared +- New chat works normally +- No hanging approvals + +--- + +### Test 11: Rapid Approve Button Clicks +**Objective:** Verify no duplicate executions from double-clicking + +**Steps:** +1. Send message: "calculate 10 + 10" +2. Wait for approval modal +3. Rapidly click "Approve" button 5 times + +**Expected Result:** +- Tool executes only once +- No duplicate results +- No error messages + +--- + +### Test 12: Invalid Edited Arguments +**Objective:** Verify validation of edited arguments + +**Steps:** +1. Send message: "calculate 5 * 5" +2. Wait for approval modal +3. Edit arguments to invalid JSON: `{expression: "broken}` +4. Click "Approve" + +**Expected Result:** +- Either: validation error before approval +- Or: tool execution fails gracefully with error message +- User can retry or cancel + +--- + +## Admin Configuration Tests + +### Test 13: Admin-Mandated Approval (Cannot Override) +**Objective:** Verify admin settings override user preferences + +**Steps:** +1. Edit `config/overrides/tool_approvals.yml`: +```yaml +require_approval_by_default: true +tools: + calculator_evaluate: + require_approval: true + allow_edit: false +``` +2. Restart backend +3. Enable user-level auto-approval toggle +4. Send message: "calculate 100 * 2" + +**Expected Result:** +- Approval modal appears despite user auto-approval setting +- Admin requirement overrides user preference +- "Edit Arguments" button is disabled (allow_edit: false) + +--- + +### Test 14: Disable Argument Editing +**Objective:** Verify admin can prevent argument editing + +**Steps:** +1. Configure tool with `allow_edit: false` +2. Send message: "calculate 7 * 7" +3. Approval modal appears + +**Expected Result:** +- "Edit Arguments" button is hidden or disabled +- User can only approve or reject with original arguments + +--- + +## Agent Mode Tests + +### Test 15: Approvals in Agent Mode +**Objective:** Verify approval works with multi-step agent loops + +**Steps:** +1. Enable agent mode in UI +2. Send message: "calculate 10 * 10, then use that result to calculate another operation" +3. Approve first tool call +4. Wait for agent to process +5. Approve any subsequent tool calls + +**Expected Result:** +- Agent pauses at each approval request +- Agent continues after each approval +- Multi-step reasoning completes successfully +- All intermediate steps and results are visible + +--- + +## Testing Checklist + +Use this checklist to track test completion: + +- [ ] Test 1: Basic approval +- [ ] Test 2: Basic rejection +- [ ] Test 3: Argument editing +- [ ] Test 4: Sequential tools +- [ ] Test 5: Enable auto-approval +- [ ] Test 6: Disable auto-approval +- [ ] Test 7: Function-specific auto-approval +- [ ] Test 8: Mixed approval modes +- [ ] Test 9: Timeout handling +- [ ] Test 10: Session cancellation +- [ ] Test 11: Duplicate click prevention +- [ ] Test 12: Invalid argument validation +- [ ] Test 13: Admin override +- [ ] Test 14: Disabled editing +- [ ] Test 15: Agent mode approvals + +--- + +## Common Issues and Debugging + +**Approval modal doesn't appear:** +- Check `config/overrides/tool_approvals.yml` exists +- Verify `require_approval: true` for the tool +- Check browser console for WebSocket errors + +**Approve button does nothing:** +- Check backend logs for `tool_approval_response` message +- Verify WebSocket connection is open +- Look for approval manager logs + +**Auto-approval not working:** +- Verify configuration is loaded (check backend startup logs) +- Ensure backend was restarted after config changes +- Check user-level setting is enabled in UI + +**Tool executes twice:** +- This is a bug - report immediately +- Check for duplicate WebSocket messages in logs diff --git a/docs/tool-approval-final-implementation.md b/docs/tool-approval-final-implementation.md new file mode 100644 index 0000000..05aaeb5 --- /dev/null +++ b/docs/tool-approval-final-implementation.md @@ -0,0 +1,252 @@ +# Tool Approval System - Final Implementation + +## Overview + +The tool approval system provides a two-tier approval mechanism: +1. **Admin-level**: Administrators can mandate approval for specific tools +2. **User-level**: Users control auto-approval for non-admin-required tools + +## User Experience + +### Approval in Chat Area + +Approval requests now appear as inline system messages in the chat, similar to tool calls and agent messages: + +``` +┌─────────────────────────────────────────────────┐ +│ [S] System │ +│ ┌───────────────────────────────────────────┐ │ +│ │ [APPROVAL REQUIRED] code-executor_run_py…│ │ +│ │ │ │ +│ │ ▶ Tool Arguments (1 params) [Edit Args] │ │ +│ │ { │ │ +│ │ "code": "print('Hello, world!')" │ │ +│ │ } │ │ +│ │ │ │ +│ │ Rejection Reason (optional): │ │ +│ │ [________________________] │ │ +│ │ │ │ +│ │ [Reject] [Approve] │ │ +│ └───────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────┘ +``` + +### Large Text Handling + +When editing large inputs (code, markdown, JSON): +- Edit mode shows each argument in a separate textarea +- Each textarea auto-sizes: min 3 rows, max 20 rows +- Container has max-height of 60vh (60% of viewport) +- Scrollable when content exceeds available space +- Full screen width for editing + +Example for large code input: +``` +┌─────────────────────────────────────────────────┐ +│ Edit Mode Active │ +│ ┌───────────────────────────────────────────┐ │ +│ │ code │ │ +│ │ ┌─────────────────────────────────────┐ │ │ +│ │ │ def analyze_data(df): │ │ ← Scrollable +│ │ │ # Process data │ │ area +│ │ │ results = [] │ │ (60vh) +│ │ │ for col in df.columns: │ │ +│ │ │ stats = calculate_stats(c…) │ │ +│ │ │ results.append(stats) │ │ +│ │ │ return results │ │ +│ │ └─────────────────────────────────────┘ │ │ +│ └───────────────────────────────────────────┘ │ +│ [Reject] [Approve (with edits)] │ +└─────────────────────────────────────────────────┘ +``` + +### User Settings + +New toggle in Settings panel: + +``` +┌─────────────────────────────────────────────────┐ +│ Auto-Approve Tool Calls [●────○] │ +│ │ +│ When enabled, tools that don't require admin │ +│ approval will execute automatically without │ +│ prompting. Tools that require admin approval │ +│ will still prompt for confirmation. │ +│ │ +│ ⚠ Currently: You will be prompted to approve │ +│ all tool calls unless admin has disabled │ +│ approval for specific tools. │ +└─────────────────────────────────────────────────┘ +``` + +When auto-approval is enabled: +``` +┌─────────────────────────────────────────────────┐ +│ [S] System │ +│ ┌───────────────────────────────────────────┐ │ +│ │ [APPROVAL REQUIRED] [AUTO-APPROVING...] │ │ +│ │ calculator_eval │ │ +│ │ ▶ Tool Arguments (1 params) │ │ +│ └───────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────┘ +``` +(Automatically approves after brief delay) + +## Technical Implementation + +### Backend Logic + +```python +def requires_approval(tool_name, config_manager): + """ + Returns: (needs_approval, allow_edit, admin_required) + + admin_required = True: Admin mandates approval (always enforced) + admin_required = False: User-level approval (can auto-approve) + """ + if tool in admin_config.tools: + if admin_config.tools[tool].require_approval: + return (True, allow_edit, True) # Admin-required + else: + return (False, True, False) # Admin disabled + + if admin_config.require_approval_by_default: + return (True, True, True) # Admin-required by default + + return (True, True, False) # User-level approval +``` + +### Frontend Auto-Approval Logic + +```javascript +// In ToolApprovalMessage component +useEffect(() => { + if (settings?.autoApproveTools && + !message.admin_required && + message.status === 'pending') { + // Auto-approve after brief delay to show message + setTimeout(() => { + sendApprovalResponse({ + tool_call_id: message.tool_call_id, + approved: true, + arguments: message.arguments + }) + }, 100) + } +}, [settings, message]) +``` + +## Configuration Examples + +### Example 1: Admin Requires Specific Tools + +**Backend config** (`config/overrides/tool-approvals.json`): +```json +{ + "require_approval_by_default": false, + "tools": { + "code-executor_run_python": { + "require_approval": true, + "allow_edit": true + }, + "code-executor_run_bash": { + "require_approval": true, + "allow_edit": true + } + } +} +``` + +**User experience:** +- Python/Bash execution: ALWAYS prompts (admin-required, no auto-approval) +- Other tools: Prompt by default, can enable auto-approval in settings +- User toggles auto-approval ON: Calculator/other tools auto-approve +- User toggles auto-approval OFF: All tools prompt + +### Example 2: Strict Admin Mode + +**Backend config:** +```json +{ + "require_approval_by_default": true, + "tools": {} +} +``` + +**User experience:** +- ALL tools: ALWAYS prompt (admin-required) +- Auto-approval toggle has no effect (admin overrides) +- Maximum security mode + +### Example 3: User-Controlled Mode + +**Backend config:** +```json +{ + "require_approval_by_default": false, + "tools": {} +} +``` + +**User experience:** +- All tools: Prompt by default +- User can enable auto-approval for all tools +- Maximum user flexibility + +## Workflow Examples + +### Scenario 1: Code Execution with Editing + +1. User: "Write and run a Python script to analyze this data" +2. LLM generates code +3. System shows approval message in chat: + - Yellow badge: "APPROVAL REQUIRED" + - Tool name: "code-executor_run_python" + - Arguments collapsed by default +4. User clicks expand arrow to view code +5. User clicks "Edit Arguments" +6. Large textarea appears (60vh height, scrollable) +7. User modifies code +8. User clicks "Approve (with edits)" +9. Tool executes with edited code +10. Result appears in chat + +### Scenario 2: Auto-Approval Enabled + +1. User enables "Auto-Approve Tool Calls" in Settings +2. User: "Calculate 15% of 250" +3. LLM calls calculator_eval +4. System shows approval message with "AUTO-APPROVING..." badge +5. After 100ms, automatically approves +6. Tool executes +7. Result appears in chat + +### Scenario 3: Admin-Required Tool + +1. User enables auto-approval in Settings +2. User: "Run this bash command" +3. LLM calls code-executor_run_bash +4. System shows approval message (admin-required flag set) +5. Auto-approval does NOT activate (admin override) +6. User must manually approve or reject +7. User reviews command and approves +8. Tool executes + +## Benefits + +1. **Inline Experience**: Approval fits naturally in chat flow +2. **Contextual**: See approval request in context of conversation +3. **Flexible**: Large text editing works well for code/markdown +4. **Two-Tier Security**: + - Admins enforce critical tool approval + - Users control convenience vs. security for other tools +5. **Clear Indicators**: Visual feedback for auto-approval status +6. **Backward Compatible**: Works with existing agent modes + +## Migration Notes + +- Modal dialog code removed but not deleted (ToolApprovalDialog.jsx still exists for reference) +- Settings persist in localStorage +- Default setting: auto-approval OFF (safe default) +- Existing admin configs work unchanged +- New `admin_required` flag is backward compatible (defaults to false if missing) diff --git a/docs/tool-approval-system.md b/docs/tool-approval-system.md new file mode 100644 index 0000000..57386af --- /dev/null +++ b/docs/tool-approval-system.md @@ -0,0 +1,308 @@ +# Tool Approval System + +## Overview + +The tool approval system allows administrators to configure which tools require user approval before execution. This provides an additional security layer for potentially sensitive or dangerous operations. + +## Features + +- **Configurable Approval Requirements**: Specify which tools require approval on a per-tool basis +- **Global Default Setting**: Set a default approval requirement for all tools +- **Argument Editing**: Allow users to edit tool arguments before approving execution +- **Timeout Handling**: Automatically reject tool calls that don't receive approval within a configurable timeout period +- **Real-time UI**: Modal dialog shows pending approval requests with full argument details + +## Configuration + +### Configuration Files + +Tool approval settings are managed in JSON configuration files: + +- **Defaults**: `config/defaults/tool-approvals.json` +- **Overrides**: `config/overrides/tool-approvals.json` + +### Configuration Format + +```json +{ + "require_approval_by_default": false, + "tools": { + "server_toolname": { + "require_approval": true, + "allow_edit": true + } + } +} +``` + +### Configuration Fields + +- `require_approval_by_default` (boolean): If true, all tools require approval unless explicitly configured otherwise +- `tools` (object): Tool-specific approval settings + - Key: Tool name in the format `server_toolname` (e.g., `code-executor_run_python`) + - Value: Tool approval configuration object: + - `require_approval` (boolean): Whether this tool requires approval + - `allow_edit` (boolean): Whether users can edit the tool arguments before approval + +### Example Configuration + +```json +{ + "require_approval_by_default": false, + "tools": { + "code-executor_run_python": { + "require_approval": true, + "allow_edit": true + }, + "code-executor_run_bash": { + "require_approval": true, + "allow_edit": true + }, + "filesystem_write_file": { + "require_approval": true, + "allow_edit": false + } + } +} +``` + +In this example: +- Most tools don't require approval (default is false) +- Python and Bash code execution require approval with argument editing allowed +- File write operations require approval but don't allow argument editing + +## User Experience + +### Approval Dialog + +When a tool requiring approval is called, the user sees a modal dialog with: + +1. **Tool Information**: Tool name and server +2. **Arguments Display**: Full JSON view of the tool arguments +3. **Edit Mode** (if allowed): Ability to modify arguments before approval +4. **Actions**: + - **Approve**: Execute the tool with current (or edited) arguments + - **Reject**: Cancel the tool execution with optional reason + +### Timeout Behavior + +- If the user doesn't respond within 5 minutes (300 seconds), the tool call is automatically rejected +- The timeout error is displayed in the chat interface + +## Backend Architecture + +### Components + +1. **Configuration Manager** (`backend/modules/config/config_manager.py`) + - Loads and validates tool approval configuration + - Provides `ToolApprovalsConfig` model + +2. **Approval Manager** (`backend/application/chat/approval_manager.py`) + - Manages pending approval requests + - Handles approval/rejection responses + - Implements timeout logic using asyncio futures + +3. **Tool Execution** (`backend/application/chat/utilities/tool_utils.py`) + - Checks if tool requires approval before execution + - Sends approval request to frontend + - Waits for user response + - Executes tool with approved (potentially edited) arguments + +4. **WebSocket Handler** (`backend/main.py`) + - Handles `tool_approval_response` messages from frontend + - Routes responses to approval manager + +### Workflow + +``` +1. LLM decides to call a tool +2. Tool execution checks if approval is required +3. If approval required: + a. Send tool_approval_request to frontend + b. Create approval request in approval manager + c. Wait for response (with timeout) + d. Handle approval/rejection +4. Execute tool (if approved) +5. Return result to LLM +``` + +## Frontend Architecture + +### Components + +1. **ToolApprovalDialog** (`frontend/src/components/ToolApprovalDialog.jsx`) + - React component that displays the approval dialog + - Handles argument editing (if allowed) + - Sends approval/rejection response + +2. **WebSocket Handler** (`frontend/src/handlers/chat/websocketHandlers.js`) + - Handles `tool_approval_request` messages from backend + - Updates approval request state + +3. **ChatContext** (`frontend/src/contexts/ChatContext.jsx`) + - Manages approval request state + - Provides methods to send responses and clear requests + +4. **App** (`frontend/src/App.jsx`) + - Renders the approval dialog when request is present + - Handles approval response submission + +### Message Protocol + +#### Backend → Frontend: `tool_approval_request` + +```json +{ + "type": "tool_approval_request", + "tool_call_id": "unique-call-id", + "tool_name": "server_toolname", + "arguments": { + "param1": "value1", + "param2": "value2" + }, + "allow_edit": true +} +``` + +#### Frontend → Backend: `tool_approval_response` + +```json +{ + "type": "tool_approval_response", + "tool_call_id": "unique-call-id", + "approved": true, + "arguments": { + "param1": "edited_value1", + "param2": "value2" + }, + "reason": "Optional rejection reason" +} +``` + +## Testing + +### Running Tests + +```bash +# Test approval manager +python -m pytest backend/tests/test_approval_manager.py -v + +# Test configuration loading +python -m pytest backend/tests/test_config_manager.py -v +``` + +### Test Coverage + +The test suite includes: +- Approval request creation and lifecycle +- Approval/rejection handling +- Timeout behavior +- Manager singleton pattern +- Full approval workflow simulation + +## Security Considerations + +1. **Default Deny**: When in doubt, configure tools to require approval +2. **Least Privilege**: Only enable argument editing when necessary +3. **Audit Trail**: All approval decisions are logged +4. **Timeout Protection**: Prevents indefinite hanging on approval requests +5. **User Authentication**: Approval responses are tied to authenticated sessions + +## Future Enhancements + +Potential improvements: +- Role-based approval requirements (different settings per user group) +- Approval history and audit log +- Bulk approval for multiple tool calls +- Approval delegation (assign to another user) +- Custom timeout per tool +- Pre-approved argument patterns (e.g., allow specific file paths) + +## Troubleshooting + +### Tools Not Requiring Approval + +Check: +1. Configuration file is loaded correctly +2. Tool name matches format `server_toolname` +3. `require_approval` is set to `true` in config +4. Configuration cache is cleared if recently changed + +### Approval Dialog Not Appearing + +Check: +1. WebSocket connection is active +2. Frontend has loaded the latest build +3. Browser console for JavaScript errors +4. Backend logs for approval request sending + +### Timeouts + +If approval requests timeout frequently: +1. Check network connectivity +2. Verify user is actively monitoring the chat +3. Consider increasing timeout in `execute_single_tool` (currently 300 seconds) + +## Example Usage Scenarios + +### Scenario 1: Code Execution Review + +**Configuration**: +```json +{ + "tools": { + "code-executor_run_python": { + "require_approval": true, + "allow_edit": true + } + } +} +``` + +**User Experience**: +1. User asks: "Create a Python script to analyze this data" +2. LLM generates code and wants to execute it +3. User sees approval dialog with the Python code +4. User reviews code, optionally edits it +5. User approves or rejects execution + +### Scenario 2: File System Protection + +**Configuration**: +```json +{ + "tools": { + "filesystem_delete_file": { + "require_approval": true, + "allow_edit": false + }, + "filesystem_write_file": { + "require_approval": true, + "allow_edit": true + } + } +} +``` + +**User Experience**: +- File deletions always require approval (no editing to prevent accidents) +- File writes require approval and allow editing the content/path + +### Scenario 3: Strict Mode + +**Configuration**: +```json +{ + "require_approval_by_default": true, + "tools": { + "calculator_eval": { + "require_approval": false + } + } +} +``` + +**User Experience**: +- All tools require approval by default +- Only safe tools like calculator are auto-approved +- Maximum control and security diff --git a/fn-calling-plan.md b/fn-calling-plan.md new file mode 100644 index 0000000..62ed116 --- /dev/null +++ b/fn-calling-plan.md @@ -0,0 +1,52 @@ +# Tool Approval: Option B Implementation Plan + +This plan implements Option B (cleaner) and addresses all failing requirements: + +- Only add ToolApprovalConfig entries when a tool is explicitly listed under `require_approval` in `mcp.json`. +- Treat `allow_edit` as moot for approval requirement. UI will always allow editing when approval is requested. +- Add FORCE flag with correct semantics: `FORCE_TOOL_APPROVAL_GLOBALLY=true` forces approval for all tools (admin-required) and overrides everything else. +- Ensure the UI does not display “Thinking…” while waiting on user approval. + +## Backend changes + +1) AppSettings: FORCE flag +- Add `force_tool_approval_globally: bool = Field(default=False, validation_alias="FORCE_TOOL_APPROVAL_GLOBALLY")` to `AppSettings`. + +2) Approval config build (Option B) +- In `ConfigManager.tool_approvals_config`: + - Build `tools` map ONLY for tools that are explicitly in a server’s `require_approval` list (fully-qualified `server_tool`). + - Do NOT add entries for tools that appear only in `allow_edit` lists; `allow_edit` does not affect approval requirement. + +3) requires_approval(tool_name, config_manager) +- Short-circuit: if `config_manager.app_settings.force_tool_approval_globally` is true, return `(True, True, True)` — approval required, admin-enforced, always editable. +- Per-tool: if tool present in `tool_approvals_config.tools` (i.e., explicitly require_approval), return `(True, True, True)`. +- Default: fall back to `tool_approvals_config.require_approval_by_default`: + - If true: `(True, True, True)` (admin-required by default). + - If false: `(True, True, False)` (user-level approval; UI can auto-approve if user enables it). + +Notes: +- “Always editable” is enforced at the UI level; backend can keep sending `allow_edit` for compatibility but it no longer gates editing. + +## Frontend changes + +4) Always-allow editing in approval UI +- In `Message.jsx` ToolApprovalMessage, remove the `message.allow_edit` condition and always render the Edit controls. + +5) Hide Thinking during approval wait +- In `websocketHandlers.js`, when receiving `tool_approval_request`, call `setIsThinking(false)` before adding the approval message. + +## Docs and env + +6) .env example +- Add `FORCE_TOOL_APPROVAL_GLOBALLY=false` with comment: “true = force approval for all tools (admin-required).” + +## Tests (follow-up) + +- Backend unit tests: FORCE short-circuit, per-tool precedence, allow_edit-only doesn’t disable defaults, default true vs false behavior. +- Frontend tests: thinking hidden on approval request; edit controls visible regardless of `allow_edit`. + +## Acceptance criteria + +- With `REQUIRE_TOOL_APPROVAL_BY_DEFAULT=true` (and FORCE=false), any tool call triggers an approval request (admin-required). UI shows approval with editable args; no “Thinking…” while waiting. +- With `FORCE_TOOL_APPROVAL_GLOBALLY=true`, same approval behavior regardless of per-tool/default settings. +- With `REQUIRE_TOOL_APPROVAL_BY_DEFAULT=false` and user auto-approve ON, non-admin-required approvals auto-approve; admin-required approvals still prompt; inline toggle visible. diff --git a/frontend/e2e/tool-approval.spec.js b/frontend/e2e/tool-approval.spec.js new file mode 100644 index 0000000..23d51d5 --- /dev/null +++ b/frontend/e2e/tool-approval.spec.js @@ -0,0 +1,299 @@ +import { test, expect } from '@playwright/test' + +test.describe('Tool Approval E2E Tests', () => { + test.beforeEach(async ({ page }) => { + await page.goto('/') + await page.waitForLoadState('networkidle') + }) + + test('should display tool approval dialog when tool requires approval', async ({ page }) => { + // Send a message that triggers a tool requiring approval + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Execute a dangerous operation') + await messageInput.press('Enter') + + // Wait for approval dialog to appear + await page.waitForSelector('text=Tool Approval Required', { timeout: 10000 }) + + // Verify dialog elements + await expect(page.locator('text=Tool Approval Required')).toBeVisible() + await expect(page.locator('button:has-text("Approve")')).toBeVisible() + await expect(page.locator('button:has-text("Reject")')).toBeVisible() + } + }) + + test('should allow approving a tool call', async ({ page }) => { + // Trigger tool approval + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Run approved tool') + await messageInput.press('Enter') + + // Wait for approval dialog + await page.waitForSelector('text=Tool Approval Required', { timeout: 10000 }) + + // Click approve button + const approveButton = page.locator('button:has-text("Approve")').first() + await approveButton.click() + + // Dialog should close + await expect(page.locator('text=Tool Approval Required')).not.toBeVisible({ timeout: 5000 }) + + // Tool should execute (look for success indicators) + await page.waitForTimeout(2000) + } + }) + + test('should allow rejecting a tool call', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Run rejected tool') + await messageInput.press('Enter') + + await page.waitForSelector('text=Tool Approval Required', { timeout: 10000 }) + + // Click reject button + const rejectButton = page.locator('button:has-text("Reject")').first() + await rejectButton.click() + + // Dialog should close + await expect(page.locator('text=Tool Approval Required')).not.toBeVisible({ timeout: 5000 }) + + // Should show rejection message + await page.waitForTimeout(2000) + } + }) + + test('should allow editing tool arguments before approval', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Execute editable tool') + await messageInput.press('Enter') + + await page.waitForSelector('text=Tool Approval Required', { timeout: 10000 }) + + // Check if edit button is present + const editButton = page.locator('button:has-text("Edit Mode")').first() + if (await editButton.isVisible()) { + await editButton.click() + + // Switch to edit mode + await expect(page.locator('button:has-text("View Mode")')).toBeVisible() + + // Find and edit an argument (this is generic - actual implementation may vary) + const textarea = page.locator('textarea').first() + if (await textarea.isVisible()) { + await textarea.fill('edited_value') + } + + // Approve with edits + const approveButton = page.locator('button:has-text("Approve")').first() + await approveButton.click() + + await expect(page.locator('text=Tool Approval Required')).not.toBeVisible({ timeout: 5000 }) + } + } + }) + + test('should show rejection reason input', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Tool to reject with reason') + await messageInput.press('Enter') + + await page.waitForSelector('text=Tool Approval Required', { timeout: 10000 }) + + // Find rejection reason input + const reasonInput = page.locator('input[placeholder*="reason"], input[placeholder*="Rejection"]').first() + if (await reasonInput.isVisible()) { + await reasonInput.fill('This is not safe') + + // Reject with reason + const rejectButton = page.locator('button:has-text("Reject")').first() + await rejectButton.click() + + await expect(page.locator('text=Tool Approval Required')).not.toBeVisible({ timeout: 5000 }) + } + } + }) + + test('should handle multiple pending approvals', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + // Trigger multiple tools + await messageInput.fill('Execute multiple tools requiring approval') + await messageInput.press('Enter') + + // Wait for first approval dialog + await page.waitForSelector('text=Tool Approval Required', { timeout: 10000 }) + + // Approve first one + const approveButton = page.locator('button:has-text("Approve")').first() + await approveButton.click() + + // Wait a bit + await page.waitForTimeout(1000) + + // Second approval might appear + const secondApproval = page.locator('text=Tool Approval Required') + if (await secondApproval.isVisible({ timeout: 3000 }).catch(() => false)) { + const approveButton2 = page.locator('button:has-text("Approve")').first() + await approveButton2.click() + } + } + }) + + test('should display tool name and arguments in approval dialog', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Show tool details') + await messageInput.press('Enter') + + await page.waitForSelector('text=Tool Approval Required', { timeout: 10000 }) + + // Check for "Arguments" section + await expect(page.locator('text=Arguments')).toBeVisible() + + // Dialog should show some content (tool name or arguments) + const dialogContent = page.locator('[class*="dialog"], [class*="modal"]').first() + if (await dialogContent.isVisible()) { + const content = await dialogContent.textContent() + expect(content.length).toBeGreaterThan(20) + } + } + }) + + test('should close approval dialog after approval', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Quick approval test') + await messageInput.press('Enter') + + await page.waitForSelector('text=Tool Approval Required', { timeout: 10000 }) + + // Verify dialog is visible + const dialog = page.locator('text=Tool Approval Required') + await expect(dialog).toBeVisible() + + // Approve + const approveButton = page.locator('button:has-text("Approve")').first() + await approveButton.click() + + // Dialog should disappear + await expect(dialog).not.toBeVisible({ timeout: 5000 }) + } + }) + + test('should handle approval timeout gracefully', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Test timeout scenario') + await messageInput.press('Enter') + + // Wait for approval dialog + const hasDialog = await page.waitForSelector('text=Tool Approval Required', { + timeout: 10000 + }).catch(() => null) + + if (hasDialog) { + // Wait for a long time without approving (simulating timeout) + // In production, this would trigger timeout - here we just verify the UI stays responsive + await page.waitForTimeout(2000) + + // Dialog should still be visible and functional + await expect(page.locator('button:has-text("Approve")')).toBeVisible() + await expect(page.locator('button:has-text("Reject")')).toBeVisible() + } + } + }) + + test('should preserve message history after approval workflow', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + // Send initial message + await messageInput.fill('First message') + await messageInput.press('Enter') + await page.waitForTimeout(1000) + + // Send message that triggers approval + await messageInput.fill('Tool requiring approval') + await messageInput.press('Enter') + + const hasDialog = await page.waitForSelector('text=Tool Approval Required', { + timeout: 10000 + }).catch(() => null) + + if (hasDialog) { + // Approve + const approveButton = page.locator('button:has-text("Approve")').first() + await approveButton.click() + + await page.waitForTimeout(1000) + + // Check that previous messages are still visible + const chatArea = page.locator('[class*="chat"], [class*="message"]').first() + if (await chatArea.isVisible()) { + const content = await chatArea.textContent() + // Should contain indication of message history + expect(content.length).toBeGreaterThan(0) + } + } + } + }) +}) + +test.describe('Tool Approval Configuration E2E Tests', () => { + test('should respect tool approval settings', async ({ page }) => { + await page.goto('/') + await page.waitForLoadState('networkidle') + + // Look for settings panel or configuration + const settingsButton = page.locator('[aria-label*="settings"], button:has-text("Settings")').first() + + if (await settingsButton.isVisible({ timeout: 3000 }).catch(() => false)) { + await settingsButton.click() + await page.waitForTimeout(500) + + // Look for approval-related settings + const approvalSettings = page.locator('text=/approval/i, text=/require.*approval/i') + if (await approvalSettings.isVisible({ timeout: 3000 }).catch(() => false)) { + // Settings panel exists - verify it's accessible + expect(await approvalSettings.isVisible()).toBeTruthy() + } + } + }) + + test('should display appropriate UI for admin-required approvals', async ({ page }) => { + const messageInput = page.locator('textarea[placeholder*="message"], input[type="text"]').first() + + if (await messageInput.isVisible()) { + await messageInput.fill('Admin-restricted tool') + await messageInput.press('Enter') + + const hasDialog = await page.waitForSelector('text=Tool Approval Required', { + timeout: 10000 + }).catch(() => null) + + if (hasDialog) { + // Check for admin-required indicator (this is implementation-specific) + const dialogText = await page.locator('text=Tool Approval Required').textContent() + expect(dialogText).toBeTruthy() + + // Close the dialog + const rejectButton = page.locator('button:has-text("Reject")').first() + await rejectButton.click() + } + } + }) +}) diff --git a/frontend/src/components/Message.jsx b/frontend/src/components/Message.jsx index 23044a2..f315419 100644 --- a/frontend/src/components/Message.jsx +++ b/frontend/src/components/Message.jsx @@ -496,6 +496,221 @@ const downloadReturnedFile = (filename, base64Data) => { } } +// Tool Approval Message Component +const ToolApprovalMessage = ({ message }) => { + const { sendApprovalResponse, settings, updateSettings } = useChat() + const [isEditing, setIsEditing] = useState(false) + const [editedArgs, setEditedArgs] = useState(message.arguments) + const [reason, setReason] = useState('') + const [isExpanded, setIsExpanded] = useState(true) + + // Auto-approve if user has the setting enabled and this is not an admin-required approval + useEffect(() => { + if (settings?.autoApproveTools && !message.admin_required && message.status === 'pending') { + // Auto-approve after a brief delay to show the message + const timer = setTimeout(() => { + sendApprovalResponse({ + type: 'tool_approval_response', + tool_call_id: message.tool_call_id, + approved: true, + arguments: message.arguments, + }) + }, 100) + return () => clearTimeout(timer) + } + }, [settings?.autoApproveTools, message.admin_required, message.status, message.tool_call_id, message.arguments, sendApprovalResponse]) + + const handleApprove = () => { + console.log('[ToolApproval] Approve button clicked', { + tool_call_id: message.tool_call_id, + tool_name: message.tool_name, + isEditing, + arguments: isEditing ? editedArgs : message.arguments + }) + + const response = { + type: 'tool_approval_response', + tool_call_id: message.tool_call_id, + approved: true, + arguments: isEditing ? editedArgs : message.arguments, + } + + console.log('[ToolApproval] Sending approval response:', JSON.stringify(response, null, 2)) + sendApprovalResponse(response) + console.log('[ToolApproval] Approval response sent') + } + + const handleReject = () => { + console.log('[ToolApproval] Reject button clicked', { + tool_call_id: message.tool_call_id, + tool_name: message.tool_name, + reason + }) + + const response = { + type: 'tool_approval_response', + tool_call_id: message.tool_call_id, + approved: false, + reason: reason || 'User rejected the tool call', + } + + console.log('[ToolApproval] Sending rejection response:', JSON.stringify(response, null, 2)) + sendApprovalResponse(response) + console.log('[ToolApproval] Rejection response sent') + } + + const handleArgumentChange = (key, value) => { + setEditedArgs(prev => ({ + ...prev, + [key]: value + })) + } + + // Don't show if already responded + if (message.status === 'approved' || message.status === 'rejected') { + return ( +
+ {JSON.stringify(message.arguments, null, 2)}
+
+ + When enabled, tools that don't require admin approval will execute automatically without prompting. + Tools that require admin approval will still prompt for confirmation. +
+ {!ctxSettings?.autoApproveTools && ( ++ ⚠ Currently: You will be prompted to approve all tool calls unless admin has disabled approval for specific tools. +
+ )} ++ This tool requires your approval before execution. Please review the arguments and choose to approve or reject. +
+
+ {JSON.stringify(request.arguments, null, 2)}
+
+ No arguments provided
+