diff --git a/README.md b/README.md index 9b1b9ad..4337444 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,44 @@ # ➡️ browser-use mcp server -[browser-use](https://github.com/browser-use/browser-use) MCP Server with SSE -transport +[![Twitter URL](https://img.shields.io/twitter/url/https/twitter.com/cobrowser.svg?style=social&label=Follow%20%40cobrowser)](https://x.com/cobrowser) +[![PyPI version](https://badge.fury.io/py/browser-use-mcp-server.svg)](https://pypi.org/project/browser-use-mcp-server/) -### requirements +[browser-use](https://github.com/browser-use/browser-use) MCP Server with SSE + +stdio transport -- uv +### Requirements + +- [uv](https://github.com/astral-sh/uv) +- [mcp-proxy](https://github.com/sparfenyuk/mcp-proxy) (for stdio) ``` +# 1. Install uv curl -LsSf https://astral.sh/uv/install.sh | sh +# 2. Install mcp-proxy pypi package via uv +uv tool install mcp-proxy ``` -### quickstart +### Quickstart -``` +Starting in SSE mode: + +```bash uv sync uv pip install playwright uv run playwright install --with-deps --no-shell chromium uv run server --port 8000 ``` +With stdio mode: + +```bash +# Run with stdio mode and specify a proxy port +uv run server --stdio --proxy-port 8001 + +# Or just stdio mode (random proxy port) +uv run server --stdio +``` + - the .env requires the following: ``` @@ -27,12 +46,9 @@ OPENAI_API_KEY=[your api key] CHROME_PATH=[only change this if you have a custom chrome build] ``` -- we will be adding support for other LLM providers to power browser-use - (claude, grok, bedrock, etc) +When building the docker image, you can use Docker secrets for VNC password: -when building the docker image, you can use Docker secrets for VNC password: - -``` +```bash # With Docker secrets (recommended for production) echo "your-secure-password" > vnc_password.txt docker run -v $(pwd)/vnc_password.txt:/run/secrets/vnc_password your-image-name @@ -41,24 +57,25 @@ docker run -v $(pwd)/vnc_password.txt:/run/secrets/vnc_password your-image-name docker build . ``` -### tools +### Tools - [x] SSE transport +- [x] stdio transport (via mcp-proxy) - [x] browser_use - Initiates browser tasks with URL and action - [x] browser_get_result - Retrieves results of async browser tasks -### supported clients +### Supported Clients - cursor.ai - claude desktop - claude code -- windsurf ([windsurf](https://codeium.com/windsurf) doesn't support SSE - yet) +- windsurf ([windsurf](https://codeium.com/windsurf) doesn't support SSE, only + stdio) -### usage +#### SSE Mode -after running the server, add http://localhost:8000/sse to your client UI, or in -a mcp.json file: +After running the server in SSE mode, add http://localhost:8000/sse to your +client UI, or in a mcp.json file: ```json { @@ -70,28 +87,66 @@ a mcp.json file: } ``` -#### cursor +#### stdio Mode + +When running in stdio mode, the server will automatically start both the SSE +server and mcp-proxy. The proxy handles the conversion between stdio and SSE +protocols. No additional configuration is needed - just start your client and it +will communicate with the server through stdin/stdout. + +Install the cli + +```bash +uv pip install -e . +``` + +And then e.g., in Windsurf, paste: + +```json +{ + "mcpServers": { + "browser-server": { + "command": "browser-use-mcp-server", + "args": [ + "run", + "server", + "--port", + "8000", + "--stdio", + "--proxy-port", + "9000" + ] + } + } +} +``` + +### Client Configuration Paths + +#### Cursor - `./.cursor/mcp.json` -#### windsurf +#### Windsurf - `~/.codeium/windsurf/mcp_config.json` -#### claude +#### Claude - `~/Library/Application Support/Claude/claude_desktop_config.json` - `%APPDATA%\Claude\claude_desktop_config.json` -then try asking your LLM the following: +### Example Usage + +Try asking your LLM the following: `open https://news.ycombinator.com and return the top ranked article` -### help +### Help for issues or interest reach out @ https://cobrowser.xyz -# stars +# Stars diff --git a/pyproject.toml b/pyproject.toml index 9679c70..7abfc66 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,3 +73,6 @@ browser-use-mcp-server = "browser_use_mcp_server.cli:cli" [tool.hatch.build] packages = ["src/browser_use_mcp_server"] + +[tool.hatch.build.targets.wheel] +packages = ["src/browser_use_mcp_server"] diff --git a/server/server.py b/server/server.py index 301e585..8bc5541 100644 --- a/server/server.py +++ b/server/server.py @@ -18,6 +18,7 @@ import uuid from datetime import datetime from typing import Any, Dict, Optional, Tuple, Union +import time # Third-party imports import click @@ -602,6 +603,12 @@ async def read_resource(uri: str) -> list[types.ResourceContents]: @click.command() @click.option("--port", default=8000, help="Port to listen on for SSE") +@click.option( + "--proxy-port", + default=None, + type=int, + help="Port for the proxy to listen on. If specified, enables proxy mode.", +) @click.option("--chrome-path", default=None, help="Path to Chrome executable") @click.option( "--window-width", @@ -619,13 +626,21 @@ async def read_resource(uri: str) -> list[types.ResourceContents]: default=CONFIG["DEFAULT_TASK_EXPIRY_MINUTES"], help="Minutes after which tasks are considered expired", ) +@click.option( + "--stdio", + is_flag=True, + default=False, + help="Enable stdio mode. If specified, enables proxy mode.", +) def main( port: int, + proxy_port: Optional[int], chrome_path: str, window_width: int, window_height: int, locale: str, task_expiry_minutes: int, + stdio: bool, ) -> int: """ Run the browser-use MCP server. @@ -633,13 +648,19 @@ def main( This function initializes the MCP server and runs it with the SSE transport. Each browser task will create its own isolated browser context. + The server can run in two modes: + 1. Direct SSE mode (default): Just runs the SSE server + 2. Proxy mode (enabled by --stdio or --proxy-port): Runs both SSE server and mcp-proxy + Args: port: Port to listen on for SSE + proxy_port: Port for the proxy to listen on. If specified, enables proxy mode. chrome_path: Path to Chrome executable window_width: Browser window width window_height: Browser window height locale: Browser locale task_expiry_minutes: Minutes after which tasks are considered expired + stdio: Enable stdio mode. If specified, enables proxy mode. Returns: Exit code (0 for success) @@ -670,9 +691,12 @@ def main( from starlette.applications import Starlette from starlette.routing import Mount, Route import uvicorn + import asyncio + import threading sse = SseServerTransport("/messages/") + # Create the Starlette app for SSE async def handle_sse(request): """Handle SSE connections from clients.""" try: @@ -694,7 +718,7 @@ async def handle_sse(request): ], ) - # Add a startup event + # Add startup event @starlette_app.on_event("startup") async def startup_event(): """Initialize the server on startup.""" @@ -719,8 +743,46 @@ async def startup_event(): asyncio.create_task(app.cleanup_old_tasks()) logger.info("Task cleanup process scheduled") - # Run uvicorn server - uvicorn.run(starlette_app, host="0.0.0.0", port=port) + # Function to run uvicorn in a separate thread + def run_uvicorn(): + uvicorn.run(starlette_app, host="0.0.0.0", port=port) + + # If proxy mode is enabled, run both the SSE server and mcp-proxy + if stdio: + import subprocess + + # Start the SSE server in a separate thread + sse_thread = threading.Thread(target=run_uvicorn) + sse_thread.daemon = True + sse_thread.start() + + # Give the SSE server a moment to start + time.sleep(1) + + proxy_cmd = [ + "mcp-proxy", + f"http://localhost:{port}/sse", + "--sse-port", + str(proxy_port), + "--allow-origin", + "*", + ] + + logger.info(f"Running proxy command: {' '.join(proxy_cmd)}") + logger.info( + f"SSE server running on port {port}, proxy running on port {proxy_port}" + ) + + try: + with subprocess.Popen(proxy_cmd) as proxy_process: + proxy_process.wait() + except Exception as e: + logger.error(f"Error starting mcp-proxy: {str(e)}") + logger.error(f"Command was: {' '.join(proxy_cmd)}") + return 1 + else: + logger.info(f"Running in direct SSE mode on port {port}") + run_uvicorn() return 0 diff --git a/src/browser_use_mcp_server/__init__.py b/src/browser_use_mcp_server/__init__.py new file mode 100644 index 0000000..853878c --- /dev/null +++ b/src/browser_use_mcp_server/__init__.py @@ -0,0 +1,8 @@ +""" +Browser-Use MCP Server Package + +This package provides a Model-Control-Protocol (MCP) server for browser automation +using the browser_use library. +""" + +__version__ = "0.1.3" diff --git a/src/browser_use_mcp_server/cli.py b/src/browser_use_mcp_server/cli.py new file mode 100644 index 0000000..5642b2b --- /dev/null +++ b/src/browser_use_mcp_server/cli.py @@ -0,0 +1,143 @@ +""" +Command line interface for browser-use-mcp-server. + +This module provides a command-line interface for starting the browser-use MCP server. +It wraps the existing server functionality with a CLI. +""" + +import os +import sys +import click +import importlib.util + + +def import_server_module(): + """ + Import the server module from the server directory. + This allows us to reuse the existing server code. + """ + # Add the root directory to the Python path to find server module + root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) + sys.path.insert(0, root_dir) + + try: + # Try to import the server module + import server.server + + return server.server + except ImportError: + # If running as an installed package, the server module might be elsewhere + try: + # Look in common locations + if os.path.exists(os.path.join(root_dir, "server", "server.py")): + spec = importlib.util.spec_from_file_location( + "server.server", os.path.join(root_dir, "server", "server.py") + ) + server_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(server_module) + return server_module + except Exception as e: + raise ImportError(f"Could not import server module: {e}") + + raise ImportError( + "Could not find server module. Make sure it's installed correctly." + ) + + +@click.group() +def cli(): + """Browser-use MCP server command line interface.""" + pass + + +@cli.command() +@click.argument("subcommand") +@click.option("--port", default=8000, help="Port to listen on for SSE") +@click.option( + "--proxy-port", + default=None, + type=int, + help="Port for the proxy to listen on (when using stdio mode)", +) +@click.option("--chrome-path", default=None, help="Path to Chrome executable") +@click.option("--window-width", default=1280, help="Browser window width") +@click.option("--window-height", default=1100, help="Browser window height") +@click.option("--locale", default="en-US", help="Browser locale") +@click.option( + "--task-expiry-minutes", + default=60, + help="Minutes after which tasks are considered expired", +) +@click.option( + "--stdio", is_flag=True, default=False, help="Enable stdio mode with mcp-proxy" +) +def run( + subcommand, + port, + proxy_port, + chrome_path, + window_width, + window_height, + locale, + task_expiry_minutes, + stdio, +): + """Run the browser-use MCP server. + + SUBCOMMAND: should be 'server' + """ + if subcommand != "server": + click.echo( + f"Unknown subcommand: {subcommand}. Only 'server' is supported.", err=True + ) + sys.exit(1) + + try: + # Import the server module + server_module = import_server_module() + + # We need to construct the command line arguments to pass to the server's Click command + old_argv = sys.argv.copy() + + # Build a new argument list for the server command + new_argv = [ + "server", # Program name + "--port", + str(port), + ] + + if chrome_path: + new_argv.extend(["--chrome-path", chrome_path]) + + if proxy_port is not None: + new_argv.extend(["--proxy-port", str(proxy_port)]) + + new_argv.extend(["--window-width", str(window_width)]) + new_argv.extend(["--window-height", str(window_height)]) + new_argv.extend(["--locale", locale]) + new_argv.extend(["--task-expiry-minutes", str(task_expiry_minutes)]) + + if stdio: + new_argv.append("--stdio") + + # Replace sys.argv temporarily + sys.argv = new_argv + + # Run the server's command directly + try: + return server_module.main() + finally: + # Restore original sys.argv + sys.argv = old_argv + + except Exception as e: + import traceback + + click.echo(f"Error starting server: {e}", err=True) + click.echo("Detailed error:", err=True) + click.echo(traceback.format_exc(), err=True) + sys.exit(1) + + +if __name__ == "__main__": + cli() diff --git a/src/browser_use_mcp_server/server.py b/src/browser_use_mcp_server/server.py new file mode 100644 index 0000000..a101b80 --- /dev/null +++ b/src/browser_use_mcp_server/server.py @@ -0,0 +1,36 @@ +""" +Server module that re-exports the main server module. + +This provides a clean import path for the CLI and other code. +""" + +import os +import sys +from server.server import ( + Server, + main, + create_browser_context_for_task, + run_browser_task_async, + cleanup_old_tasks, + create_mcp_server, + init_configuration, + CONFIG, + task_store, +) + +# Add the root directory to the Python path to find server module +root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +sys.path.insert(0, root_dir) + +# Re-export everything we imported +__all__ = [ + "Server", + "main", + "create_browser_context_for_task", + "run_browser_task_async", + "cleanup_old_tasks", + "create_mcp_server", + "init_configuration", + "CONFIG", + "task_store", +]