diff --git a/apps/docs/content/docs/en/tools/browser_use.mdx b/apps/docs/content/docs/en/tools/browser_use.mdx index c8e5df7ec5e..26c1bc1e503 100644 --- a/apps/docs/content/docs/en/tools/browser_use.mdx +++ b/apps/docs/content/docs/en/tools/browser_use.mdx @@ -42,9 +42,18 @@ Runs a browser automation task using BrowserUse | Parameter | Type | Required | Description | | --------- | ---- | -------- | ----------- | | `task` | string | Yes | What should the browser agent do | -| `variables` | json | No | Optional variables to use as secrets \(format: \{key: value\}\) | -| `save_browser_data` | boolean | No | Whether to save browser data | -| `model` | string | No | LLM model to use \(default: gpt-4o\) | +| `startUrl` | string | No | Initial page URL to start the agent on \(reduces navigation steps\) | +| `variables` | json | No | Optional secrets injected into the task \(format: \{key: value\}\) | +| `allowedDomains` | string | No | Comma-separated list of domains the agent is allowed to visit | +| `maxSteps` | number | No | Maximum number of steps the agent may take \(default 100, max 10000\) | +| `flashMode` | boolean | No | Enable flash mode \(faster, less careful navigation\) | +| `thinking` | boolean | No | Enable extended reasoning mode | +| `vision` | string | No | Vision capability: "true", "false", or "auto" | +| `systemPromptExtension` | string | No | Optional text appended to the agent system prompt \(max 2000 chars\) | +| `structuredOutput` | string | No | Stringified JSON schema for the structured output | +| `highlightElements` | boolean | No | Highlight interactive elements on the page \(default true\) | +| `metadata` | json | No | Custom key-value metadata \(up to 10 pairs\) for tracking | +| `model` | string | No | LLM model identifier \(e.g. browser-use-2.0\) | | `apiKey` | string | Yes | API key for BrowserUse API | | `profile_id` | string | No | Browser profile ID for persistent sessions \(cookies, login state\) | @@ -54,7 +63,18 @@ Runs a browser automation task using BrowserUse | --------- | ---- | ----------- | | `id` | string | Task execution identifier | | `success` | boolean | Task completion status | -| `output` | json | Task output data | -| `steps` | json | Execution steps taken | +| `output` | json | Final task output \(string or structured\) | +| `steps` | array | Steps the agent executed \(number, memory, nextGoal, url, actions, duration\) | +| ↳ `number` | number | Sequential step number | +| ↳ `memory` | string | Agent memory at this step | +| ↳ `evaluationPreviousGoal` | string | Evaluation of previous goal completion | +| ↳ `nextGoal` | string | Goal for the next step | +| ↳ `url` | string | Current URL of the browser | +| ↳ `screenshotUrl` | string | Optional screenshot URL | +| ↳ `actions` | array | Stringified JSON actions performed | +| ↳ `duration` | number | Step duration in seconds | +| `liveUrl` | string | Embeddable live browser session URL \(active during execution\) | +| `shareUrl` | string | Public shareable URL for the recorded session \(post-run\) | +| `sessionId` | string | Browser Use session identifier | diff --git a/apps/docs/content/docs/en/tools/stagehand.mdx b/apps/docs/content/docs/en/tools/stagehand.mdx index d03ba626a77..c83d0cf5431 100644 --- a/apps/docs/content/docs/en/tools/stagehand.mdx +++ b/apps/docs/content/docs/en/tools/stagehand.mdx @@ -72,6 +72,8 @@ Run an autonomous web agent to complete tasks and extract structured data | `provider` | string | No | AI provider to use: openai or anthropic | | `apiKey` | string | Yes | API key for the selected provider | | `outputSchema` | json | No | Optional JSON schema defining the structure of data the agent should return | +| `mode` | string | No | Agent tool mode: dom \(default\), hybrid, or cua | +| `maxSteps` | number | No | Maximum agent steps \(default 20, max 200\) | #### Output @@ -92,5 +94,7 @@ Run an autonomous web agent to complete tasks and extract structured data | ↳ `timestamp` | number | Unix timestamp when the action was performed | | ↳ `timeMs` | number | Time in milliseconds \(for wait actions\) | | `structuredOutput` | object | Extracted data matching the provided output schema | +| `liveViewUrl` | string | Embeddable Browserbase live view URL \(active only while the session is running\) | +| `sessionId` | string | Browserbase session identifier | diff --git a/apps/sim/app/api/tools/stagehand/agent/route.ts b/apps/sim/app/api/tools/stagehand/agent/route.ts index afc32d5bc6a..3c17d60eeb4 100644 --- a/apps/sim/app/api/tools/stagehand/agent/route.ts +++ b/apps/sim/app/api/tools/stagehand/agent/route.ts @@ -22,6 +22,8 @@ const requestSchema = z.object({ variables: z.any(), provider: z.enum(['openai', 'anthropic']).optional().default('openai'), apiKey: z.string(), + mode: z.enum(['dom', 'hybrid', 'cua']).optional().default('dom'), + maxSteps: z.number().int().min(1).max(200).optional().default(20), }) /** @@ -121,7 +123,7 @@ export const POST = withRouteHandler(async (request: NextRequest) => { } const params = validationResult.data - const { task, startUrl: rawStartUrl, outputSchema, provider, apiKey } = params + const { task, startUrl: rawStartUrl, outputSchema, provider, apiKey, mode, maxSteps } = params const variablesObject = processVariables(params.variables) const startUrl = normalizeUrl(rawStartUrl) @@ -165,8 +167,10 @@ export const POST = withRouteHandler(async (request: NextRequest) => { return NextResponse.json({ error: 'Invalid Anthropic API key format' }, { status: 400 }) } - const modelName = - provider === 'anthropic' ? 'anthropic/claude-sonnet-4-5-20250929' : 'openai/gpt-5' + const modelName = provider === 'anthropic' ? 'anthropic/claude-sonnet-4-6' : 'openai/gpt-5' + + let sessionId: string | null = null + let liveViewUrl: string | null = null try { logger.info('Initializing Stagehand with Browserbase (v3)', { provider, modelName }) @@ -190,6 +194,35 @@ export const POST = withRouteHandler(async (request: NextRequest) => { await stagehand.init() logger.info('Stagehand initialized successfully') + sessionId = stagehand.browserbaseSessionID ?? null + if (sessionId) { + try { + const debugResponse = await fetch( + `https://api.browserbase.com/v1/sessions/${sessionId}/debug`, + { + method: 'GET', + headers: { + 'X-BB-API-Key': BROWSERBASE_API_KEY, + }, + } + ) + if (debugResponse.ok) { + const debugData = (await debugResponse.json()) as { + debuggerFullscreenUrl?: string + debuggerUrl?: string + } + liveViewUrl = debugData.debuggerFullscreenUrl ?? debugData.debuggerUrl ?? null + if (liveViewUrl) { + logger.info(`Browserbase live view URL: ${liveViewUrl}`) + } + } else { + logger.warn(`Failed to fetch Browserbase debug URL: ${debugResponse.statusText}`) + } + } catch (debugError) { + logger.warn('Error fetching Browserbase debug URL', { error: debugError }) + } + } + const page = stagehand.context.pages()[0] logger.info(`Navigating to ${startUrl}`) await page.goto(startUrl, { waitUntil: 'networkidle' }) @@ -223,13 +256,14 @@ export const POST = withRouteHandler(async (request: NextRequest) => { apiKey: apiKey, }, systemPrompt: agentInstructions, + mode, }) - logger.info('Executing agent task', { task: taskWithVariables }) + logger.info('Executing agent task', { task: taskWithVariables, mode, maxSteps }) const agentExecutionResult = await agent.execute({ instruction: taskWithVariables, - maxSteps: 20, + maxSteps, }) const agentResult = { @@ -293,6 +327,8 @@ export const POST = withRouteHandler(async (request: NextRequest) => { return NextResponse.json({ agentResult, structuredOutput, + liveViewUrl, + sessionId, }) } catch (error) { logger.error('Stagehand agent execution error', { @@ -327,6 +363,8 @@ export const POST = withRouteHandler(async (request: NextRequest) => { { error: errorMessage, details: errorDetails, + liveViewUrl, + sessionId, }, { status: 500 } ) diff --git a/apps/sim/app/api/tools/stagehand/extract/route.ts b/apps/sim/app/api/tools/stagehand/extract/route.ts index c39f5c78534..1ec99a182d9 100644 --- a/apps/sim/app/api/tools/stagehand/extract/route.ts +++ b/apps/sim/app/api/tools/stagehand/extract/route.ts @@ -17,8 +17,6 @@ const BROWSERBASE_PROJECT_ID = env.BROWSERBASE_PROJECT_ID const requestSchema = z.object({ instruction: z.string(), schema: z.record(z.any()), - useTextExtract: z.boolean().optional().default(false), - selector: z.string().nullable().optional(), provider: z.enum(['openai', 'anthropic']).optional().default('openai'), apiKey: z.string(), url: z.string().url(), @@ -51,7 +49,7 @@ export const POST = withRouteHandler(async (request: NextRequest) => { } const params = validationResult.data - const { url: rawUrl, instruction, selector, provider, apiKey, schema } = params + const { url: rawUrl, instruction, provider, apiKey, schema } = params const url = normalizeUrl(rawUrl) const urlValidation = await validateUrlWithDNS(url, 'url') if (!urlValidation.isValid) { @@ -101,8 +99,7 @@ export const POST = withRouteHandler(async (request: NextRequest) => { } try { - const modelName = - provider === 'anthropic' ? 'anthropic/claude-sonnet-4-5-20250929' : 'openai/gpt-5' + const modelName = provider === 'anthropic' ? 'anthropic/claude-sonnet-4-6' : 'openai/gpt-5' logger.info('Initializing Stagehand with Browserbase (v3)', { provider, modelName }) @@ -162,14 +159,11 @@ export const POST = withRouteHandler(async (request: NextRequest) => { logger.info('Calling stagehand.extract with options', { hasInstruction: !!instruction, hasSchema: !!zodSchema, - hasSelector: !!selector, }) let extractedData if (zodSchema) { - extractedData = await stagehand.extract(instruction, zodSchema, { - selector: selector || undefined, - }) + extractedData = await stagehand.extract(instruction, zodSchema) } else { extractedData = await stagehand.extract(instruction) } diff --git a/apps/sim/blocks/blocks/browser_use.ts b/apps/sim/blocks/blocks/browser_use.ts index 1e059cf25c1..1d8c8c28999 100644 --- a/apps/sim/blocks/blocks/browser_use.ts +++ b/apps/sim/blocks/blocks/browser_use.ts @@ -23,6 +23,12 @@ export const BrowserUseBlock: BlockConfig = { placeholder: 'Describe what the browser agent should do...', required: true, }, + { + id: 'startUrl', + title: 'Start URL', + type: 'short-input', + placeholder: 'https://example.com (optional starting URL)', + }, { id: 'variables', title: 'Variables (Secrets)', @@ -51,22 +57,85 @@ export const BrowserUseBlock: BlockConfig = { { label: 'Claude 3.7 Sonnet', id: 'claude-3-7-sonnet-20250219' }, { label: 'Claude Sonnet 4', id: 'claude-sonnet-4-20250514' }, { label: 'Claude Sonnet 4.5', id: 'claude-sonnet-4-5-20250929' }, + { label: 'Claude Sonnet 4.6', id: 'claude-sonnet-4-6' }, { label: 'Claude Opus 4.5', id: 'claude-opus-4-5-20251101' }, { label: 'Llama 4 Maverick', id: 'llama-4-maverick-17b-128e-instruct' }, ], }, - { - id: 'save_browser_data', - title: 'Save Browser Data', - type: 'switch', - placeholder: 'Save browser data', - }, { id: 'profile_id', title: 'Profile ID', type: 'short-input', placeholder: 'Enter browser profile ID (optional)', }, + { + id: 'maxSteps', + title: 'Max Steps', + type: 'short-input', + placeholder: '100', + mode: 'advanced', + }, + { + id: 'allowedDomains', + title: 'Allowed Domains', + type: 'short-input', + placeholder: 'example.com, docs.example.com', + mode: 'advanced', + }, + { + id: 'vision', + title: 'Vision', + type: 'dropdown', + options: [ + { label: 'Auto (default)', id: 'auto' }, + { label: 'Enabled', id: 'true' }, + { label: 'Disabled', id: 'false' }, + ], + mode: 'advanced', + }, + { + id: 'flashMode', + title: 'Flash Mode', + type: 'switch', + placeholder: 'Faster but less careful navigation', + mode: 'advanced', + }, + { + id: 'thinking', + title: 'Thinking', + type: 'switch', + placeholder: 'Enable extended reasoning', + mode: 'advanced', + }, + { + id: 'highlightElements', + title: 'Highlight Elements', + type: 'switch', + placeholder: 'Visually mark interactive elements', + mode: 'advanced', + }, + { + id: 'systemPromptExtension', + title: 'System Prompt Extension', + type: 'long-input', + placeholder: 'Append custom instructions to the agent system prompt (max 2000 chars)', + mode: 'advanced', + }, + { + id: 'structuredOutput', + title: 'Structured Output Schema', + type: 'code', + language: 'json', + placeholder: 'Stringified JSON schema for structured output', + mode: 'advanced', + }, + { + id: 'metadata', + title: 'Metadata', + type: 'table', + columns: ['Key', 'Value'], + mode: 'advanced', + }, { id: 'apiKey', title: 'API Key', @@ -78,19 +147,68 @@ export const BrowserUseBlock: BlockConfig = { ], tools: { access: ['browser_use_run_task'], + config: { + tool: () => 'browser_use_run_task', + params: (params) => { + const next: Record = { ...params } + if (typeof next.maxSteps === 'string') { + const trimmed = next.maxSteps.trim() + if (trimmed === '') { + next.maxSteps = undefined + } else { + const n = Number(trimmed) + next.maxSteps = Number.isFinite(n) ? n : undefined + } + } + if (next.vision === 'true') next.vision = true + else if (next.vision === 'false') next.vision = false + if (next.metadata && Array.isArray(next.metadata)) { + const obj: Record = {} + for (const row of next.metadata as Array>) { + const key = row?.cells?.Key ?? row?.Key + const value = row?.cells?.Value ?? row?.Value + if (key) obj[key] = String(value ?? '') + } + next.metadata = obj + } + return next + }, + }, }, inputs: { task: { type: 'string', description: 'Browser automation task' }, + startUrl: { type: 'string', description: 'Starting URL for the agent' }, apiKey: { type: 'string', description: 'BrowserUse API key' }, - variables: { type: 'json', description: 'Task variables' }, - model: { type: 'string', description: 'AI model to use' }, - save_browser_data: { type: 'boolean', description: 'Save browser data' }, + variables: { type: 'json', description: 'Secrets to inject into the task' }, + model: { type: 'string', description: 'LLM model to use' }, profile_id: { type: 'string', description: 'Browser profile ID for persistent sessions' }, + maxSteps: { type: 'number', description: 'Maximum agent steps' }, + allowedDomains: { type: 'string', description: 'Comma-separated allowed domains' }, + vision: { type: 'string', description: 'Vision capability (auto / true / false)' }, + flashMode: { type: 'boolean', description: 'Enable flash mode' }, + thinking: { type: 'boolean', description: 'Enable extended reasoning' }, + highlightElements: { type: 'boolean', description: 'Highlight interactive elements' }, + systemPromptExtension: { type: 'string', description: 'Custom system prompt extension' }, + structuredOutput: { type: 'string', description: 'Stringified JSON schema' }, + metadata: { type: 'json', description: 'Custom key-value metadata' }, }, outputs: { id: { type: 'string', description: 'Task execution identifier' }, success: { type: 'boolean', description: 'Task completion status' }, - output: { type: 'json', description: 'Task output data' }, - steps: { type: 'json', description: 'Execution steps taken' }, + output: { type: 'json', description: 'Final task output (string or structured)' }, + steps: { + type: 'json', + description: + 'Steps the agent executed (number, memory, evaluationPreviousGoal, nextGoal, url, screenshotUrl, actions, duration)', + }, + liveUrl: { + type: 'string', + description: 'Embeddable live browser session URL (active during execution)', + }, + shareUrl: { + type: 'string', + description: 'Public shareable URL for the session (post-run)', + }, + sessionId: { type: 'string', description: 'Browser Use session identifier' }, }, } diff --git a/apps/sim/blocks/blocks/stagehand.ts b/apps/sim/blocks/blocks/stagehand.ts index 385328c1944..6c7b0c11e3a 100644 --- a/apps/sim/blocks/blocks/stagehand.ts +++ b/apps/sim/blocks/blocks/stagehand.ts @@ -1,28 +1,6 @@ import { StagehandIcon } from '@/components/icons' import { AuthMode, type BlockConfig, IntegrationType } from '@/blocks/types' -import type { ToolResponse } from '@/tools/types' - -export interface StagehandExtractResponse extends ToolResponse { - output: { - data: Record - } -} - -export interface StagehandAgentResponse extends ToolResponse { - output: { - agentResult: { - success: boolean - completed: boolean - message: string - actions?: Array<{ - type: string - description: string - result?: string - }> - } - structuredOutput?: Record - } -} +import type { StagehandAgentResponse, StagehandExtractResponse } from '@/tools/stagehand/types' export type StagehandResponse = StagehandExtractResponse | StagehandAgentResponse @@ -345,6 +323,27 @@ Example 3 (Data Collection): generationType: 'json-schema', }, }, + { + id: 'mode', + title: 'Agent Mode', + type: 'dropdown', + options: [ + { label: 'DOM (default)', id: 'dom' }, + { label: 'Hybrid', id: 'hybrid' }, + { label: 'CUA', id: 'cua' }, + ], + value: () => 'dom', + condition: { field: 'operation', value: 'agent' }, + mode: 'advanced', + }, + { + id: 'maxSteps', + title: 'Max Steps', + type: 'short-input', + placeholder: '20', + condition: { field: 'operation', value: 'agent' }, + mode: 'advanced', + }, // Shared API key field { id: 'apiKey', @@ -361,6 +360,19 @@ Example 3 (Data Collection): tool: (params) => { return params.operation === 'agent' ? 'stagehand_agent' : 'stagehand_extract' }, + params: (params) => { + const next: Record = { ...params } + if (typeof next.maxSteps === 'string') { + const trimmed = next.maxSteps.trim() + if (trimmed === '') { + next.maxSteps = undefined + } else { + const n = Number(trimmed) + next.maxSteps = Number.isFinite(n) ? n : undefined + } + } + return next + }, }, }, inputs: { @@ -376,6 +388,8 @@ Example 3 (Data Collection): task: { type: 'string', description: 'Task description (agent operation)' }, variables: { type: 'json', description: 'Task variables (agent operation)' }, outputSchema: { type: 'json', description: 'Output schema (agent operation)' }, + mode: { type: 'string', description: 'Agent mode: dom, hybrid, or cua (agent operation)' }, + maxSteps: { type: 'number', description: 'Max agent steps (agent operation)' }, }, outputs: { // Extract outputs @@ -383,5 +397,10 @@ Example 3 (Data Collection): // Agent outputs agentResult: { type: 'json', description: 'Agent execution result (agent operation)' }, structuredOutput: { type: 'json', description: 'Structured output data (agent operation)' }, + liveViewUrl: { + type: 'string', + description: 'Embeddable Browserbase live view URL (agent operation)', + }, + sessionId: { type: 'string', description: 'Browserbase session identifier (agent operation)' }, }, } diff --git a/apps/sim/package.json b/apps/sim/package.json index 1322e2302c2..d9430284dc6 100644 --- a/apps/sim/package.json +++ b/apps/sim/package.json @@ -55,7 +55,7 @@ "@azure/storage-blob": "12.27.0", "@better-auth/sso": "1.3.12", "@better-auth/stripe": "1.3.12", - "@browserbasehq/stagehand": "^3.0.5", + "@browserbasehq/stagehand": "^3.2.1", "@cerebras/cerebras_cloud_sdk": "^1.23.0", "@e2b/code-interpreter": "^2.0.0", "@google/genai": "1.34.0", diff --git a/apps/sim/tools/browser_use/run_task.ts b/apps/sim/tools/browser_use/run_task.ts index e87f1f965f2..f4ea48859f9 100644 --- a/apps/sim/tools/browser_use/run_task.ts +++ b/apps/sim/tools/browser_use/run_task.ts @@ -9,13 +9,14 @@ const logger = createLogger('BrowserUseTool') const POLL_INTERVAL_MS = 5000 const MAX_POLL_TIME_MS = getMaxExecutionTimeout() const MAX_CONSECUTIVE_ERRORS = 3 +const API_BASE = 'https://api.browser-use.com/api/v2' async function createSessionWithProfile( profileId: string, apiKey: string ): Promise<{ sessionId: string } | { error: string }> { try { - const response = await fetch('https://api.browser-use.com/api/v2/sessions', { + const response = await fetch(`${API_BASE}/sessions`, { method: 'POST', headers: { 'Content-Type': 'application/json', @@ -43,7 +44,7 @@ async function createSessionWithProfile( async function stopSession(sessionId: string, apiKey: string): Promise { try { - const response = await fetch(`https://api.browser-use.com/api/v2/sessions/${sessionId}`, { + const response = await fetch(`${API_BASE}/sessions/${sessionId}`, { method: 'PATCH', headers: { 'Content-Type': 'application/json', @@ -62,58 +63,92 @@ async function stopSession(sessionId: string, apiKey: string): Promise { } } -function buildRequestBody( - params: BrowserUseRunTaskParams, - sessionId?: string -): Record { - const requestBody: Record = { - task: params.task, +async function fetchSessionLiveUrl( + sessionId: string, + apiKey: string +): Promise<{ liveUrl: string | null; publicShareUrl: string | null }> { + try { + const response = await fetch(`${API_BASE}/sessions/${sessionId}`, { + method: 'GET', + headers: { 'X-Browser-Use-API-Key': apiKey }, + }) + if (!response.ok) { + return { liveUrl: null, publicShareUrl: null } + } + const data = (await response.json()) as { liveUrl?: string; publicShareUrl?: string } + return { + liveUrl: data.liveUrl ?? null, + publicShareUrl: data.publicShareUrl ?? null, + } + } catch (error: any) { + logger.warn(`Error fetching session ${sessionId}:`, error) + return { liveUrl: null, publicShareUrl: null } } +} - if (sessionId) { - requestBody.sessionId = sessionId - logger.info(`Using session ${sessionId} for task`) - } +function normalizeSecrets(variables: BrowserUseRunTaskParams['variables']): Record { + const secrets: Record = {} + if (!variables) return secrets - if (params.variables) { - let secrets: Record = {} - - if (Array.isArray(params.variables)) { - logger.info('Converting variables array to dictionary format') - params.variables.forEach((row: any) => { - if (row.cells?.Key && row.cells.Value !== undefined) { - secrets[row.cells.Key] = row.cells.Value - logger.info(`Added secret for key: ${row.cells.Key}`) - } else if (row.Key && row.Value !== undefined) { - secrets[row.Key] = row.Value - logger.info(`Added secret for key: ${row.Key}`) - } - }) - } else if (typeof params.variables === 'object' && params.variables !== null) { - logger.info('Using variables object directly') - secrets = params.variables + if (Array.isArray(variables)) { + for (const row of variables as Array>) { + if (row?.cells?.Key && row.cells.Value !== undefined) { + secrets[row.cells.Key] = row.cells.Value + } else if (row?.Key && row.Value !== undefined) { + secrets[row.Key] = row.Value + } } - - if (Object.keys(secrets).length > 0) { - logger.info(`Found ${Object.keys(secrets).length} secrets to include`) - requestBody.secrets = secrets - } else { - logger.warn('No usable secrets found in variables') + } else if (typeof variables === 'object') { + for (const [k, v] of Object.entries(variables)) { + if (typeof v === 'string') secrets[k] = v } } + return secrets +} - if (params.model) { - requestBody.llm_model = params.model - } - - if (params.save_browser_data) { - requestBody.save_browser_data = params.save_browser_data - } +function parseAllowedDomains(input?: string | string[]): string[] | undefined { + if (!input) return undefined + const arr = Array.isArray(input) + ? input + : input + .split(',') + .map((s) => s.trim()) + .filter(Boolean) + return arr.length > 0 ? arr : undefined +} - requestBody.use_adblock = true - requestBody.highlight_elements = true +function buildRequestBody( + params: BrowserUseRunTaskParams, + sessionId?: string +): Record { + const body: Record = { task: params.task } + + if (sessionId) body.sessionId = sessionId + if (params.model) body.llm = params.model + if (params.startUrl?.trim()) body.startUrl = params.startUrl.trim() + if (typeof params.maxSteps === 'number' && params.maxSteps > 0) body.maxSteps = params.maxSteps + if (params.structuredOutput) body.structuredOutput = params.structuredOutput + if (typeof params.flashMode === 'boolean') body.flashMode = params.flashMode + if (typeof params.thinking === 'boolean') body.thinking = params.thinking + if (typeof params.vision === 'boolean' || params.vision === 'auto') body.vision = params.vision + if (params.systemPromptExtension) body.systemPromptExtension = params.systemPromptExtension + if (typeof params.highlightElements === 'boolean') + body.highlightElements = params.highlightElements + + const allowedDomains = parseAllowedDomains(params.allowedDomains) + if (allowedDomains) body.allowedDomains = allowedDomains + + const secrets = normalizeSecrets(params.variables) + if (Object.keys(secrets).length > 0) body.secrets = secrets + + if ( + params.metadata && + typeof params.metadata === 'object' && + Object.keys(params.metadata).length > 0 + ) + body.metadata = params.metadata - return requestBody + return body } async function fetchTaskStatus( @@ -121,30 +156,36 @@ async function fetchTaskStatus( apiKey: string ): Promise<{ ok: true; data: any } | { ok: false; error: string }> { try { - const response = await fetch(`https://api.browser-use.com/api/v2/tasks/${taskId}`, { + const response = await fetch(`${API_BASE}/tasks/${taskId}`, { method: 'GET', - headers: { - 'X-Browser-Use-API-Key': apiKey, - }, + headers: { 'X-Browser-Use-API-Key': apiKey }, }) if (!response.ok) { return { ok: false, error: `HTTP ${response.status}: ${response.statusText}` } } - const data = await response.json() - return { ok: true, data } + return { ok: true, data: await response.json() } } catch (error: any) { return { ok: false, error: error.message || 'Network error' } } } -async function pollForCompletion( - taskId: string, - apiKey: string -): Promise<{ success: boolean; output: any; steps: any[]; error?: string }> { - let liveUrlLogged = false +interface PollResult { + success: boolean + output: any + steps: any[] + sessionId: string | null + liveUrl: string | null + publicShareUrl: string | null + error?: string +} + +async function pollForCompletion(taskId: string, apiKey: string): Promise { let consecutiveErrors = 0 + let sessionId: string | null = null + let liveUrl: string | null = null + let publicShareUrl: string | null = null const startTime = Date.now() while (Date.now() - startTime < MAX_POLL_TIME_MS) { @@ -157,11 +198,13 @@ async function pollForCompletion( ) if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) { - logger.error(`Max consecutive errors reached for task ${taskId}`) return { success: false, output: null, steps: [], + sessionId, + liveUrl, + publicShareUrl, error: `Failed to poll task status after ${MAX_CONSECUTIVE_ERRORS} attempts: ${result.error}`, } } @@ -172,23 +215,31 @@ async function pollForCompletion( consecutiveErrors = 0 const taskData = result.data + if (taskData.sessionId) sessionId = taskData.sessionId const status = taskData.status logger.info(`BrowserUse task ${taskId} status: ${status}`) + if (sessionId && !liveUrl) { + const session = await fetchSessionLiveUrl(sessionId, apiKey) + if (session.liveUrl) { + liveUrl = session.liveUrl + logger.info(`BrowserUse live URL: ${liveUrl}`) + } + if (session.publicShareUrl) publicShareUrl = session.publicShareUrl + } + if (['finished', 'failed', 'stopped'].includes(status)) { return { success: status === 'finished', output: taskData.output ?? null, steps: taskData.steps || [], + sessionId, + liveUrl, + publicShareUrl, } } - if (!liveUrlLogged && taskData.live_url) { - logger.info(`BrowserUse task ${taskId} live URL: ${taskData.live_url}`) - liveUrlLogged = true - } - await sleep(POLL_INTERVAL_MS) } @@ -198,20 +249,58 @@ async function pollForCompletion( success: finalResult.data.status === 'finished', output: finalResult.data.output ?? null, steps: finalResult.data.steps || [], + sessionId: finalResult.data.sessionId ?? sessionId, + liveUrl, + publicShareUrl, } } - logger.warn( - `Task ${taskId} did not complete within the maximum polling time (${MAX_POLL_TIME_MS / 1000}s)` - ) return { success: false, output: null, steps: [], + sessionId, + liveUrl, + publicShareUrl, error: `Task did not complete within the maximum polling time (${MAX_POLL_TIME_MS / 1000}s)`, } } +async function createShareUrl(sessionId: string, apiKey: string): Promise { + try { + const response = await fetch(`${API_BASE}/sessions/${sessionId}/public-share`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-Browser-Use-API-Key': apiKey, + }, + }) + + if (!response.ok) { + logger.warn(`Failed to create share URL for session ${sessionId}: ${response.statusText}`) + return null + } + + const data = (await response.json()) as { shareUrl?: string; shareToken?: string } + return data.shareUrl ?? null + } catch (error: any) { + logger.warn(`Error creating share URL for session ${sessionId}:`, error) + return null + } +} + +function emptyOutput(): BrowserUseRunTaskResponse['output'] { + return { + id: '', + success: false, + output: null, + steps: [], + liveUrl: null, + shareUrl: null, + sessionId: null, + } +} + export const runTaskTool: ToolConfig = { id: 'browser_use_run_task', name: 'Browser Use', @@ -225,23 +314,77 @@ export const runTaskTool: ToolConfig ({ 'Content-Type': 'application/json', @@ -273,16 +416,7 @@ export const runTaskTool: ToolConfig + variables?: Record | Array> model?: string - save_browser_data?: boolean + startUrl?: string + allowedDomains?: string | string[] + maxSteps?: number + flashMode?: boolean + thinking?: boolean + vision?: boolean | 'auto' + systemPromptExtension?: string + structuredOutput?: string + highlightElements?: boolean + metadata?: Record profile_id?: string } export interface BrowserUseTaskStep { - id: string - step: number - evaluation_previous_goal: string - next_goal: string - url?: string - extracted_data?: Record + number: number + memory: string + evaluationPreviousGoal: string + nextGoal: string + url: string + screenshotUrl?: string | null + actions: string[] + duration?: number | null } export interface BrowserUseTaskOutput { id: string success: boolean - output: any + output: string | null steps: BrowserUseTaskStep[] + liveUrl: string | null + shareUrl: string | null + sessionId: string | null } export interface BrowserUseRunTaskResponse extends ToolResponse { @@ -30,10 +44,5 @@ export interface BrowserUseRunTaskResponse extends ToolResponse { } export interface BrowserUseResponse extends ToolResponse { - output: { - id: string - success: boolean - output: any - steps: BrowserUseTaskStep[] - } + output: BrowserUseTaskOutput } diff --git a/apps/sim/tools/stagehand/agent.ts b/apps/sim/tools/stagehand/agent.ts index f3d055a8ea4..042600f89b6 100644 --- a/apps/sim/tools/stagehand/agent.ts +++ b/apps/sim/tools/stagehand/agent.ts @@ -49,6 +49,18 @@ export const agentTool: ToolConfig visibility: 'user-only', description: 'Optional JSON schema defining the structure of data the agent should return', }, + mode: { + type: 'string', + required: false, + visibility: 'user-only', + description: 'Agent tool mode: dom (default), hybrid, or cua', + }, + maxSteps: { + type: 'number', + required: false, + visibility: 'user-only', + description: 'Maximum agent steps (default 20, max 200)', + }, }, request: { @@ -71,6 +83,8 @@ export const agentTool: ToolConfig variables: params.variables, provider: params.provider || 'openai', apiKey: params.apiKey, + mode: params.mode, + maxSteps: params.maxSteps, } }, }, @@ -82,6 +96,8 @@ export const agentTool: ToolConfig output: { agentResult: data.agentResult, structuredOutput: data.structuredOutput || {}, + liveViewUrl: data.liveViewUrl ?? null, + sessionId: data.sessionId ?? null, }, } }, @@ -96,5 +112,16 @@ export const agentTool: ToolConfig type: 'object', description: 'Extracted data matching the provided output schema', }, + liveViewUrl: { + type: 'string', + description: + 'Embeddable Browserbase live view URL (active only while the session is running)', + optional: true, + }, + sessionId: { + type: 'string', + description: 'Browserbase session identifier', + optional: true, + }, }, } diff --git a/apps/sim/tools/stagehand/types.ts b/apps/sim/tools/stagehand/types.ts index e301254bce9..9fc6bf69edf 100644 --- a/apps/sim/tools/stagehand/types.ts +++ b/apps/sim/tools/stagehand/types.ts @@ -247,6 +247,8 @@ export interface StagehandAgentParams { variables?: Record provider?: 'openai' | 'anthropic' apiKey: string + mode?: 'dom' | 'hybrid' | 'cua' + maxSteps?: number options?: { useTextExtract?: boolean selector?: string @@ -286,6 +288,8 @@ export interface StagehandAgentResponse extends ToolResponse { output: { agentResult: StagehandAgentResult structuredOutput?: Record + liveViewUrl?: string | null + sessionId?: string | null } } diff --git a/bun.lock b/bun.lock index 916bc86fb35..daa75296d82 100644 --- a/bun.lock +++ b/bun.lock @@ -109,7 +109,7 @@ "@azure/storage-blob": "12.27.0", "@better-auth/sso": "1.3.12", "@better-auth/stripe": "1.3.12", - "@browserbasehq/stagehand": "^3.0.5", + "@browserbasehq/stagehand": "^3.2.1", "@cerebras/cerebras_cloud_sdk": "^1.23.0", "@e2b/code-interpreter": "^2.0.0", "@google/genai": "1.34.0", @@ -777,7 +777,7 @@ "@browserbasehq/sdk": ["@browserbasehq/sdk@2.9.0", "", { "dependencies": { "@types/node": "^18.11.18", "@types/node-fetch": "^2.6.4", "abort-controller": "^3.0.0", "agentkeepalive": "^4.2.1", "form-data-encoder": "1.7.2", "formdata-node": "^4.3.2", "node-fetch": "^2.6.7" } }, "sha512-Xzm1+6suzQypXjley4Phqer++pjnYyST6S7CArUn3kWyGA8aruXjAV5wkmqE21lgXo9K3/OQJvCu48bKEZFNDQ=="], - "@browserbasehq/stagehand": ["@browserbasehq/stagehand@3.2.0", "", { "dependencies": { "@ai-sdk/provider": "^2.0.0", "@anthropic-ai/sdk": "0.39.0", "@browserbasehq/sdk": "^2.7.0", "@google/genai": "^1.22.0", "@langchain/openai": "^0.4.4", "@modelcontextprotocol/sdk": "^1.17.2", "ai": "^5.0.133", "devtools-protocol": "^0.0.1464554", "fetch-cookie": "^3.1.0", "openai": "^4.87.1", "pino": "^9.6.0", "pino-pretty": "^13.0.0", "uuid": "^11.1.0", "ws": "^8.18.0", "zod-to-json-schema": "^3.25.0" }, "optionalDependencies": { "@ai-sdk/amazon-bedrock": "^3.0.73", "@ai-sdk/anthropic": "^2.0.34", "@ai-sdk/azure": "^2.0.54", "@ai-sdk/cerebras": "^1.0.25", "@ai-sdk/deepseek": "^1.0.23", "@ai-sdk/google": "^2.0.53", "@ai-sdk/google-vertex": "^3.0.70", "@ai-sdk/groq": "^2.0.24", "@ai-sdk/mistral": "^2.0.19", "@ai-sdk/openai": "^2.0.53", "@ai-sdk/perplexity": "^2.0.13", "@ai-sdk/togetherai": "^1.0.23", "@ai-sdk/xai": "^2.0.26", "@langchain/core": "^0.3.80", "bufferutil": "^4.0.9", "chrome-launcher": "^1.2.0", "ollama-ai-provider-v2": "^1.5.0", "patchright-core": "^1.55.2", "playwright": "^1.52.0", "playwright-core": "^1.54.1", "puppeteer-core": "^22.8.0" }, "peerDependencies": { "deepmerge": "^4.3.1", "zod": "^3.25.76 || ^4.2.0" } }, "sha512-X9s3sZuTL3zf8gt1o9yr4mvT2JmDRigkmBinlKF6LD+rlAIOh+nH6Cmz6xfRjZ4RgTfR0wRoE1iUTKa39YtWfA=="], + "@browserbasehq/stagehand": ["@browserbasehq/stagehand@3.2.1", "", { "dependencies": { "@ai-sdk/provider": "^2.0.0", "@anthropic-ai/sdk": "0.39.0", "@browserbasehq/sdk": "^2.7.0", "@google/genai": "^1.22.0", "@langchain/openai": "^0.4.4", "@modelcontextprotocol/sdk": "^1.17.2", "ai": "^5.0.133", "devtools-protocol": "^0.0.1464554", "fetch-cookie": "^3.1.0", "openai": "^4.87.1", "pino": "^9.6.0", "pino-pretty": "^13.0.0", "uuid": "^11.1.0", "ws": "^8.18.0", "zod-to-json-schema": "^3.25.0" }, "optionalDependencies": { "@ai-sdk/amazon-bedrock": "^3.0.73", "@ai-sdk/anthropic": "^2.0.34", "@ai-sdk/azure": "^2.0.54", "@ai-sdk/cerebras": "^1.0.25", "@ai-sdk/deepseek": "^1.0.23", "@ai-sdk/google": "^2.0.53", "@ai-sdk/google-vertex": "^3.0.70", "@ai-sdk/groq": "^2.0.24", "@ai-sdk/mistral": "^2.0.19", "@ai-sdk/openai": "^2.0.53", "@ai-sdk/perplexity": "^2.0.13", "@ai-sdk/togetherai": "^1.0.23", "@ai-sdk/xai": "^2.0.26", "@langchain/core": "^0.3.80", "bufferutil": "^4.0.9", "chrome-launcher": "^1.2.0", "ollama-ai-provider-v2": "^1.5.0", "patchright-core": "^1.55.2", "playwright": "^1.52.0", "playwright-core": "^1.54.1", "puppeteer-core": "^22.8.0" }, "peerDependencies": { "deepmerge": "^4.3.1", "zod": "^3.25.76 || ^4.2.0" } }, "sha512-h7KAAaNK7JUMw97w7sj0CBsBVtjLXyEorbUoYmCwLYYWrL2IUd9WFS7gRFspCp0ww2hpVPJEKMxHumwFCPEC8g=="], "@bufbuild/protobuf": ["@bufbuild/protobuf@2.11.0", "", {}, "sha512-sBXGT13cpmPR5BMgHE6UEEfEaShh5Ror6rfN3yEK5si7QVrtZg8LEPQb0VVhiLRUslD2yLnXtnRzG035J/mZXQ=="], @@ -2533,8 +2533,6 @@ "fast-glob": ["fast-glob@3.3.3", "", { "dependencies": { "@nodelib/fs.stat": "^2.0.2", "@nodelib/fs.walk": "^1.2.3", "glob-parent": "^5.1.2", "merge2": "^1.3.0", "micromatch": "^4.0.8" } }, "sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg=="], - "fast-json-stable-stringify": ["fast-json-stable-stringify@2.1.0", "", {}, "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw=="], - "fast-safe-stringify": ["fast-safe-stringify@2.1.1", "", {}, "sha512-W+KJc2dmILlPplD/H4K9l9LcAHAfPtP6BY84uVLXQ6Evcz9Lcg33Y2z1IVblT6xdY54PXYVHEv+0Wpq8Io6zkA=="], "fast-sha256": ["fast-sha256@1.3.0", "", {}, "sha512-n11RGP/lrWEFI/bWdygLxhI+pVeo1ZYIVwvvPkW7azl/rOy+F3HYRZ2K5zeE9mmkhQppyv9sQFx0JM9UabnpPQ=="], @@ -3989,8 +3987,6 @@ "update-browserslist-db": ["update-browserslist-db@1.2.3", "", { "dependencies": { "escalade": "^3.2.0", "picocolors": "^1.1.1" }, "peerDependencies": { "browserslist": ">= 4.21.0" }, "bin": { "update-browserslist-db": "cli.js" } }, "sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w=="], - "uri-js": ["uri-js@4.4.1", "", { "dependencies": { "punycode": "^2.1.0" } }, "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg=="], - "urlpattern-polyfill": ["urlpattern-polyfill@10.0.0", "", {}, "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="], "use-callback-ref": ["use-callback-ref@1.3.3", "", { "dependencies": { "tslib": "^2.0.0" }, "peerDependencies": { "@types/react": "*", "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react"] }, "sha512-jQL3lRnocaFtu3V00JToYz/4QkNWswxijDaCVNZRiRTO3HQDLsdu1ZtmIUvV4yPp+rvWm5j0y0TG/S61cuijTg=="], @@ -4971,8 +4967,6 @@ "@browserbasehq/stagehand/@anthropic-ai/sdk": ["@anthropic-ai/sdk@0.39.0", "", { "dependencies": { "@types/node": "^18.11.18", "@types/node-fetch": "^2.6.4", "abort-controller": "^3.0.0", "agentkeepalive": "^4.2.1", "form-data-encoder": "1.7.2", "formdata-node": "^4.3.2", "node-fetch": "^2.6.7" } }, "sha512-eMyDIPRZbt1CCLErRCi3exlAvNkBtRe+kW5vvJyef93PmNr/clstYgHhtvmkxN82nlKgzyGPCyGxrm0JQ1ZIdg=="], - "@browserbasehq/stagehand/@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.20.2", "", { "dependencies": { "ajv": "^6.12.6", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.0.1", "express-rate-limit": "^7.5.0", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.23.8", "zod-to-json-schema": "^3.24.1" } }, "sha512-6rqTdFt67AAAzln3NOKsXRmv5ZzPkgbfaebKBqUbts7vK1GZudqnrun5a8d3M/h955cam9RHZ6Jb4Y1XhnmFPg=="], - "@cerebras/cerebras_cloud_sdk/@types/node": ["@types/node@18.19.130", "", { "dependencies": { "undici-types": "~5.26.4" } }, "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg=="], "@cerebras/cerebras_cloud_sdk/node-fetch": ["node-fetch@2.7.0", "", { "dependencies": { "whatwg-url": "^5.0.0" }, "peerDependencies": { "encoding": "^0.1.0" }, "optionalPeers": ["encoding"] }, "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A=="], @@ -6065,8 +6059,6 @@ "@browserbasehq/stagehand/@anthropic-ai/sdk/node-fetch": ["node-fetch@2.7.0", "", { "dependencies": { "whatwg-url": "^5.0.0" }, "peerDependencies": { "encoding": "^0.1.0" }, "optionalPeers": ["encoding"] }, "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A=="], - "@browserbasehq/stagehand/@modelcontextprotocol/sdk/ajv": ["ajv@6.14.0", "", { "dependencies": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", "json-schema-traverse": "^0.4.1", "uri-js": "^4.2.2" } }, "sha512-IWrosm/yrn43eiKqkfkHis7QioDleaXQHdDVPKg0FSwwd/DuvyX79TZnFOnYpB7dcsFAMmtFztZuXPDvSePkFw=="], - "@cerebras/cerebras_cloud_sdk/@types/node/undici-types": ["undici-types@5.26.5", "", {}, "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="], "@cerebras/cerebras_cloud_sdk/node-fetch/whatwg-url": ["whatwg-url@5.0.0", "", { "dependencies": { "tr46": "~0.0.3", "webidl-conversions": "^3.0.0" } }, "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw=="], @@ -6785,8 +6777,6 @@ "@browserbasehq/stagehand/@anthropic-ai/sdk/node-fetch/whatwg-url": ["whatwg-url@5.0.0", "", { "dependencies": { "tr46": "~0.0.3", "webidl-conversions": "^3.0.0" } }, "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw=="], - "@browserbasehq/stagehand/@modelcontextprotocol/sdk/ajv/json-schema-traverse": ["json-schema-traverse@0.4.1", "", {}, "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg=="], - "@cerebras/cerebras_cloud_sdk/node-fetch/whatwg-url/tr46": ["tr46@0.0.3", "", {}, "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="], "@cerebras/cerebras_cloud_sdk/node-fetch/whatwg-url/webidl-conversions": ["webidl-conversions@3.0.1", "", {}, "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="],