Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions apps/docs/content/docs/en/tools/browser_use.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,18 @@ Runs a browser automation task using BrowserUse
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `task` | string | Yes | What should the browser agent do |
| `variables` | json | No | Optional variables to use as secrets \(format: \{key: value\}\) |
| `save_browser_data` | boolean | No | Whether to save browser data |
| `model` | string | No | LLM model to use \(default: gpt-4o\) |
| `startUrl` | string | No | Initial page URL to start the agent on \(reduces navigation steps\) |
| `variables` | json | No | Optional secrets injected into the task \(format: \{key: value\}\) |
| `allowedDomains` | string | No | Comma-separated list of domains the agent is allowed to visit |
| `maxSteps` | number | No | Maximum number of steps the agent may take \(default 100, max 10000\) |
| `flashMode` | boolean | No | Enable flash mode \(faster, less careful navigation\) |
| `thinking` | boolean | No | Enable extended reasoning mode |
| `vision` | string | No | Vision capability: "true", "false", or "auto" |
| `systemPromptExtension` | string | No | Optional text appended to the agent system prompt \(max 2000 chars\) |
| `structuredOutput` | string | No | Stringified JSON schema for the structured output |
| `highlightElements` | boolean | No | Highlight interactive elements on the page \(default true\) |
| `metadata` | json | No | Custom key-value metadata \(up to 10 pairs\) for tracking |
| `model` | string | No | LLM model identifier \(e.g. browser-use-2.0\) |
| `apiKey` | string | Yes | API key for BrowserUse API |
| `profile_id` | string | No | Browser profile ID for persistent sessions \(cookies, login state\) |

Expand All @@ -54,7 +63,18 @@ Runs a browser automation task using BrowserUse
| --------- | ---- | ----------- |
| `id` | string | Task execution identifier |
| `success` | boolean | Task completion status |
| `output` | json | Task output data |
| `steps` | json | Execution steps taken |
| `output` | json | Final task output \(string or structured\) |
| `steps` | array | Steps the agent executed \(number, memory, nextGoal, url, actions, duration\) |
| ↳ `number` | number | Sequential step number |
| ↳ `memory` | string | Agent memory at this step |
| ↳ `evaluationPreviousGoal` | string | Evaluation of previous goal completion |
| ↳ `nextGoal` | string | Goal for the next step |
| ↳ `url` | string | Current URL of the browser |
| ↳ `screenshotUrl` | string | Optional screenshot URL |
| ↳ `actions` | array | Stringified JSON actions performed |
| ↳ `duration` | number | Step duration in seconds |
| `liveUrl` | string | Embeddable live browser session URL \(active during execution\) |
| `shareUrl` | string | Public shareable URL for the recorded session \(post-run\) |
| `sessionId` | string | Browser Use session identifier |


4 changes: 4 additions & 0 deletions apps/docs/content/docs/en/tools/stagehand.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ Run an autonomous web agent to complete tasks and extract structured data
| `provider` | string | No | AI provider to use: openai or anthropic |
| `apiKey` | string | Yes | API key for the selected provider |
| `outputSchema` | json | No | Optional JSON schema defining the structure of data the agent should return |
| `mode` | string | No | Agent tool mode: dom \(default\), hybrid, or cua |
| `maxSteps` | number | No | Maximum agent steps \(default 20, max 200\) |

#### Output

Expand All @@ -92,5 +94,7 @@ Run an autonomous web agent to complete tasks and extract structured data
| ↳ `timestamp` | number | Unix timestamp when the action was performed |
| ↳ `timeMs` | number | Time in milliseconds \(for wait actions\) |
| `structuredOutput` | object | Extracted data matching the provided output schema |
| `liveViewUrl` | string | Embeddable Browserbase live view URL \(active only while the session is running\) |
| `sessionId` | string | Browserbase session identifier |


48 changes: 43 additions & 5 deletions apps/sim/app/api/tools/stagehand/agent/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ const requestSchema = z.object({
variables: z.any(),
provider: z.enum(['openai', 'anthropic']).optional().default('openai'),
apiKey: z.string(),
mode: z.enum(['dom', 'hybrid', 'cua']).optional().default('dom'),
maxSteps: z.number().int().min(1).max(200).optional().default(20),
})

/**
Expand Down Expand Up @@ -121,7 +123,7 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
}

const params = validationResult.data
const { task, startUrl: rawStartUrl, outputSchema, provider, apiKey } = params
const { task, startUrl: rawStartUrl, outputSchema, provider, apiKey, mode, maxSteps } = params
const variablesObject = processVariables(params.variables)

const startUrl = normalizeUrl(rawStartUrl)
Expand Down Expand Up @@ -165,8 +167,10 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
return NextResponse.json({ error: 'Invalid Anthropic API key format' }, { status: 400 })
}

const modelName =
provider === 'anthropic' ? 'anthropic/claude-sonnet-4-5-20250929' : 'openai/gpt-5'
const modelName = provider === 'anthropic' ? 'anthropic/claude-sonnet-4-6' : 'openai/gpt-5'

let sessionId: string | null = null
let liveViewUrl: string | null = null

try {
logger.info('Initializing Stagehand with Browserbase (v3)', { provider, modelName })
Expand All @@ -190,6 +194,35 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
await stagehand.init()
logger.info('Stagehand initialized successfully')

sessionId = stagehand.browserbaseSessionID ?? null
if (sessionId) {
try {
const debugResponse = await fetch(
`https://api.browserbase.com/v1/sessions/${sessionId}/debug`,
{
method: 'GET',
headers: {
'X-BB-API-Key': BROWSERBASE_API_KEY,
},
}
)
if (debugResponse.ok) {
const debugData = (await debugResponse.json()) as {
debuggerFullscreenUrl?: string
debuggerUrl?: string
}
liveViewUrl = debugData.debuggerFullscreenUrl ?? debugData.debuggerUrl ?? null
if (liveViewUrl) {
logger.info(`Browserbase live view URL: ${liveViewUrl}`)
}
} else {
logger.warn(`Failed to fetch Browserbase debug URL: ${debugResponse.statusText}`)
}
} catch (debugError) {
logger.warn('Error fetching Browserbase debug URL', { error: debugError })
}
}

const page = stagehand.context.pages()[0]
logger.info(`Navigating to ${startUrl}`)
await page.goto(startUrl, { waitUntil: 'networkidle' })
Expand Down Expand Up @@ -223,13 +256,14 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
apiKey: apiKey,
},
systemPrompt: agentInstructions,
mode,
})

logger.info('Executing agent task', { task: taskWithVariables })
logger.info('Executing agent task', { task: taskWithVariables, mode, maxSteps })

const agentExecutionResult = await agent.execute({
instruction: taskWithVariables,
maxSteps: 20,
maxSteps,
})

const agentResult = {
Expand Down Expand Up @@ -293,6 +327,8 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
return NextResponse.json({
agentResult,
structuredOutput,
liveViewUrl,
sessionId,
})
} catch (error) {
logger.error('Stagehand agent execution error', {
Expand Down Expand Up @@ -327,6 +363,8 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
{
error: errorMessage,
details: errorDetails,
liveViewUrl,
sessionId,
},
{ status: 500 }
)
Expand Down
12 changes: 3 additions & 9 deletions apps/sim/app/api/tools/stagehand/extract/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ const BROWSERBASE_PROJECT_ID = env.BROWSERBASE_PROJECT_ID
const requestSchema = z.object({
instruction: z.string(),
schema: z.record(z.any()),
useTextExtract: z.boolean().optional().default(false),
selector: z.string().nullable().optional(),
provider: z.enum(['openai', 'anthropic']).optional().default('openai'),
apiKey: z.string(),
url: z.string().url(),
Expand Down Expand Up @@ -51,7 +49,7 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
}

const params = validationResult.data
const { url: rawUrl, instruction, selector, provider, apiKey, schema } = params
const { url: rawUrl, instruction, provider, apiKey, schema } = params
const url = normalizeUrl(rawUrl)
const urlValidation = await validateUrlWithDNS(url, 'url')
if (!urlValidation.isValid) {
Expand Down Expand Up @@ -101,8 +99,7 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
}

try {
const modelName =
provider === 'anthropic' ? 'anthropic/claude-sonnet-4-5-20250929' : 'openai/gpt-5'
const modelName = provider === 'anthropic' ? 'anthropic/claude-sonnet-4-6' : 'openai/gpt-5'

logger.info('Initializing Stagehand with Browserbase (v3)', { provider, modelName })

Expand Down Expand Up @@ -162,14 +159,11 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
logger.info('Calling stagehand.extract with options', {
hasInstruction: !!instruction,
hasSchema: !!zodSchema,
hasSelector: !!selector,
})

let extractedData
if (zodSchema) {
extractedData = await stagehand.extract(instruction, zodSchema, {
selector: selector || undefined,
})
extractedData = await stagehand.extract(instruction, zodSchema)
} else {
extractedData = await stagehand.extract(instruction)
}
Expand Down
140 changes: 129 additions & 11 deletions apps/sim/blocks/blocks/browser_use.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ export const BrowserUseBlock: BlockConfig<BrowserUseResponse> = {
placeholder: 'Describe what the browser agent should do...',
required: true,
},
{
id: 'startUrl',
title: 'Start URL',
type: 'short-input',
placeholder: 'https://example.com (optional starting URL)',
},
{
id: 'variables',
title: 'Variables (Secrets)',
Expand Down Expand Up @@ -51,22 +57,85 @@ export const BrowserUseBlock: BlockConfig<BrowserUseResponse> = {
{ label: 'Claude 3.7 Sonnet', id: 'claude-3-7-sonnet-20250219' },
{ label: 'Claude Sonnet 4', id: 'claude-sonnet-4-20250514' },
{ label: 'Claude Sonnet 4.5', id: 'claude-sonnet-4-5-20250929' },
{ label: 'Claude Sonnet 4.6', id: 'claude-sonnet-4-6' },
{ label: 'Claude Opus 4.5', id: 'claude-opus-4-5-20251101' },
{ label: 'Llama 4 Maverick', id: 'llama-4-maverick-17b-128e-instruct' },
],
},
{
id: 'save_browser_data',
title: 'Save Browser Data',
type: 'switch',
placeholder: 'Save browser data',
},
{
id: 'profile_id',
title: 'Profile ID',
type: 'short-input',
placeholder: 'Enter browser profile ID (optional)',
},
{
id: 'maxSteps',
title: 'Max Steps',
type: 'short-input',
placeholder: '100',
mode: 'advanced',
},
{
id: 'allowedDomains',
title: 'Allowed Domains',
type: 'short-input',
placeholder: 'example.com, docs.example.com',
mode: 'advanced',
},
{
id: 'vision',
title: 'Vision',
type: 'dropdown',
options: [
{ label: 'Auto (default)', id: 'auto' },
{ label: 'Enabled', id: 'true' },
{ label: 'Disabled', id: 'false' },
],
mode: 'advanced',
},
{
id: 'flashMode',
title: 'Flash Mode',
type: 'switch',
placeholder: 'Faster but less careful navigation',
mode: 'advanced',
},
{
id: 'thinking',
title: 'Thinking',
type: 'switch',
placeholder: 'Enable extended reasoning',
mode: 'advanced',
},
{
id: 'highlightElements',
title: 'Highlight Elements',
type: 'switch',
placeholder: 'Visually mark interactive elements',
mode: 'advanced',
},
{
id: 'systemPromptExtension',
title: 'System Prompt Extension',
type: 'long-input',
placeholder: 'Append custom instructions to the agent system prompt (max 2000 chars)',
mode: 'advanced',
},
{
id: 'structuredOutput',
title: 'Structured Output Schema',
type: 'code',
language: 'json',
placeholder: 'Stringified JSON schema for structured output',
mode: 'advanced',
},
{
id: 'metadata',
title: 'Metadata',
type: 'table',
columns: ['Key', 'Value'],
mode: 'advanced',
},
{
id: 'apiKey',
title: 'API Key',
Expand All @@ -78,19 +147,68 @@ export const BrowserUseBlock: BlockConfig<BrowserUseResponse> = {
],
tools: {
access: ['browser_use_run_task'],
config: {
tool: () => 'browser_use_run_task',
params: (params) => {
const next: Record<string, any> = { ...params }
if (typeof next.maxSteps === 'string') {
const trimmed = next.maxSteps.trim()
if (trimmed === '') {
next.maxSteps = undefined
} else {
const n = Number(trimmed)
next.maxSteps = Number.isFinite(n) ? n : undefined
}
}
Comment thread
waleedlatif1 marked this conversation as resolved.
if (next.vision === 'true') next.vision = true
else if (next.vision === 'false') next.vision = false
Comment thread
waleedlatif1 marked this conversation as resolved.
if (next.metadata && Array.isArray(next.metadata)) {
const obj: Record<string, string> = {}
for (const row of next.metadata as Array<Record<string, any>>) {
const key = row?.cells?.Key ?? row?.Key
const value = row?.cells?.Value ?? row?.Value
if (key) obj[key] = String(value ?? '')
}
next.metadata = obj
}
return next
},
},
},
inputs: {
task: { type: 'string', description: 'Browser automation task' },
startUrl: { type: 'string', description: 'Starting URL for the agent' },
apiKey: { type: 'string', description: 'BrowserUse API key' },
variables: { type: 'json', description: 'Task variables' },
model: { type: 'string', description: 'AI model to use' },
save_browser_data: { type: 'boolean', description: 'Save browser data' },
variables: { type: 'json', description: 'Secrets to inject into the task' },
model: { type: 'string', description: 'LLM model to use' },
profile_id: { type: 'string', description: 'Browser profile ID for persistent sessions' },
maxSteps: { type: 'number', description: 'Maximum agent steps' },
allowedDomains: { type: 'string', description: 'Comma-separated allowed domains' },
vision: { type: 'string', description: 'Vision capability (auto / true / false)' },
flashMode: { type: 'boolean', description: 'Enable flash mode' },
thinking: { type: 'boolean', description: 'Enable extended reasoning' },
highlightElements: { type: 'boolean', description: 'Highlight interactive elements' },
systemPromptExtension: { type: 'string', description: 'Custom system prompt extension' },
structuredOutput: { type: 'string', description: 'Stringified JSON schema' },
metadata: { type: 'json', description: 'Custom key-value metadata' },
},
outputs: {
id: { type: 'string', description: 'Task execution identifier' },
success: { type: 'boolean', description: 'Task completion status' },
output: { type: 'json', description: 'Task output data' },
steps: { type: 'json', description: 'Execution steps taken' },
output: { type: 'json', description: 'Final task output (string or structured)' },
steps: {
type: 'json',
description:
'Steps the agent executed (number, memory, evaluationPreviousGoal, nextGoal, url, screenshotUrl, actions, duration)',
},
liveUrl: {
type: 'string',
description: 'Embeddable live browser session URL (active during execution)',
},
shareUrl: {
type: 'string',
description: 'Public shareable URL for the session (post-run)',
},
sessionId: { type: 'string', description: 'Browser Use session identifier' },
},
}
Loading
Loading