diff --git a/.gitleaksignore b/.gitleaksignore index d5c038bc..d782fc0b 100644 --- a/.gitleaksignore +++ b/.gitleaksignore @@ -4,3 +4,11 @@ # False positive: test value in historical commit (fixed in current code) 12323684c2a470321a34fea845a9556eb8b644d1:test/cli/provider.test.ts:generic-api-key:223 + +# False positives: OpenRouter test values in historical commits +6d7d33837971d7976864be4ab0642c2f5938997e:packages/cli/test/extensionConfig.test.ts:generic-api-key:74 +6d7d33837971d7976864be4ab0642c2f5938997e:packages/cli/test/extensionConfig.test.ts:generic-api-key:80 +6d7d33837971d7976864be4ab0642c2f5938997e:packages/cli/test/extensionConfig.test.ts:generic-api-key:106 +cbd8a7a9fb3fd1bba93b68f888a1a4246a243405:packages/cli/test/extensionConfig.test.ts:generic-api-key:74 +cbd8a7a9fb3fd1bba93b68f888a1a4246a243405:packages/cli/test/extensionConfig.test.ts:generic-api-key:80 +cbd8a7a9fb3fd1bba93b68f888a1a4246a243405:packages/cli/test/extensionConfig.test.ts:generic-api-key:106 diff --git a/README.md b/README.md index bfb7ec29..b13c152f 100644 --- a/README.md +++ b/README.md @@ -174,6 +174,42 @@ pilo run --browser bidi --bidi-url "ws://127.0.0.1:9222/session" "what's the wea - 📝 **Rich Context**: Pass structured data to help with form filling and complex tasks - ☁️ **Tabstack API Integration**: Extract markdown, structured JSON, or AI-transformed data from any URL using [Tabstack](https://tabstack.ai) cloud tools — especially useful for PDFs which browsers cannot read directly +## Security Model + +Pilo treats every web page as untrusted input. By default, an **action firewall** prevents the agent from filling freeform form fields (textareas, contact-info inputs, password fields, etc.) and from submitting any form containing agent-filled values that the user did not explicitly approve. This is the structural defense against prompt-injection attacks where a page tries to coax the agent into exfiltrating data through a form. + +Two caller-supplied controls relax this protection. Both are off by default. **Enabling either weakens the firewall's data-protection guarantees.** + +### `trusted_hostnames` + +A list of hostnames on which the firewall is bypassed for fills and submissions. The bypass applies only when the current page hostname **and every form-action hostname** (the form's `action` plus any submitter `formaction` override) are all in the list. + +```bash +pilo config set trusted_hostnames example.com,app.example.com +``` + +WARNING: on listed hosts, prompt injection from page content can drive the agent to fill and submit any field, including personal and credential data. Use only for sites you fully trust to receive your data. + +### `unsafe_mode` + +A global firewall disable. When enabled, neither the fill gate nor the submit gate applies, regardless of page or form-action hostname. + +```bash +pilo config set unsafe_mode true +``` + +WARNING: prompt injection from page content can then cause the agent to submit your data, including credentials, personal information, and conversation context, to attacker-controlled forms. Only enable for trusted, controlled environments. + +### Remediation when a block fires + +When the firewall blocks a fill or submission and the agent is not running in interactive mode (no `UserDataCallback`), the CLI prints a footer listing the three ways the user can enable the workflow: + +- Add the involved hostnames to `trusted_hostnames`. +- Run in interactive mode so the agent can request per-field approval through `request_user_data`. +- Enable `unsafe_mode` (with the data-protection warning above). + +The footer is shown only to the user; the model that drives the agent never sees these remediation suggestions, so prompt-injected page content cannot ask the user to enable the bypasses. + ## Configuration Pilo supports multiple AI providers and stores configuration globally at `~/.config/pilo/config.json` (XDG standard; `%APPDATA%/pilo/config.json` on Windows). diff --git a/packages/cli/src/commands/config.ts b/packages/cli/src/commands/config.ts index 651786cd..76c2973b 100644 --- a/packages/cli/src/commands/config.ts +++ b/packages/cli/src/commands/config.ts @@ -1,7 +1,7 @@ import chalk from "chalk"; import { Command } from "commander"; import { existsSync } from "fs"; -import { config, getAIProviderInfo } from "pilo-core"; +import { config, getAIProviderInfo, normalizeHostname } from "pilo-core"; import { getPackageInfo, parseConfigValue } from "../utils.js"; /** @@ -180,9 +180,13 @@ function getConfigurationValue(key: string): void { */ function setConfigurationValue(key: string, value: string): void { try { - const parsedValue = parseConfigValue(value); + let parsedValue = parseConfigValue(value, key as any); + if (key === "trusted_hostnames" && Array.isArray(parsedValue)) { + parsedValue = parsedValue.map((h: string) => normalizeHostname(h)); + } config.set(key as any, parsedValue); - console.log(chalk.green(`✅ Set ${key} = ${value}`)); + const displayValue = Array.isArray(parsedValue) ? parsedValue.join(",") : value; + console.log(chalk.green(`✅ Set ${key} = ${displayValue}`)); } catch (error) { console.error(chalk.red("❌ Error:"), error instanceof Error ? error.message : String(error)); console.log(chalk.gray("Example: pilo config set browser chrome")); diff --git a/packages/cli/src/commands/run.ts b/packages/cli/src/commands/run.ts index 6ef02db1..6ff4d73c 100644 --- a/packages/cli/src/commands/run.ts +++ b/packages/cli/src/commands/run.ts @@ -14,7 +14,13 @@ import { MetricsCollector, SecretsRedactor, } from "pilo-core"; -import type { Logger, UserDataCallback, UserDataRequest, UserDataResponse } from "pilo-core"; +import type { + Logger, + UserDataCallback, + UserDataRequest, + UserDataResponse, + FirewallBlockedNonInteractiveEventData, +} from "pilo-core"; import { validateBrowser, getValidBrowsers, parseJsonData, parseResourcesList } from "../utils.js"; import * as fs from "fs"; import * as path from "path"; @@ -306,6 +312,10 @@ async function executeRunCommand(task: string, options: any): Promise { }); } + eventEmitter.onEvent(WebAgentEventType.FIREWALL_BLOCKED_NON_INTERACTIVE, (data: unknown) => { + printFirewallRemediation(data as FirewallBlockedNonInteractiveEventData); + }); + // Create WebAgent const webAgent = new WebAgent(browser, { debug: debugMode, @@ -321,6 +331,8 @@ async function executeRunCommand(task: string, options: any): Promise { searchApiKey: cfg.parallel_api_key, tabstackApiKey: options.tabstackApiKey ?? cfg.tabstack_api_key, tabstackApiUrl: options.tabstackApiUrl ?? cfg.tabstack_api_url, + trustedHostnames: options.trustedHostnames ?? cfg.trusted_hostnames, + unsafeMode: options.unsafe ?? cfg.unsafe_mode, providerConfig, logger, eventEmitter, @@ -340,3 +352,40 @@ async function executeRunCommand(task: string, options: any): Promise { process.exit(1); } } + +export function printFirewallRemediation(data: FirewallBlockedNonInteractiveEventData): void { + const lines: string[] = []; + lines.push(""); + lines.push(chalk.yellow.bold("Pilo: an action was blocked by the prompt-injection firewall.")); + lines.push(chalk.yellow(`Reason: ${data.reason}`)); + + const involvedHosts = Array.from( + new Set( + [data.pageHostname, ...data.formActionHostnames].filter((h): h is string => Boolean(h)), + ), + ); + if (involvedHosts.length > 0) { + lines.push(chalk.yellow(`Hostnames involved: ${involvedHosts.join(", ")}`)); + } + + lines.push(chalk.yellow("To allow this action, you can:")); + for (const r of data.remediations) { + if (r.kind === "add-trusted-hostnames") { + const cmd = + r.hostnames.length > 0 + ? `pilo config set trusted_hostnames ${r.hostnames.join(",")}` + : "pilo config set trusted_hostnames "; + lines.push(` - ${r.description}`); + lines.push(` Run: ${chalk.cyan(cmd)}`); + } else if (r.kind === "enable-interactive-mode") { + lines.push(` - ${r.description}`); + } else if (r.kind === "enable-unsafe-mode") { + lines.push(` - ${r.description}`); + lines.push(` Run: ${chalk.cyan("pilo config set unsafe_mode true")}`); + } + } + + for (const line of lines) { + console.warn(line); + } +} diff --git a/packages/cli/src/utils.ts b/packages/cli/src/utils.ts index caa87867..3bbcfde1 100644 --- a/packages/cli/src/utils.ts +++ b/packages/cli/src/utils.ts @@ -1,7 +1,7 @@ import { existsSync, readFileSync } from "fs"; import { dirname, join } from "path"; import { fileURLToPath } from "url"; -import { BROWSERS } from "pilo-core"; +import { BROWSERS, FIELDS, type PiloConfig } from "pilo-core"; /** * CLI-specific utilities and helpers @@ -101,7 +101,15 @@ export function parseConfigKeyValue(keyValue: string): { key: string; value: str /** * Parse configuration value to appropriate type */ -export function parseConfigValue(value: string): any { +export function parseConfigValue(value: string, key?: keyof PiloConfig): any { + // When the field type is known and is string[], CSV-split the value. + if (key && FIELDS[key]?.type === "string[]") { + return value + .split(",") + .map((s) => s.trim()) + .filter(Boolean); + } + // Parse boolean values if (value === "true") return true; if (value === "false") return false; diff --git a/packages/cli/test/commands/config.test.ts b/packages/cli/test/commands/config.test.ts index 6fe410c3..912a5dd0 100644 --- a/packages/cli/test/commands/config.test.ts +++ b/packages/cli/test/commands/config.test.ts @@ -174,6 +174,26 @@ describe("CLI Config Command (subcommands)", () => { expect(mockExit).toHaveBeenCalledWith(1); }); + + it("should parse trusted_hostnames as an array and persist normalized entries", async () => { + const cmd = getCommand(); + await cmd.parseAsync(["set", "trusted_hostnames", "Example.COM,app.example.com."], { + from: "user", + }); + + expect(mockConfig.set).toHaveBeenCalledWith("trusted_hostnames", [ + "example.com", + "app.example.com", + ]); + }); + + it("should exit(1) on invalid hostname in trusted_hostnames", async () => { + const cmd = getCommand(); + await cmd.parseAsync(["set", "trusted_hostnames", "good.com,bad value"], { from: "user" }); + + expect(mockExit).toHaveBeenCalledWith(1); + expect(mockConfig.set).not.toHaveBeenCalledWith("trusted_hostnames", expect.anything()); + }); }); // ------------------------------------------------------------------------- diff --git a/packages/cli/test/commands/run.test.ts b/packages/cli/test/commands/run.test.ts index 717f2737..cc157ef3 100644 --- a/packages/cli/test/commands/run.test.ts +++ b/packages/cli/test/commands/run.test.ts @@ -1,6 +1,7 @@ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; import { Command } from "commander"; -import { createRunCommand } from "../../src/commands/run.js"; +import { createRunCommand, printFirewallRemediation } from "../../src/commands/run.js"; +import type { FirewallBlockedNonInteractiveEventData } from "pilo-core"; import { getConfigDefaults } from "pilo-core"; // Get defaults from schema (used for mocking config.getConfig) @@ -37,6 +38,7 @@ vi.mock("pilo-core", async (importOriginal) => { }), WebAgentEventType: { AI_GENERATION: "ai:generation", + FIREWALL_BLOCKED_NON_INTERACTIVE: "firewall:blocked_non_interactive", }, WebAgentEventEmitter: vi.fn().mockImplementation(function () { return { @@ -587,3 +589,86 @@ describe("CLI Run Command", () => { }); }); }); + +describe("printFirewallRemediation", () => { + let warnSpy: ReturnType; + + beforeEach(() => { + warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + }); + + afterEach(() => { + warnSpy.mockRestore(); + }); + + it("prints all three remediation options with the blocked hostname", () => { + const data: FirewallBlockedNonInteractiveEventData = { + timestamp: Date.now(), + iterationId: "", + reason: "Security policy blocked submitting a form containing unauthorized agent-filled data", + kind: "form-submission", + pageHostname: "untrusted.com", + formActionHostnames: ["untrusted.com"], + remediations: [ + { + kind: "add-trusted-hostnames", + hostnames: ["untrusted.com"], + description: "Add untrusted.com to trusted_hostnames to allow this action on this site.", + }, + { + kind: "enable-interactive-mode", + description: + "Run in interactive mode by providing a UserDataCallback so the agent can ask the user to approve sensitive fields per-action via request_user_data.", + }, + { + kind: "enable-unsafe-mode", + description: "Set unsafe_mode=true to disable the action firewall entirely. WARNING: ...", + }, + ], + }; + + printFirewallRemediation(data); + const output = warnSpy.mock.calls + .map((c: unknown[]) => c.join(" ")) + .join("\n") + .replace(/\x1b\[[0-9;]*m/g, ""); + expect(output).toContain("untrusted.com"); + expect(output).toContain("trusted_hostnames untrusted.com"); + expect(output).toContain("interactive mode"); + expect(output).toContain("unsafe_mode true"); + }); + + it("falls back to a generic command when no hostnames are listed", () => { + const data: FirewallBlockedNonInteractiveEventData = { + timestamp: Date.now(), + iterationId: "", + reason: "Security policy blocked filling a submittable form field without user approval", + kind: "freeform-fill", + pageHostname: null, + formActionHostnames: [], + remediations: [ + { + kind: "add-trusted-hostnames", + hostnames: [], + description: + "Add the page hostname to trusted_hostnames to allow this action on this site.", + }, + { + kind: "enable-interactive-mode", + description: "Run in interactive mode...", + }, + { + kind: "enable-unsafe-mode", + description: "Set unsafe_mode=true...", + }, + ], + }; + + printFirewallRemediation(data); + const output = warnSpy.mock.calls + .map((c: unknown[]) => c.join(" ")) + .join("\n") + .replace(/\x1b\[[0-9;]*m/g, ""); + expect(output).toContain("trusted_hostnames "); + }); +}); diff --git a/packages/cli/test/utils.test.ts b/packages/cli/test/utils.test.ts index d42410eb..f418c4e0 100644 --- a/packages/cli/test/utils.test.ts +++ b/packages/cli/test/utils.test.ts @@ -113,6 +113,25 @@ describe("CLI Utils", () => { expect(parseConfigValue("sk-test123")).toBe("sk-test123"); expect(parseConfigValue("")).toBe(""); }); + + it("should CSV-split values for known string[] keys", () => { + expect(parseConfigValue("a.com,b.com", "trusted_hostnames")).toEqual(["a.com", "b.com"]); + expect(parseConfigValue("a.com", "trusted_hostnames")).toEqual(["a.com"]); + expect(parseConfigValue(" a.com , b.com ", "trusted_hostnames")).toEqual(["a.com", "b.com"]); + expect(parseConfigValue("", "trusted_hostnames")).toEqual([]); + }); + + it("should CSV-split values for pw_cdp_endpoints (regression for pre-existing bug)", () => { + expect(parseConfigValue("ws://a:9222,ws://b:9222", "pw_cdp_endpoints" as any)).toEqual([ + "ws://a:9222", + "ws://b:9222", + ]); + }); + + it("should still coerce booleans/numbers when key is omitted", () => { + expect(parseConfigValue("true")).toBe(true); + expect(parseConfigValue("42")).toBe(42); + }); }); describe("getPackageInfo", () => { diff --git a/packages/core/schemas/webagent-event.json b/packages/core/schemas/webagent-event.json index 296a72d6..6bd2d031 100644 --- a/packages/core/schemas/webagent-event.json +++ b/packages/core/schemas/webagent-event.json @@ -3368,6 +3368,117 @@ ], "type": "object" }, + "FirewallBlockedNonInteractiveEventData": { + "additionalProperties": false, + "properties": { + "formActionHostnames": { + "items": { + "type": "string" + }, + "type": "array" + }, + "iterationId": { + "type": "string" + }, + "kind": { + "enum": [ + "freeform-fill", + "form-submission" + ], + "type": "string" + }, + "pageHostname": { + "type": [ + "string", + "null" + ] + }, + "reason": { + "type": "string" + }, + "remediations": { + "items": { + "$ref": "#/definitions/FirewallRemediation" + }, + "type": "array" + }, + "timestamp": { + "type": "number" + } + }, + "required": [ + "formActionHostnames", + "iterationId", + "kind", + "pageHostname", + "reason", + "remediations", + "timestamp" + ], + "type": "object" + }, + "FirewallRemediation": { + "anyOf": [ + { + "additionalProperties": false, + "properties": { + "description": { + "type": "string" + }, + "hostnames": { + "items": { + "type": "string" + }, + "type": "array" + }, + "kind": { + "const": "add-trusted-hostnames", + "type": "string" + } + }, + "required": [ + "kind", + "hostnames", + "description" + ], + "type": "object" + }, + { + "additionalProperties": false, + "properties": { + "description": { + "type": "string" + }, + "kind": { + "const": "enable-interactive-mode", + "type": "string" + } + }, + "required": [ + "kind", + "description" + ], + "type": "object" + }, + { + "additionalProperties": false, + "properties": { + "description": { + "type": "string" + }, + "kind": { + "const": "enable-unsafe-mode", + "type": "string" + } + }, + "required": [ + "kind", + "description" + ], + "type": "object" + } + ] + }, "FormFieldRequest": { "additionalProperties": false, "description": "A single form field the agent needs data for.", @@ -4656,6 +4767,23 @@ "data" ], "type": "object" + }, + { + "additionalProperties": false, + "properties": { + "data": { + "$ref": "#/definitions/FirewallBlockedNonInteractiveEventData" + }, + "type": { + "const": "firewall:blocked_non_interactive", + "type": "string" + } + }, + "required": [ + "type", + "data" + ], + "type": "object" } ], "description": "Union type of all event data types" diff --git a/packages/core/src/browser/ariaBrowser.ts b/packages/core/src/browser/ariaBrowser.ts index 06fbf382..43bad48d 100644 --- a/packages/core/src/browser/ariaBrowser.ts +++ b/packages/core/src/browser/ariaBrowser.ts @@ -57,6 +57,40 @@ export interface TemporaryTab { waitForLoadState(state: LoadState, options?: { timeout?: number }): Promise; } +export interface FieldMetadata { + ref: string; + tagName: string; + inputType: string | null; + role: string | null; + name: string | null; + label: string | null; + placeholder: string | null; + autocomplete: string | null; + isContentEditable: boolean; + formId: string | null; + formAction: string | null; + formMethod: string | null; +} + +export interface FormFieldState { + ref: string | null; + name: string | null; + tagName: string; + inputType: string | null; + autocomplete: string | null; +} + +export interface FormSubmissionContext { + submitterRef: string; + formId: string | null; + actionUrl: string | null; + submitterActionUrl: string | null; + method: string | null; + fields: FormFieldState[]; +} + +export type FormSubmissionTrigger = "click" | "enter"; + export interface AriaBrowser { /** The name of the browser being used */ browserName: string; @@ -99,6 +133,15 @@ export interface AriaBrowser { */ performAction(ref: string, action: PageAction, value?: string): Promise; + /** Returns structural metadata for an element ref used in form/action policy checks. */ + getFieldMetadata(ref: string): Promise; + + /** Returns the form that would be submitted by activating this ref, if any. */ + getFormSubmissionContext( + ref: string, + trigger?: FormSubmissionTrigger, + ): Promise; + /** * Look up element identity (role + accessible name) for a ref from the * most recent ariaTree snapshot. Returns null if the ref is unknown. diff --git a/packages/core/src/browser/bidiBrowser.ts b/packages/core/src/browser/bidiBrowser.ts index a17c6296..1bd0f6bd 100644 --- a/packages/core/src/browser/bidiBrowser.ts +++ b/packages/core/src/browser/bidiBrowser.ts @@ -1,5 +1,13 @@ import TurndownService from "turndown"; -import { AriaBrowser, PageAction, LoadState, TemporaryTab } from "./ariaBrowser.js"; +import { + AriaBrowser, + PageAction, + LoadState, + TemporaryTab, + type FieldMetadata, + type FormSubmissionContext, + type FormSubmissionTrigger, +} from "./ariaBrowser.js"; import { BiDiConnection } from "./bidiConnection.js"; import { ARIA_TREE_SCRIPT } from "./ariaTree/bundle.js"; import { BrowserActionException, InvalidRefException } from "../errors.js"; @@ -263,6 +271,41 @@ export class BiDiBrowser implements AriaBrowser { }); } + // Action-firewall introspection. NOT yet implemented for the BiDi backend — + // porting PlaywrightBrowser's in-page field/form logic is a follow-up and is + // untestable without a live BiDi session. Until then these are deliberately + // fail-safe, never fail-open: + // - getFieldMetadata reports a generic freeform text input, so the firewall + // classifies every BiDi-driven fill as non-operational and blocks it on + // untrusted pages (it is allowed on caller-trusted hosts, where the + // firewall bypasses field classification anyway). + // - getFormSubmissionContext returns null, so no submitter context is + // produced. This cannot weaken protection because no agent-filled freeform + // value reaches a field on an untrusted page in the first place. + async getFieldMetadata(ref: string): Promise { + return { + ref, + tagName: "input", + inputType: "text", + role: null, + name: null, + label: null, + placeholder: null, + autocomplete: null, + isContentEditable: false, + formId: null, + formAction: null, + formMethod: null, + }; + } + + async getFormSubmissionContext( + _ref: string, + _trigger?: FormSubmissionTrigger, + ): Promise { + return null; + } + async performAction(ref: string, action: PageAction, value?: string): Promise { return withSpan( SpanName.BROWSER_PERFORM, diff --git a/packages/core/src/browser/playwrightBrowser.ts b/packages/core/src/browser/playwrightBrowser.ts index 3744ae36..2ef96910 100644 --- a/packages/core/src/browser/playwrightBrowser.ts +++ b/packages/core/src/browser/playwrightBrowser.ts @@ -11,7 +11,15 @@ import { Locator, errors as playwrightErrors, } from "playwright"; -import { AriaBrowser, PageAction, LoadState, TemporaryTab } from "./ariaBrowser.js"; +import { + AriaBrowser, + FieldMetadata, + FormSubmissionTrigger, + FormSubmissionContext, + LoadState, + PageAction, + TemporaryTab, +} from "./ariaBrowser.js"; import { PlaywrightBlocker } from "@ghostery/adblocker-playwright"; import fetch from "cross-fetch"; import TurndownService from "turndown"; @@ -839,6 +847,210 @@ export class PlaywrightBrowser implements AriaBrowser { } } + async getFieldMetadata(ref: string): Promise { + const locator = await this.validateElementRef(ref); + + try { + return await locator.evaluate((element, elementRef): FieldMetadata => { + const el = element as HTMLElement; + const input = el instanceof HTMLInputElement ? el : null; + const form = getElementForm(el); + + return { + ref: elementRef, + tagName: el.tagName.toLowerCase(), + inputType: input?.type?.toLowerCase() ?? null, + role: el.getAttribute("role"), + name: getElementName(el), + label: getElementLabel(el), + placeholder: getElementPlaceholder(el), + autocomplete: getElementAutocomplete(el), + isContentEditable: el.isContentEditable, + formId: form?.id || null, + formAction: form?.action || null, + formMethod: form?.method?.toLowerCase() || null, + }; + + function getElementForm(node: HTMLElement): HTMLFormElement | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement || + node instanceof HTMLButtonElement + ) { + return node.form; + } + return node.closest("form"); + } + + function getElementName(node: HTMLElement): string | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement || + node instanceof HTMLButtonElement + ) { + return node.name || null; + } + return node.getAttribute("name"); + } + + function getElementLabel(node: HTMLElement): string | null { + const ariaLabel = node.getAttribute("aria-label"); + if (ariaLabel?.trim()) return ariaLabel.trim(); + + const labelledBy = node.getAttribute("aria-labelledby"); + if (labelledBy) { + const text = labelledBy + .split(/\s+/) + .map((id) => node.ownerDocument.getElementById(id)?.textContent?.trim() || "") + .filter(Boolean) + .join(" "); + if (text) return text; + } + + if ("labels" in node) { + const labels = (node as HTMLInputElement | HTMLTextAreaElement | HTMLSelectElement) + .labels; + const text = Array.from(labels || []) + .map((label) => label.textContent?.trim() || "") + .filter(Boolean) + .join(" "); + if (text) return text; + } + + return null; + } + + function getElementPlaceholder(node: HTMLElement): string | null { + if (node instanceof HTMLInputElement || node instanceof HTMLTextAreaElement) { + return node.placeholder || null; + } + return null; + } + + function getElementAutocomplete(node: HTMLElement): string | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement + ) { + return node.autocomplete || null; + } + return null; + } + }, ref); + } catch (error) { + throw new BrowserActionException( + "getFieldMetadata", + `Failed to get field metadata: ${error instanceof Error ? error.message : String(error)}`, + { ref, originalError: error }, + ); + } + } + + async getFormSubmissionContext( + ref: string, + trigger: FormSubmissionTrigger = "click", + ): Promise { + const locator = await this.validateElementRef(ref); + + try { + return await locator.evaluate( + (element, { submitterRef, trigger }): FormSubmissionContext | null => { + const el = element as HTMLElement; + if (!canSubmitForm(el, trigger)) return null; + + const form = getSubmissionForm(el); + if (!form) return null; + + const fields = Array.from(form.elements) + .filter( + (field): field is HTMLInputElement | HTMLTextAreaElement | HTMLSelectElement => + field instanceof HTMLInputElement || + field instanceof HTMLTextAreaElement || + field instanceof HTMLSelectElement, + ) + .filter((field) => !field.disabled) + .map((field) => ({ + ref: field.getAttribute("data-pilo-ref"), + name: field.name || null, + tagName: field.tagName.toLowerCase(), + inputType: field instanceof HTMLInputElement ? field.type.toLowerCase() : null, + autocomplete: "autocomplete" in field ? field.autocomplete || null : null, + })); + + const submitterActionUrl = (() => { + if (!(el instanceof HTMLButtonElement) && !(el instanceof HTMLInputElement)) + return null; + if (el instanceof HTMLInputElement && el.type !== "submit" && el.type !== "image") + return null; + if (el instanceof HTMLButtonElement && el.type !== "submit") return null; + if (!el.hasAttribute("formaction")) return null; + return el.formAction || null; + })(); + + return { + submitterRef, + formId: form.id || null, + actionUrl: form.action || null, + submitterActionUrl, + method: form.method?.toLowerCase() || null, + fields, + }; + + function getSubmissionForm(node: HTMLElement): HTMLFormElement | null { + if ( + node instanceof HTMLButtonElement || + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement + ) { + return node.form; + } + return node.closest("form"); + } + + function canSubmitForm(node: HTMLElement, submitTrigger: FormSubmissionTrigger): boolean { + if (submitTrigger === "click") { + if (node instanceof HTMLButtonElement) { + return node.type === "submit"; + } + if (node instanceof HTMLInputElement) { + return node.type === "submit" || node.type === "image"; + } + return false; + } + + if (node instanceof HTMLTextAreaElement || node instanceof HTMLSelectElement) + return false; + if (!(node instanceof HTMLInputElement)) return false; + return ![ + "button", + "checkbox", + "color", + "file", + "hidden", + "radio", + "range", + "reset", + "submit", + ].includes(node.type); + } + }, + { submitterRef: ref, trigger }, + ); + } catch (error) { + throw new BrowserActionException( + "getFormSubmissionContext", + `Failed to get form submission context: ${ + error instanceof Error ? error.message : String(error) + }`, + { ref, trigger, originalError: error }, + ); + } + } + async performAction(ref: string, action: PageAction, value?: string): Promise { if (!this.page) throw new Error("Browser not started"); return withSpan( diff --git a/packages/core/src/config/defaults.ts b/packages/core/src/config/defaults.ts index 29da4b5b..6e20754e 100644 --- a/packages/core/src/config/defaults.ts +++ b/packages/core/src/config/defaults.ts @@ -125,6 +125,8 @@ export interface PiloConfig { // Action Configuration action_timeout_ms?: number; + trusted_hostnames?: string[]; + unsafe_mode?: boolean; // Search Configuration search_provider?: SearchProviderName; @@ -197,6 +199,8 @@ export interface PiloConfigResolved { // Action Configuration action_timeout_ms: number; + trusted_hostnames: string[]; + unsafe_mode: boolean; // Search Configuration search_provider: SearchProviderName; @@ -627,6 +631,25 @@ export const FIELDS: Record = { description: "Timeout for page load and element actions in milliseconds", category: "action", }, + trusted_hostnames: { + default: [], + type: "string[]", + cli: "--trusted-hostnames", + placeholder: "host1,host2,...", + env: ["PILO_TRUSTED_HOSTNAMES"], + description: + "Comma-separated hostnames where the action firewall is bypassed for fills and submissions. WARNING: on listed hosts, prompt injection from page content can drive the agent to fill and submit any field, including personal and credential data. Use only for sites you fully trust to receive your data.", + category: "action", + }, + unsafe_mode: { + default: false, + type: "boolean", + cli: "--unsafe", + env: ["PILO_UNSAFE_MODE"], + description: + "Disables the action firewall entirely. WARNING: prompt injection from page content can then cause the agent to submit your data, including credentials, personal info, and conversation context, to attacker-controlled forms. Only enable for trusted, controlled environments.", + category: "action", + }, // Search Configuration search_provider: { @@ -703,6 +726,8 @@ function buildDefaults(): PiloConfigResolved { "navigation_max_attempts", "navigation_timeout_multiplier", "action_timeout_ms", + "trusted_hostnames", + "unsafe_mode", "search_provider", ]; diff --git a/packages/core/src/core.ts b/packages/core/src/core.ts index 75e4f2c7..ebd7e5b1 100644 --- a/packages/core/src/core.ts +++ b/packages/core/src/core.ts @@ -5,7 +5,12 @@ */ export { WebAgent } from "./webAgent.js"; -export type { AriaBrowser } from "./browser/ariaBrowser.js"; +export type { + AriaBrowser, + FieldMetadata, + FormSubmissionContext, + FormSubmissionTrigger, +} from "./browser/ariaBrowser.js"; export { PageAction, LoadState } from "./browser/ariaBrowser.js"; export type { TaskExecutionResult, TaskError, WebAgentOptions } from "./webAgent.js"; export { TaskErrorCode } from "./webAgent.js"; @@ -35,6 +40,8 @@ export type { ValidationErrorEventData, InteractiveFormDataRequestEventData, InteractiveFormDataErrorEventData, + FirewallBlockedNonInteractiveEventData, + FirewallRemediation, AutomateStreamEvent, StreamCompleteEventData, StreamDoneEventData, @@ -52,11 +59,16 @@ export type { Action, TaskValidationResult } from "./schemas.js"; export { RecoverableError, BrowserException, + BrowserActionException, + InvalidRefException, NavigationTimeoutException, PlanningError, NoStartingUrlError, } from "./errors.js"; +// Action firewall helpers (for CLI-side validation at config-set time) +export { normalizeHostname, InvalidHostnameError } from "./security/actionFirewall.js"; + // Navigation retry configuration export type { NavigationRetryConfig } from "./browser/navigationRetry.js"; export { calculateTimeout, DEFAULT_NAVIGATION_RETRY_CONFIG } from "./browser/navigationRetry.js"; diff --git a/packages/core/src/events.ts b/packages/core/src/events.ts index 31b2f205..5a57416b 100644 --- a/packages/core/src/events.ts +++ b/packages/core/src/events.ts @@ -52,6 +52,9 @@ export enum WebAgentEventType { // Interactive mode events INTERACTIVE_FORM_DATA_REQUEST = "interactive:form_data:request", INTERACTIVE_FORM_DATA_ERROR = "interactive:form_data:error", + + // Firewall events + FIREWALL_BLOCKED_NON_INTERACTIVE = "firewall:blocked_non_interactive", } /** @@ -363,6 +366,19 @@ export interface InteractiveFormDataErrorEventData extends WebAgentEventData { fieldErrors: Record; } +export type FirewallRemediation = + | { kind: "add-trusted-hostnames"; hostnames: string[]; description: string } + | { kind: "enable-interactive-mode"; description: string } + | { kind: "enable-unsafe-mode"; description: string }; + +export interface FirewallBlockedNonInteractiveEventData extends WebAgentEventData { + reason: string; + kind: "freeform-fill" | "form-submission"; + pageHostname: string | null; + formActionHostnames: string[]; + remediations: FirewallRemediation[]; +} + /** * Union type of all event data types */ @@ -405,6 +421,10 @@ export type WebAgentEvent = | { type: WebAgentEventType.INTERACTIVE_FORM_DATA_ERROR; data: InteractiveFormDataErrorEventData; + } + | { + type: WebAgentEventType.FIREWALL_BLOCKED_NON_INTERACTIVE; + data: FirewallBlockedNonInteractiveEventData; }; // ============================================================================ diff --git a/packages/core/src/security/actionFirewall.ts b/packages/core/src/security/actionFirewall.ts new file mode 100644 index 00000000..0dde6d03 --- /dev/null +++ b/packages/core/src/security/actionFirewall.ts @@ -0,0 +1,282 @@ +import type { FieldMetadata, FormSubmissionContext } from "../browser/ariaBrowser.js"; + +export const SECURITY_BLOCKED_UNAUTHORIZED_FILL = + "Security policy blocked filling a submittable form field without user approval"; + +export const SECURITY_BLOCKED_UNAUTHORIZED_SUBMIT = + "Security policy blocked submitting a form containing unauthorized agent-filled data"; + +export const SECURITY_BLOCKED_CROSS_SITE_OPERATIONAL_SUBMIT = + "Security policy blocked submitting operational field data to a site other than the current page"; + +export type FillSource = "agent" | "user-approved"; + +export type ActionFirewallResult = + | { allowed: true; operational?: boolean } + | { allowed: false; reason: string; isRecoverable: true }; + +export interface FirewallConfig { + trustedHostnames: ReadonlySet; + unsafeMode: boolean; +} + +const OPERATIONAL_INPUT_TYPES = new Set([ + "search", + "number", + "date", + "datetime-local", + "month", + "time", + "week", + "color", + "range", +]); + +const OPERATIONAL_ROLES = new Set(["searchbox", "combobox", "spinbutton", "slider"]); + +const SENSITIVE_AUTOCOMPLETE_TOKENS = new Set([ + "name", + "honorific-prefix", + "given-name", + "additional-name", + "family-name", + "honorific-suffix", + "nickname", + "email", + "username", + "new-password", + "current-password", + "one-time-code", + "organization", + "street-address", + "address-line1", + "address-line2", + "address-line3", + "address-level1", + "address-level2", + "address-level3", + "address-level4", + "country", + "country-name", + "postal-code", + "cc-name", + "cc-given-name", + "cc-additional-name", + "cc-family-name", + "cc-number", + "cc-exp", + "cc-exp-month", + "cc-exp-year", + "cc-csc", + "cc-type", + "transaction-currency", + "transaction-amount", + "language", + "bday", + "bday-day", + "bday-month", + "bday-year", + "sex", + "tel", + "tel-country-code", + "tel-national", + "tel-area-code", + "tel-local", + "tel-local-prefix", + "tel-local-suffix", + "tel-extension", + "impp", + "url", + "photo", +]); + +export function assessFill(input: { + field: FieldMetadata; + source: FillSource; + pageHostname: string | null; + firewall: FirewallConfig; +}): ActionFirewallResult { + if (input.firewall.unsafeMode) { + return { allowed: true }; + } + + if (input.pageHostname !== null && input.firewall.trustedHostnames.has(input.pageHostname)) { + return { allowed: true }; + } + + if (input.source === "user-approved") { + return { allowed: true }; + } + + if (isOperationalField(input.field)) { + return { allowed: true, operational: true }; + } + + return { + allowed: false, + reason: SECURITY_BLOCKED_UNAUTHORIZED_FILL, + isRecoverable: true, + }; +} + +export function assessFormSubmission(input: { + form: FormSubmissionContext; + approvedRefs: ReadonlySet; + agentFilledRefs: ReadonlySet; + operationalRefs: ReadonlySet; + pageHostname: string | null; + firewall: FirewallConfig; +}): ActionFirewallResult { + if (input.firewall.unsafeMode) { + return { allowed: true }; + } + + if (input.pageHostname !== null && input.firewall.trustedHostnames.has(input.pageHostname)) { + const formActionHost = extractHostname(input.form.actionUrl); + const submitterActionHost = extractHostname(input.form.submitterActionUrl); + + const formActionTrusted = + formActionHost !== null && input.firewall.trustedHostnames.has(formActionHost); + + const submitterTrusted = + input.form.submitterActionUrl === null + ? true + : submitterActionHost !== null && input.firewall.trustedHostnames.has(submitterActionHost); + + if (formActionTrusted && submitterTrusted) { + return { allowed: true }; + } + } + + let hasOperationalAgentFill = false; + for (const field of input.form.fields) { + if (!field.ref || !input.agentFilledRefs.has(field.ref)) continue; + if (input.approvedRefs.has(field.ref)) continue; + if (input.operationalRefs.has(field.ref)) { + hasOperationalAgentFill = true; + continue; + } + + return { + allowed: false, + reason: SECURITY_BLOCKED_UNAUTHORIZED_SUBMIT, + isRecoverable: true, + }; + } + + // Operational fields (search/filter boxes, comboboxes, etc.) are classified + // from page-controlled attributes (inputType/role) and may carry agent-typed + // text. They are exempt from the unauthorized-submit gate so the agent can + // search and filter — but that exemption must not become an exfiltration + // channel. An attacker page can label its collector field as a search box and + // point the form action at its own host. So operational agent-filled data may + // only be submitted to the current page's own host. A null page host + // (non-http(s) page, or a getUrl failure) cannot be matched, so we fail closed. + // Approved fields are excluded from this restriction: they hold the user's own + // data, entered through request_user_data, and legitimately post cross-host + // (e.g. a payment processor on a separate domain). + if (hasOperationalAgentFill) { + const sameHost = (url: string | null): boolean => + input.pageHostname !== null && extractHostname(url) === input.pageHostname; + const submitterSameHost = + input.form.submitterActionUrl === null ? true : sameHost(input.form.submitterActionUrl); + if (!sameHost(input.form.actionUrl) || !submitterSameHost) { + return { + allowed: false, + reason: SECURITY_BLOCKED_CROSS_SITE_OPERATIONAL_SUBMIT, + isRecoverable: true, + }; + } + } + + return { allowed: true }; +} + +function isOperationalField(field: FieldMetadata): boolean { + const inputType = field.inputType?.toLowerCase() ?? null; + const role = field.role?.toLowerCase() ?? null; + + if (hasSensitiveAutocomplete(field.autocomplete)) return false; + if (field.tagName.toLowerCase() === "textarea" || field.isContentEditable) return false; + if (inputType && OPERATIONAL_INPUT_TYPES.has(inputType)) return true; + if (role && OPERATIONAL_ROLES.has(role)) return true; + return false; +} + +function hasSensitiveAutocomplete(autocomplete: string | null): boolean { + if (!autocomplete) return false; + const tokens = autocomplete.toLowerCase().split(/\s+/); + return tokens.some((token) => SENSITIVE_AUTOCOMPLETE_TOKENS.has(token)); +} + +export class InvalidHostnameError extends Error { + constructor(input: string, reason: string) { + super(`Invalid hostname "${input}": ${reason}`); + this.name = "InvalidHostnameError"; + } +} + +const HOSTNAME_DISALLOWED_CHARS = /[\s/:*]/; + +export function normalizeHostname(input: string): string { + if (typeof input !== "string") { + throw new InvalidHostnameError(String(input), "not a string"); + } + const trimmed = input.trim(); + if (trimmed.length === 0) { + throw new InvalidHostnameError(input, "empty"); + } + if (HOSTNAME_DISALLOWED_CHARS.test(trimmed)) { + throw new InvalidHostnameError(input, "contains whitespace, '/', ':', or '*'"); + } + if (trimmed.startsWith("[") || trimmed.endsWith("]")) { + throw new InvalidHostnameError(input, "bracketed IPv6 is not supported"); + } + let withoutTrailingDot = trimmed; + if (withoutTrailingDot.endsWith(".")) { + withoutTrailingDot = withoutTrailingDot.slice(0, -1); + } + if (withoutTrailingDot.length === 0) { + throw new InvalidHostnameError(input, "empty after trimming trailing dot"); + } + return withoutTrailingDot.toLowerCase(); +} + +/** + * Return a FirewallConfig with the start URL's host added to the trusted set. + * + * Used to trust the hostname of a caller-provided start URL: navigating somewhere + * the caller explicitly named is treated as consent to interact with that host's + * forms. Only call this with a caller-supplied URL — never a planner-chosen or + * agent-navigated URL, which are model/page-influenced and must not grant trust. + * + * Non-http(s) or unparseable URLs (null host) leave the firewall unchanged, as + * does a host that is already trusted. The input is never mutated. + */ +export function withTrustedStartHost( + firewall: FirewallConfig, + startUrl: string | null, +): FirewallConfig { + const host = extractHostname(startUrl); + if (host === null || firewall.trustedHostnames.has(host)) return firewall; + return Object.freeze({ + trustedHostnames: new Set([...firewall.trustedHostnames, host]), + unsafeMode: firewall.unsafeMode, + }); +} + +export function extractHostname(url: string | null): string | null { + if (url === null || url === undefined) return null; + if (typeof url !== "string" || url.length === 0) return null; + let parsed: URL; + try { + parsed = new URL(url); + } catch { + return null; + } + if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return null; + let host = parsed.hostname.toLowerCase(); + if (host.endsWith(".")) host = host.slice(0, -1); + if (host.length === 0) return null; + return host; +} diff --git a/packages/core/src/tools/interactiveTools.ts b/packages/core/src/tools/interactiveTools.ts index 9f215e18..4869c679 100644 --- a/packages/core/src/tools/interactiveTools.ts +++ b/packages/core/src/tools/interactiveTools.ts @@ -26,21 +26,7 @@ interface InteractiveToolContext { * Used by the fill gate to prevent the agent from filling form fields with * generated data when interactive mode is on. */ -export class ApprovedRefs { - private refs = new Set(); - - add(ref: string): void { - this.refs.add(ref); - } - - has(ref: string): boolean { - return this.refs.has(ref); - } - - clear(): void { - this.refs.clear(); - } -} +export class ApprovedRefs extends Set {} /** * Maps field types from the request schema to the appropriate browser action. diff --git a/packages/core/src/tools/webActionTools.ts b/packages/core/src/tools/webActionTools.ts index 800b7482..9a1256c2 100644 --- a/packages/core/src/tools/webActionTools.ts +++ b/packages/core/src/tools/webActionTools.ts @@ -14,6 +14,13 @@ import type { ProviderConfig } from "../provider.js"; import { BrowserException } from "../errors.js"; import { generateTextWithRetry } from "../utils/retry.js"; import { wrapExternalContentWithWarning, ExternalContentLabel } from "../utils/promptSecurity.js"; +import { + assessFill, + assessFormSubmission, + extractHostname, + type FirewallConfig, +} from "../security/actionFirewall.js"; +import type { FirewallBlockedNonInteractiveEventData, FirewallRemediation } from "../events.js"; import { withSpan, SpanStatusCode, @@ -26,6 +33,11 @@ interface WebActionContext { eventEmitter: WebAgentEventEmitter; providerConfig: ProviderConfig; abortSignal?: AbortSignal; + approvedRefs?: ReadonlySet; + agentFilledRefs: Set; + operationalRefs: Set; + firewall: FirewallConfig; + interactive: boolean; } /** @@ -51,6 +63,127 @@ type ActionResult = { targetIdentity?: { role: string; name: string }; }; +const EMPTY_APPROVED_REFS = new Set(); + +function buildRemediations(blockedHostnames: string[]): FirewallRemediation[] { + const uniqueHosts = Array.from(new Set(blockedHostnames.filter((h): h is string => Boolean(h)))); + return [ + { + kind: "add-trusted-hostnames", + hostnames: uniqueHosts, + description: + uniqueHosts.length > 0 + ? `Add ${uniqueHosts.join(", ")} to trusted_hostnames to allow this action on this site.` + : "Add the page hostname to trusted_hostnames to allow this action on this site.", + }, + { + kind: "enable-interactive-mode", + description: + "Run in interactive mode by providing a UserDataCallback so the agent can ask the user to approve sensitive fields per-action via request_user_data.", + }, + { + kind: "enable-unsafe-mode", + description: + "Set unsafe_mode=true to disable the action firewall entirely. WARNING: prompt injection from page content can then drive the agent to submit any field, including personal and credential data, to attacker-controlled forms.", + }, + ]; +} + +function emitNonInteractiveBlock( + context: WebActionContext, + kind: "freeform-fill" | "form-submission", + reason: string, + pageHostname: string | null, + formActionHostnames: string[], +): void { + if (context.interactive) return; + const hostsForRemediation = + pageHostname === null ? formActionHostnames : [pageHostname, ...formActionHostnames]; + const data: FirewallBlockedNonInteractiveEventData = { + timestamp: Date.now(), + iterationId: "", + reason, + kind, + pageHostname, + formActionHostnames, + remediations: buildRemediations(hostsForRemediation), + }; + context.eventEmitter.emit(WebAgentEventType.FIREWALL_BLOCKED_NON_INTERACTIVE, data); +} + +function failedActionResult( + action: string, + error: string, + context: WebActionContext, + ref?: string, + value?: string | number, +): ActionResult { + context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, { + success: false, + action, + error, + isRecoverable: true, + }); + + return { + success: false, + action, + ...(ref && { ref }), + ...(value !== undefined && { value }), + error, + isRecoverable: true, + }; +} + +async function assessFormSubmissionForAction( + action: PageAction.Click | PageAction.Enter, + context: WebActionContext, + ref: string, +): Promise { + try { + const [form, pageUrl] = await Promise.all([ + context.browser.getFormSubmissionContext( + ref, + action === PageAction.Click ? "click" : "enter", + ), + context.browser.getUrl(), + ]); + if (!form) return null; + const pageHostname = extractHostname(pageUrl); + const formActionHostnames = [ + extractHostname(form.actionUrl), + extractHostname(form.submitterActionUrl), + ].filter((h): h is string => h !== null); + + const assessment = assessFormSubmission({ + form, + approvedRefs: context.approvedRefs ?? EMPTY_APPROVED_REFS, + agentFilledRefs: context.agentFilledRefs, + operationalRefs: context.operationalRefs, + pageHostname, + firewall: context.firewall, + }); + + if (!assessment.allowed) { + emitNonInteractiveBlock( + context, + "form-submission", + assessment.reason, + pageHostname, + formActionHostnames, + ); + return failedActionResult(action, assessment.reason, context, ref); + } + } catch (error) { + if (error instanceof BrowserException) { + return failedActionResult(action, error.message, context, ref); + } + throw error; + } + + return null; +} + /** * Helper function to perform an action with full error handling and logging * Handles browser exceptions and converts them to recoverable errors for the agent @@ -157,6 +290,16 @@ async function performActionWithValidation( } export function createWebActionTools(context: WebActionContext) { + if (!context.agentFilledRefs || !context.operationalRefs) { + throw new Error("Web action provenance tracking sets are required"); + } + if (!context.firewall) { + throw new Error("FirewallConfig is required on WebActionContext"); + } + if (typeof context.interactive !== "boolean") { + throw new Error("interactive flag is required on WebActionContext"); + } + return { click: tool({ description: TOOL_STRINGS.webActions.click.description, @@ -164,6 +307,9 @@ export function createWebActionTools(context: WebActionContext) { ref: z.string().describe(TOOL_STRINGS.webActions.common.elementRef), }), execute: async ({ ref }) => { + const blocked = await assessFormSubmissionForAction(PageAction.Click, context, ref); + if (blocked) return blocked; + return await performActionWithValidation(PageAction.Click, context, ref); }, }), @@ -175,7 +321,39 @@ export function createWebActionTools(context: WebActionContext) { value: z.string().describe(TOOL_STRINGS.webActions.common.textValue), }), execute: async ({ ref, value }) => { - return await performActionWithValidation(PageAction.Fill, context, ref, value); + try { + const [metadata, pageUrl] = await Promise.all([ + context.browser.getFieldMetadata(ref), + context.browser.getUrl(), + ]); + const pageHostname = extractHostname(pageUrl); + const userApproved = Boolean(context.approvedRefs?.has(ref)); + const assessment = assessFill({ + field: metadata, + source: userApproved ? "user-approved" : "agent", + pageHostname, + firewall: context.firewall, + }); + + if (!assessment.allowed) { + emitNonInteractiveBlock(context, "freeform-fill", assessment.reason, pageHostname, []); + return failedActionResult(PageAction.Fill, assessment.reason, context, ref); + } + + const result = await performActionWithValidation(PageAction.Fill, context, ref, value); + if (result.success && !userApproved) { + context.agentFilledRefs.add(ref); + if (assessment.operational) { + context.operationalRefs.add(ref); + } + } + return result; + } catch (error) { + if (error instanceof BrowserException) { + return failedActionResult(PageAction.Fill, error.message, context, ref); + } + throw error; + } }, }), @@ -236,6 +414,9 @@ export function createWebActionTools(context: WebActionContext) { ref: z.string().describe(TOOL_STRINGS.webActions.common.elementRef), }), execute: async ({ ref }) => { + const blocked = await assessFormSubmissionForAction(PageAction.Enter, context, ref); + if (blocked) return blocked; + return await performActionWithValidation(PageAction.Enter, context, ref); }, }), diff --git a/packages/core/src/webAgent.ts b/packages/core/src/webAgent.ts index 888a1cb9..3b02497b 100644 --- a/packages/core/src/webAgent.ts +++ b/packages/core/src/webAgent.ts @@ -44,7 +44,7 @@ import { SearchService } from "./search/searchService.js"; import { createPlanningTools } from "./tools/planningTools.js"; import { createValidationTools } from "./tools/validationTools.js"; import { createTabstackTools } from "./tools/tabstackTools.js"; -import { createInteractiveTools, ApprovedRefs, FILL_GATE_ERROR } from "./tools/interactiveTools.js"; +import { createInteractiveTools, ApprovedRefs } from "./tools/interactiveTools.js"; import { createTabstackClient } from "./tabstack/client.js"; import type { UserDataCallback } from "./types/interactive.js"; import { nanoid } from "nanoid"; @@ -60,6 +60,11 @@ import { SpanName, recordSanitizedException, } from "./telemetry/tracing.js"; +import { + normalizeHostname, + withTrustedStartHost, + type FirewallConfig, +} from "./security/actionFirewall.js"; // === Type Definitions === @@ -100,6 +105,26 @@ export interface WebAgentOptions { onUserDataRequired?: UserDataCallback; /** Correlation ID for this task, propagated to logs and traces. */ taskId?: string; + /** + * Hostnames where the action firewall is bypassed for fills and submissions. + * + * @warning On listed hosts, prompt injection from page content can drive the + * agent to fill and submit any field, including personal and credential data. + * Use only for sites you fully trust to receive your data. The bypass applies + * only when the current page hostname AND every form-action hostname (the + * form's `action` plus any submitter `formaction` override) are all in this + * list. + */ + trustedHostnames?: readonly string[]; + /** + * Disables the action firewall entirely. + * + * @warning When true, prompt injection from page content can cause the agent + * to submit your data, including credentials, personal information, and + * conversation context, to attacker-controlled forms. Only enable for + * trusted, controlled environments. + */ + unsafeMode?: boolean; } export interface ExecuteOptions { @@ -229,6 +254,12 @@ export class WebAgent { private readonly tabstackApiUrl: string | undefined; private readonly onUserDataRequired: UserDataCallback | undefined; private readonly taskId: string | undefined; + private readonly firewall: FirewallConfig; + // Host of the caller-provided start URL (options.startingUrl), captured at + // execute() time. Trusted by the firewall — navigating somewhere the caller + // explicitly named is consent to interact with that host. NOT set from the + // planner-chosen URL, which is model-influenced and must not grant trust. + private callerStartHostUrl: string | null = null; // Actions where same-action-same-value repetition is legitimate workflow // (e.g. scrolling an infinite feed, waiting for a slow page) rather than a @@ -262,6 +293,10 @@ export class WebAgent { this.tabstackApiUrl = options.tabstackApiUrl; this.onUserDataRequired = options.onUserDataRequired; this.taskId = options.taskId; + this.firewall = Object.freeze({ + trustedHostnames: new Set((options.trustedHostnames ?? []).map((h) => normalizeHostname(h))), + unsafeMode: Boolean(options.unsafeMode), + }); if (this.searchProvider === "parallel-api" && !this.searchApiKey) { throw new Error("parallel_api_key is required when search_provider is 'parallel-api'"); @@ -319,6 +354,10 @@ export class WebAgent { // 1. Validate input parameters (let validation errors throw) this.validateTaskAndOptions(task, options); + // Capture only the caller-provided start URL (not the planner's choice) + // so the firewall can trust that host for fills/submissions. + this.callerStartHostUrl = options.startingUrl ?? null; + // 2. Initialize browser and internal state await this.initializeBrowserAndState(task, options); @@ -397,12 +436,32 @@ export class WebAgent { task: string, executionState: ExecutionState, ): Promise<{ success: boolean; finalAnswer: string | null; error?: TaskError }> { + // Only include interactive tools if a callback is provided + let interactiveToolSet: Record = {}; + let approvedRefs: ApprovedRefs | null = null; + const agentFilledRefs = new Set(); + const operationalRefs = new Set(); + if (this.onUserDataRequired) { + const result = createInteractiveTools({ + callback: this.onUserDataRequired, + browser: this.browser, + eventEmitter: this.eventEmitter, + }); + interactiveToolSet = result.tools; + approvedRefs = result.approvedRefs; + } + // Setup tools once const webActionTools = createWebActionTools({ browser: this.browser, eventEmitter: this.eventEmitter, providerConfig: this.providerConfig, abortSignal: this.abortSignal, + approvedRefs: approvedRefs ?? undefined, + agentFilledRefs, + operationalRefs, + firewall: withTrustedStartHost(this.firewall, this.callerStartHostUrl), + interactive: Boolean(this.onUserDataRequired), }); // Only include search tools if a search service was created @@ -418,51 +477,6 @@ export class WebAgent { }) : {}; - // Only include interactive tools if a callback is provided - let interactiveToolSet: Record = {}; - let approvedRefs: ApprovedRefs | null = null; - if (this.onUserDataRequired) { - const result = createInteractiveTools({ - callback: this.onUserDataRequired, - browser: this.browser, - eventEmitter: this.eventEmitter, - }); - interactiveToolSet = result.tools; - approvedRefs = result.approvedRefs; - } - - // When interactive mode is on, gate fill/select/check to require approved refs. - // On first unapproved attempt, return an error. If the agent retries the same ref - // (indicating it's a navigation/search field, not a user-data form field), allow it - // through on the second attempt to avoid a deadlock. - if (approvedRefs) { - const warnedRefs = new Set(); - const gatedActions = ["fill", "select", "check"] as const; - for (const actionName of gatedActions) { - const originalTool = webActionTools[actionName]; - if (originalTool) { - const originalExecute = originalTool.execute!; - (originalTool as any).execute = async (args: any, options: any) => { - if (args.ref && !approvedRefs!.has(args.ref)) { - if (!warnedRefs.has(args.ref)) { - // First attempt: warn and block - warnedRefs.add(args.ref); - return { - success: false, - action: actionName, - ref: args.ref, - error: FILL_GATE_ERROR, - isRecoverable: true, - }; - } - // Second attempt: agent confirmed this is a navigation/search field, allow it - } - return originalExecute(args, options); - }; - } - } - } - // Merge all tools const allTools = { ...webActionTools, ...searchTools, ...tabstackTools, ...interactiveToolSet }; @@ -516,9 +530,13 @@ export class WebAgent { if (needsPageSnapshot) { // Clear approved refs when page changes: ARIA refs reset on each snapshot, // so old ref strings may now point to different DOM elements. + // Recoverable blocked action errors deliberately keep needsPageSnapshot=false + // so a blocked submit retry remains tied to the same agent-filled refs. if (approvedRefs) { approvedRefs.clear(); } + agentFilledRefs.clear(); + operationalRefs.clear(); await this.addPageSnapshot(); } @@ -1120,8 +1138,7 @@ export class WebAgent { throw new Error(actionOutput.error); } - // Determine if page changed (most actions change the page, except extract and webSearch) - const pageChanged = actionOutput.action !== "extract" && actionOutput.action !== "webSearch"; + const pageChanged = WebAgent.shouldRefreshPageSnapshotAfterAction(actionOutput.action); // Check for terminal actions if (actionOutput.isTerminal) { @@ -1189,6 +1206,15 @@ export class WebAgent { }; } + // Fill keeps the current snapshot so refs and agent-filled provenance remain + // valid for a following submit check. This trades off immediate visibility + // into dynamic validation UI until a later action refreshes the snapshot. + private static readonly ACTIONS_WITHOUT_PAGE_REFRESH = new Set(["extract", "webSearch", "fill"]); + + private static shouldRefreshPageSnapshotAfterAction(action: string): boolean { + return !WebAgent.ACTIONS_WITHOUT_PAGE_REFRESH.has(action); + } + /** * Check for repeated actions and handle accordingly * @returns Action result if intervention is needed, null otherwise diff --git a/packages/core/test/config.test.ts b/packages/core/test/config.test.ts index 4539b4b8..114da941 100644 --- a/packages/core/test/config.test.ts +++ b/packages/core/test/config.test.ts @@ -190,6 +190,8 @@ describe("ConfigManager", () => { "navigation_max_attempts", "navigation_timeout_multiplier", "action_timeout_ms", + "trusted_hostnames", + "unsafe_mode", "search_provider", "parallel_api_key", "tabstack_api_key", diff --git a/packages/core/test/config/commander.test.ts b/packages/core/test/config/commander.test.ts new file mode 100644 index 00000000..ce6081a7 --- /dev/null +++ b/packages/core/test/config/commander.test.ts @@ -0,0 +1,33 @@ +import { describe, it, expect } from "vitest"; +import { Command } from "commander"; +import { addConfigOptions } from "../../src/config/commander.js"; + +describe("CLI: firewall flags", () => { + it("parses --trusted-hostnames as comma-separated list", () => { + const cmd = new Command().exitOverride(); + addConfigOptions(cmd); + cmd.action(() => {}); + cmd.parse(["node", "test", "--trusted-hostnames", "a.com,b.com"]); + const opts = cmd.opts(); + expect(opts.trustedHostnames).toEqual(["a.com", "b.com"]); + }); + + it("parses --unsafe as boolean true", () => { + const cmd = new Command().exitOverride(); + addConfigOptions(cmd); + cmd.action(() => {}); + cmd.parse(["node", "test", "--unsafe"]); + const opts = cmd.opts(); + expect(opts.unsafe).toBe(true); + }); + + it("does not set firewall opts when flags omitted", () => { + const cmd = new Command().exitOverride(); + addConfigOptions(cmd); + cmd.action(() => {}); + cmd.parse(["node", "test"]); + const opts = cmd.opts(); + expect(opts.trustedHostnames).toBeUndefined(); + expect(opts.unsafe).toBeUndefined(); + }); +}); diff --git a/packages/core/test/config/defaults.test.ts b/packages/core/test/config/defaults.test.ts new file mode 100644 index 00000000..4f97795c --- /dev/null +++ b/packages/core/test/config/defaults.test.ts @@ -0,0 +1,38 @@ +import { describe, it, expect } from "vitest"; +import { FIELDS, DEFAULTS } from "../../src/config/defaults.js"; + +describe("config defaults: firewall fields", () => { + it("declares trusted_hostnames as string[] with empty default", () => { + expect(FIELDS.trusted_hostnames).toBeDefined(); + expect(FIELDS.trusted_hostnames.type).toBe("string[]"); + expect(FIELDS.trusted_hostnames.category).toBe("action"); + expect(DEFAULTS.trusted_hostnames).toEqual([]); + }); + + it("declares unsafe_mode as boolean with false default", () => { + expect(FIELDS.unsafe_mode).toBeDefined(); + expect(FIELDS.unsafe_mode.type).toBe("boolean"); + expect(FIELDS.unsafe_mode.category).toBe("action"); + expect(DEFAULTS.unsafe_mode).toBe(false); + }); + + it("trusted_hostnames description warns about data risk", () => { + expect(FIELDS.trusted_hostnames.description).toMatch(/WARNING/); + expect(FIELDS.trusted_hostnames.description.toLowerCase()).toContain("trust"); + }); + + it("unsafe_mode description warns about data risk", () => { + expect(FIELDS.unsafe_mode.description).toMatch(/WARNING/); + expect(FIELDS.unsafe_mode.description.toLowerCase()).toContain("firewall"); + }); + + it("trusted_hostnames has a CLI flag and env var", () => { + expect(FIELDS.trusted_hostnames.cli).toBe("--trusted-hostnames"); + expect(FIELDS.trusted_hostnames.env).toContain("PILO_TRUSTED_HOSTNAMES"); + }); + + it("unsafe_mode has a CLI flag and env var", () => { + expect(FIELDS.unsafe_mode.cli).toBe("--unsafe"); + expect(FIELDS.unsafe_mode.env).toContain("PILO_UNSAFE_MODE"); + }); +}); diff --git a/packages/core/test/config/env.test.ts b/packages/core/test/config/env.test.ts new file mode 100644 index 00000000..5e76ce2d --- /dev/null +++ b/packages/core/test/config/env.test.ts @@ -0,0 +1,39 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { parseEnvConfig } from "../../src/config/env.js"; + +describe("env: firewall fields", () => { + const originalEnv = { ...process.env }; + + beforeEach(() => { + delete process.env.PILO_TRUSTED_HOSTNAMES; + delete process.env.PILO_UNSAFE_MODE; + }); + + afterEach(() => { + process.env = { ...originalEnv }; + }); + + it("parses PILO_TRUSTED_HOSTNAMES as comma-separated list", () => { + process.env.PILO_TRUSTED_HOSTNAMES = "a.com,b.com"; + const result = parseEnvConfig(); + expect(result.trusted_hostnames).toEqual(["a.com", "b.com"]); + }); + + it("parses PILO_UNSAFE_MODE=true as boolean true", () => { + process.env.PILO_UNSAFE_MODE = "true"; + const result = parseEnvConfig(); + expect(result.unsafe_mode).toBe(true); + }); + + it("parses PILO_UNSAFE_MODE=false as boolean false", () => { + process.env.PILO_UNSAFE_MODE = "false"; + const result = parseEnvConfig(); + expect(result.unsafe_mode).toBe(false); + }); + + it("returns undefined when env vars are not set", () => { + const result = parseEnvConfig(); + expect(result.trusted_hostnames).toBeUndefined(); + expect(result.unsafe_mode).toBeUndefined(); + }); +}); diff --git a/packages/core/test/events.test.ts b/packages/core/test/events.test.ts index f8f1b271..bcd73117 100644 --- a/packages/core/test/events.test.ts +++ b/packages/core/test/events.test.ts @@ -141,6 +141,7 @@ describe("WebAgentEventEmitter", () => { "browser:reconnected", "interactive:form_data:request", "interactive:form_data:error", + "firewall:blocked_non_interactive", ]; const actualEventTypes = Object.values(WebAgentEventType); diff --git a/packages/core/test/playwrightBrowser.test.ts b/packages/core/test/playwrightBrowser.test.ts index 0a978ada..e3f47d39 100644 --- a/packages/core/test/playwrightBrowser.test.ts +++ b/packages/core/test/playwrightBrowser.test.ts @@ -941,6 +941,87 @@ describe("PlaywrightBrowser", () => { expect(error.ref).toBe("missing"); }); }); + + describe("metadata error handling", () => { + it("should wrap field metadata evaluation errors in BrowserActionException", async () => { + const mockLocator = { + count: vi.fn().mockResolvedValue(1), + evaluate: vi.fn().mockRejectedValue(new Error("Execution context was destroyed")), + }; + const mockPage = { + locator: vi.fn().mockReturnValue(mockLocator), + }; + (browser as any).page = mockPage; + + await expect(browser.getFieldMetadata("input1")).rejects.toThrow(BrowserActionException); + await expect(browser.getFieldMetadata("input1")).rejects.toThrow( + "Failed to get field metadata: Execution context was destroyed", + ); + }); + + it("should wrap form submission context evaluation errors in BrowserActionException", async () => { + const mockLocator = { + count: vi.fn().mockResolvedValue(1), + evaluate: vi.fn().mockRejectedValue(new Error("Execution context was destroyed")), + }; + const mockPage = { + locator: vi.fn().mockReturnValue(mockLocator), + }; + (browser as any).page = mockPage; + + await expect(browser.getFormSubmissionContext("submit1")).rejects.toThrow( + BrowserActionException, + ); + await expect(browser.getFormSubmissionContext("submit1")).rejects.toThrow( + "Failed to get form submission context: Execution context was destroyed", + ); + }); + + it("returns submitterActionUrl from the evaluate result", async () => { + const mockLocator = { + count: vi.fn().mockResolvedValue(1), + evaluate: vi.fn().mockResolvedValue({ + submitterRef: "btn", + formId: null, + actionUrl: "https://example.com/normal", + submitterActionUrl: "https://override.example.com/special", + method: "post", + fields: [], + }), + }; + const mockPage = { + locator: vi.fn().mockReturnValue(mockLocator), + }; + (browser as any).page = mockPage; + + const ctx = await browser.getFormSubmissionContext("btn", "click"); + expect(ctx).not.toBeNull(); + expect(ctx!.actionUrl).toBe("https://example.com/normal"); + expect(ctx!.submitterActionUrl).toBe("https://override.example.com/special"); + }); + + it("returns null submitterActionUrl when the evaluate result has none", async () => { + const mockLocator = { + count: vi.fn().mockResolvedValue(1), + evaluate: vi.fn().mockResolvedValue({ + submitterRef: "btn", + formId: null, + actionUrl: "https://example.com/normal", + submitterActionUrl: null, + method: "post", + fields: [], + }), + }; + const mockPage = { + locator: vi.fn().mockReturnValue(mockLocator), + }; + (browser as any).page = mockPage; + + const ctx = await browser.getFormSubmissionContext("btn", "click"); + expect(ctx).not.toBeNull(); + expect(ctx!.submitterActionUrl).toBeNull(); + }); + }); }); describe("CDP endpoint failover", () => { diff --git a/packages/core/test/security/actionFirewall.test.ts b/packages/core/test/security/actionFirewall.test.ts new file mode 100644 index 00000000..e032bcee --- /dev/null +++ b/packages/core/test/security/actionFirewall.test.ts @@ -0,0 +1,619 @@ +import { describe, expect, it } from "vitest"; +import type { FieldMetadata, FormSubmissionContext } from "../../src/browser/ariaBrowser.js"; +import { + assessFill, + assessFormSubmission, + normalizeHostname, + extractHostname, + withTrustedStartHost, + InvalidHostnameError, + SECURITY_BLOCKED_UNAUTHORIZED_FILL, + SECURITY_BLOCKED_UNAUTHORIZED_SUBMIT, + SECURITY_BLOCKED_CROSS_SITE_OPERATIONAL_SUBMIT, + type FirewallConfig, +} from "../../src/security/actionFirewall.js"; + +function field(overrides: Partial = {}): FieldMetadata { + return { + ref: "E1", + tagName: "input", + inputType: "text", + role: null, + name: null, + label: null, + placeholder: null, + autocomplete: null, + isContentEditable: false, + formId: "form-1", + formAction: "https://example.com/search", + formMethod: "get", + ...overrides, + }; +} + +function form(overrides: Partial = {}): FormSubmissionContext { + return { + submitterRef: "E9", + formId: "form-1", + actionUrl: "https://example.com/submit", + submitterActionUrl: null, + method: "post", + fields: [], + ...overrides, + }; +} + +describe("actionFirewall", () => { + it("allows agent fills for operational search fields", () => { + const result = assessFill({ + field: field({ inputType: "search", label: "Search products" }), + source: "agent", + pageHostname: null, + firewall: { trustedHostnames: new Set(), unsafeMode: false }, + }); + + expect(result.allowed).toBe(true); + if (!result.allowed) throw new Error("Expected fill to be allowed"); + expect(result.operational).toBe(true); + }); + + it("blocks agent fills for freeform text fields", () => { + const result = assessFill({ + field: field({ label: "Message" }), + source: "agent", + pageHostname: null, + firewall: { trustedHostnames: new Set(), unsafeMode: false }, + }); + + expect(result.allowed).toBe(false); + if (result.allowed) throw new Error("Expected fill to be blocked"); + expect(result.reason).toBe(SECURITY_BLOCKED_UNAUTHORIZED_FILL); + }); + + it("does not classify fields as operational from label text alone", () => { + const result = assessFill({ + field: field({ inputType: "text", label: "Search products", placeholder: "Search" }), + source: "agent", + pageHostname: null, + firewall: { trustedHostnames: new Set(), unsafeMode: false }, + }); + + expect(result.allowed).toBe(false); + }); + + it("blocks inherently freeform fields even when they have operational roles", () => { + const result = assessFill({ + field: field({ tagName: "textarea", inputType: null, role: "searchbox" }), + source: "agent", + pageHostname: null, + firewall: { trustedHostnames: new Set(), unsafeMode: false }, + }); + + expect(result.allowed).toBe(false); + }); + + it("blocks fields with sensitive autocomplete even when the input type looks operational", () => { + const result = assessFill({ + field: field({ inputType: "url", autocomplete: "url" }), + source: "agent", + pageHostname: null, + firewall: { trustedHostnames: new Set(), unsafeMode: false }, + }); + + expect(result.allowed).toBe(false); + }); + + it("blocks agent fills for URL fields without user approval", () => { + const result = assessFill({ + field: field({ inputType: "url", autocomplete: null }), + source: "agent", + pageHostname: null, + firewall: { trustedHostnames: new Set(), unsafeMode: false }, + }); + + expect(result.allowed).toBe(false); + if (result.allowed) throw new Error("Expected URL fill to be blocked"); + expect(result.reason).toBe(SECURITY_BLOCKED_UNAUTHORIZED_FILL); + }); + + it("allows user-approved freeform fields", () => { + const result = assessFill({ + field: field({ label: "Message" }), + source: "user-approved", + pageHostname: null, + firewall: { trustedHostnames: new Set(), unsafeMode: false }, + }); + + expect(result.allowed).toBe(true); + }); + + it("blocks submitting forms with unauthorized agent-filled fields", () => { + const result = assessFormSubmission({ + form: form({ + fields: [ + { + ref: "E1", + name: "message", + tagName: "textarea", + inputType: null, + autocomplete: null, + }, + ], + }), + approvedRefs: new Set(), + agentFilledRefs: new Set(["E1"]), + operationalRefs: new Set(), + pageHostname: null, + firewall: { trustedHostnames: new Set(), unsafeMode: false }, + }); + + expect(result.allowed).toBe(false); + if (result.allowed) throw new Error("Expected submit to be blocked"); + expect(result.reason).toBe(SECURITY_BLOCKED_UNAUTHORIZED_SUBMIT); + expect(result.reason).not.toContain("do not leak this value"); + }); + + it("allows submitting forms when agent-filled fields are approved or operational", () => { + const result = assessFormSubmission({ + form: form({ + // form() defaults actionUrl to https://example.com/submit, so the + // operational field submits same-site as the page below. + fields: [ + { + ref: "E1", + name: "q", + tagName: "input", + inputType: "search", + autocomplete: null, + }, + { + ref: "E2", + name: "email", + tagName: "input", + inputType: "email", + autocomplete: "email", + }, + ], + }), + approvedRefs: new Set(["E2"]), + agentFilledRefs: new Set(["E1", "E2"]), + operationalRefs: new Set(["E1"]), + pageHostname: "example.com", + firewall: { trustedHostnames: new Set(), unsafeMode: false }, + }); + + expect(result.allowed).toBe(true); + }); +}); + +describe("normalizeHostname", () => { + it("lowercases input", () => { + expect(normalizeHostname("Example.COM")).toBe("example.com"); + }); + + it("strips a single trailing dot", () => { + expect(normalizeHostname("example.com.")).toBe("example.com"); + }); + + it("accepts bare hostnames", () => { + expect(normalizeHostname("app.example.com")).toBe("app.example.com"); + }); + + it("accepts IDN punycode", () => { + expect(normalizeHostname("xn--mnich-kva.de")).toBe("xn--mnich-kva.de"); + }); + + it("accepts bare IPv4 literals", () => { + expect(normalizeHostname("127.0.0.1")).toBe("127.0.0.1"); + }); + + it("rejects empty string", () => { + expect(() => normalizeHostname("")).toThrow(InvalidHostnameError); + }); + + it("rejects whitespace-only", () => { + expect(() => normalizeHostname(" ")).toThrow(InvalidHostnameError); + }); + + it("rejects strings with whitespace", () => { + expect(() => normalizeHostname("ex ample.com")).toThrow(InvalidHostnameError); + }); + + it("rejects strings with slashes", () => { + expect(() => normalizeHostname("example.com/path")).toThrow(InvalidHostnameError); + }); + + it("rejects strings with colons", () => { + expect(() => normalizeHostname("example.com:8080")).toThrow(InvalidHostnameError); + }); + + it("rejects strings with wildcards", () => { + expect(() => normalizeHostname("*.example.com")).toThrow(InvalidHostnameError); + }); + + it("rejects URL inputs with scheme", () => { + expect(() => normalizeHostname("https://example.com")).toThrow(InvalidHostnameError); + }); + + it("rejects bracketed IPv6 in v1", () => { + expect(() => normalizeHostname("[::1]")).toThrow(InvalidHostnameError); + }); + + it("error message names the bad entry", () => { + try { + normalizeHostname("bad value"); + } catch (e) { + expect(e).toBeInstanceOf(InvalidHostnameError); + expect((e as Error).message).toContain("bad value"); + } + }); +}); + +describe("withTrustedStartHost", () => { + const base: FirewallConfig = { trustedHostnames: new Set(["already.com"]), unsafeMode: false }; + + it("adds the start URL's host to the trusted set", () => { + const result = withTrustedStartHost(base, "https://github.com/signup"); + expect(result.trustedHostnames.has("github.com")).toBe(true); + }); + + it("preserves existing trusted hosts and unsafeMode", () => { + const result = withTrustedStartHost( + { trustedHostnames: new Set(["already.com"]), unsafeMode: true }, + "https://github.com/", + ); + expect(result.trustedHostnames.has("already.com")).toBe(true); + expect(result.unsafeMode).toBe(true); + }); + + it("lowercases the host (via extractHostname)", () => { + const result = withTrustedStartHost(base, "https://GitHub.COM/x"); + expect(result.trustedHostnames.has("github.com")).toBe(true); + }); + + it("does not mutate the input firewall", () => { + withTrustedStartHost(base, "https://github.com/"); + expect(base.trustedHostnames.has("github.com")).toBe(false); + }); + + it("returns the firewall unchanged for a null start URL", () => { + expect(withTrustedStartHost(base, null)).toBe(base); + }); + + it("returns the firewall unchanged for a non-http(s) start URL", () => { + expect(withTrustedStartHost(base, "about:blank")).toBe(base); + expect(withTrustedStartHost(base, "file:///tmp/x.html")).toBe(base); + }); + + it("returns the firewall unchanged when the host is already trusted", () => { + expect(withTrustedStartHost(base, "https://already.com/path")).toBe(base); + }); +}); + +describe("extractHostname", () => { + it("returns lowercase hostname for https URLs", () => { + expect(extractHostname("https://Example.COM/path?q=1")).toBe("example.com"); + }); + + it("returns lowercase hostname for http URLs", () => { + expect(extractHostname("http://app.example.com")).toBe("app.example.com"); + }); + + it("strips trailing dot", () => { + expect(extractHostname("https://example.com./")).toBe("example.com"); + }); + + it("returns null for null input", () => { + expect(extractHostname(null)).toBeNull(); + }); + + it("returns null for about:blank", () => { + expect(extractHostname("about:blank")).toBeNull(); + }); + + it("returns null for data: URLs", () => { + expect(extractHostname("data:text/html,

x

")).toBeNull(); + }); + + it("returns null for file: URLs", () => { + expect(extractHostname("file:///tmp/foo.html")).toBeNull(); + }); + + it("returns null for javascript: URLs", () => { + expect(extractHostname("javascript:alert(1)")).toBeNull(); + }); + + it("returns null for malformed URLs", () => { + expect(extractHostname("not a url")).toBeNull(); + }); + + it("returns null for empty string", () => { + expect(extractHostname("")).toBeNull(); + }); +}); + +const freeformField: FieldMetadata = { + ref: "ref-1", + tagName: "textarea", + inputType: null, + role: null, + name: "comment", + label: "Comment", + placeholder: null, + autocomplete: null, + isContentEditable: false, + formId: null, + formAction: null, + formMethod: null, +}; + +function withTrusted(hosts: string[]): FirewallConfig { + return { trustedHostnames: new Set(hosts), unsafeMode: false }; +} + +const unsafeFirewall: FirewallConfig = { + trustedHostnames: new Set(), + unsafeMode: true, +}; + +describe("assessFill bypass branches", () => { + it("unsafeMode allows any field regardless of source", () => { + const result = assessFill({ + field: freeformField, + source: "agent", + pageHostname: null, + firewall: unsafeFirewall, + }); + expect(result.allowed).toBe(true); + }); + + it("trusted page hostname allows freeform fill", () => { + const result = assessFill({ + field: freeformField, + source: "agent", + pageHostname: "example.com", + firewall: withTrusted(["example.com"]), + }); + expect(result.allowed).toBe(true); + }); + + it("untrusted page hostname falls through to existing rules and blocks freeform", () => { + const result = assessFill({ + field: freeformField, + source: "agent", + pageHostname: "attacker.com", + firewall: withTrusted(["example.com"]), + }); + expect(result.allowed).toBe(false); + }); + + it("pageHostname=null never bypasses", () => { + const result = assessFill({ + field: freeformField, + source: "agent", + pageHostname: null, + firewall: withTrusted(["example.com"]), + }); + expect(result.allowed).toBe(false); + }); +}); + +const baseForm: FormSubmissionContext = { + submitterRef: "submit-1", + formId: null, + actionUrl: "https://example.com/submit", + submitterActionUrl: null, + method: "post", + fields: [ + { + ref: "ref-1", + name: "comment", + tagName: "textarea", + inputType: null, + autocomplete: null, + }, + ], +}; + +describe("assessFormSubmission bypass branches", () => { + it("unsafeMode allows any form", () => { + const result = assessFormSubmission({ + form: baseForm, + approvedRefs: new Set(), + agentFilledRefs: new Set(["ref-1"]), + operationalRefs: new Set(), + pageHostname: "attacker.com", + firewall: unsafeFirewall, + }); + expect(result.allowed).toBe(true); + }); + + it("trusted page + trusted form action allows submission", () => { + const result = assessFormSubmission({ + form: baseForm, + approvedRefs: new Set(), + agentFilledRefs: new Set(["ref-1"]), + operationalRefs: new Set(), + pageHostname: "example.com", + firewall: withTrusted(["example.com"]), + }); + expect(result.allowed).toBe(true); + }); + + it("trusted page + untrusted form action falls through and blocks", () => { + const result = assessFormSubmission({ + form: { ...baseForm, actionUrl: "https://attacker.com/exfil" }, + approvedRefs: new Set(), + agentFilledRefs: new Set(["ref-1"]), + operationalRefs: new Set(), + pageHostname: "example.com", + firewall: withTrusted(["example.com"]), + }); + expect(result.allowed).toBe(false); + }); + + it("trusted page + null form action hostname falls through", () => { + const result = assessFormSubmission({ + form: { ...baseForm, actionUrl: "about:blank" }, + approvedRefs: new Set(), + agentFilledRefs: new Set(["ref-1"]), + operationalRefs: new Set(), + pageHostname: "example.com", + firewall: withTrusted(["example.com"]), + }); + expect(result.allowed).toBe(false); + }); + + it("untrusted page + trusted form action falls through", () => { + const result = assessFormSubmission({ + form: baseForm, + approvedRefs: new Set(), + agentFilledRefs: new Set(["ref-1"]), + operationalRefs: new Set(), + pageHostname: "attacker.com", + firewall: withTrusted(["example.com"]), + }); + expect(result.allowed).toBe(false); + }); + + it("checks submitter action URL when present", () => { + const result = assessFormSubmission({ + form: { + ...baseForm, + actionUrl: "https://example.com/normal", + submitterActionUrl: "https://attacker.com/override", + }, + approvedRefs: new Set(), + agentFilledRefs: new Set(["ref-1"]), + operationalRefs: new Set(), + pageHostname: "example.com", + firewall: withTrusted(["example.com"]), + }); + expect(result.allowed).toBe(false); + }); + + it("falls through (no bypass) when nothing is agent-filled but submitter is untrusted", () => { + const result = assessFormSubmission({ + form: { ...baseForm, actionUrl: "https://attacker.com/exfil" }, + approvedRefs: new Set(), + agentFilledRefs: new Set(), + operationalRefs: new Set(), + pageHostname: "example.com", + firewall: withTrusted(["example.com"]), + }); + expect(result.allowed).toBe(true); // existing rule: no agent-filled => allowed + }); +}); + +describe("assessFormSubmission same-site operational restriction", () => { + const operationalForm = (overrides: Partial = {}): FormSubmissionContext => + form({ + fields: [{ ref: "E1", name: "q", tagName: "input", inputType: "search", autocomplete: null }], + ...overrides, + }); + + it("allows operational agent-filled submission to a same-site form action", () => { + const result = assessFormSubmission({ + form: operationalForm({ actionUrl: "https://example.com/search" }), + approvedRefs: new Set(), + agentFilledRefs: new Set(["E1"]), + operationalRefs: new Set(["E1"]), + pageHostname: "example.com", + firewall: withTrusted([]), + }); + expect(result.allowed).toBe(true); + }); + + it("blocks operational agent-filled submission to a cross-site form action", () => { + const result = assessFormSubmission({ + form: operationalForm({ actionUrl: "https://attacker.example/collect" }), + approvedRefs: new Set(), + agentFilledRefs: new Set(["E1"]), + operationalRefs: new Set(["E1"]), + pageHostname: "example.com", + firewall: withTrusted([]), + }); + expect(result.allowed).toBe(false); + if (result.allowed) throw new Error("Expected cross-site operational submit to be blocked"); + expect(result.reason).toBe(SECURITY_BLOCKED_CROSS_SITE_OPERATIONAL_SUBMIT); + // Error must not echo the attempted field value or its content. + expect(result.reason).not.toContain("q"); + }); + + it("blocks operational submission when the submitter formaction overrides cross-site", () => { + const result = assessFormSubmission({ + form: operationalForm({ + actionUrl: "https://example.com/search", + submitterActionUrl: "https://attacker.example/collect", + }), + approvedRefs: new Set(), + agentFilledRefs: new Set(["E1"]), + operationalRefs: new Set(["E1"]), + pageHostname: "example.com", + firewall: withTrusted([]), + }); + expect(result.allowed).toBe(false); + }); + + it("blocks operational submission when the page hostname is unknown (fail closed)", () => { + const result = assessFormSubmission({ + form: operationalForm({ actionUrl: "https://example.com/search" }), + approvedRefs: new Set(), + agentFilledRefs: new Set(["E1"]), + operationalRefs: new Set(["E1"]), + pageHostname: null, + firewall: withTrusted([]), + }); + expect(result.allowed).toBe(false); + }); + + it("does not host-restrict user-approved (non-operational) submissions", () => { + // Approved fields are filled with the user's data via request_user_data and + // are never tracked as agent-filled, so they keep submitting cross-site + // (e.g. a payment processor on a separate domain). The user authorized them. + const result = assessFormSubmission({ + form: form({ + actionUrl: "https://payments.example.net/charge", + fields: [ + { + ref: "E2", + name: "card", + tagName: "input", + inputType: "text", + autocomplete: "cc-number", + }, + ], + }), + approvedRefs: new Set(["E2"]), + agentFilledRefs: new Set(), + operationalRefs: new Set(), + pageHostname: "shop.example.com", + firewall: withTrusted([]), + }); + expect(result.allowed).toBe(true); + }); + + it("allows operational cross-site submission when unsafeMode is on", () => { + const result = assessFormSubmission({ + form: operationalForm({ actionUrl: "https://attacker.example/collect" }), + approvedRefs: new Set(), + agentFilledRefs: new Set(["E1"]), + operationalRefs: new Set(["E1"]), + pageHostname: "example.com", + firewall: unsafeFirewall, + }); + expect(result.allowed).toBe(true); + }); + + it("allows operational cross-host submission when both page and action hosts are trusted", () => { + const result = assessFormSubmission({ + form: operationalForm({ actionUrl: "https://api.example.com/search" }), + approvedRefs: new Set(), + agentFilledRefs: new Set(["E1"]), + operationalRefs: new Set(["E1"]), + pageHostname: "example.com", + firewall: withTrusted(["example.com", "api.example.com"]), + }); + expect(result.allowed).toBe(true); + }); +}); diff --git a/packages/core/test/tools/webActionTools.test.ts b/packages/core/test/tools/webActionTools.test.ts index c4754188..f66b4ec5 100644 --- a/packages/core/test/tools/webActionTools.test.ts +++ b/packages/core/test/tools/webActionTools.test.ts @@ -1,6 +1,12 @@ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; import { createWebActionTools } from "../../src/tools/webActionTools.js"; -import { AriaBrowser, PageAction } from "../../src/browser/ariaBrowser.js"; +import { + AriaBrowser, + FieldMetadata, + FormSubmissionTrigger, + FormSubmissionContext, + PageAction, +} from "../../src/browser/ariaBrowser.js"; import { WebAgentEventEmitter, WebAgentEventType } from "../../src/events.js"; import { LanguageModel } from "ai"; import { z } from "zod"; @@ -30,6 +36,8 @@ class MockBrowser implements AriaBrowser { browserName = "mock-browser"; public url = "https://example.com"; public title = "Example Page"; + public fieldMetadata = new Map(); + public formSubmissionContexts = new Map(); async start(): Promise {} async shutdown(): Promise {} @@ -73,6 +81,32 @@ class MockBrowser implements AriaBrowser { // Mock implementation - can be configured to throw errors for testing } + async getFieldMetadata(ref: string): Promise { + return ( + this.fieldMetadata.get(ref) ?? { + ref, + tagName: "input", + inputType: "search", + role: "searchbox", + name: "q", + label: "Search", + placeholder: "Search", + autocomplete: null, + isContentEditable: false, + formId: "search-form", + formAction: "https://example.com/search", + formMethod: "get", + } + ); + } + + async getFormSubmissionContext( + ref: string, + _trigger?: FormSubmissionTrigger, + ): Promise { + return this.formSubmissionContexts.get(ref) ?? null; + } + async getRefIdentity(_ref: string): Promise<{ role: string; name: string } | null> { return null; } @@ -109,6 +143,10 @@ describe("Web Action Tools", () => { eventEmitter, providerConfig: { model: mockProvider }, abortSignal: undefined, + agentFilledRefs: new Set(), + operationalRefs: new Set(), + firewall: { trustedHostnames: new Set(), unsafeMode: false }, + interactive: false, }; tools = createWebActionTools(context); @@ -119,6 +157,16 @@ describe("Web Action Tools", () => { }); describe("Tool Structure", () => { + it("should require provenance tracking sets", () => { + expect(() => + createWebActionTools({ + browser: mockBrowser, + eventEmitter, + providerConfig: { model: mockProvider }, + } as any), + ).toThrow("Web action provenance tracking sets are required"); + }); + it("should create all expected tools", () => { expect(tools).toBeDefined(); expect(tools.click).toBeDefined(); @@ -305,6 +353,76 @@ describe("Web Action Tools", () => { }); }); + it("should block agent fill of freeform submittable fields", async () => { + mockBrowser.fieldMetadata.set("input1", { + ref: "input1", + tagName: "textarea", + inputType: null, + role: null, + name: "message", + label: "Message", + placeholder: "Message", + autocomplete: null, + isContentEditable: false, + formId: "contact", + formAction: "https://example.com/contact", + formMethod: "post", + }); + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + + const result = await tools.fill.execute({ ref: "input1", value: "generated payload" }); + + expect(performActionSpy).not.toHaveBeenCalled(); + expect(result).toEqual({ + success: false, + action: "fill", + ref: "input1", + error: "Security policy blocked filling a submittable form field without user approval", + isRecoverable: true, + }); + expect(result.value).toBeUndefined(); + }); + + it("should allow approved freeform field fills", async () => { + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + mockBrowser.fieldMetadata.set("input1", { + ref: "input1", + tagName: "textarea", + inputType: null, + role: null, + name: "message", + label: "Message", + placeholder: "Message", + autocomplete: null, + isContentEditable: false, + formId: "contact", + formAction: "https://example.com/contact", + formMethod: "post", + }); + context.approvedRefs = new Set(["input1"]); + tools = createWebActionTools(context); + + const result = await tools.fill.execute({ ref: "input1", value: "user-provided value" }); + + expect(performActionSpy).toHaveBeenCalledWith( + "input1", + PageAction.Fill, + "user-provided value", + ); + expect(result.success).toBe(true); + }); + + it("should track agent-filled operational refs", async () => { + context.agentFilledRefs = new Set(); + context.operationalRefs = new Set(); + tools = createWebActionTools(context); + + await tools.fill.execute({ ref: "input1", value: "pilo" }); + + expect(context.agentFilledRefs.has("input1")).toBe(true); + expect(context.operationalRefs.has("input1")).toBe(true); + }); + it("should emit browser action events", async () => { const emitSpy = vi.spyOn(eventEmitter, "emit"); @@ -541,6 +659,144 @@ describe("Web Action Tools", () => { expect(invalid.success).toBe(false); }); + it("should block click submit when form contains unauthorized agent-filled values", async () => { + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + context.agentFilledRefs = new Set(["message"]); + context.operationalRefs = new Set(); + context.approvedRefs = new Set(); + mockBrowser.formSubmissionContexts.set("submit1", { + submitterRef: "submit1", + formId: "contact", + actionUrl: "https://example.com/contact", + submitterActionUrl: null, + method: "post", + fields: [ + { + ref: "message", + name: "message", + tagName: "textarea", + inputType: null, + autocomplete: null, + }, + ], + }); + tools = createWebActionTools(context); + + const result = await tools.click.execute({ ref: "submit1" }); + + expect(performActionSpy).not.toHaveBeenCalled(); + expect(result.success).toBe(false); + expect(result.error).toBe( + "Security policy blocked submitting a form containing unauthorized agent-filled data", + ); + expect(JSON.stringify(result)).not.toContain("generated payload"); + }); + + it("should allow click submit when form fields are approved or operational", async () => { + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + context.agentFilledRefs = new Set(["query", "email"]); + context.operationalRefs = new Set(["query"]); + context.approvedRefs = new Set(["email"]); + mockBrowser.formSubmissionContexts.set("submit1", { + submitterRef: "submit1", + formId: "search", + actionUrl: "https://example.com/search", + submitterActionUrl: null, + method: "get", + fields: [ + { + ref: "query", + name: "q", + tagName: "input", + inputType: "search", + autocomplete: null, + }, + { + ref: "email", + name: "email", + tagName: "input", + inputType: "email", + autocomplete: "email", + }, + ], + }); + tools = createWebActionTools(context); + + const result = await tools.click.execute({ ref: "submit1" }); + + expect(performActionSpy).toHaveBeenCalledWith("submit1", PageAction.Click, undefined); + expect(result.success).toBe(true); + }); + + it("should block click submit when an operational field posts to a cross-site action", async () => { + // The reported bypass: an attacker page labels its collector field as a + // search box (operational) and points the form action at its own host. + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + mockBrowser.url = "https://example.com/search"; + context.agentFilledRefs = new Set(["query"]); + context.operationalRefs = new Set(["query"]); + context.approvedRefs = new Set(); + mockBrowser.formSubmissionContexts.set("submit1", { + submitterRef: "submit1", + formId: "search", + actionUrl: "https://attacker.example/collect", + submitterActionUrl: null, + method: "get", + fields: [ + { + ref: "query", + name: "q", + tagName: "input", + inputType: "search", + autocomplete: null, + }, + ], + }); + tools = createWebActionTools(context); + + const result = await tools.click.execute({ ref: "submit1" }); + + expect(performActionSpy).not.toHaveBeenCalled(); + expect(result.success).toBe(false); + expect(result.error).toBe( + "Security policy blocked submitting operational field data to a site other than the current page", + ); + }); + + it("should block enter submit when form contains unauthorized agent-filled fields", async () => { + const formContextSpy = vi.spyOn(mockBrowser, "getFormSubmissionContext"); + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + context.agentFilledRefs = new Set(["message"]); + context.operationalRefs = new Set(); + context.approvedRefs = new Set(); + mockBrowser.formSubmissionContexts.set("input1", { + submitterRef: "input1", + formId: "contact", + actionUrl: "https://example.com/contact", + submitterActionUrl: null, + method: "post", + fields: [ + { + ref: "message", + name: "message", + tagName: "textarea", + inputType: null, + autocomplete: null, + }, + ], + }); + tools = createWebActionTools(context); + + const result = await tools.enter.execute({ ref: "input1" }); + + expect(formContextSpy).toHaveBeenCalledWith("input1", "enter"); + expect(performActionSpy).not.toHaveBeenCalled(); + expect(result.success).toBe(false); + expect(result.error).toBe( + "Security policy blocked submitting a form containing unauthorized agent-filled data", + ); + }); + it("should execute back action successfully", async () => { const performActionSpy = vi.spyOn(mockBrowser, "performAction"); @@ -833,4 +1089,159 @@ describe("Web Action Tools", () => { expect(result.value).toBe(longText); }); }); + + describe("firewall bypass and remediation", () => { + it("trustedHostnames allows freeform fill on a trusted page", async () => { + mockBrowser.url = "https://example.com/page"; + mockBrowser.fieldMetadata.set("ref-1", { + ref: "ref-1", + tagName: "textarea", + inputType: null, + role: null, + name: "comment", + label: "Comment", + placeholder: null, + autocomplete: null, + isContentEditable: false, + formId: null, + formAction: null, + formMethod: null, + }); + const performSpy = vi.spyOn(mockBrowser, "performAction"); + const trustedContext = { + ...context, + firewall: { trustedHostnames: new Set(["example.com"]), unsafeMode: false }, + }; + const trustedTools: any = createWebActionTools(trustedContext); + + const result = await trustedTools.fill.execute({ ref: "ref-1", value: "hi" }); + expect(result.success).toBe(true); + expect(performSpy).toHaveBeenCalled(); + }); + + it("unsafeMode allows fill of any field on any page", async () => { + mockBrowser.url = "https://attacker.com/"; + mockBrowser.fieldMetadata.set("ref-1", { + ref: "ref-1", + tagName: "textarea", + inputType: null, + role: null, + name: "comment", + label: "Comment", + placeholder: null, + autocomplete: null, + isContentEditable: false, + formId: null, + formAction: null, + formMethod: null, + }); + const performSpy = vi.spyOn(mockBrowser, "performAction"); + const unsafeContext = { + ...context, + firewall: { trustedHostnames: new Set(), unsafeMode: true }, + }; + const unsafeTools: any = createWebActionTools(unsafeContext); + + const result = await unsafeTools.fill.execute({ ref: "ref-1", value: "hi" }); + expect(result.success).toBe(true); + expect(performSpy).toHaveBeenCalled(); + }); + + it("emits FIREWALL_BLOCKED_NON_INTERACTIVE on fill block when interactive=false", async () => { + mockBrowser.url = "https://untrusted.com/"; + mockBrowser.fieldMetadata.set("ref-1", { + ref: "ref-1", + tagName: "textarea", + inputType: null, + role: null, + name: "comment", + label: "Comment", + placeholder: null, + autocomplete: null, + isContentEditable: false, + formId: null, + formAction: null, + formMethod: null, + }); + const performSpy = vi.spyOn(mockBrowser, "performAction"); + const events: unknown[] = []; + eventEmitter.on(WebAgentEventType.FIREWALL_BLOCKED_NON_INTERACTIVE, (data) => + events.push(data), + ); + + const result = await tools.fill.execute({ ref: "ref-1", value: "hi" }); + expect(result.success).toBe(false); + expect(performSpy).not.toHaveBeenCalled(); + expect(events).toHaveLength(1); + const data = events[0] as { + kind: string; + pageHostname: string | null; + formActionHostnames: string[]; + reason: string; + timestamp: number; + remediations: Array<{ kind: string; hostnames?: string[]; description: string }>; + }; + expect(data.kind).toBe("freeform-fill"); + expect(data.pageHostname).toBe("untrusted.com"); + expect(data.formActionHostnames).toEqual([]); + expect(typeof data.reason).toBe("string"); + expect(data.reason.length).toBeGreaterThan(0); + expect(typeof data.timestamp).toBe("number"); + expect(data.remediations.map((r) => r.kind).sort()).toEqual( + ["add-trusted-hostnames", "enable-interactive-mode", "enable-unsafe-mode"].sort(), + ); + const trusted = data.remediations.find((r) => r.kind === "add-trusted-hostnames"); + expect(trusted?.hostnames).toEqual(["untrusted.com"]); + }); + + it("does NOT emit FIREWALL_BLOCKED_NON_INTERACTIVE when interactive=true", async () => { + mockBrowser.url = "https://untrusted.com/"; + mockBrowser.fieldMetadata.set("ref-1", { + ref: "ref-1", + tagName: "textarea", + inputType: null, + role: null, + name: "comment", + label: "Comment", + placeholder: null, + autocomplete: null, + isContentEditable: false, + formId: null, + formAction: null, + formMethod: null, + }); + const events: unknown[] = []; + eventEmitter.on(WebAgentEventType.FIREWALL_BLOCKED_NON_INTERACTIVE, (data) => + events.push(data), + ); + const interactiveContext = { ...context, interactive: true }; + const interactiveTools: any = createWebActionTools(interactiveContext); + + const result = await interactiveTools.fill.execute({ ref: "ref-1", value: "hi" }); + expect(result.success).toBe(false); + expect(events).toHaveLength(0); + }); + + it("model-visible error string does not include unsafe_mode or trusted_hostnames", async () => { + mockBrowser.url = "https://untrusted.com/"; + mockBrowser.fieldMetadata.set("ref-1", { + ref: "ref-1", + tagName: "textarea", + inputType: null, + role: null, + name: "comment", + label: "Comment", + placeholder: null, + autocomplete: null, + isContentEditable: false, + formId: null, + formAction: null, + formMethod: null, + }); + const result = await tools.fill.execute({ ref: "ref-1", value: "hi" }); + expect(result.success).toBe(false); + expect(result.error).toBeDefined(); + expect(result.error).not.toMatch(/unsafe_mode|trusted_hostnames|untrusted\.com/); + }); + }); }); diff --git a/packages/core/test/webAgent.test.ts b/packages/core/test/webAgent.test.ts index a693d7ff..cce1a8fb 100644 --- a/packages/core/test/webAgent.test.ts +++ b/packages/core/test/webAgent.test.ts @@ -1,6 +1,13 @@ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; import { WebAgent, WebAgentOptions } from "../src/webAgent.js"; -import { AriaBrowser, PageAction } from "../src/browser/ariaBrowser.js"; +import { InvalidHostnameError } from "../src/security/actionFirewall.js"; +import { + AriaBrowser, + FieldMetadata, + FormSubmissionTrigger, + FormSubmissionContext, + PageAction, +} from "../src/browser/ariaBrowser.js"; import { WebAgentEventEmitter, WebAgentEventType } from "../src/events.js"; import { LanguageModel, streamText } from "ai"; import { Logger } from "../src/loggers/types.js"; @@ -156,6 +163,8 @@ class MockBrowser implements AriaBrowser { `; private markdown = "# Mock Page\nContent here"; + fieldMetadata = new Map(); + formSubmissionContexts = new Map(); async start(): Promise {} async shutdown(): Promise {} @@ -195,6 +204,32 @@ class MockBrowser implements AriaBrowser { async performAction(_ref: string, _action: PageAction, _value?: string): Promise {} + async getFieldMetadata(ref: string): Promise { + return ( + this.fieldMetadata.get(ref) ?? { + ref, + tagName: "input", + inputType: "search", + role: "searchbox", + name: "q", + label: "Search", + placeholder: "Search", + autocomplete: null, + isContentEditable: false, + formId: "search-form", + formAction: "https://example.com/search", + formMethod: "get", + } + ); + } + + async getFormSubmissionContext( + ref: string, + _trigger?: FormSubmissionTrigger, + ): Promise { + return this.formSubmissionContexts.get(ref) ?? null; + } + async getRefIdentity(_ref: string): Promise<{ role: string; name: string } | null> { return null; } @@ -871,6 +906,77 @@ describe("WebAgent", () => { expect(navigatedEvent?.data.url).toBe(startingUrl); }); + it("should keep the same snapshot after fill so form refs remain valid for submit", async () => { + mockGenerateTextWithRetry.mockResolvedValueOnce({ + text: "Planning", + toolResults: [ + { + type: "tool-result", + toolCallId: "plan_1", + toolName: "create_plan", + output: { + successCriteria: "Fill then submit", + plan: "1. Fill the form\n2. Submit the form", + }, + }, + ], + } as any); + + const snapshotSpy = vi.spyOn(mockBrowser, "getTreeWithRefs"); + + mockStreamText.mockReturnValueOnce( + createMockStreamResponse({ + text: "Fill", + toolResults: [ + { + type: "tool-result", + toolCallId: "fill_1", + toolName: "fill", + input: { ref: "input1", value: "context" }, + output: { + success: true, + action: "fill", + ref: "input1", + value: "context", + }, + }, + ], + response: { + messages: [{ role: "assistant", content: "Fill" }], + }, + }) as any, + ); + + mockStreamText.mockReturnValueOnce( + createMockStreamResponse({ + text: "Done", + toolResults: [ + { + type: "tool-result", + toolCallId: "done_1", + toolName: "done", + input: { result: "Complete" }, + output: { + success: true, + action: "done", + result: "Complete", + isTerminal: true, + }, + }, + ], + response: { + messages: [{ role: "assistant", content: "Done" }], + }, + }) as any, + ); + + mockGenerateTextWithRetry.mockResolvedValueOnce(mockValidationResponse("complete")); + + await webAgent.execute("Fill then submit", { startingUrl: "https://example.com" }); + + expect(snapshotSpy).toHaveBeenCalledTimes(1); + }); + it("should pass webSearchEnabled to planning prompt when search provider is set", async () => { // Create a WebAgent with a search provider enabled const searchAgent = new WebAgent(mockBrowser, { @@ -4362,3 +4468,45 @@ describe("WebAgent", () => { }); }); }); + +describe("WebAgent firewall options", () => { + let mockProvider: any; + + beforeEach(() => { + vi.clearAllMocks(); + mockProvider = { specificationVersion: "v1" } as unknown as any; + }); + + it("throws InvalidHostnameError when trustedHostnames contains an invalid entry", () => { + const browser = new MockBrowser(); + expect( + () => + new WebAgent(browser, { + providerConfig: { model: mockProvider }, + trustedHostnames: ["bad value"], + }), + ).toThrow(InvalidHostnameError); + }); + + it("normalizes trustedHostnames at construction", () => { + const browser = new MockBrowser(); + expect( + () => + new WebAgent(browser, { + providerConfig: { model: mockProvider }, + trustedHostnames: ["Example.COM", "app.example.com."], + }), + ).not.toThrow(); + }); + + it("accepts unsafeMode true", () => { + const browser = new MockBrowser(); + expect( + () => + new WebAgent(browser, { + providerConfig: { model: mockProvider }, + unsafeMode: true, + }), + ).not.toThrow(); + }); +}); diff --git a/packages/extension/src/background/ExtensionBrowser.ts b/packages/extension/src/background/ExtensionBrowser.ts index 9f4803fb..7c02c3e0 100644 --- a/packages/extension/src/background/ExtensionBrowser.ts +++ b/packages/extension/src/background/ExtensionBrowser.ts @@ -1,6 +1,11 @@ import browser from "webextension-polyfill"; -import type { AriaBrowser } from "pilo-core/core"; -import { PageAction, LoadState } from "pilo-core/core"; +import type { + AriaBrowser, + FieldMetadata, + FormSubmissionContext, + FormSubmissionTrigger, +} from "pilo-core/core"; +import { BrowserActionException, InvalidRefException, PageAction, LoadState } from "pilo-core/core"; import type { Tabs } from "webextension-polyfill"; import { createLogger } from "../shared/utils/logger"; import TurndownService from "turndown"; @@ -11,6 +16,10 @@ interface ActionResult { message?: string; } +type MetadataScriptResult = + | { success: true; data: T } + | { success: false; error: string; errorType?: "invalid-ref" }; + interface AriaSnapshotWindow { generateAndRenderAriaTree: (root: Element, counter?: { value: number }) => string; } @@ -302,6 +311,279 @@ export class ExtensionBrowser implements AriaBrowser { } } + async getFieldMetadata(ref: string): Promise { + try { + const tab = await this.getActiveTab(); + await this.ensureContentScript(); + + const results = await browser.scripting.executeScript({ + target: { tabId: tab.id! }, + func: (elementRef: string): MetadataScriptResult => { + const element = document.querySelector(`[data-pilo-ref="${elementRef}"]`); + if (!(element instanceof HTMLElement)) { + return { + success: false, + error: `Element with ref ${elementRef} not found in DOM`, + errorType: "invalid-ref", + }; + } + + const input = element instanceof HTMLInputElement ? element : null; + const form = getElementForm(element); + + return { + success: true, + data: { + ref: elementRef, + tagName: element.tagName.toLowerCase(), + inputType: input?.type?.toLowerCase() ?? null, + role: element.getAttribute("role"), + name: getElementName(element), + label: getElementLabel(element), + placeholder: getElementPlaceholder(element), + autocomplete: getElementAutocomplete(element), + isContentEditable: element.isContentEditable, + formId: form?.id || null, + formAction: form?.action || null, + formMethod: form?.method?.toLowerCase() || null, + }, + }; + + function getElementForm(node: HTMLElement): HTMLFormElement | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement || + node instanceof HTMLButtonElement + ) { + return node.form; + } + return node.closest("form"); + } + + function getElementName(node: HTMLElement): string | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement || + node instanceof HTMLButtonElement + ) { + return node.name || null; + } + return node.getAttribute("name"); + } + + function getElementLabel(node: HTMLElement): string | null { + const ariaLabel = node.getAttribute("aria-label"); + if (ariaLabel?.trim()) return ariaLabel.trim(); + + const labelledBy = node.getAttribute("aria-labelledby"); + if (labelledBy) { + const text = labelledBy + .split(/\s+/) + .map((id) => node.ownerDocument.getElementById(id)?.textContent?.trim() || "") + .filter(Boolean) + .join(" "); + if (text) return text; + } + + if ("labels" in node) { + const labels = (node as HTMLInputElement | HTMLTextAreaElement | HTMLSelectElement) + .labels; + const text = Array.from(labels || []) + .map((label) => label.textContent?.trim() || "") + .filter(Boolean) + .join(" "); + if (text) return text; + } + + return null; + } + + function getElementPlaceholder(node: HTMLElement): string | null { + if (node instanceof HTMLInputElement || node instanceof HTMLTextAreaElement) { + return node.placeholder || null; + } + return null; + } + + function getElementAutocomplete(node: HTMLElement): string | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement + ) { + return node.autocomplete || null; + } + return null; + } + }, + args: [ref], + }); + + return this.unwrapMetadataResult( + results[0]?.result as MetadataScriptResult | undefined, + ref, + "getFieldMetadata", + ); + } catch (error) { + if (error instanceof InvalidRefException || error instanceof BrowserActionException) { + throw error; + } + throw new BrowserActionException( + "getFieldMetadata", + `Failed to get field metadata: ${error instanceof Error ? error.message : String(error)}`, + { ref, originalError: error }, + ); + } + } + + async getFormSubmissionContext( + ref: string, + trigger: FormSubmissionTrigger = "click", + ): Promise { + try { + const tab = await this.getActiveTab(); + await this.ensureContentScript(); + + const results = await browser.scripting.executeScript({ + target: { tabId: tab.id! }, + func: (paramsJson: string): MetadataScriptResult => { + const { ref: submitterRef, trigger: submitTrigger } = JSON.parse(paramsJson) as { + ref: string; + trigger: FormSubmissionTrigger; + }; + const element = document.querySelector(`[data-pilo-ref="${submitterRef}"]`); + if (!(element instanceof HTMLElement)) { + return { + success: false, + error: `Element with ref ${submitterRef} not found in DOM`, + errorType: "invalid-ref", + }; + } + if (!canSubmitForm(element, submitTrigger)) return { success: true, data: null }; + + const form = getSubmissionForm(element); + if (!form) return { success: true, data: null }; + + const fields = Array.from(form.elements) + .filter( + (field): field is HTMLInputElement | HTMLTextAreaElement | HTMLSelectElement => + field instanceof HTMLInputElement || + field instanceof HTMLTextAreaElement || + field instanceof HTMLSelectElement, + ) + .filter((field) => !field.disabled) + .map((field) => ({ + ref: field.getAttribute("data-pilo-ref"), + name: field.name || null, + tagName: field.tagName.toLowerCase(), + inputType: field instanceof HTMLInputElement ? field.type.toLowerCase() : null, + autocomplete: "autocomplete" in field ? field.autocomplete || null : null, + })); + + return { + success: true, + data: { + submitterRef, + formId: form.id || null, + actionUrl: form.action || null, + submitterActionUrl: null, + method: form.method?.toLowerCase() || null, + fields, + }, + }; + + function getSubmissionForm(node: HTMLElement): HTMLFormElement | null { + if ( + node instanceof HTMLButtonElement || + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement + ) { + return node.form; + } + return node.closest("form"); + } + + function canSubmitForm(node: HTMLElement, submitTrigger: FormSubmissionTrigger): boolean { + if (submitTrigger === "click") { + if (node instanceof HTMLButtonElement) return node.type === "submit"; + if (node instanceof HTMLInputElement) { + return node.type === "submit" || node.type === "image"; + } + return false; + } + + if (node instanceof HTMLTextAreaElement || node instanceof HTMLSelectElement) + return false; + if (!(node instanceof HTMLInputElement)) return false; + return ![ + "button", + "checkbox", + "color", + "file", + "hidden", + "radio", + "range", + "reset", + "submit", + ].includes(node.type); + } + }, + args: [JSON.stringify({ ref, trigger })], + }); + + return this.unwrapMetadataResult( + results[0]?.result as MetadataScriptResult | undefined, + ref, + "getFormSubmissionContext", + ); + } catch (error) { + if (error instanceof InvalidRefException || error instanceof BrowserActionException) { + throw error; + } + throw new BrowserActionException( + "getFormSubmissionContext", + `Failed to get form submission context: ${ + error instanceof Error ? error.message : String(error) + }`, + { ref, trigger, originalError: error }, + ); + } + } + + private unwrapMetadataResult( + result: MetadataScriptResult | undefined, + ref: string, + action: "getFieldMetadata" | "getFormSubmissionContext", + ): T { + if (!result) { + const actionDescription = + action === "getFieldMetadata" ? "get field metadata" : "get form submission context"; + throw new BrowserActionException( + action, + `Failed to ${actionDescription}: script returned no result`, + { + ref, + }, + ); + } + + if (!result.success) { + if (result.errorType === "invalid-ref") { + const invalidRefError = new InvalidRefException(ref); + if (result.error) { + invalidRefError.message = `${invalidRefError.message} ${result.error}`; + } + throw invalidRefError; + } + throw new BrowserActionException(action, result.error, { ref }); + } + + return result.data; + } + async getRefIdentity(ref: string): Promise<{ role: string; name: string } | null> { try { const tab = await this.getActiveTab(); diff --git a/packages/extension/test/ExtensionBrowser.test.ts b/packages/extension/test/ExtensionBrowser.test.ts index 9b48d7d1..92b2964d 100644 --- a/packages/extension/test/ExtensionBrowser.test.ts +++ b/packages/extension/test/ExtensionBrowser.test.ts @@ -1,6 +1,7 @@ import { describe, it, expect, beforeEach, vi } from "vitest"; import { ExtensionBrowser } from "../src/background/ExtensionBrowser"; import browser from "webextension-polyfill"; +import { BrowserActionException, InvalidRefException } from "pilo-core/core"; vi.mock("webextension-polyfill", () => ({ default: { @@ -94,4 +95,84 @@ describe("ExtensionBrowser", () => { expect(browser.scripting.executeScript).toHaveBeenCalled(); }); }); + + describe("metadata error handling", () => { + it("should translate missing field metadata refs into InvalidRefException", async () => { + vi.mocked(browser.scripting.executeScript).mockResolvedValue([ + { + result: { + success: false, + error: "Element with ref missing-input not found in DOM", + errorType: "invalid-ref", + }, + } as any, + ]); + + await expect(extensionBrowser.getFieldMetadata("missing-input")).rejects.toThrow( + InvalidRefException, + ); + }); + + it("should translate missing form submission refs into InvalidRefException", async () => { + vi.mocked(browser.scripting.executeScript).mockResolvedValue([ + { + result: { + success: false, + error: "Element with ref missing-submit not found in DOM", + errorType: "invalid-ref", + }, + } as any, + ]); + + await expect(extensionBrowser.getFormSubmissionContext("missing-submit")).rejects.toThrow( + InvalidRefException, + ); + }); + + it("should wrap field metadata script failures in BrowserActionException", async () => { + vi.mocked(browser.scripting.executeScript) + .mockResolvedValueOnce([{ result: true } as any]) + .mockRejectedValueOnce(new Error("Cannot access contents of url")); + + const error = await extensionBrowser.getFieldMetadata("input1").catch((err) => err); + expect(error).toBeInstanceOf(BrowserActionException); + expect(error.message).toContain( + "Failed to get field metadata: Cannot access contents of url", + ); + }); + + it("should wrap empty field metadata script results in BrowserActionException", async () => { + vi.mocked(browser.scripting.executeScript) + .mockResolvedValueOnce([{ result: true } as any]) + .mockResolvedValueOnce([]); + + const error = await extensionBrowser.getFieldMetadata("input1").catch((err) => err); + expect(error).toBeInstanceOf(BrowserActionException); + expect(error.message).toContain("Failed to get field metadata: script returned no result"); + }); + + it("should wrap form submission script failures in BrowserActionException", async () => { + vi.mocked(browser.scripting.executeScript) + .mockResolvedValueOnce([{ result: true } as any]) + .mockRejectedValueOnce(new Error("Cannot access contents of url")); + + const error = await extensionBrowser.getFormSubmissionContext("submit1").catch((err) => err); + expect(error).toBeInstanceOf(BrowserActionException); + expect(error.message).toContain( + "Failed to get form submission context: Cannot access contents of url", + ); + }); + + it("should wrap empty form submission script results in BrowserActionException", async () => { + vi.mocked(browser.scripting.executeScript) + .mockResolvedValueOnce([{ result: true } as any]) + .mockResolvedValueOnce([]); + + const error = await extensionBrowser.getFormSubmissionContext("submit1").catch((err) => err); + expect(error).toBeInstanceOf(BrowserActionException); + expect(error.message).toContain( + "Failed to get form submission context: script returned no result", + ); + }); + }); });