From 9fb6d511674776c1a792b3ef414bd4985a116644 Mon Sep 17 00:00:00 2001 From: muly Date: Mon, 17 Nov 2025 12:11:48 +0000 Subject: [PATCH 1/4] feat: add remote Chrome DevTools Protocol support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable connecting to a remote Chrome/Edge browser via CDP for browser automation mode. This allows running Oracle on headless servers while using a browser with GUI on another machine (e.g., Windows desktop). New CLI flag: --remote-chrome Features: - Connect to remote Chrome via CDP instead of launching local browser - Skip cookie sync (uses existing browser session) - Automatic model selection and prompt submission - Full compatibility with existing browser mode features Example usage: oracle --engine browser --remote-chrome 192.168.1.10:9222 --prompt "Hello" šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- bin/oracle-cli.ts | 4 + scripts/test-remote-chrome.ts | 80 +++++++++++++++++ src/browser/chromeLifecycle.ts | 10 +++ src/browser/config.ts | 1 + src/browser/index.ts | 159 ++++++++++++++++++++++++++++++++- src/browser/types.ts | 4 +- src/cli/browserConfig.ts | 16 ++++ src/sessionManager.ts | 1 + 8 files changed, 273 insertions(+), 2 deletions(-) create mode 100644 scripts/test-remote-chrome.ts diff --git a/bin/oracle-cli.ts b/bin/oracle-cli.ts index 0f400ca9..23c1172c 100755 --- a/bin/oracle-cli.ts +++ b/bin/oracle-cli.ts @@ -72,6 +72,7 @@ interface CliOptions extends OptionValues { browserKeepBrowser?: boolean; browserAllowCookieErrors?: boolean; browserInlineFiles?: boolean; + remoteChrome?: string; verbose?: boolean; debugHelp?: boolean; heartbeat?: number; @@ -174,6 +175,9 @@ program .addOption( new Option('--browser-allow-cookie-errors', 'Continue even if Chrome cookies cannot be copied.').hideHelp(), ) + .addOption( + new Option('--remote-chrome ', 'Connect to remote Chrome DevTools Protocol (e.g., 192.168.1.10:9222).'), + ) .addOption( new Option('--browser-inline-files', 'Paste files directly into the ChatGPT composer instead of uploading attachments.').default(false), ) diff --git a/scripts/test-remote-chrome.ts b/scripts/test-remote-chrome.ts new file mode 100644 index 00000000..6c174def --- /dev/null +++ b/scripts/test-remote-chrome.ts @@ -0,0 +1,80 @@ +#!/usr/bin/env npx tsx +/** + * POC: Test connecting to remote Chrome instance + * + * On remote machine with display, run: + * google-chrome --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 + * + * Then run this script: + * npx tsx scripts/test-remote-chrome.ts [port] + */ + +import CDP from 'chrome-remote-interface'; + +async function main() { + const host = process.argv[2] || 'localhost'; + const port = parseInt(process.argv[3] || '9222', 10); + + console.log(`Attempting to connect to Chrome at ${host}:${port}...`); + + try { + // Test connection + const client = await CDP({ host, port }); + console.log('āœ“ Connected to Chrome DevTools Protocol'); + + const { Network, Page, Runtime } = client; + + // Enable domains + await Promise.all([Network.enable(), Page.enable()]); + console.log('āœ“ Enabled Network and Page domains'); + + // Get browser version info + const version = await CDP.Version({ host, port }); + console.log(`āœ“ Browser: ${version.Browser}`); + console.log(`āœ“ Protocol: ${version['Protocol-Version']}`); + + // Navigate to ChatGPT + console.log('\nNavigating to ChatGPT...'); + await Page.navigate({ url: 'https://chatgpt.com/' }); + await Page.loadEventFired(); + console.log('āœ“ Page loaded'); + + // Check current URL + const evalResult = await Runtime.evaluate({ expression: 'window.location.href' }); + console.log(`āœ“ Current URL: ${evalResult.result.value}`); + + // Check if logged in (look for specific elements) + const checkLogin = await Runtime.evaluate({ + expression: ` + // Check for composer textarea (indicates logged in) + const composer = document.querySelector('textarea, [contenteditable="true"]'); + const hasComposer = !!composer; + + // Check for login button (indicates logged out) + const loginBtn = document.querySelector('a[href*="login"], button[data-testid*="login"]'); + const hasLogin = !!loginBtn; + + ({ hasComposer, hasLogin, loggedIn: hasComposer && !hasLogin }) + `, + }); + console.log(`āœ“ Login status: ${JSON.stringify(checkLogin.result.value)}`); + + await client.close(); + console.log('\nāœ“ POC successful! Remote Chrome connection works.'); + console.log('\nTo use Oracle with remote Chrome, you would need to:'); + console.log('1. Ensure cookies are loaded in remote Chrome'); + console.log('2. Modify Oracle to accept --remote-chrome-host and --remote-chrome-port flags'); + console.log('3. Skip local Chrome launch when remote is specified'); + + } catch (error) { + console.error('āœ— Connection failed:', error instanceof Error ? error.message : error); + console.log('\nTroubleshooting:'); + console.log('1. Ensure Chrome is running on remote machine with:'); + console.log(` google-chrome --remote-debugging-port=${port} --remote-debugging-address=0.0.0.0`); + console.log('2. Check firewall allows connections to port', port); + console.log('3. Verify network connectivity to', host); + process.exit(1); + } +} + +main(); diff --git a/src/browser/chromeLifecycle.ts b/src/browser/chromeLifecycle.ts index 4cfc280d..693f7282 100644 --- a/src/browser/chromeLifecycle.ts +++ b/src/browser/chromeLifecycle.ts @@ -89,6 +89,16 @@ export async function connectToChrome(port: number, logger: BrowserLogger): Prom return client; } +export async function connectToRemoteChrome( + host: string, + port: number, + logger: BrowserLogger, +): Promise { + const client = await CDP({ host, port }); + logger(`Connected to remote Chrome DevTools protocol at ${host}:${port}`); + return client; +} + function buildChromeFlags(headless: boolean): string[] { const flags = [ '--disable-background-networking', diff --git a/src/browser/config.ts b/src/browser/config.ts index f2200a53..fa6166b5 100644 --- a/src/browser/config.ts +++ b/src/browser/config.ts @@ -14,6 +14,7 @@ export const DEFAULT_BROWSER_CONFIG: ResolvedBrowserConfig = { desiredModel: DEFAULT_MODEL_TARGET, debug: false, allowCookieErrors: false, + remoteChrome: null, }; export function resolveBrowserConfig(config: BrowserAutomationConfig | undefined): ResolvedBrowserConfig { diff --git a/src/browser/index.ts b/src/browser/index.ts index a49a4a8d..fb0da34a 100644 --- a/src/browser/index.ts +++ b/src/browser/index.ts @@ -3,7 +3,13 @@ import path from 'node:path'; import os from 'node:os'; import { resolveBrowserConfig } from './config.js'; import type { BrowserRunOptions, BrowserRunResult, BrowserLogger, ChromeClient, BrowserAttachment } from './types.js'; -import { launchChrome, registerTerminationHooks, hideChromeWindow, connectToChrome } from './chromeLifecycle.js'; +import { + launchChrome, + registerTerminationHooks, + hideChromeWindow, + connectToChrome, + connectToRemoteChrome, +} from './chromeLifecycle.js'; import { syncCookies } from './cookies.js'; import { navigateToChatGPT, @@ -49,6 +55,11 @@ export async function runBrowserMode(options: BrowserRunOptions): Promise, + logger: BrowserLogger, + options: BrowserRunOptions, +): Promise { + const { host, port } = config.remoteChrome!; + logger(`Connecting to remote Chrome at ${host}:${port}`); + + let client: ChromeClient | null = null; + const startedAt = Date.now(); + let answerText = ''; + let answerMarkdown = ''; + let answerHtml = ''; + let connectionClosedUnexpectedly = false; + let stopThinkingMonitor: (() => void) | null = null; + + try { + client = await connectToRemoteChrome(host, port, logger); + const markConnectionLost = () => { + connectionClosedUnexpectedly = true; + }; + client.on('disconnect', markConnectionLost); + const { Network, Page, Runtime, Input, DOM } = client; + + const domainEnablers = [Network.enable({}), Page.enable(), Runtime.enable()]; + if (DOM && typeof DOM.enable === 'function') { + domainEnablers.push(DOM.enable()); + } + await Promise.all(domainEnablers); + + // Skip cookie sync for remote Chrome - it already has cookies + logger('Skipping cookie sync for remote Chrome (using existing session)'); + + await navigateToChatGPT(Page, Runtime, config.url, logger); + await ensureNotBlocked(Runtime, config.headless, logger); + await ensurePromptReady(Runtime, config.inputTimeoutMs, logger); + logger(`Prompt textarea ready (initial focus, ${promptText.length.toLocaleString()} chars queued)`); + + if (config.desiredModel) { + await withRetries( + () => ensureModelSelection(Runtime, config.desiredModel as string, logger), + { + retries: 2, + delayMs: 300, + onRetry: (attempt, error) => { + if (options.verbose) { + logger(`[retry] Model picker attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`); + } + }, + }, + ); + await ensurePromptReady(Runtime, config.inputTimeoutMs, logger); + logger(`Prompt textarea ready (after model switch, ${promptText.length.toLocaleString()} chars queued)`); + } + + if (attachments.length > 0) { + if (!DOM) { + throw new Error('Chrome DOM domain unavailable while uploading attachments.'); + } + for (const attachment of attachments) { + logger(`Uploading attachment: ${attachment.displayPath}`); + await uploadAttachmentFile({ runtime: Runtime, dom: DOM }, attachment, logger); + } + const waitBudget = Math.max(config.inputTimeoutMs ?? 30_000, 30_000); + await waitForAttachmentCompletion(Runtime, waitBudget, logger); + logger('All attachments uploaded'); + } + + await submitPrompt({ runtime: Runtime, input: Input }, promptText, logger); + stopThinkingMonitor = startThinkingStatusMonitor(Runtime, logger, options.verbose ?? false); + const answer = await waitForAssistantResponse(Runtime, config.timeoutMs, logger); + answerText = answer.text; + answerHtml = answer.html ?? ''; + + const copiedMarkdown = await withRetries( + async () => { + const attempt = await captureAssistantMarkdown(Runtime, answer.meta, logger); + if (!attempt) { + throw new Error('copy-missing'); + } + return attempt; + }, + { + retries: 2, + delayMs: 350, + onRetry: (attempt, error) => { + if (options.verbose) { + logger( + `[retry] Markdown capture attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`, + ); + } + }, + }, + ).catch(() => null); + + answerMarkdown = copiedMarkdown ?? answerText; + stopThinkingMonitor?.(); + + const durationMs = Date.now() - startedAt; + const answerChars = answerText.length; + const answerTokens = estimateTokenCount(answerMarkdown); + + return { + answerText, + answerMarkdown, + answerHtml: answerHtml.length > 0 ? answerHtml : undefined, + tookMs: durationMs, + answerTokens, + answerChars, + chromePid: undefined, + chromePort: port, + userDataDir: undefined, + }; + } catch (error) { + const normalizedError = error instanceof Error ? error : new Error(String(error)); + stopThinkingMonitor?.(); + const socketClosed = connectionClosedUnexpectedly || isWebSocketClosureError(normalizedError); + connectionClosedUnexpectedly = connectionClosedUnexpectedly || socketClosed; + + if (!socketClosed) { + logger(`Failed to complete ChatGPT run: ${normalizedError.message}`); + if ((config.debug || process.env.CHATGPT_DEVTOOLS_TRACE === '1') && normalizedError.stack) { + logger(normalizedError.stack); + } + throw normalizedError; + } + + throw new Error('Remote Chrome connection lost before Oracle finished.', { + cause: normalizedError, + }); + } finally { + try { + if (!connectionClosedUnexpectedly && client) { + await client.close(); + } + } catch { + // ignore + } + // Don't kill remote Chrome - it's not ours to manage + const totalSeconds = (Date.now() - startedAt) / 1000; + logger(`Remote session complete • ${totalSeconds.toFixed(1)}s total`); + } +} + export { estimateTokenCount } from './utils.js'; export { resolveBrowserConfig, DEFAULT_BROWSER_CONFIG } from './config.js'; export { syncCookies } from './cookies.js'; diff --git a/src/browser/types.ts b/src/browser/types.ts index f6008318..e3246244 100644 --- a/src/browser/types.ts +++ b/src/browser/types.ts @@ -48,6 +48,7 @@ export interface BrowserAutomationConfig { desiredModel?: string | null; debug?: boolean; allowCookieErrors?: boolean; + remoteChrome?: { host: string; port: number } | null; } export interface BrowserRunOptions { @@ -72,9 +73,10 @@ export interface BrowserRunResult { } export type ResolvedBrowserConfig = Required< - Omit + Omit > & { chromeProfile?: string | null; chromePath?: string | null; desiredModel?: string | null; + remoteChrome?: { host: string; port: number } | null; }; diff --git a/src/cli/browserConfig.ts b/src/cli/browserConfig.ts index 0c5584a3..5a0796e6 100644 --- a/src/cli/browserConfig.ts +++ b/src/cli/browserConfig.ts @@ -23,6 +23,7 @@ export interface BrowserFlagOptions { browserKeepBrowser?: boolean; browserModelLabel?: string; browserAllowCookieErrors?: boolean; + remoteChrome?: string; model: ModelName; verbose?: boolean; } @@ -32,6 +33,20 @@ export function buildBrowserConfig(options: BrowserFlagOptions): BrowserSessionC const normalizedOverride = desiredModelOverride?.toLowerCase() ?? ''; const baseModel = options.model.toLowerCase(); const shouldUseOverride = normalizedOverride.length > 0 && normalizedOverride !== baseModel; + + let remoteChrome: { host: string; port: number } | undefined; + if (options.remoteChrome) { + const parts = options.remoteChrome.split(':'); + if (parts.length === 2) { + remoteChrome = { + host: parts[0], + port: parseInt(parts[1], 10), + }; + } else { + throw new Error(`Invalid remote-chrome format: ${options.remoteChrome}. Expected host:port`); + } + } + return { chromeProfile: options.browserChromeProfile ?? DEFAULT_CHROME_PROFILE, chromePath: options.browserChromePath ?? null, @@ -47,6 +62,7 @@ export function buildBrowserConfig(options: BrowserFlagOptions): BrowserSessionC desiredModel: shouldUseOverride ? desiredModelOverride : mapModelToBrowserLabel(options.model), debug: options.verbose ? true : undefined, allowCookieErrors: options.browserAllowCookieErrors ? true : undefined, + remoteChrome, }; } diff --git a/src/sessionManager.ts b/src/sessionManager.ts index 1e9debed..b4a95755 100644 --- a/src/sessionManager.ts +++ b/src/sessionManager.ts @@ -20,6 +20,7 @@ export interface BrowserSessionConfig { desiredModel?: string | null; debug?: boolean; allowCookieErrors?: boolean; + remoteChrome?: { host: string; port: number }; } export interface BrowserRuntimeMetadata { From 33d1010d08e5babd37d1a9b1b2a2f6fc38fd3b49 Mon Sep 17 00:00:00 2001 From: muly Date: Tue, 18 Nov 2025 05:04:53 +0000 Subject: [PATCH 2/4] fix: add file transfer support for remote Chrome mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolve P1 issue where remote Chrome couldn't access local files. Solution: Transfer files via CDP using DataTransfer API - Read file content from CLI host filesystem - Base64 encode and inject into remote browser's DOM - Create File object via JavaScript DataTransfer API - Trigger change event on file input This enables full file attachment support in remote mode, allowing users to send local files from headless servers to ChatGPT via a remote browser on another machine. Fixes #8 (P1 Badge: Remote mode cannot upload local files) šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/browser/actions/remoteFileTransfer.ts | 168 ++++++++++++++++++++++ src/browser/index.ts | 4 +- 2 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 src/browser/actions/remoteFileTransfer.ts diff --git a/src/browser/actions/remoteFileTransfer.ts b/src/browser/actions/remoteFileTransfer.ts new file mode 100644 index 00000000..f6caea03 --- /dev/null +++ b/src/browser/actions/remoteFileTransfer.ts @@ -0,0 +1,168 @@ +import { readFile } from 'node:fs/promises'; +import path from 'node:path'; +import type { ChromeClient, BrowserAttachment, BrowserLogger } from '../types.js'; +import { FILE_INPUT_SELECTOR, GENERIC_FILE_INPUT_SELECTOR } from '../constants.js'; +import { delay } from '../utils.js'; +import { logDomFailure } from '../domDebug.js'; + +/** + * Upload file to remote Chrome by transferring content via CDP + * Used when browser is on a different machine than CLI + */ +export async function uploadAttachmentViaDataTransfer( + deps: { runtime: ChromeClient['Runtime']; dom?: ChromeClient['DOM'] }, + attachment: BrowserAttachment, + logger: BrowserLogger, +): Promise { + const { runtime, dom } = deps; + if (!dom) { + throw new Error('DOM domain unavailable while uploading attachments.'); + } + + // Read file content from local filesystem + const fileContent = await readFile(attachment.path); + const base64Content = fileContent.toString('base64'); + const fileName = path.basename(attachment.path); + const mimeType = guessMimeType(fileName); + + logger(`Transferring ${fileName} (${fileContent.length} bytes) to remote browser...`); + + // Find file input element + const documentNode = await dom.getDocument(); + const selectors = [FILE_INPUT_SELECTOR, GENERIC_FILE_INPUT_SELECTOR]; + let fileInputSelector: string | undefined; + + for (const selector of selectors) { + const result = await dom.querySelector({ nodeId: documentNode.root.nodeId, selector }); + if (result.nodeId) { + fileInputSelector = selector; + break; + } + } + + if (!fileInputSelector) { + await logDomFailure(runtime, logger, 'file-input'); + throw new Error('Unable to locate ChatGPT file attachment input.'); + } + + // Inject file via JavaScript DataTransfer API + const expression = ` + (function() { + const fileInput = document.querySelector(${JSON.stringify(fileInputSelector)}); + if (!fileInput) { + return { success: false, error: 'File input not found' }; + } + + // Convert base64 to Blob + const base64Data = ${JSON.stringify(base64Content)}; + const binaryString = atob(base64Data); + const bytes = new Uint8Array(binaryString.length); + for (let i = 0; i < binaryString.length; i++) { + bytes[i] = binaryString.charCodeAt(i); + } + const blob = new Blob([bytes], { type: ${JSON.stringify(mimeType)} }); + + // Create File object + const file = new File([blob], ${JSON.stringify(fileName)}, { + type: ${JSON.stringify(mimeType)}, + lastModified: Date.now() + }); + + // Create DataTransfer and assign to input + const dataTransfer = new DataTransfer(); + dataTransfer.items.add(file); + fileInput.files = dataTransfer.files; + + // Trigger change event + const event = new Event('change', { bubbles: true }); + fileInput.dispatchEvent(event); + + return { success: true, fileName: file.name, size: file.size }; + })() + `; + + const { result } = await runtime.evaluate({ expression, returnByValue: true }); + const uploadResult = result.value as { success?: boolean; error?: string; fileName?: string; size?: number }; + + if (!uploadResult?.success) { + throw new Error(`Failed to transfer file to remote browser: ${uploadResult?.error || 'Unknown error'}`); + } + + logger(`File transferred: ${uploadResult.fileName} (${uploadResult.size} bytes)`); + + // Give ChatGPT a moment to process the file + await delay(500); + logger(`Attachment queued: ${attachment.displayPath}`); +} + +async function waitForAttachmentRecognition( + Runtime: ChromeClient['Runtime'], + expectedFileName: string, + timeoutMs: number, +): Promise { + const deadline = Date.now() + timeoutMs; + const checkExpression = ` + (() => { + // Check for any file attachment indicators in the composer + const indicators = [ + // Look for file name in any element + ...Array.from(document.querySelectorAll('*')).filter(el => { + const text = el.textContent || ''; + return text.includes(${JSON.stringify(expectedFileName)}) && + el.getBoundingClientRect().height > 0; + }), + // Look for file input that has files + ...Array.from(document.querySelectorAll('input[type="file"]')).filter(input => { + return input.files && input.files.length > 0; + }) + ]; + + return indicators.length > 0; + })() + `; + + while (Date.now() < deadline) { + const { result } = await Runtime.evaluate({ expression: checkExpression, returnByValue: true }); + if (result.value === true) { + return; + } + await delay(250); + } + + throw new Error(`Attachment ${expectedFileName} did not register with ChatGPT composer in time.`); +} + +function guessMimeType(fileName: string): string { + const ext = path.extname(fileName).toLowerCase(); + const mimeTypes: Record = { + '.txt': 'text/plain', + '.md': 'text/markdown', + '.json': 'application/json', + '.js': 'text/javascript', + '.ts': 'text/typescript', + '.jsx': 'text/javascript', + '.tsx': 'text/typescript', + '.py': 'text/x-python', + '.java': 'text/x-java', + '.c': 'text/x-c', + '.cpp': 'text/x-c++', + '.h': 'text/x-c', + '.hpp': 'text/x-c++', + '.html': 'text/html', + '.css': 'text/css', + '.xml': 'text/xml', + '.yaml': 'text/yaml', + '.yml': 'text/yaml', + '.sh': 'text/x-sh', + '.bash': 'text/x-sh', + '.pdf': 'application/pdf', + '.png': 'image/png', + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.gif': 'image/gif', + '.svg': 'image/svg+xml', + '.zip': 'application/zip', + }; + + return mimeTypes[ext] || 'application/octet-stream'; +} diff --git a/src/browser/index.ts b/src/browser/index.ts index fb0da34a..60b6520b 100644 --- a/src/browser/index.ts +++ b/src/browser/index.ts @@ -23,6 +23,7 @@ import { waitForAttachmentCompletion, readAssistantSnapshot, } from './pageActions.js'; +import { uploadAttachmentViaDataTransfer } from './actions/remoteFileTransfer.js'; import { estimateTokenCount, withRetries } from './utils.js'; import { formatElapsed } from '../oracle/format.js'; @@ -298,9 +299,10 @@ async function runRemoteBrowserMode( if (!DOM) { throw new Error('Chrome DOM domain unavailable while uploading attachments.'); } + // Use remote file transfer for remote Chrome (reads local files and injects via CDP) for (const attachment of attachments) { logger(`Uploading attachment: ${attachment.displayPath}`); - await uploadAttachmentFile({ runtime: Runtime, dom: DOM }, attachment, logger); + await uploadAttachmentViaDataTransfer({ runtime: Runtime, dom: DOM }, attachment, logger); } const waitBudget = Math.max(config.inputTimeoutMs ?? 30_000, 30_000); await waitForAttachmentCompletion(Runtime, waitBudget, logger); From 2a0463912833444b35bae8703865ab5be389ecd4 Mon Sep 17 00:00:00 2001 From: muly Date: Tue, 18 Nov 2025 05:53:23 +0000 Subject: [PATCH 3/4] refactor: implement GPT-5 Pro code review improvements for remote file transfer - Add error handling for Runtime.evaluate exceptions with exceptionDetails - Remove unused waitForAttachmentRecognition function (dead code) - Add 20MB file size limit to prevent CDP protocol issues - Expand MIME type coverage (CSV, Office docs, WebP, archives) - Dispatch both input and change events for better compatibility - Add browser API validation (File, Blob, DataTransfer, atob) - Add stricter file input element validation (instanceof, type check) All improvements suggested by GPT-5 Pro code review implemented and tested. Multi-file upload verified with 3 files (test-file.txt, remoteFileTransfer.ts, package.json). --- src/browser/actions/remoteFileTransfer.ts | 98 +++++++++++++---------- 1 file changed, 55 insertions(+), 43 deletions(-) diff --git a/src/browser/actions/remoteFileTransfer.ts b/src/browser/actions/remoteFileTransfer.ts index f6caea03..40a60f80 100644 --- a/src/browser/actions/remoteFileTransfer.ts +++ b/src/browser/actions/remoteFileTransfer.ts @@ -21,6 +21,15 @@ export async function uploadAttachmentViaDataTransfer( // Read file content from local filesystem const fileContent = await readFile(attachment.path); + + // Enforce file size limit to avoid CDP protocol issues + const MAX_BYTES = 20 * 1024 * 1024; // 20MB limit for CDP transfer + if (fileContent.length > MAX_BYTES) { + throw new Error( + `Attachment ${path.basename(attachment.path)} is too large for remote upload (${fileContent.length} bytes). Maximum size is ${MAX_BYTES} bytes.` + ); + } + const base64Content = fileContent.toString('base64'); const fileName = path.basename(attachment.path); const mimeType = guessMimeType(fileName); @@ -48,11 +57,21 @@ export async function uploadAttachmentViaDataTransfer( // Inject file via JavaScript DataTransfer API const expression = ` (function() { + // Check for required file APIs + if (!('File' in window) || !('Blob' in window) || !('DataTransfer' in window) || typeof atob !== 'function') { + return { success: false, error: 'Required file APIs are not available in this browser' }; + } + const fileInput = document.querySelector(${JSON.stringify(fileInputSelector)}); if (!fileInput) { return { success: false, error: 'File input not found' }; } + // Validate that the element is actually a file input + if (!(fileInput instanceof HTMLInputElement) || fileInput.type !== 'file') { + return { success: false, error: 'Found element is not a file input' }; + } + // Convert base64 to Blob const base64Data = ${JSON.stringify(base64Content)}; const binaryString = atob(base64Data); @@ -73,16 +92,23 @@ export async function uploadAttachmentViaDataTransfer( dataTransfer.items.add(file); fileInput.files = dataTransfer.files; - // Trigger change event - const event = new Event('change', { bubbles: true }); - fileInput.dispatchEvent(event); + // Trigger both input and change events for better compatibility + fileInput.dispatchEvent(new Event('input', { bubbles: true })); + fileInput.dispatchEvent(new Event('change', { bubbles: true })); return { success: true, fileName: file.name, size: file.size }; })() `; - const { result } = await runtime.evaluate({ expression, returnByValue: true }); - const uploadResult = result.value as { success?: boolean; error?: string; fileName?: string; size?: number }; + const evalResult = await runtime.evaluate({ expression, returnByValue: true }); + + // Check for JavaScript exceptions during evaluation + if ('exceptionDetails' in evalResult && evalResult.exceptionDetails) { + const description = evalResult.exceptionDetails.text ?? 'JS evaluation failed'; + throw new Error(`Failed to transfer file to remote browser: ${description}`); + } + + const uploadResult = evalResult.result.value as { success?: boolean; error?: string; fileName?: string; size?: number }; if (!uploadResult?.success) { throw new Error(`Failed to transfer file to remote browser: ${uploadResult?.error || 'Unknown error'}`); @@ -95,48 +121,16 @@ export async function uploadAttachmentViaDataTransfer( logger(`Attachment queued: ${attachment.displayPath}`); } -async function waitForAttachmentRecognition( - Runtime: ChromeClient['Runtime'], - expectedFileName: string, - timeoutMs: number, -): Promise { - const deadline = Date.now() + timeoutMs; - const checkExpression = ` - (() => { - // Check for any file attachment indicators in the composer - const indicators = [ - // Look for file name in any element - ...Array.from(document.querySelectorAll('*')).filter(el => { - const text = el.textContent || ''; - return text.includes(${JSON.stringify(expectedFileName)}) && - el.getBoundingClientRect().height > 0; - }), - // Look for file input that has files - ...Array.from(document.querySelectorAll('input[type="file"]')).filter(input => { - return input.files && input.files.length > 0; - }) - ]; - - return indicators.length > 0; - })() - `; - - while (Date.now() < deadline) { - const { result } = await Runtime.evaluate({ expression: checkExpression, returnByValue: true }); - if (result.value === true) { - return; - } - await delay(250); - } - - throw new Error(`Attachment ${expectedFileName} did not register with ChatGPT composer in time.`); -} function guessMimeType(fileName: string): string { const ext = path.extname(fileName).toLowerCase(); const mimeTypes: Record = { + // Text files '.txt': 'text/plain', '.md': 'text/markdown', + '.csv': 'text/csv', + + // Code files '.json': 'application/json', '.js': 'text/javascript', '.ts': 'text/typescript', @@ -148,20 +142,38 @@ function guessMimeType(fileName: string): string { '.cpp': 'text/x-c++', '.h': 'text/x-c', '.hpp': 'text/x-c++', + '.sh': 'text/x-sh', + '.bash': 'text/x-sh', + + // Web files '.html': 'text/html', '.css': 'text/css', '.xml': 'text/xml', '.yaml': 'text/yaml', '.yml': 'text/yaml', - '.sh': 'text/x-sh', - '.bash': 'text/x-sh', + + // Documents '.pdf': 'application/pdf', + '.doc': 'application/msword', + '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + '.xls': 'application/vnd.ms-excel', + '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + '.ppt': 'application/vnd.ms-powerpoint', + '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + + // Images '.png': 'image/png', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.gif': 'image/gif', '.svg': 'image/svg+xml', + '.webp': 'image/webp', + + // Archives '.zip': 'application/zip', + '.tar': 'application/x-tar', + '.gz': 'application/gzip', + '.7z': 'application/x-7z-compressed', }; return mimeTypes[ext] || 'application/octet-stream'; From 106f8cf1108414c5efc6dea7e02eee8a705246f7 Mon Sep 17 00:00:00 2001 From: muly Date: Tue, 18 Nov 2025 07:58:12 +0000 Subject: [PATCH 4/4] refactor: improve remote Chrome error handling and validation Based on GPT-5 Pro code review feedback, address critical robustness issues: - Port validation: Add Number.isFinite() check and enforce valid range (1-65535) to prevent NaN and out-of-range port errors in browserConfig.ts - CDP error handling: Validate evalResult.result structure before accessing to prevent crashes from malformed CDP protocol responses in remoteFileTransfer.ts - User warnings: Alert users when local-only browser flags are ignored in remote Chrome mode to prevent silent misconfiguration in index.ts - Documentation: Update test script comments to reflect actual --remote-chrome flag syntax instead of deprecated split flags All fixes reviewed and rated 'Good' or 'Excellent' by GPT-5 Pro. --- scripts/test-remote-chrome.ts | 4 ++-- src/browser/actions/remoteFileTransfer.ts | 11 ++++++++--- src/browser/index.ts | 8 ++++++++ src/cli/browserConfig.ts | 14 ++++++++++---- 4 files changed, 28 insertions(+), 9 deletions(-) diff --git a/scripts/test-remote-chrome.ts b/scripts/test-remote-chrome.ts index 6c174def..b9821965 100644 --- a/scripts/test-remote-chrome.ts +++ b/scripts/test-remote-chrome.ts @@ -63,8 +63,8 @@ async function main() { console.log('\nāœ“ POC successful! Remote Chrome connection works.'); console.log('\nTo use Oracle with remote Chrome, you would need to:'); console.log('1. Ensure cookies are loaded in remote Chrome'); - console.log('2. Modify Oracle to accept --remote-chrome-host and --remote-chrome-port flags'); - console.log('3. Skip local Chrome launch when remote is specified'); + console.log('2. Configure Oracle with --remote-chrome to use this instance'); + console.log('3. Ensure Oracle skips local Chrome launch when --remote-chrome is specified'); } catch (error) { console.error('āœ— Connection failed:', error instanceof Error ? error.message : error); diff --git a/src/browser/actions/remoteFileTransfer.ts b/src/browser/actions/remoteFileTransfer.ts index 40a60f80..d42f8761 100644 --- a/src/browser/actions/remoteFileTransfer.ts +++ b/src/browser/actions/remoteFileTransfer.ts @@ -103,15 +103,20 @@ export async function uploadAttachmentViaDataTransfer( const evalResult = await runtime.evaluate({ expression, returnByValue: true }); // Check for JavaScript exceptions during evaluation - if ('exceptionDetails' in evalResult && evalResult.exceptionDetails) { + if (evalResult.exceptionDetails) { const description = evalResult.exceptionDetails.text ?? 'JS evaluation failed'; throw new Error(`Failed to transfer file to remote browser: ${description}`); } + // Validate result structure before accessing + if (!evalResult.result || typeof evalResult.result.value !== 'object' || evalResult.result.value == null) { + throw new Error('Failed to transfer file to remote browser: unexpected evaluation result'); + } + const uploadResult = evalResult.result.value as { success?: boolean; error?: string; fileName?: string; size?: number }; - if (!uploadResult?.success) { - throw new Error(`Failed to transfer file to remote browser: ${uploadResult?.error || 'Unknown error'}`); + if (!uploadResult.success) { + throw new Error(`Failed to transfer file to remote browser: ${uploadResult.error || 'Unknown error'}`); } logger(`File transferred: ${uploadResult.fileName} (${uploadResult.size} bytes)`); diff --git a/src/browser/index.ts b/src/browser/index.ts index 60b6520b..c8ab5309 100644 --- a/src/browser/index.ts +++ b/src/browser/index.ts @@ -58,6 +58,14 @@ export async function runBrowserMode(options: BrowserRunOptions): Promise 65535) { + throw new Error( + `Invalid remote-chrome port: "${parts[1]}". Expected a number between 1 and 65535.` + ); + } + + remoteChrome = { host: parts[0], port }; } else { throw new Error(`Invalid remote-chrome format: ${options.remoteChrome}. Expected host:port`); }