diff --git a/bin/oracle-cli.ts b/bin/oracle-cli.ts index 3d39cd90..ada84a86 100755 --- a/bin/oracle-cli.ts +++ b/bin/oracle-cli.ts @@ -93,6 +93,7 @@ interface CliOptions extends OptionValues { browserKeepBrowser?: boolean; browserAllowCookieErrors?: boolean; browserInlineFiles?: boolean; + remoteChrome?: string; browserBundleFiles?: boolean; verbose?: boolean; debugHelp?: boolean; @@ -257,6 +258,9 @@ program .addOption( new Option('--browser-allow-cookie-errors', 'Continue even if Chrome cookies cannot be copied.').hideHelp(), ) + .addOption( + new Option('--remote-chrome ', 'Connect to remote Chrome DevTools Protocol (e.g., 192.168.1.10:9222).'), + ) .addOption( new Option('--browser-inline-files', 'Paste files directly into the ChatGPT composer instead of uploading attachments.').default(false), ) diff --git a/scripts/test-remote-chrome.ts b/scripts/test-remote-chrome.ts new file mode 100644 index 00000000..b9821965 --- /dev/null +++ b/scripts/test-remote-chrome.ts @@ -0,0 +1,80 @@ +#!/usr/bin/env npx tsx +/** + * POC: Test connecting to remote Chrome instance + * + * On remote machine with display, run: + * google-chrome --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 + * + * Then run this script: + * npx tsx scripts/test-remote-chrome.ts [port] + */ + +import CDP from 'chrome-remote-interface'; + +async function main() { + const host = process.argv[2] || 'localhost'; + const port = parseInt(process.argv[3] || '9222', 10); + + console.log(`Attempting to connect to Chrome at ${host}:${port}...`); + + try { + // Test connection + const client = await CDP({ host, port }); + console.log('✓ Connected to Chrome DevTools Protocol'); + + const { Network, Page, Runtime } = client; + + // Enable domains + await Promise.all([Network.enable(), Page.enable()]); + console.log('✓ Enabled Network and Page domains'); + + // Get browser version info + const version = await CDP.Version({ host, port }); + console.log(`✓ Browser: ${version.Browser}`); + console.log(`✓ Protocol: ${version['Protocol-Version']}`); + + // Navigate to ChatGPT + console.log('\nNavigating to ChatGPT...'); + await Page.navigate({ url: 'https://chatgpt.com/' }); + await Page.loadEventFired(); + console.log('✓ Page loaded'); + + // Check current URL + const evalResult = await Runtime.evaluate({ expression: 'window.location.href' }); + console.log(`✓ Current URL: ${evalResult.result.value}`); + + // Check if logged in (look for specific elements) + const checkLogin = await Runtime.evaluate({ + expression: ` + // Check for composer textarea (indicates logged in) + const composer = document.querySelector('textarea, [contenteditable="true"]'); + const hasComposer = !!composer; + + // Check for login button (indicates logged out) + const loginBtn = document.querySelector('a[href*="login"], button[data-testid*="login"]'); + const hasLogin = !!loginBtn; + + ({ hasComposer, hasLogin, loggedIn: hasComposer && !hasLogin }) + `, + }); + console.log(`✓ Login status: ${JSON.stringify(checkLogin.result.value)}`); + + await client.close(); + console.log('\n✓ POC successful! Remote Chrome connection works.'); + console.log('\nTo use Oracle with remote Chrome, you would need to:'); + console.log('1. Ensure cookies are loaded in remote Chrome'); + console.log('2. Configure Oracle with --remote-chrome to use this instance'); + console.log('3. Ensure Oracle skips local Chrome launch when --remote-chrome is specified'); + + } catch (error) { + console.error('✗ Connection failed:', error instanceof Error ? error.message : error); + console.log('\nTroubleshooting:'); + console.log('1. Ensure Chrome is running on remote machine with:'); + console.log(` google-chrome --remote-debugging-port=${port} --remote-debugging-address=0.0.0.0`); + console.log('2. Check firewall allows connections to port', port); + console.log('3. Verify network connectivity to', host); + process.exit(1); + } +} + +main(); diff --git a/src/browser/actions/remoteFileTransfer.ts b/src/browser/actions/remoteFileTransfer.ts new file mode 100644 index 00000000..d42f8761 --- /dev/null +++ b/src/browser/actions/remoteFileTransfer.ts @@ -0,0 +1,185 @@ +import { readFile } from 'node:fs/promises'; +import path from 'node:path'; +import type { ChromeClient, BrowserAttachment, BrowserLogger } from '../types.js'; +import { FILE_INPUT_SELECTOR, GENERIC_FILE_INPUT_SELECTOR } from '../constants.js'; +import { delay } from '../utils.js'; +import { logDomFailure } from '../domDebug.js'; + +/** + * Upload file to remote Chrome by transferring content via CDP + * Used when browser is on a different machine than CLI + */ +export async function uploadAttachmentViaDataTransfer( + deps: { runtime: ChromeClient['Runtime']; dom?: ChromeClient['DOM'] }, + attachment: BrowserAttachment, + logger: BrowserLogger, +): Promise { + const { runtime, dom } = deps; + if (!dom) { + throw new Error('DOM domain unavailable while uploading attachments.'); + } + + // Read file content from local filesystem + const fileContent = await readFile(attachment.path); + + // Enforce file size limit to avoid CDP protocol issues + const MAX_BYTES = 20 * 1024 * 1024; // 20MB limit for CDP transfer + if (fileContent.length > MAX_BYTES) { + throw new Error( + `Attachment ${path.basename(attachment.path)} is too large for remote upload (${fileContent.length} bytes). Maximum size is ${MAX_BYTES} bytes.` + ); + } + + const base64Content = fileContent.toString('base64'); + const fileName = path.basename(attachment.path); + const mimeType = guessMimeType(fileName); + + logger(`Transferring ${fileName} (${fileContent.length} bytes) to remote browser...`); + + // Find file input element + const documentNode = await dom.getDocument(); + const selectors = [FILE_INPUT_SELECTOR, GENERIC_FILE_INPUT_SELECTOR]; + let fileInputSelector: string | undefined; + + for (const selector of selectors) { + const result = await dom.querySelector({ nodeId: documentNode.root.nodeId, selector }); + if (result.nodeId) { + fileInputSelector = selector; + break; + } + } + + if (!fileInputSelector) { + await logDomFailure(runtime, logger, 'file-input'); + throw new Error('Unable to locate ChatGPT file attachment input.'); + } + + // Inject file via JavaScript DataTransfer API + const expression = ` + (function() { + // Check for required file APIs + if (!('File' in window) || !('Blob' in window) || !('DataTransfer' in window) || typeof atob !== 'function') { + return { success: false, error: 'Required file APIs are not available in this browser' }; + } + + const fileInput = document.querySelector(${JSON.stringify(fileInputSelector)}); + if (!fileInput) { + return { success: false, error: 'File input not found' }; + } + + // Validate that the element is actually a file input + if (!(fileInput instanceof HTMLInputElement) || fileInput.type !== 'file') { + return { success: false, error: 'Found element is not a file input' }; + } + + // Convert base64 to Blob + const base64Data = ${JSON.stringify(base64Content)}; + const binaryString = atob(base64Data); + const bytes = new Uint8Array(binaryString.length); + for (let i = 0; i < binaryString.length; i++) { + bytes[i] = binaryString.charCodeAt(i); + } + const blob = new Blob([bytes], { type: ${JSON.stringify(mimeType)} }); + + // Create File object + const file = new File([blob], ${JSON.stringify(fileName)}, { + type: ${JSON.stringify(mimeType)}, + lastModified: Date.now() + }); + + // Create DataTransfer and assign to input + const dataTransfer = new DataTransfer(); + dataTransfer.items.add(file); + fileInput.files = dataTransfer.files; + + // Trigger both input and change events for better compatibility + fileInput.dispatchEvent(new Event('input', { bubbles: true })); + fileInput.dispatchEvent(new Event('change', { bubbles: true })); + + return { success: true, fileName: file.name, size: file.size }; + })() + `; + + const evalResult = await runtime.evaluate({ expression, returnByValue: true }); + + // Check for JavaScript exceptions during evaluation + if (evalResult.exceptionDetails) { + const description = evalResult.exceptionDetails.text ?? 'JS evaluation failed'; + throw new Error(`Failed to transfer file to remote browser: ${description}`); + } + + // Validate result structure before accessing + if (!evalResult.result || typeof evalResult.result.value !== 'object' || evalResult.result.value == null) { + throw new Error('Failed to transfer file to remote browser: unexpected evaluation result'); + } + + const uploadResult = evalResult.result.value as { success?: boolean; error?: string; fileName?: string; size?: number }; + + if (!uploadResult.success) { + throw new Error(`Failed to transfer file to remote browser: ${uploadResult.error || 'Unknown error'}`); + } + + logger(`File transferred: ${uploadResult.fileName} (${uploadResult.size} bytes)`); + + // Give ChatGPT a moment to process the file + await delay(500); + logger(`Attachment queued: ${attachment.displayPath}`); +} + + +function guessMimeType(fileName: string): string { + const ext = path.extname(fileName).toLowerCase(); + const mimeTypes: Record = { + // Text files + '.txt': 'text/plain', + '.md': 'text/markdown', + '.csv': 'text/csv', + + // Code files + '.json': 'application/json', + '.js': 'text/javascript', + '.ts': 'text/typescript', + '.jsx': 'text/javascript', + '.tsx': 'text/typescript', + '.py': 'text/x-python', + '.java': 'text/x-java', + '.c': 'text/x-c', + '.cpp': 'text/x-c++', + '.h': 'text/x-c', + '.hpp': 'text/x-c++', + '.sh': 'text/x-sh', + '.bash': 'text/x-sh', + + // Web files + '.html': 'text/html', + '.css': 'text/css', + '.xml': 'text/xml', + '.yaml': 'text/yaml', + '.yml': 'text/yaml', + + // Documents + '.pdf': 'application/pdf', + '.doc': 'application/msword', + '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + '.xls': 'application/vnd.ms-excel', + '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + '.ppt': 'application/vnd.ms-powerpoint', + '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + + // Images + '.png': 'image/png', + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.gif': 'image/gif', + '.svg': 'image/svg+xml', + '.webp': 'image/webp', + + // Archives + '.zip': 'application/zip', + '.tar': 'application/x-tar', + '.gz': 'application/gzip', + '.7z': 'application/x-7z-compressed', + }; + + return mimeTypes[ext] || 'application/octet-stream'; +} diff --git a/src/browser/chromeLifecycle.ts b/src/browser/chromeLifecycle.ts index 4cfc280d..693f7282 100644 --- a/src/browser/chromeLifecycle.ts +++ b/src/browser/chromeLifecycle.ts @@ -89,6 +89,16 @@ export async function connectToChrome(port: number, logger: BrowserLogger): Prom return client; } +export async function connectToRemoteChrome( + host: string, + port: number, + logger: BrowserLogger, +): Promise { + const client = await CDP({ host, port }); + logger(`Connected to remote Chrome DevTools protocol at ${host}:${port}`); + return client; +} + function buildChromeFlags(headless: boolean): string[] { const flags = [ '--disable-background-networking', diff --git a/src/browser/config.ts b/src/browser/config.ts index d0e28c59..c0bf15ac 100644 --- a/src/browser/config.ts +++ b/src/browser/config.ts @@ -17,6 +17,7 @@ export const DEFAULT_BROWSER_CONFIG: ResolvedBrowserConfig = { desiredModel: DEFAULT_MODEL_TARGET, debug: false, allowCookieErrors: false, + remoteChrome: null, }; export function resolveBrowserConfig(config: BrowserAutomationConfig | undefined): ResolvedBrowserConfig { diff --git a/src/browser/index.ts b/src/browser/index.ts index 6d717611..da529f19 100644 --- a/src/browser/index.ts +++ b/src/browser/index.ts @@ -3,7 +3,13 @@ import path from 'node:path'; import os from 'node:os'; import { resolveBrowserConfig } from './config.js'; import type { BrowserRunOptions, BrowserRunResult, BrowserLogger, ChromeClient, BrowserAttachment } from './types.js'; -import { launchChrome, registerTerminationHooks, hideChromeWindow, connectToChrome } from './chromeLifecycle.js'; +import { + launchChrome, + registerTerminationHooks, + hideChromeWindow, + connectToChrome, + connectToRemoteChrome, +} from './chromeLifecycle.js'; import { syncCookies } from './cookies.js'; import { navigateToChatGPT, @@ -17,6 +23,7 @@ import { waitForAttachmentCompletion, readAssistantSnapshot, } from './pageActions.js'; +import { uploadAttachmentViaDataTransfer } from './actions/remoteFileTransfer.js'; import { estimateTokenCount, withRetries } from './utils.js'; import { formatElapsed } from '../oracle/format.js'; @@ -49,6 +56,19 @@ export async function runBrowserMode(options: BrowserRunOptions): Promise, + logger: BrowserLogger, + options: BrowserRunOptions, +): Promise { + const { host, port } = config.remoteChrome!; + logger(`Connecting to remote Chrome at ${host}:${port}`); + + let client: ChromeClient | null = null; + const startedAt = Date.now(); + let answerText = ''; + let answerMarkdown = ''; + let answerHtml = ''; + let connectionClosedUnexpectedly = false; + let stopThinkingMonitor: (() => void) | null = null; + + try { + client = await connectToRemoteChrome(host, port, logger); + const markConnectionLost = () => { + connectionClosedUnexpectedly = true; + }; + client.on('disconnect', markConnectionLost); + const { Network, Page, Runtime, Input, DOM } = client; + + const domainEnablers = [Network.enable({}), Page.enable(), Runtime.enable()]; + if (DOM && typeof DOM.enable === 'function') { + domainEnablers.push(DOM.enable()); + } + await Promise.all(domainEnablers); + + // Skip cookie sync for remote Chrome - it already has cookies + logger('Skipping cookie sync for remote Chrome (using existing session)'); + + await navigateToChatGPT(Page, Runtime, config.url, logger); + await ensureNotBlocked(Runtime, config.headless, logger); + await ensurePromptReady(Runtime, config.inputTimeoutMs, logger); + logger(`Prompt textarea ready (initial focus, ${promptText.length.toLocaleString()} chars queued)`); + + if (config.desiredModel) { + await withRetries( + () => ensureModelSelection(Runtime, config.desiredModel as string, logger), + { + retries: 2, + delayMs: 300, + onRetry: (attempt, error) => { + if (options.verbose) { + logger(`[retry] Model picker attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`); + } + }, + }, + ); + await ensurePromptReady(Runtime, config.inputTimeoutMs, logger); + logger(`Prompt textarea ready (after model switch, ${promptText.length.toLocaleString()} chars queued)`); + } + + if (attachments.length > 0) { + if (!DOM) { + throw new Error('Chrome DOM domain unavailable while uploading attachments.'); + } + // Use remote file transfer for remote Chrome (reads local files and injects via CDP) + for (const attachment of attachments) { + logger(`Uploading attachment: ${attachment.displayPath}`); + await uploadAttachmentViaDataTransfer({ runtime: Runtime, dom: DOM }, attachment, logger); + } + const waitBudget = Math.max(config.inputTimeoutMs ?? 30_000, 30_000); + await waitForAttachmentCompletion(Runtime, waitBudget, logger); + logger('All attachments uploaded'); + } + + await submitPrompt({ runtime: Runtime, input: Input }, promptText, logger); + stopThinkingMonitor = startThinkingStatusMonitor(Runtime, logger, options.verbose ?? false); + const answer = await waitForAssistantResponse(Runtime, config.timeoutMs, logger); + answerText = answer.text; + answerHtml = answer.html ?? ''; + + const copiedMarkdown = await withRetries( + async () => { + const attempt = await captureAssistantMarkdown(Runtime, answer.meta, logger); + if (!attempt) { + throw new Error('copy-missing'); + } + return attempt; + }, + { + retries: 2, + delayMs: 350, + onRetry: (attempt, error) => { + if (options.verbose) { + logger( + `[retry] Markdown capture attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`, + ); + } + }, + }, + ).catch(() => null); + + answerMarkdown = copiedMarkdown ?? answerText; + stopThinkingMonitor?.(); + + const durationMs = Date.now() - startedAt; + const answerChars = answerText.length; + const answerTokens = estimateTokenCount(answerMarkdown); + + return { + answerText, + answerMarkdown, + answerHtml: answerHtml.length > 0 ? answerHtml : undefined, + tookMs: durationMs, + answerTokens, + answerChars, + chromePid: undefined, + chromePort: port, + userDataDir: undefined, + }; + } catch (error) { + const normalizedError = error instanceof Error ? error : new Error(String(error)); + stopThinkingMonitor?.(); + const socketClosed = connectionClosedUnexpectedly || isWebSocketClosureError(normalizedError); + connectionClosedUnexpectedly = connectionClosedUnexpectedly || socketClosed; + + if (!socketClosed) { + logger(`Failed to complete ChatGPT run: ${normalizedError.message}`); + if ((config.debug || process.env.CHATGPT_DEVTOOLS_TRACE === '1') && normalizedError.stack) { + logger(normalizedError.stack); + } + throw normalizedError; + } + + throw new Error('Remote Chrome connection lost before Oracle finished.', { + cause: normalizedError, + }); + } finally { + try { + if (!connectionClosedUnexpectedly && client) { + await client.close(); + } + } catch { + // ignore + } + // Don't kill remote Chrome - it's not ours to manage + const totalSeconds = (Date.now() - startedAt) / 1000; + logger(`Remote session complete • ${totalSeconds.toFixed(1)}s total`); + } +} + export { estimateTokenCount } from './utils.js'; export { resolveBrowserConfig, DEFAULT_BROWSER_CONFIG } from './config.js'; export { syncCookies } from './cookies.js'; diff --git a/src/browser/types.ts b/src/browser/types.ts index 53aa51c7..1a5d9480 100644 --- a/src/browser/types.ts +++ b/src/browser/types.ts @@ -51,6 +51,7 @@ export interface BrowserAutomationConfig { desiredModel?: string | null; debug?: boolean; allowCookieErrors?: boolean; + remoteChrome?: { host: string; port: number } | null; } export interface BrowserRunOptions { @@ -75,10 +76,11 @@ export interface BrowserRunResult { } export type ResolvedBrowserConfig = Required< - Omit + Omit > & { chromeProfile?: string | null; chromePath?: string | null; desiredModel?: string | null; + remoteChrome?: { host: string; port: number } | null; inlineCookiesSource?: string | null; }; diff --git a/src/cli/browserConfig.ts b/src/cli/browserConfig.ts index 21c2a169..1fa8ab82 100644 --- a/src/cli/browserConfig.ts +++ b/src/cli/browserConfig.ts @@ -31,6 +31,7 @@ export interface BrowserFlagOptions { browserKeepBrowser?: boolean; browserModelLabel?: string; browserAllowCookieErrors?: boolean; + remoteChrome?: string; model: ModelName; verbose?: boolean; } @@ -40,6 +41,26 @@ export async function buildBrowserConfig(options: BrowserFlagOptions): Promise 0 && normalizedOverride !== baseModel; + + let remoteChrome: { host: string; port: number } | undefined; + if (options.remoteChrome) { + const parts = options.remoteChrome.split(':'); + if (parts.length === 2) { + const port = Number.parseInt(parts[1], 10); + + // Validate port is a valid number in valid range + if (!Number.isFinite(port) || port <= 0 || port > 65535) { + throw new Error( + `Invalid remote-chrome port: "${parts[1]}". Expected a number between 1 and 65535.` + ); + } + + remoteChrome = { host: parts[0], port }; + } else { + throw new Error(`Invalid remote-chrome format: ${options.remoteChrome}. Expected host:port`); + } + } + const cookieNames = parseCookieNames(options.browserCookieNames ?? process.env.ORACLE_BROWSER_COOKIE_NAMES); const inline = await resolveInlineCookies({ inlineArg: options.browserInlineCookies, @@ -66,6 +87,7 @@ export async function buildBrowserConfig(options: BrowserFlagOptions): Promise