From 604a89efde9f62995e02b7aad0493cc893f3739d Mon Sep 17 00:00:00 2001 From: Denys Kuchma Date: Thu, 21 May 2026 22:52:41 +0300 Subject: [PATCH 1/4] upd minor fix --- bin/explorbot-cli.ts | 7 +++- src/ai/navigator.ts | 45 ++++++++++++++++++++++- src/ai/pilot.ts | 5 +++ src/ai/planner.ts | 26 +++++++++++-- src/ai/researcher.ts | 8 +++- src/ai/researcher/deep-analysis.ts | 27 ++++++++++---- src/ai/session-analyst.ts | 24 ++++++++++++ src/ai/tester.ts | 4 +- src/ai/tools.ts | 5 ++- src/commands/explore-command.ts | 7 +++- tests/unit/navigator-origin-guard.test.ts | 40 ++++++++++++++++++++ 11 files changed, 178 insertions(+), 20 deletions(-) create mode 100644 tests/unit/navigator-origin-guard.test.ts diff --git a/bin/explorbot-cli.ts b/bin/explorbot-cli.ts index 5499742..d2b1176 100755 --- a/bin/explorbot-cli.ts +++ b/bin/explorbot-cli.ts @@ -43,6 +43,11 @@ interface CLIOptions { } function buildExplorBotOptions(from: string | undefined, options: CLIOptions): ExplorBotOptions { + const sessionFile = + options.session === true + ? path.join(path.resolve(options.path || process.cwd()), 'output', 'session.json') + : options.session; + return { from, verbose: options.verbose || options.debug, @@ -51,7 +56,7 @@ function buildExplorBotOptions(from: string | undefined, options: CLIOptions): E show: options.show, headless: options.headless, incognito: options.incognito, - session: options.session === true ? 'output/session.json' : options.session, + session: sessionFile, } as ExplorBotOptions; } diff --git a/src/ai/navigator.ts b/src/ai/navigator.ts index 8b67fd7..f21461a 100644 --- a/src/ai/navigator.ts +++ b/src/ai/navigator.ts @@ -80,8 +80,48 @@ class Navigator implements Agent { this.hooksRunner = new HooksRunner(explorer, explorer.getConfig()); } + private getBaseOrigin(): string | null { + const baseUrl = this.explorer.getConfig().playwright.url; + try { + return new URL(baseUrl).origin; + } catch { + return null; + } + } + + private getComparableCurrentUrl(stateManager: any, expectedUrl: string): string { + const currentState = stateManager.getCurrentState(); + if (!currentState) return ''; + const current = /^https?:\/\//i.test(expectedUrl) ? currentState.fullUrl || currentState.url || '' : currentState.url || ''; + return current; + } + + private isSameExpectedOrigin(expectedUrl: string, stateManager: any): boolean { + const currentState = stateManager.getCurrentState(); + if (!currentState) return false; + + const currentFullUrl = currentState.fullUrl || currentState.url || ''; + if (!currentFullUrl) return false; + + try { + const currentOrigin = new URL(currentFullUrl).origin; + if (/^https?:\/\//i.test(expectedUrl)) { + return currentOrigin === new URL(expectedUrl).origin; + } + + const baseOrigin = this.getBaseOrigin(); + if (!baseOrigin) return true; + return currentOrigin === baseOrigin; + } catch { + return !/^https?:\/\//i.test(expectedUrl); + } + } + private isOnExpectedPage(expectedUrl: string, stateManager: any): boolean { - const currentUrl = stateManager.getCurrentState()?.url || ''; + if (!this.isSameExpectedOrigin(expectedUrl, stateManager)) { + return false; + } + const currentUrl = this.getComparableCurrentUrl(stateManager, expectedUrl); return normalizeUrl(currentUrl) === normalizeUrl(expectedUrl); } @@ -282,7 +322,8 @@ class Navigator implements Agent { } } const freshState = await action.capturePageState(); - const urlMatches = normalizeUrl(freshState.url || '') === normalizeUrl(expectedUrl); + const currentUrl = /^https?:\/\//i.test(expectedUrl) ? freshState.fullUrl || freshState.url || '' : freshState.url || ''; + const urlMatches = this.isSameExpectedOrigin(expectedUrl, action.stateManager) && normalizeUrl(currentUrl) === normalizeUrl(expectedUrl); const stateChanged = freshState.getStateHash() !== actionResult.getStateHash(); resolved = urlMatches && stateChanged; diff --git a/src/ai/pilot.ts b/src/ai/pilot.ts index 676290c..4cfd84b 100644 --- a/src/ai/pilot.ts +++ b/src/ai/pilot.ts @@ -313,6 +313,9 @@ export class Pilot implements Agent { overrides the others — weigh them together. Tester's record() notes are the LEAST reliable; always cross-check against actual actions and state. Visual screenshot analysis is strong for UI state (active tabs, visible counts, colors). + If the final page clearly shows an equivalent success state in a different UI form, do not fail only + because one narrow assertion targeted a specific badge, count, toast, or wording that the product + represents differently. SCENARIO TITLE defines what must happen. Action verbs require persisted evidence: - "Create X" → X must exist (visible, redirected to its page, or success message). Opening a form is NOT enough. @@ -355,6 +358,8 @@ export class Pilot implements Agent { GUIDANCE (required for "continue"): a specific next action on the current page — which tool, what to verify, how to record. Do not suggest repeating actions that already succeeded. + If progress is blocked only because the page lacks target data for the scenario, prefer precondition() + over repeated UI attempts. `; } diff --git a/src/ai/planner.ts b/src/ai/planner.ts index 638ef82..e523f95 100644 --- a/src/ai/planner.ts +++ b/src/ai/planner.ts @@ -80,6 +80,10 @@ export class Planner extends PlannerBase implements Agent { return ConfigParser.getInstance().getConfig().ai?.agents?.researcher?.sections || Object.keys(POSSIBLE_SECTIONS); } + private getDefaultStartUrl(state: { url: string; fullUrl?: string }): string { + return state.fullUrl || state.url; + } + getSystemMessage(feature?: string): string { const currentUrl = this.stateManager.getCurrentState()?.url; const customPrompt = this.provider.getSystemPromptForAgent('planner', currentUrl); @@ -187,7 +191,8 @@ export class Planner extends PlannerBase implements Agent { throw new Error('No tasks were created successfully'); } - const fromPlanning = aiResult.object.scenarios.map((s: any) => new Test(s.scenario, s.priority, s.expectedOutcomes, s.startUrl || state.url, s.steps || [])); + const defaultStartUrl = this.getDefaultStartUrl(state); + const fromPlanning = aiResult.object.scenarios.map((s: any) => new Test(s.scenario, s.priority, s.expectedOutcomes, s.startUrl || defaultStartUrl, s.steps || [])); return { tests: fromPlanning, planName: aiResult.object.planName }; }); @@ -199,7 +204,8 @@ export class Planner extends PlannerBase implements Agent { const cached = state.url ? getRegisteredPlan(state.url) : null; const planName = feature || cached?.plan.title || result.planName || state.url; this.currentPlan = new Plan(planName); - this.currentPlan.url = state.url; + this.currentPlan.url = this.getDefaultStartUrl(state); + const defaultStartUrl = this.getDefaultStartUrl(state); if (parentPlan) this.currentPlan.parentPlan = parentPlan; const allPreviousScenarios = this.getPreviousSessionScenarios(); const existingTestScenarios = this.getExistingTestFileScenarios(state.url); @@ -207,13 +213,13 @@ export class Planner extends PlannerBase implements Agent { for (const t of tests) { if (allPreviousScenarios.has(t.scenario.toLowerCase())) continue; t.style = this.lastStyleName; - t.startUrl = state.url; + t.startUrl = defaultStartUrl; this.currentPlan.addTest(t); } } else { tag('step').log(`Expanding plan: "${this.currentPlan.title}"`); this.currentPlan.nextIteration(); - const newTests = this.addNewTests(tests, state.url); + const newTests = this.addNewTests(tests, this.getDefaultStartUrl(state)); if (newTests.length > 0) { const summary = `New scenarios:\n${newTests.map((t) => `+ [${t.priority}] ${t.scenario}`).join('\n')}`; tag('multiline').log(summary); @@ -330,6 +336,13 @@ export class Planner extends PlannerBase implements Agent { Focus on URL page change or data persistency after page reload. If there are subpages (pages with same URL path) plan testing of those subpages as well If you plan to test CRUD operations, plan them in correct order: create, read, update. + Do not invent specific route names, success messages, validation texts, badge counts, or welcome messages unless they are visible in research, visited pages, or prior observed flows. + If exact wording is unknown, describe the expected result generically, for example "an authentication error is shown" or "the user stays on the login page" instead of guessing the literal text. + If exact redirect destination is unknown, describe the destination by visible page identity, for example "the dashboard page opens" or "the current workspace home page opens" instead of inventing a URL slug. + Only propose scenarios whose prerequisites are evident from page research, visited pages, or API data preparation context. + If a scenario needs existing records, recipients, results, notifications, or other target data, propose it only when that data is visible or API preconditions can create it. + If the page appears read-only, degraded, demo-limited, maintenance-like, or lacks write controls, prefer read-only scenarios such as opening panels, inspecting visible lists, filtering, searching, or verifying current state. + Do not assume hidden data exists just because a control is present. DO NOT propose "verification-only" tests that merely open a UI element (modal, dropdown, panel) and check it exists. Every test must complete a meaningful action that changes application state or produces a business outcome. Opening a modal is NOT a test — performing an action INSIDE the modal IS a test. @@ -565,10 +578,15 @@ export class Planner extends PlannerBase implements Agent { - Good: "New suite 'My New Suite' appears in the suite list" - Good: "Suite appears under Starred filter tab" - Good: "Success message 'Suite created' is displayed" + - Good when wording is unknown: "An authentication error is displayed" + - Good when route is unknown: "The workspace home page is displayed" - Bad: "Modal is displayed" (just verifying existence, no business value) - Bad: "Dropdown menu is visible" (just verifying existence) + - Bad: "Welcome message is displayed" if no welcome message is visible in research + - Bad: "Redirected to /dashboard" if no such route was observed - Each outcome should be independently verifiable - Avoid combining multiple checks into one outcome + - Prefer durable user-facing results over fragile micro-signals - Expected outcomes describe WHAT TO VERIFY FORMATTING RULES: diff --git a/src/ai/researcher.ts b/src/ai/researcher.ts index ad8e4db..5d3c4cf 100644 --- a/src/ai/researcher.ts +++ b/src/ai/researcher.ts @@ -125,7 +125,7 @@ export class Researcher extends ResearcherBase implements Agent { tag('info').log(`Researching ${displayUrl} to understand the context...`); setActivity(`${this.emoji} Researching...`, 'action'); - await this.ensureNavigated(state.url, screenshot && this.provider.hasVision()); + await this.ensureNavigated(displayUrl, screenshot && this.provider.hasVision()); await this.hooksRunner.runBeforeHook('researcher', state.url); const annotatedElements = await this.explorer.annotateElements(); @@ -285,7 +285,11 @@ export class Researcher extends ResearcherBase implements Agent { } if (!interrupted() && deep) { - await this.performDeepAnalysis(state, result); + try { + await this.performDeepAnalysis(state, result); + } catch (err) { + tag('warning').log(`Deep analysis failed, continuing with best-effort research: ${err instanceof Error ? err.message : err}`); + } } if (!interrupted() && data) { diff --git a/src/ai/researcher/deep-analysis.ts b/src/ai/researcher/deep-analysis.ts index f87b0eb..1f7296b 100644 --- a/src/ai/researcher/deep-analysis.ts +++ b/src/ai/researcher/deep-analysis.ts @@ -24,7 +24,7 @@ export function WithDeepAnalysis(Base: T) { async performDeepAnalysis(state: WebPageState, result: ResearchResult): Promise { tag('info').log('Starting deep analysis of expandable elements'); - await (this as any).navigateTo(state.url); + await (this as any).navigateTo(state.fullUrl || state.url); let expandables = await this._discoverExpandables(result.text); if (expandables.length === 0) { @@ -35,7 +35,7 @@ export function WithDeepAnalysis(Base: T) { const maxClicks = (this.explorer.getConfig().ai?.agents?.researcher as any)?.maxExpandableClicks ?? DEFAULT_MAX_EXPANDABLE_CLICKS; if (expandables.length > maxClicks) { - expandables = await this._selectExpandables(expandables, state.url, maxClicks); + expandables = await this._selectExpandables(expandables, state.fullUrl || state.url, maxClicks); tag('substep').log(`Selected ${expandables.length} expandables to click (max: ${maxClicks})`); } @@ -177,7 +177,14 @@ export function WithDeepAnalysis(Base: T) { visionCall = this.provider.processImage(visionPrompt, screenshot.toString('base64')); } - const [textRes, visionRes] = await Promise.all([textCall, visionCall]); + let textRes: { text?: string } | null = null; + let visionRes: { text?: string } | null = null; + try { + [textRes, visionRes] = await Promise.all([textCall, visionCall]); + } catch (err) { + tag('warning').log(`Expandable discovery failed, skipping deep analysis: ${err instanceof Error ? err.message : err}`); + return []; + } const eidxSet = new Set(); const parseRefs = (text: string | undefined) => { @@ -244,10 +251,16 @@ export function WithDeepAnalysis(Base: T) { `; const model = this.provider.getModelForAgent('researcher'); - const r = await this.provider.chat([{ role: 'user', content: prompt }], model, { - agentName: 'researcher', - telemetryFunctionId: 'researcher.selectExpandables', - }); + let r: { text?: string }; + try { + r = await this.provider.chat([{ role: 'user', content: prompt }], model, { + agentName: 'researcher', + telemetryFunctionId: 'researcher.selectExpandables', + }); + } catch (err) { + tag('warning').log(`Expandable selection failed, using first ${maxClicks}: ${err instanceof Error ? err.message : err}`); + return expandables.slice(0, maxClicks); + } const nums = (r.text || '').match(/\d+/g)?.map(Number) || []; const selected = expandables.filter((_, i) => nums.includes(i + 1)); diff --git a/src/ai/session-analyst.ts b/src/ai/session-analyst.ts index c87dc03..a53caa3 100644 --- a/src/ai/session-analyst.ts +++ b/src/ai/session-analyst.ts @@ -120,6 +120,24 @@ export class SessionAnalyst implements Agent { .slice(-30) .map((entry) => ` - [${entry.type}] ${entry.content}`) .join('\n'); + const checked = test.getCheckedExpectations().join(' | ') || '(none)'; + const remaining = test.getRemainingExpectations().join(' | ') || '(none)'; + const notes = test + .getPrintableNotes() + .slice(-12) + .map((note) => ` - ${note}`) + .join('\n'); + const visitedUrls = test.getVisitedUrls({ localOnly: true }).join(' | ') || '(none)'; + const verification = test.verification + ? dedent` + verification_status: ${test.verification.status || 'unknown'} + verification_message: ${test.verification.message || '(none)'} + verification_url: ${test.verification.url || '(none)'} + verification_page: ${test.verification.pageLabel || '(none)'} + verification_details: + ${(test.verification.details.length > 0 ? test.verification.details : ['(none)']).map((detail) => ` - ${detail}`).join('\n')} + ` + : 'verification_status: none'; return dedent` @@ -127,6 +145,12 @@ export class SessionAnalyst implements Agent { scenario: ${test.scenario} result: ${test.result || 'unknown'} expected: ${test.expected.join(' | ') || '(none)'} + checked_expectations: ${checked} + remaining_expectations: ${remaining} + visited_urls: ${visitedUrls} + ${verification} + notes: + ${notes || ' - (none)'} log: ${log} diff --git a/src/ai/tester.ts b/src/ai/tester.ts index 1e8ad0e..65961e4 100644 --- a/src/ai/tester.ts +++ b/src/ai/tester.ts @@ -880,7 +880,9 @@ export class Tester extends TaskAgent implements Agent { await this.explorer.switchToMainFrame(); } - if (this.explorer.getStateManager().getCurrentState()?.url === resetUrl!) { + const currentState = this.explorer.getStateManager().getCurrentState(); + const currentUrl = currentState?.fullUrl || currentState?.url; + if (currentUrl === resetUrl!) { return { success: false, message: 'Reset failed - already on initial page!', diff --git a/src/ai/tools.ts b/src/ai/tools.ts index d4da1fd..c7fe136 100644 --- a/src/ai/tools.ts +++ b/src/ai/tools.ts @@ -854,12 +854,13 @@ export function createAgentTools({ }), execute: async ({ reason }) => { const stateManager = explorer.getStateManager(); - const currentUrl = stateManager.getCurrentState()?.url; + const currentState = stateManager.getCurrentState(); + const currentUrl = currentState?.fullUrl || currentState?.url; const history = stateManager.getStateHistory(); let targetUrl: string | null = null; for (let i = history.length - 1; i >= 0; i--) { - const url = history[i].toState.url; + const url = history[i].toState.fullUrl || history[i].toState.url; if (url !== currentUrl) { targetUrl = url; break; diff --git a/src/commands/explore-command.ts b/src/commands/explore-command.ts index 2c5b81b..3dcec70 100644 --- a/src/commands/explore-command.ts +++ b/src/commands/explore-command.ts @@ -38,6 +38,11 @@ export class ExploreCommand extends BaseCommand { private oldTestRefs = new Set(); private priorityFilter?: Set; + private getCurrentPageUrl(): string | undefined { + const state = this.explorBot.getExplorer().getStateManager().getCurrentState(); + return state?.fullUrl || state?.url; + } + async execute(args: string): Promise { const { opts, args: remaining } = this.parseArgs(args); if (opts.maxTests) { @@ -51,7 +56,7 @@ export class ExploreCommand extends BaseCommand { if (this.dryRun) tag('info').log('Dry-run mode: planner runs to discover new tests; test execution is skipped'); Stats.mode ??= 'explore'; Stats.focus ??= feature; - const mainUrl = this.explorBot.getExplorer().getStateManager().getCurrentState()?.url; + const mainUrl = this.getCurrentPageUrl(); if (cfg.enabled) { await this.runReuseMode(mainUrl, feature, cfg); diff --git a/tests/unit/navigator-origin-guard.test.ts b/tests/unit/navigator-origin-guard.test.ts new file mode 100644 index 0000000..0d1e9da --- /dev/null +++ b/tests/unit/navigator-origin-guard.test.ts @@ -0,0 +1,40 @@ +import { describe, expect, it } from 'bun:test'; +import { Navigator } from '../../src/ai/navigator.ts'; + +describe('Navigator origin guard', () => { + function createNavigator(baseUrl = 'http://192.168.1.162:3000') { + const navigator = Object.create(Navigator.prototype) as Navigator & { explorer: any }; + navigator.explorer = { + getConfig: () => ({ + playwright: { + url: baseUrl, + }, + }), + }; + return navigator; + } + + it('rejects external origins for relative expected URLs', () => { + const navigator = createNavigator(); + const stateManager = { + getCurrentState: () => ({ + url: '/', + fullUrl: 'https://your-domain.com/', + }), + }; + + expect((navigator as any).isOnExpectedPage('/', stateManager)).toBe(false); + }); + + it('accepts the configured origin for relative expected URLs', () => { + const navigator = createNavigator(); + const stateManager = { + getCurrentState: () => ({ + url: '/', + fullUrl: 'http://192.168.1.162:3000/', + }), + }; + + expect((navigator as any).isOnExpectedPage('/', stateManager)).toBe(true); + }); +}); From a75fd4b487cb141d47bb80a39a84c0888ab58303 Mon Sep 17 00:00:00 2001 From: Denys Kuchma Date: Thu, 21 May 2026 22:56:43 +0300 Subject: [PATCH 2/4] fix --- bin/explorbot-cli.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/bin/explorbot-cli.ts b/bin/explorbot-cli.ts index d2b1176..cdab5ca 100755 --- a/bin/explorbot-cli.ts +++ b/bin/explorbot-cli.ts @@ -43,10 +43,7 @@ interface CLIOptions { } function buildExplorBotOptions(from: string | undefined, options: CLIOptions): ExplorBotOptions { - const sessionFile = - options.session === true - ? path.join(path.resolve(options.path || process.cwd()), 'output', 'session.json') - : options.session; + const sessionFile = options.session === true ? path.join(path.resolve(options.path || process.cwd()), 'output', 'session.json') : options.session; return { from, From b4a2adc7fa23d9fb76a0e81faa8e1632f1afe45e Mon Sep 17 00:00:00 2001 From: Denys Kuchma Date: Thu, 21 May 2026 23:00:49 +0300 Subject: [PATCH 3/4] fix flaky test --- tests/unit/action-result-diff.test.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/action-result-diff.test.ts b/tests/unit/action-result-diff.test.ts index a6f98df..d91088f 100644 --- a/tests/unit/action-result-diff.test.ts +++ b/tests/unit/action-result-diff.test.ts @@ -4,7 +4,8 @@ import { ConfigParser } from '../../src/config.ts'; describe('ActionResult Diff', () => { beforeEach(async () => { - await ConfigParser.getInstance().loadConfig('./explorbot.config.js'); + ConfigParser.resetForTesting(); + await ConfigParser.getInstance().loadConfig({ config: './explorbot.config.js' }); }); test('should create diff with previous state', () => { const previous = new ActionResult({ From ffbed21ee239a4b44c4c29d72164cd8b88041e56 Mon Sep 17 00:00:00 2001 From: Denys Kuchma Date: Thu, 21 May 2026 23:12:29 +0300 Subject: [PATCH 4/4] fix flaky test --- tests/unit/action-result-diff.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/action-result-diff.test.ts b/tests/unit/action-result-diff.test.ts index d91088f..27ef371 100644 --- a/tests/unit/action-result-diff.test.ts +++ b/tests/unit/action-result-diff.test.ts @@ -3,9 +3,9 @@ import { ActionResult, Diff } from '../../src/action-result.ts'; import { ConfigParser } from '../../src/config.ts'; describe('ActionResult Diff', () => { - beforeEach(async () => { + beforeEach(() => { ConfigParser.resetForTesting(); - await ConfigParser.getInstance().loadConfig({ config: './explorbot.config.js' }); + ConfigParser.setupTestConfig(); }); test('should create diff with previous state', () => { const previous = new ActionResult({