Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion bin/explorbot-cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ interface CLIOptions {
}

function buildExplorBotOptions(from: string | undefined, options: CLIOptions): ExplorBotOptions {
const sessionFile = options.session === true ? path.join(path.resolve(options.path || process.cwd()), 'output', 'session.json') : options.session;

return {
from,
verbose: options.verbose || options.debug,
Expand All @@ -51,7 +53,7 @@ function buildExplorBotOptions(from: string | undefined, options: CLIOptions): E
show: options.show,
headless: options.headless,
incognito: options.incognito,
session: options.session === true ? 'output/session.json' : options.session,
session: sessionFile,
} as ExplorBotOptions;
}

Expand Down
45 changes: 43 additions & 2 deletions src/ai/navigator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,48 @@ class Navigator implements Agent {
this.hooksRunner = new HooksRunner(explorer, explorer.getConfig());
}

private getBaseOrigin(): string | null {
const baseUrl = this.explorer.getConfig().playwright.url;
try {
return new URL(baseUrl).origin;
} catch {
return null;
}
}

private getComparableCurrentUrl(stateManager: any, expectedUrl: string): string {
const currentState = stateManager.getCurrentState();
if (!currentState) return '';
const current = /^https?:\/\//i.test(expectedUrl) ? currentState.fullUrl || currentState.url || '' : currentState.url || '';
return current;
}

private isSameExpectedOrigin(expectedUrl: string, stateManager: any): boolean {
const currentState = stateManager.getCurrentState();
if (!currentState) return false;

const currentFullUrl = currentState.fullUrl || currentState.url || '';
if (!currentFullUrl) return false;

try {
const currentOrigin = new URL(currentFullUrl).origin;
if (/^https?:\/\//i.test(expectedUrl)) {
return currentOrigin === new URL(expectedUrl).origin;
}

const baseOrigin = this.getBaseOrigin();
if (!baseOrigin) return true;
return currentOrigin === baseOrigin;
} catch {
return !/^https?:\/\//i.test(expectedUrl);
}
}

private isOnExpectedPage(expectedUrl: string, stateManager: any): boolean {
const currentUrl = stateManager.getCurrentState()?.url || '';
if (!this.isSameExpectedOrigin(expectedUrl, stateManager)) {
return false;
}
const currentUrl = this.getComparableCurrentUrl(stateManager, expectedUrl);
return normalizeUrl(currentUrl) === normalizeUrl(expectedUrl);
}

Expand Down Expand Up @@ -282,7 +322,8 @@ class Navigator implements Agent {
}
}
const freshState = await action.capturePageState();
const urlMatches = normalizeUrl(freshState.url || '') === normalizeUrl(expectedUrl);
const currentUrl = /^https?:\/\//i.test(expectedUrl) ? freshState.fullUrl || freshState.url || '' : freshState.url || '';
const urlMatches = this.isSameExpectedOrigin(expectedUrl, action.stateManager) && normalizeUrl(currentUrl) === normalizeUrl(expectedUrl);
const stateChanged = freshState.getStateHash() !== actionResult.getStateHash();
resolved = urlMatches && stateChanged;

Expand Down
5 changes: 5 additions & 0 deletions src/ai/pilot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,9 @@ export class Pilot implements Agent {
overrides the others — weigh them together. Tester's record() notes are the LEAST reliable; always
cross-check against actual actions and state. Visual screenshot analysis is strong for UI state
(active tabs, visible counts, colors).
If the final page clearly shows an equivalent success state in a different UI form, do not fail only
because one narrow assertion targeted a specific badge, count, toast, or wording that the product
represents differently.

SCENARIO TITLE defines what must happen. Action verbs require persisted evidence:
- "Create X" → X must exist (visible, redirected to its page, or success message). Opening a form is NOT enough.
Expand Down Expand Up @@ -355,6 +358,8 @@ export class Pilot implements Agent {

GUIDANCE (required for "continue"): a specific next action on the current page — which tool, what
to verify, how to record. Do not suggest repeating actions that already succeeded.
If progress is blocked only because the page lacks target data for the scenario, prefer precondition()
over repeated UI attempts.
`;
}

Expand Down
26 changes: 22 additions & 4 deletions src/ai/planner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ export class Planner extends PlannerBase implements Agent {
return ConfigParser.getInstance().getConfig().ai?.agents?.researcher?.sections || Object.keys(POSSIBLE_SECTIONS);
}

private getDefaultStartUrl(state: { url: string; fullUrl?: string }): string {
return state.fullUrl || state.url;
}

getSystemMessage(feature?: string): string {
const currentUrl = this.stateManager.getCurrentState()?.url;
const customPrompt = this.provider.getSystemPromptForAgent('planner', currentUrl);
Expand Down Expand Up @@ -187,7 +191,8 @@ export class Planner extends PlannerBase implements Agent {
throw new Error('No tasks were created successfully');
}

const fromPlanning = aiResult.object.scenarios.map((s: any) => new Test(s.scenario, s.priority, s.expectedOutcomes, s.startUrl || state.url, s.steps || []));
const defaultStartUrl = this.getDefaultStartUrl(state);
const fromPlanning = aiResult.object.scenarios.map((s: any) => new Test(s.scenario, s.priority, s.expectedOutcomes, s.startUrl || defaultStartUrl, s.steps || []));

return { tests: fromPlanning, planName: aiResult.object.planName };
});
Expand All @@ -199,21 +204,22 @@ export class Planner extends PlannerBase implements Agent {
const cached = state.url ? getRegisteredPlan(state.url) : null;
const planName = feature || cached?.plan.title || result.planName || state.url;
this.currentPlan = new Plan(planName);
this.currentPlan.url = state.url;
this.currentPlan.url = this.getDefaultStartUrl(state);
const defaultStartUrl = this.getDefaultStartUrl(state);
if (parentPlan) this.currentPlan.parentPlan = parentPlan;
const allPreviousScenarios = this.getPreviousSessionScenarios();
const existingTestScenarios = this.getExistingTestFileScenarios(state.url);
for (const s of existingTestScenarios) allPreviousScenarios.add(s);
for (const t of tests) {
if (allPreviousScenarios.has(t.scenario.toLowerCase())) continue;
t.style = this.lastStyleName;
t.startUrl = state.url;
t.startUrl = defaultStartUrl;
this.currentPlan.addTest(t);
}
} else {
tag('step').log(`Expanding plan: "${this.currentPlan.title}"`);
this.currentPlan.nextIteration();
const newTests = this.addNewTests(tests, state.url);
const newTests = this.addNewTests(tests, this.getDefaultStartUrl(state));
if (newTests.length > 0) {
const summary = `New scenarios:\n${newTests.map((t) => `+ [${t.priority}] ${t.scenario}`).join('\n')}`;
tag('multiline').log(summary);
Expand Down Expand Up @@ -330,6 +336,13 @@ export class Planner extends PlannerBase implements Agent {
Focus on URL page change or data persistency after page reload.
If there are subpages (pages with same URL path) plan testing of those subpages as well
If you plan to test CRUD operations, plan them in correct order: create, read, update.
Do not invent specific route names, success messages, validation texts, badge counts, or welcome messages unless they are visible in research, visited pages, or prior observed flows.
If exact wording is unknown, describe the expected result generically, for example "an authentication error is shown" or "the user stays on the login page" instead of guessing the literal text.
If exact redirect destination is unknown, describe the destination by visible page identity, for example "the dashboard page opens" or "the current workspace home page opens" instead of inventing a URL slug.
Only propose scenarios whose prerequisites are evident from page research, visited pages, or API data preparation context.
If a scenario needs existing records, recipients, results, notifications, or other target data, propose it only when that data is visible or API preconditions can create it.
If the page appears read-only, degraded, demo-limited, maintenance-like, or lacks write controls, prefer read-only scenarios such as opening panels, inspecting visible lists, filtering, searching, or verifying current state.
Do not assume hidden data exists just because a control is present.
DO NOT propose "verification-only" tests that merely open a UI element (modal, dropdown, panel) and check it exists.
Every test must complete a meaningful action that changes application state or produces a business outcome.
Opening a modal is NOT a test — performing an action INSIDE the modal IS a test.
Expand Down Expand Up @@ -565,10 +578,15 @@ export class Planner extends PlannerBase implements Agent {
- Good: "New suite 'My New Suite' appears in the suite list"
- Good: "Suite appears under Starred filter tab"
- Good: "Success message 'Suite created' is displayed"
- Good when wording is unknown: "An authentication error is displayed"
- Good when route is unknown: "The workspace home page is displayed"
- Bad: "Modal is displayed" (just verifying existence, no business value)
- Bad: "Dropdown menu is visible" (just verifying existence)
- Bad: "Welcome message is displayed" if no welcome message is visible in research
- Bad: "Redirected to /dashboard" if no such route was observed
- Each outcome should be independently verifiable
- Avoid combining multiple checks into one outcome
- Prefer durable user-facing results over fragile micro-signals
- Expected outcomes describe WHAT TO VERIFY

FORMATTING RULES:
Expand Down
8 changes: 6 additions & 2 deletions src/ai/researcher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ export class Researcher extends ResearcherBase implements Agent {
tag('info').log(`Researching ${displayUrl} to understand the context...`);
setActivity(`${this.emoji} Researching...`, 'action');

await this.ensureNavigated(state.url, screenshot && this.provider.hasVision());
await this.ensureNavigated(displayUrl, screenshot && this.provider.hasVision());
await this.hooksRunner.runBeforeHook('researcher', state.url);

const annotatedElements = await this.explorer.annotateElements();
Expand Down Expand Up @@ -285,7 +285,11 @@ export class Researcher extends ResearcherBase implements Agent {
}

if (!interrupted() && deep) {
await this.performDeepAnalysis(state, result);
try {
await this.performDeepAnalysis(state, result);
} catch (err) {
tag('warning').log(`Deep analysis failed, continuing with best-effort research: ${err instanceof Error ? err.message : err}`);
}
}

if (!interrupted() && data) {
Expand Down
27 changes: 20 additions & 7 deletions src/ai/researcher/deep-analysis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ export function WithDeepAnalysis<T extends Constructor>(Base: T) {

async performDeepAnalysis(state: WebPageState, result: ResearchResult): Promise<void> {
tag('info').log('Starting deep analysis of expandable elements');
await (this as any).navigateTo(state.url);
await (this as any).navigateTo(state.fullUrl || state.url);

let expandables = await this._discoverExpandables(result.text);
if (expandables.length === 0) {
Expand All @@ -35,7 +35,7 @@ export function WithDeepAnalysis<T extends Constructor>(Base: T) {

const maxClicks = (this.explorer.getConfig().ai?.agents?.researcher as any)?.maxExpandableClicks ?? DEFAULT_MAX_EXPANDABLE_CLICKS;
if (expandables.length > maxClicks) {
expandables = await this._selectExpandables(expandables, state.url, maxClicks);
expandables = await this._selectExpandables(expandables, state.fullUrl || state.url, maxClicks);
tag('substep').log(`Selected ${expandables.length} expandables to click (max: ${maxClicks})`);
}

Expand Down Expand Up @@ -177,7 +177,14 @@ export function WithDeepAnalysis<T extends Constructor>(Base: T) {
visionCall = this.provider.processImage(visionPrompt, screenshot.toString('base64'));
}

const [textRes, visionRes] = await Promise.all([textCall, visionCall]);
let textRes: { text?: string } | null = null;
let visionRes: { text?: string } | null = null;
try {
[textRes, visionRes] = await Promise.all([textCall, visionCall]);
} catch (err) {
tag('warning').log(`Expandable discovery failed, skipping deep analysis: ${err instanceof Error ? err.message : err}`);
return [];
}

const eidxSet = new Set<string>();
const parseRefs = (text: string | undefined) => {
Expand Down Expand Up @@ -244,10 +251,16 @@ export function WithDeepAnalysis<T extends Constructor>(Base: T) {
`;

const model = this.provider.getModelForAgent('researcher');
const r = await this.provider.chat([{ role: 'user', content: prompt }], model, {
agentName: 'researcher',
telemetryFunctionId: 'researcher.selectExpandables',
});
let r: { text?: string };
try {
r = await this.provider.chat([{ role: 'user', content: prompt }], model, {
agentName: 'researcher',
telemetryFunctionId: 'researcher.selectExpandables',
});
} catch (err) {
tag('warning').log(`Expandable selection failed, using first ${maxClicks}: ${err instanceof Error ? err.message : err}`);
return expandables.slice(0, maxClicks);
}

const nums = (r.text || '').match(/\d+/g)?.map(Number) || [];
const selected = expandables.filter((_, i) => nums.includes(i + 1));
Expand Down
24 changes: 24 additions & 0 deletions src/ai/session-analyst.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,37 @@ export class SessionAnalyst implements Agent {
.slice(-30)
.map((entry) => ` - [${entry.type}] ${entry.content}`)
.join('\n');
const checked = test.getCheckedExpectations().join(' | ') || '(none)';
const remaining = test.getRemainingExpectations().join(' | ') || '(none)';
const notes = test
.getPrintableNotes()
.slice(-12)
.map((note) => ` - ${note}`)
.join('\n');
const visitedUrls = test.getVisitedUrls({ localOnly: true }).join(' | ') || '(none)';
const verification = test.verification
? dedent`
verification_status: ${test.verification.status || 'unknown'}
verification_message: ${test.verification.message || '(none)'}
verification_url: ${test.verification.url || '(none)'}
verification_page: ${test.verification.pageLabel || '(none)'}
verification_details:
${(test.verification.details.length > 0 ? test.verification.details : ['(none)']).map((detail) => ` - ${detail}`).join('\n')}
`
: 'verification_status: none';

return dedent`
<test ref="#${ref}">
url: ${test.startUrl || '/'}
scenario: ${test.scenario}
result: ${test.result || 'unknown'}
expected: ${test.expected.join(' | ') || '(none)'}
checked_expectations: ${checked}
remaining_expectations: ${remaining}
visited_urls: ${visitedUrls}
${verification}
notes:
${notes || ' - (none)'}
log:
${log}
</test>
Expand Down
4 changes: 3 additions & 1 deletion src/ai/tester.ts
Original file line number Diff line number Diff line change
Expand Up @@ -880,7 +880,9 @@ export class Tester extends TaskAgent implements Agent {
await this.explorer.switchToMainFrame();
}

if (this.explorer.getStateManager().getCurrentState()?.url === resetUrl!) {
const currentState = this.explorer.getStateManager().getCurrentState();
const currentUrl = currentState?.fullUrl || currentState?.url;
if (currentUrl === resetUrl!) {
return {
success: false,
message: 'Reset failed - already on initial page!',
Expand Down
5 changes: 3 additions & 2 deletions src/ai/tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -854,12 +854,13 @@ export function createAgentTools({
}),
execute: async ({ reason }) => {
const stateManager = explorer.getStateManager();
const currentUrl = stateManager.getCurrentState()?.url;
const currentState = stateManager.getCurrentState();
const currentUrl = currentState?.fullUrl || currentState?.url;
const history = stateManager.getStateHistory();

let targetUrl: string | null = null;
for (let i = history.length - 1; i >= 0; i--) {
const url = history[i].toState.url;
const url = history[i].toState.fullUrl || history[i].toState.url;
if (url !== currentUrl) {
targetUrl = url;
break;
Expand Down
7 changes: 6 additions & 1 deletion src/commands/explore-command.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ export class ExploreCommand extends BaseCommand {
private oldTestRefs = new Set<Test>();
private priorityFilter?: Set<string>;

private getCurrentPageUrl(): string | undefined {
const state = this.explorBot.getExplorer().getStateManager().getCurrentState();
return state?.fullUrl || state?.url;
}

async execute(args: string): Promise<void> {
const { opts, args: remaining } = this.parseArgs(args);
if (opts.maxTests) {
Expand All @@ -51,7 +56,7 @@ export class ExploreCommand extends BaseCommand {
if (this.dryRun) tag('info').log('Dry-run mode: planner runs to discover new tests; test execution is skipped');
Stats.mode ??= 'explore';
Stats.focus ??= feature;
const mainUrl = this.explorBot.getExplorer().getStateManager().getCurrentState()?.url;
const mainUrl = this.getCurrentPageUrl();

if (cfg.enabled) {
await this.runReuseMode(mainUrl, feature, cfg);
Expand Down
5 changes: 3 additions & 2 deletions tests/unit/action-result-diff.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ import { ActionResult, Diff } from '../../src/action-result.ts';
import { ConfigParser } from '../../src/config.ts';

describe('ActionResult Diff', () => {
beforeEach(async () => {
await ConfigParser.getInstance().loadConfig('./explorbot.config.js');
beforeEach(() => {
ConfigParser.resetForTesting();
ConfigParser.setupTestConfig();
});
test('should create diff with previous state', () => {
const previous = new ActionResult({
Expand Down
40 changes: 40 additions & 0 deletions tests/unit/navigator-origin-guard.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import { describe, expect, it } from 'bun:test';
import { Navigator } from '../../src/ai/navigator.ts';

describe('Navigator origin guard', () => {
function createNavigator(baseUrl = 'http://192.168.1.162:3000') {
const navigator = Object.create(Navigator.prototype) as Navigator & { explorer: any };
navigator.explorer = {
getConfig: () => ({
playwright: {
url: baseUrl,
},
}),
};
return navigator;
}

it('rejects external origins for relative expected URLs', () => {
const navigator = createNavigator();
const stateManager = {
getCurrentState: () => ({
url: '/',
fullUrl: 'https://your-domain.com/',
}),
};

expect((navigator as any).isOnExpectedPage('/', stateManager)).toBe(false);
});

it('accepts the configured origin for relative expected URLs', () => {
const navigator = createNavigator();
const stateManager = {
getCurrentState: () => ({
url: '/',
fullUrl: 'http://192.168.1.162:3000/',
}),
};

expect((navigator as any).isOnExpectedPage('/', stateManager)).toBe(true);
});
});
Loading