Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions pkg/templates/python/cua/providers/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,10 @@ async def _execute_action(
await asyncio.sleep(1.5)

elif name == "key_combination":
combo = args.get("key_combination", "")
parts = [k.strip() for k in combo.split("+")]
# Gemini sends the combo as a single "+"-joined string in `keys`.
if "keys" not in args:
return {"error": "key_combination requires keys"}
parts = [k.strip() for k in str(args["keys"]).split("+")]
hold_keys = parts[:-1] if len(parts) > 1 else []
keys = parts[-1:] if parts else []
kwargs: dict = {"keys": keys or parts}
Expand All @@ -222,10 +224,11 @@ async def _execute_action(
)

elif name == "drag_and_drop":
sx = self._denorm(args.get("start_x"), width)
sy = self._denorm(args.get("start_y"), height)
ex = self._denorm(args.get("end_x"), width)
ey = self._denorm(args.get("end_y"), height)
# Gemini's drag schema uses x/y for the start and destination_x/destination_y for the end.
sx = self._denorm(args.get("x"), width)
sy = self._denorm(args.get("y"), height)
ex = self._denorm(args.get("destination_x"), width)
ey = self._denorm(args.get("destination_y"), height)
await asyncio.to_thread(
computer.drag_mouse, options.session_id, path=[[sx, sy], [ex, ey]],
)
Expand Down
17 changes: 10 additions & 7 deletions pkg/templates/python/cua/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,23 +114,26 @@ async def stop(self) -> SessionInfo:
info = self.info

if self._session_id:
session_id = self._session_id
try:
if self.opts.record_replay and self._replay_id:
if self.opts.replay_grace_period > 0:
await asyncio.sleep(self.opts.replay_grace_period)
await self._stop_replay()
info.replay_view_url = self._replay_view_url
finally:
print(f"Destroying browser session: {self._session_id}")
# Reset state up front so that if browser deletion or a thrown replay
# error propagates, a follow-up stop() call from the caller's error path
# is a no-op instead of attempting to delete the same session twice.
self._session_id = None
self._live_view_url = None
self._replay_id = None
self._replay_view_url = None
print(f"Destroying browser session: {session_id}")
await asyncio.to_thread(
self.kernel.browsers.delete_by_id, self._session_id,
self.kernel.browsers.delete_by_id, session_id,
)

self._session_id = None
self._live_view_url = None
self._replay_id = None
self._replay_view_url = None

return info

async def _stop_replay(self) -> None:
Expand Down
22 changes: 11 additions & 11 deletions pkg/templates/typescript/cua/providers/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,11 @@ interface GeminiArgs {
y?: number;
text?: string;
url?: string;
key_combination?: string;
keys?: string;
direction?: string;
magnitude?: number;
start_x?: number;
start_y?: number;
end_x?: number;
end_y?: number;
destination_x?: number;
destination_y?: number;
safety_decision?: { decision: string; explanation?: string };
[key: string]: unknown;
}
Expand Down Expand Up @@ -221,8 +219,9 @@ export class GeminiProvider implements CuaProvider {
break;
}
case 'key_combination': {
const combo = args.key_combination ?? '';
const parts = combo.split('+').map(k => k.trim());
// Gemini sends the combo as a single "+"-joined string in `keys`.
if (!args.keys) return { error: 'key_combination requires keys' };
const parts = args.keys.split('+').map(k => k.trim());
const holdKeys = parts.slice(0, -1);
const keys = parts.slice(-1);
await computer.pressKey(sessionId, {
Expand All @@ -232,10 +231,11 @@ export class GeminiProvider implements CuaProvider {
break;
}
case 'drag_and_drop': {
const sx = this.denormalize(args.start_x, width);
const sy = this.denormalize(args.start_y, height);
const ex = this.denormalize(args.end_x, width);
const ey = this.denormalize(args.end_y, height);
// Gemini's drag schema uses x/y for the start and destination_x/destination_y for the end.
const sx = this.denormalize(args.x, width);
const sy = this.denormalize(args.y, height);
const ex = this.denormalize(args.destination_x, width);
const ey = this.denormalize(args.destination_y, height);
await computer.dragMouse(sessionId, { path: [[sx, sy], [ex, ey]] });
break;
}
Expand Down
17 changes: 10 additions & 7 deletions pkg/templates/typescript/cua/session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ export class KernelBrowserSession {
const info = this.info;

if (this._sessionId) {
const sessionId = this._sessionId;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Second stop() call throws instead of being no-op

Medium Severity

The stop() method calls this.info (TS) / self.info (Python) unconditionally at the top, before the if (this._sessionId) guard. The info getter delegates to the sessionId getter, which throws when _sessionId is null. After the newly-added state reset in finally sets _sessionId to null, a follow-up stop() call from the caller's error path will throw "Session not started" instead of being the safe no-op described in the comment and PR description. The if guard that makes it a no-op is never reached.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 1c6b554. Configure here.

try {
if (this.opts.recordReplay && this._replayId) {
if (this.opts.replayGracePeriod > 0) {
Expand All @@ -115,16 +116,18 @@ export class KernelBrowserSession {
info.replayViewUrl = this._replayViewUrl || undefined;
}
} finally {
console.log(`Destroying browser session: ${this._sessionId}`);
await this.kernel.browsers.deleteByID(this._sessionId);
// Reset state up front so that if browser deletion or a thrown replay error
// propagates, a follow-up stop() call from the caller's error path is a no-op
// instead of attempting to delete the same session twice.
this._sessionId = null;
this._liveViewUrl = null;
this._replayId = null;
this._replayViewUrl = null;
console.log(`Destroying browser session: ${sessionId}`);
await this.kernel.browsers.deleteByID(sessionId);
}
}

this._sessionId = null;
this._liveViewUrl = null;
this._replayId = null;
this._replayViewUrl = null;

return info;
}

Expand Down