Skip to content

Commit 04a3c61

Browse files
committed
Fix key translation for goto/back batch actions
1 parent 6f67377 commit 04a3c61

File tree

3 files changed

+14
-60
lines changed

3 files changed

+14
-60
lines changed

pkg/templates/python/openai-computer-use/agent/agent.py

Lines changed: 8 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import json
22
import time
33
from typing import Any, Callable
4-
from computers.kernel_computer import KernelComputer
4+
from computers.kernel_computer import (
5+
KernelComputer,
6+
_describe_action,
7+
_describe_batch_actions,
8+
)
59
from utils import (
610
create_response,
711
show_image,
@@ -186,56 +190,6 @@ def _extract_prompt_text(self, item: dict[str, Any]) -> str | None:
186190
parts.append(text)
187191
return " ".join(parts) if parts else None
188192

189-
def _describe_action(self, action_type: str, action_args: dict[str, Any]) -> str:
190-
if action_type == "click":
191-
x = int(action_args.get("x", 0))
192-
y = int(action_args.get("y", 0))
193-
button = action_args.get("button", "left")
194-
if button in ("", "left"):
195-
return f"click({x}, {y})"
196-
return f"click({x}, {y}, {button})"
197-
if action_type == "double_click":
198-
return f"double_click({int(action_args.get('x', 0))}, {int(action_args.get('y', 0))})"
199-
if action_type == "type":
200-
text = str(action_args.get("text", ""))
201-
if len(text) > 60:
202-
text = f"{text[:57]}..."
203-
return f"type({text!r})"
204-
if action_type == "keypress":
205-
keys = action_args.get("keys", [])
206-
hold_keys = action_args.get("hold_keys", [])
207-
if hold_keys:
208-
return f"keypress(hold={hold_keys}, keys={keys})"
209-
return f"keypress({keys})"
210-
if action_type == "scroll":
211-
return (
212-
f"scroll({int(action_args.get('x', 0))}, {int(action_args.get('y', 0))}, "
213-
f"dx={int(action_args.get('scroll_x', 0))}, dy={int(action_args.get('scroll_y', 0))})"
214-
)
215-
if action_type == "move":
216-
return f"move({int(action_args.get('x', 0))}, {int(action_args.get('y', 0))})"
217-
if action_type == "drag":
218-
return "drag(...)"
219-
if action_type == "wait":
220-
return f"wait({int(action_args.get('ms', 1000))}ms)"
221-
if action_type == "goto":
222-
return f"goto({action_args.get('url', '')!r})"
223-
if action_type == "back":
224-
return "back()"
225-
if action_type == "url":
226-
return "url()"
227-
if action_type == "screenshot":
228-
return "screenshot()"
229-
return action_type
230-
231-
def _describe_batch_actions(self, actions: list[dict[str, Any]]) -> str:
232-
pieces: list[str] = []
233-
for action in actions:
234-
action_type = str(action.get("type", "unknown"))
235-
action_args = {k: v for k, v in action.items() if k != "type"}
236-
pieces.append(self._describe_action(action_type, action_args))
237-
return "batch[" + " -> ".join(pieces) + "]"
238-
239193
def _batch_terminal_read_action(self, actions: list[dict[str, Any]]) -> str:
240194
if not actions:
241195
return ""
@@ -269,7 +223,7 @@ def handle_item(self, item):
269223
typed_actions = [a for a in actions if isinstance(a, dict)]
270224
payload = {
271225
"action_type": "batch",
272-
"description": self._describe_batch_actions(typed_actions),
226+
"description": _describe_batch_actions(typed_actions),
273227
"action": {"type": "batch", "actions": typed_actions},
274228
}
275229
if elapsed_ms is not None:
@@ -315,14 +269,14 @@ def handle_item(self, item):
315269
if len(typed_actions) == 1:
316270
action_type = str(typed_actions[0].get("type", "unknown"))
317271
action_payload: dict[str, Any] = typed_actions[0]
318-
description = self._describe_action(
272+
description = _describe_action(
319273
action_type,
320274
{k: v for k, v in typed_actions[0].items() if k != "type"},
321275
)
322276
else:
323277
action_type = "batch"
324278
action_payload = {"type": "batch", "actions": typed_actions}
325-
description = self._describe_batch_actions(typed_actions)
279+
description = _describe_batch_actions(typed_actions)
326280

327281
payload = {
328282
"action_type": action_type,

pkg/templates/python/openai-computer-use/computers/kernel_computer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -189,15 +189,15 @@ def _goto_batch_actions(url: str) -> List[Dict[str, Any]]:
189189
return [
190190
{
191191
"type": "press_key",
192-
"press_key": {"hold_keys": ["Ctrl"], "keys": ["l"]},
192+
"press_key": {"hold_keys": ["Control_L"], "keys": ["l"]},
193193
},
194194
{
195195
"type": "sleep",
196196
"sleep": {"duration_ms": GOTO_CHORD_DELAY_MS},
197197
},
198198
{
199199
"type": "press_key",
200-
"press_key": {"hold_keys": ["Ctrl"], "keys": ["a"]},
200+
"press_key": {"hold_keys": ["Control_L"], "keys": ["a"]},
201201
},
202202
{
203203
"type": "type_text",
@@ -214,7 +214,7 @@ def _back_batch_actions() -> List[Dict[str, Any]]:
214214
return [
215215
{
216216
"type": "press_key",
217-
"press_key": {"hold_keys": ["Alt"], "keys": ["Left"]},
217+
"press_key": {"hold_keys": ["Alt_L"], "keys": ["Left"]},
218218
}
219219
]
220220

pkg/templates/typescript/openai-computer-use/lib/kernel-computer.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -220,17 +220,17 @@ function isBatchComputerActionType(actionType: string): boolean {
220220

221221
function gotoBatchActions(url: string): BatchAction[] {
222222
return [
223-
{ type: 'press_key', press_key: { hold_keys: ['Ctrl'], keys: ['l'] } },
223+
{ type: 'press_key', press_key: { hold_keys: ['Control_L'], keys: ['l'] } },
224224
{ type: 'sleep', sleep: { duration_ms: GOTO_CHORD_DELAY_MS } },
225-
{ type: 'press_key', press_key: { hold_keys: ['Ctrl'], keys: ['a'] } },
225+
{ type: 'press_key', press_key: { hold_keys: ['Control_L'], keys: ['a'] } },
226226
{ type: 'type_text', type_text: { text: url } },
227227
{ type: 'press_key', press_key: { keys: ['Return'] } },
228228
];
229229
}
230230

231231
function backBatchActions(): BatchAction[] {
232232
return [
233-
{ type: 'press_key', press_key: { hold_keys: ['Alt'], keys: ['Left'] } },
233+
{ type: 'press_key', press_key: { hold_keys: ['Alt_L'], keys: ['Left'] } },
234234
];
235235
}
236236

0 commit comments

Comments
 (0)