From 3d2077724b47aa1abf4af0e98d1bd36a241cb8f4 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Fri, 17 Oct 2025 15:21:12 -0400 Subject: [PATCH 1/2] docs for computer controls --- browsers/computer-controls.mdx | 362 +++++++++++++++++++++++++++++++++ docs.json | 3 +- 2 files changed, 364 insertions(+), 1 deletion(-) create mode 100644 browsers/computer-controls.mdx diff --git a/browsers/computer-controls.mdx b/browsers/computer-controls.mdx new file mode 100644 index 0000000..6c8a5ef --- /dev/null +++ b/browsers/computer-controls.mdx @@ -0,0 +1,362 @@ +--- +title: "Computer Controls" +description: "Control the computer's mouse, keyboard, and screen" +--- + +Use OS-level controls to move and click the mouse, type and press keys, scroll, drag, and capture screenshots from a running browser session. + +## Click the mouse + +Simulate mouse clicks at specific coordinates. You can select the button, click type (down, up, click), number of clicks, and optional modifier keys to hold. + + +```typescript Typescript/Javascript +import { Kernel } from '@onkernel/sdk'; + +const kernel = new Kernel(); +const kernelBrowser = await kernel.browsers.create(); + +// Basic left click at (100, 200) +await kernel.browsers.computer.clickMouse(kernelBrowser.session_id, { + x: 100, + y: 200, +}); + +// Double right-click while holding Shift +await kernel.browsers.computer.clickMouse(kernelBrowser.session_id, { + x: 100, + y: 200, + button: 'right', + click_type: 'click', + num_clicks: 2, + hold_keys: ['Shift'], +}); +``` + +```python Python +import kernel + +client = kernel.Kernel() +kernel_browser = client.browsers.create() + +# Basic left click at (100, 200) +client.browsers.computer.click_mouse( + id=kernel_browser.session_id, + x=100, + y=200, +) + +# Double right-click while holding Shift +client.browsers.computer.click_mouse( + id=kernel_browser.session_id, + x=100, + y=200, + button="right", + click_type="click", + num_clicks=2, + hold_keys=["Shift"], +) +``` + +```bash CLI +# Click the mouse at coordinates (100, 200) +kernel browsers computer click-mouse --x 100 --y 200 + +# Double-click the right mouse button +kernel browsers computer click-mouse --x 100 --y 200 --num-clicks 2 --button right +``` + + +## Move the mouse + +Move the cursor to specific screen coordinates. Optionally hold modifier keys during the move. + + +```typescript Typescript/Javascript +import { Kernel } from '@onkernel/sdk'; + +const kernel = new Kernel(); +const kernelBrowser = await kernel.browsers.create(); + +await kernel.browsers.computer.moveMouse(kernelBrowser.session_id, { + x: 500, + y: 300, + hold_keys: ['Alt'], +}); +``` + +```python Python +import kernel + +client = kernel.Kernel() +kernel_browser = client.browsers.create() + +client.browsers.computer.move_mouse( + id=kernel_browser.session_id, + x=500, + y=300, + hold_keys=["Alt"], +) +``` + +```bash CLI +# Move the mouse to coordinates (500, 300) +kernel browsers computer move-mouse --x 500 --y 300 +``` + + +## Take screenshots + +Capture a full-screen PNG or a specific region. + + +```typescript Typescript/Javascript +import fs from 'fs'; +import { Buffer } from 'buffer'; +import { Kernel } from '@onkernel/sdk'; + +const kernel = new Kernel(); +const kernelBrowser = await kernel.browsers.create(); + +// Full screenshot +{ + const response = await kernel.browsers.computer.captureScreenshot(kernelBrowser.session_id); + const blob = await response.blob(); + const buffer = Buffer.from(await blob.arrayBuffer()); + fs.writeFileSync('screenshot.png', buffer); +} + +// Region screenshot +{ + const response = await kernel.browsers.computer.captureScreenshot(kernelBrowser.session_id, { + region: { x: 0, y: 0, width: 800, height: 600 }, + }); + const blob = await response.blob(); + const buffer = Buffer.from(await blob.arrayBuffer()); + fs.writeFileSync('region.png', buffer); +} +``` + +```python Python +import kernel + +client = kernel.Kernel() +kernel_browser = client.browsers.create() + +# Full screenshot +with open('screenshot.png', 'wb') as f: + image_data = client.browsers.computer.capture_screenshot(id=kernel_browser.session_id) + f.write(image_data.read()) + +# Region screenshot +with open('region.png', 'wb') as f: + image_data = client.browsers.computer.capture_screenshot( + id=kernel_browser.session_id, + region={"x": 0, "y": 0, "width": 800, "height": 600}, + ) + f.write(image_data.read()) +``` + +```bash CLI +# Take a full screenshot +kernel browsers computer screenshot --to screenshot.png + +# Take a screenshot of a specific region +kernel browsers computer screenshot --to region.png --x 0 --y 0 --width 800 --height 600 +``` + + +## Type text + +Type literal text, optionally with a delay in milliseconds between keystrokes. + + +```typescript Typescript/Javascript +import { Kernel } from '@onkernel/sdk'; + +const kernel = new Kernel(); +const kernelBrowser = await kernel.browsers.create(); + +await kernel.browsers.computer.typeText(kernelBrowser.session_id, { + text: 'Hello, World!', +}); + +await kernel.browsers.computer.typeText(kernelBrowser.session_id, { + text: 'Slow typing...', + delay: 100, +}); +``` + +```python Python +import kernel + +client = kernel.Kernel() +kernel_browser = client.browsers.create() + +client.browsers.computer.type_text( + id=kernel_browser.session_id, + text="Hello, World!", +) + +client.browsers.computer.type_text( + id=kernel_browser.session_id, + text="Slow typing...", + delay=100, +) +``` + +```bash CLI +# Type text in the browser +kernel browsers computer type --text "Hello, World!" + +# Type text with a 100ms delay between keystrokes +kernel browsers computer type --text "Slow typing..." --delay 100 +``` + + +## Press keys + +Press one or more key symbols (including combinations like "Ctrl+t" or "Ctrl+Shift+Tab"). Optionally hold modifiers and/or set a duration to hold keys down. + + +```typescript Typescript/Javascript +import { Kernel } from '@onkernel/sdk'; + +const kernel = new Kernel(); +const kernelBrowser = await kernel.browsers.create(); + +// Tap a key combination +await kernel.browsers.computer.pressKey(kernelBrowser.session_id, { + keys: ['Ctrl+t'], +}); + +// Hold keys for 250ms while also holding Alt +await kernel.browsers.computer.pressKey(kernelBrowser.session_id, { + keys: ['Ctrl+Shift+Tab'], + duration: 250, + hold_keys: ['Alt'], +}); +``` + +```python Python +import kernel + +client = kernel.Kernel() +kernel_browser = client.browsers.create() + +# Tap a key combination +client.browsers.computer.press_key( + id=kernel_browser.session_id, + keys=["Ctrl+t"], +) + +# Hold keys for 250ms while also holding Alt +client.browsers.computer.press_key( + id=kernel_browser.session_id, + keys=["Ctrl+Shift+Tab"], + duration=250, + hold_keys=["Alt"], +) +``` + +```bash CLI +# Press one or more keys (repeatable --key) +kernel browsers computer press-key --key Ctrl+t + +# Hold for a duration and add optional modifiers +kernel browsers computer press-key --key Ctrl+Shift+Tab --duration 250 --hold-key Alt +``` + + +## Scroll + +Scroll the mouse wheel at a specific position. Positive `delta_y` scrolls down; negative scrolls up. Positive `delta_x` scrolls right; negative scrolls left. + + +```typescript Typescript/Javascript +import { Kernel } from '@onkernel/sdk'; + +const kernel = new Kernel(); +const kernelBrowser = await kernel.browsers.create(); + +await kernel.browsers.computer.scroll(kernelBrowser.session_id, { + x: 300, + y: 400, + delta_x: 0, + delta_y: 120, +}); +``` + +```python Python +import kernel + +client = kernel.Kernel() +kernel_browser = client.browsers.create() + +client.browsers.computer.scroll( + id=kernel_browser.session_id, + x=300, + y=400, + delta_x=0, + delta_y=120, +) +``` + +```bash CLI +# Scroll at a position +kernel browsers computer scroll --x 300 --y 400 --delta-y 120 +``` + + +## Drag the mouse + +Drag by pressing a button, moving along a path of points, then releasing. You can control delay before starting, the granularity and speed of the drag via `steps_per_segment` and `step_delay_ms`, and optionally hold modifier keys. + + +```typescript Typescript/Javascript +import { Kernel } from '@onkernel/sdk'; + +const kernel = new Kernel(); +const kernelBrowser = await kernel.browsers.create(); + +await kernel.browsers.computer.dragMouse(kernelBrowser.session_id, { + path: [ + [100, 200], + [150, 220], + [200, 260], + ], + button: 'left', + delay: 0, + steps_per_segment: 10, + step_delay_ms: 50, + hold_keys: ['Shift'], +}); +``` + +```python Python +import kernel + +client = kernel.Kernel() +kernel_browser = client.browsers.create() + +client.browsers.computer.drag_mouse( + id=kernel_browser.session_id, + path=[[100, 200], [150, 220], [200, 260]], + button="left", + delay=0, + steps_per_segment=10, + step_delay_ms=50, + hold_keys=["Shift"], +) +``` + +```bash CLI +# Drag the mouse along a path +kernel browsers computer drag-mouse \ + --point 100,200 \ + --point 150,220 \ + --point 200,260 \ + --button left \ + --delay 0 +``` + diff --git a/docs.json b/docs.json index a7ee85c..dff0b0e 100644 --- a/docs.json +++ b/docs.json @@ -81,7 +81,8 @@ ] } ] - } + }, + "browsers/computer-controls" ] }, { From 85c0f701e8abdf3f22c689760ee65ce878322439 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 17 Oct 2025 19:21:39 +0000 Subject: [PATCH 2/2] docs: update code samples from OpenAPI --- .../post-browsers-id-computer-click_mouse.mdx | 25 +++++++++++++++ .../post-browsers-id-computer-drag_mouse.mdx | 29 +++++++++++++++++ .../post-browsers-id-computer-move_mouse.mdx | 25 +++++++++++++++ .../post-browsers-id-computer-press_key.mdx | 24 ++++++++++++++ .../post-browsers-id-computer-screenshot.mdx | 31 +++++++++++++++++++ .../post-browsers-id-computer-scroll.mdx | 25 +++++++++++++++ .../post-browsers-id-computer-type.mdx | 24 ++++++++++++++ snippets/openapi/post-deployments.mdx | 11 ++++++- 8 files changed, 193 insertions(+), 1 deletion(-) create mode 100644 snippets/openapi/post-browsers-id-computer-click_mouse.mdx create mode 100644 snippets/openapi/post-browsers-id-computer-drag_mouse.mdx create mode 100644 snippets/openapi/post-browsers-id-computer-move_mouse.mdx create mode 100644 snippets/openapi/post-browsers-id-computer-press_key.mdx create mode 100644 snippets/openapi/post-browsers-id-computer-screenshot.mdx create mode 100644 snippets/openapi/post-browsers-id-computer-scroll.mdx create mode 100644 snippets/openapi/post-browsers-id-computer-type.mdx diff --git a/snippets/openapi/post-browsers-id-computer-click_mouse.mdx b/snippets/openapi/post-browsers-id-computer-click_mouse.mdx new file mode 100644 index 0000000..c96be43 --- /dev/null +++ b/snippets/openapi/post-browsers-id-computer-click_mouse.mdx @@ -0,0 +1,25 @@ + +```typescript Typescript/Javascript +import Kernel from '@onkernel/sdk'; + +const client = new Kernel({ + apiKey: 'My API Key', +}); + +await client.browsers.computer.clickMouse('id', { x: 0, y: 0 }); +``` + + +```python Python +from kernel import Kernel + +client = Kernel( + api_key="My API Key", +) +client.browsers.computer.click_mouse( + id="id", + x=0, + y=0, +) +``` + diff --git a/snippets/openapi/post-browsers-id-computer-drag_mouse.mdx b/snippets/openapi/post-browsers-id-computer-drag_mouse.mdx new file mode 100644 index 0000000..d31c16a --- /dev/null +++ b/snippets/openapi/post-browsers-id-computer-drag_mouse.mdx @@ -0,0 +1,29 @@ + +```typescript Typescript/Javascript +import Kernel from '@onkernel/sdk'; + +const client = new Kernel({ + apiKey: 'My API Key', +}); + +await client.browsers.computer.dragMouse('id', { + path: [ + [0, 0], + [0, 0], + ], +}); +``` + + +```python Python +from kernel import Kernel + +client = Kernel( + api_key="My API Key", +) +client.browsers.computer.drag_mouse( + id="id", + path=[[0, 0], [0, 0]], +) +``` + diff --git a/snippets/openapi/post-browsers-id-computer-move_mouse.mdx b/snippets/openapi/post-browsers-id-computer-move_mouse.mdx new file mode 100644 index 0000000..5dafecd --- /dev/null +++ b/snippets/openapi/post-browsers-id-computer-move_mouse.mdx @@ -0,0 +1,25 @@ + +```typescript Typescript/Javascript +import Kernel from '@onkernel/sdk'; + +const client = new Kernel({ + apiKey: 'My API Key', +}); + +await client.browsers.computer.moveMouse('id', { x: 0, y: 0 }); +``` + + +```python Python +from kernel import Kernel + +client = Kernel( + api_key="My API Key", +) +client.browsers.computer.move_mouse( + id="id", + x=0, + y=0, +) +``` + diff --git a/snippets/openapi/post-browsers-id-computer-press_key.mdx b/snippets/openapi/post-browsers-id-computer-press_key.mdx new file mode 100644 index 0000000..a3e90e5 --- /dev/null +++ b/snippets/openapi/post-browsers-id-computer-press_key.mdx @@ -0,0 +1,24 @@ + +```typescript Typescript/Javascript +import Kernel from '@onkernel/sdk'; + +const client = new Kernel({ + apiKey: 'My API Key', +}); + +await client.browsers.computer.pressKey('id', { keys: ['string'] }); +``` + + +```python Python +from kernel import Kernel + +client = Kernel( + api_key="My API Key", +) +client.browsers.computer.press_key( + id="id", + keys=["string"], +) +``` + diff --git a/snippets/openapi/post-browsers-id-computer-screenshot.mdx b/snippets/openapi/post-browsers-id-computer-screenshot.mdx new file mode 100644 index 0000000..3786e34 --- /dev/null +++ b/snippets/openapi/post-browsers-id-computer-screenshot.mdx @@ -0,0 +1,31 @@ + +```typescript Typescript/Javascript +import Kernel from '@onkernel/sdk'; + +const client = new Kernel({ + apiKey: 'My API Key', +}); + +const response = await client.browsers.computer.captureScreenshot('id'); + +console.log(response); + +const content = await response.blob(); +console.log(content); +``` + + +```python Python +from kernel import Kernel + +client = Kernel( + api_key="My API Key", +) +response = client.browsers.computer.capture_screenshot( + id="id", +) +print(response) +content = response.read() +print(content) +``` + diff --git a/snippets/openapi/post-browsers-id-computer-scroll.mdx b/snippets/openapi/post-browsers-id-computer-scroll.mdx new file mode 100644 index 0000000..eb7b09d --- /dev/null +++ b/snippets/openapi/post-browsers-id-computer-scroll.mdx @@ -0,0 +1,25 @@ + +```typescript Typescript/Javascript +import Kernel from '@onkernel/sdk'; + +const client = new Kernel({ + apiKey: 'My API Key', +}); + +await client.browsers.computer.scroll('id', { x: 0, y: 0 }); +``` + + +```python Python +from kernel import Kernel + +client = Kernel( + api_key="My API Key", +) +client.browsers.computer.scroll( + id="id", + x=0, + y=0, +) +``` + diff --git a/snippets/openapi/post-browsers-id-computer-type.mdx b/snippets/openapi/post-browsers-id-computer-type.mdx new file mode 100644 index 0000000..f6448b3 --- /dev/null +++ b/snippets/openapi/post-browsers-id-computer-type.mdx @@ -0,0 +1,24 @@ + +```typescript Typescript/Javascript +import Kernel from '@onkernel/sdk'; + +const client = new Kernel({ + apiKey: 'My API Key', +}); + +await client.browsers.computer.typeText('id', { text: 'text' }); +``` + + +```python Python +from kernel import Kernel + +client = Kernel( + api_key="My API Key", +) +client.browsers.computer.type_text( + id="id", + text="text", +) +``` + diff --git a/snippets/openapi/post-deployments.mdx b/snippets/openapi/post-deployments.mdx index 6212992..7f8ee8e 100644 --- a/snippets/openapi/post-deployments.mdx +++ b/snippets/openapi/post-deployments.mdx @@ -8,7 +8,10 @@ const client = new Kernel({ const deployment = await client.deployments.create({ entrypoint_rel_path: 'src/app.py', + env_vars: { FOO: 'bar' }, file: fs.createReadStream('path/to/file'), + region: 'aws.us-east-1a', + version: '1.0.0', }); console.log(deployment.id); @@ -23,7 +26,13 @@ client = Kernel( ) deployment = client.deployments.create( entrypoint_rel_path="src/app.py", - file=b"raw file contents", + env_vars={ + "FOO": "bar" + }, + file=b"", + force=False, + region="aws.us-east-1a", + version="1.0.0", ) print(deployment.id) ```