Skip to content

Commit f377db1

Browse files
authored
feat: add macOS screen snapshots for monitor preview (#67954) thanks @BunsDev
Co-authored-by: Val Alexander <68980965+BunsDev@users.noreply.github.com>
1 parent 0b6c39b commit f377db1

File tree

12 files changed

+321
-1
lines changed

12 files changed

+321
-1
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ Docs: https://docs.openclaw.ai
44

55
## Unreleased
66

7+
### Changes
8+
9+
- macOS/gateway: add `screen.snapshot` support for macOS app nodes, including runtime plumbing, default macOS allowlisting, and docs for monitor preview flows. (#67954) Thanks @BunsDev.
10+
711
### Fixes
812

913
- Onboarding/non-interactive: preserve existing gateway auth tokens during re-onboard so active local gateway clients are not disconnected by an implicit token rotation. (#67821) Thanks @BKF-Gitty.

apps/macos/Sources/OpenClaw/NodeMode/MacNodeModeCoordinator.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ final class MacNodeModeCoordinator {
146146
OpenClawCanvasA2UICommand.push.rawValue,
147147
OpenClawCanvasA2UICommand.pushJSONL.rawValue,
148148
OpenClawCanvasA2UICommand.reset.rawValue,
149+
MacNodeScreenCommand.snapshot.rawValue,
149150
MacNodeScreenCommand.record.rawValue,
150151
OpenClawSystemCommand.notify.rawValue,
151152
OpenClawSystemCommand.which.rawValue,

apps/macos/Sources/OpenClaw/NodeMode/MacNodeRuntime.swift

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ actor MacNodeRuntime {
6363
return try await self.handleCameraInvoke(req)
6464
case OpenClawLocationCommand.get.rawValue:
6565
return try await self.handleLocationInvoke(req)
66+
case MacNodeScreenCommand.snapshot.rawValue:
67+
return try await self.handleScreenSnapshotInvoke(req)
6668
case MacNodeScreenCommand.record.rawValue:
6769
return try await self.handleScreenRecordInvoke(req)
6870
case OpenClawSystemCommand.run.rawValue:
@@ -352,6 +354,34 @@ actor MacNodeRuntime {
352354
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
353355
}
354356

357+
private func handleScreenSnapshotInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse {
358+
let params = (try? Self.decodeParams(MacNodeScreenSnapshotParams.self, from: req.paramsJSON)) ??
359+
MacNodeScreenSnapshotParams()
360+
let services = await self.mainActorServices()
361+
let capturedAtMs = Int64(Date().timeIntervalSince1970 * 1000)
362+
let res = try await services.snapshotScreen(
363+
screenIndex: params.screenIndex,
364+
maxWidth: params.maxWidth,
365+
quality: params.quality,
366+
format: params.format)
367+
struct ScreenSnapshotPayload: Encodable {
368+
var format: String
369+
var base64: String
370+
var width: Int
371+
var height: Int
372+
var screenIndex: Int?
373+
var capturedAtMs: Int64
374+
}
375+
let payload = try Self.encodePayload(ScreenSnapshotPayload(
376+
format: res.format.rawValue,
377+
base64: res.data.base64EncodedString(),
378+
width: res.width,
379+
height: res.height,
380+
screenIndex: params.screenIndex,
381+
capturedAtMs: capturedAtMs))
382+
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
383+
}
384+
355385
private func mainActorServices() async -> any MacNodeRuntimeMainActorServices {
356386
if let cachedMainActorServices { return cachedMainActorServices }
357387
let services = await self.makeMainActorServices()

apps/macos/Sources/OpenClaw/NodeMode/MacNodeRuntimeMainActorServices.swift

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@ import OpenClawKit
44

55
@MainActor
66
protocol MacNodeRuntimeMainActorServices: Sendable {
7+
func snapshotScreen(
8+
screenIndex: Int?,
9+
maxWidth: Int?,
10+
quality: Double?,
11+
format: OpenClawScreenSnapshotFormat?) async throws
12+
-> (data: Data, format: OpenClawScreenSnapshotFormat, width: Int, height: Int)
13+
714
func recordScreen(
815
screenIndex: Int?,
916
durationMs: Int?,
@@ -21,9 +28,24 @@ protocol MacNodeRuntimeMainActorServices: Sendable {
2128

2229
@MainActor
2330
final class LiveMacNodeRuntimeMainActorServices: MacNodeRuntimeMainActorServices, @unchecked Sendable {
31+
private let screenSnapshotter = ScreenSnapshotService()
2432
private let screenRecorder = ScreenRecordService()
2533
private let locationService = MacNodeLocationService()
2634

35+
func snapshotScreen(
36+
screenIndex: Int?,
37+
maxWidth: Int?,
38+
quality: Double?,
39+
format: OpenClawScreenSnapshotFormat?) async throws
40+
-> (data: Data, format: OpenClawScreenSnapshotFormat, width: Int, height: Int)
41+
{
42+
try await self.screenSnapshotter.snapshot(
43+
screenIndex: screenIndex,
44+
maxWidth: maxWidth,
45+
quality: quality,
46+
format: format)
47+
}
48+
2749
func recordScreen(
2850
screenIndex: Int?,
2951
durationMs: Int?,

apps/macos/Sources/OpenClaw/NodeMode/MacNodeScreenCommands.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,18 @@
11
import Foundation
2+
import OpenClawKit
23

34
enum MacNodeScreenCommand: String, Codable {
5+
case snapshot = "screen.snapshot"
46
case record = "screen.record"
57
}
68

9+
struct MacNodeScreenSnapshotParams: Codable, Equatable {
10+
var screenIndex: Int?
11+
var maxWidth: Int?
12+
var quality: Double?
13+
var format: OpenClawScreenSnapshotFormat?
14+
}
15+
716
struct MacNodeScreenRecordParams: Codable, Equatable {
817
var screenIndex: Int?
918
var durationMs: Int?
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import AppKit
2+
import Foundation
3+
import OpenClawKit
4+
@preconcurrency import ScreenCaptureKit
5+
6+
@MainActor
7+
final class ScreenSnapshotService {
8+
enum ScreenSnapshotError: LocalizedError {
9+
case noDisplays
10+
case invalidScreenIndex(Int)
11+
case captureFailed(String)
12+
case encodeFailed(String)
13+
14+
var errorDescription: String? {
15+
switch self {
16+
case .noDisplays:
17+
"No displays available for screen snapshot"
18+
case let .invalidScreenIndex(idx):
19+
"Invalid screen index \(idx)"
20+
case let .captureFailed(message):
21+
message
22+
case let .encodeFailed(message):
23+
message
24+
}
25+
}
26+
}
27+
28+
func snapshot(
29+
screenIndex: Int?,
30+
maxWidth: Int?,
31+
quality: Double?,
32+
format: OpenClawScreenSnapshotFormat?) async throws
33+
-> (data: Data, format: OpenClawScreenSnapshotFormat, width: Int, height: Int)
34+
{
35+
let format = format ?? .jpeg
36+
let normalized = Self.normalize(maxWidth: maxWidth, quality: quality, format: format)
37+
38+
let content = try await SCShareableContent.current
39+
let displays = content.displays.sorted { $0.displayID < $1.displayID }
40+
guard !displays.isEmpty else {
41+
throw ScreenSnapshotError.noDisplays
42+
}
43+
44+
let idx = screenIndex ?? 0
45+
guard idx >= 0, idx < displays.count else {
46+
throw ScreenSnapshotError.invalidScreenIndex(idx)
47+
}
48+
let display = displays[idx]
49+
50+
let filter = SCContentFilter(display: display, excludingWindows: [])
51+
let config = SCStreamConfiguration()
52+
let targetSize = Self.targetSize(
53+
width: display.width,
54+
height: display.height,
55+
maxWidth: normalized.maxWidth)
56+
config.width = targetSize.width
57+
config.height = targetSize.height
58+
config.showsCursor = true
59+
60+
let cgImage: CGImage
61+
do {
62+
cgImage = try await SCScreenshotManager.captureImage(
63+
contentFilter: filter,
64+
configuration: config)
65+
} catch {
66+
throw ScreenSnapshotError.captureFailed(error.localizedDescription)
67+
}
68+
69+
let bitmap = NSBitmapImageRep(cgImage: cgImage)
70+
let data: Data
71+
switch format {
72+
case .png:
73+
guard let encoded = bitmap.representation(using: .png, properties: [:]) else {
74+
throw ScreenSnapshotError.encodeFailed("png encode failed")
75+
}
76+
data = encoded
77+
case .jpeg:
78+
guard let encoded = bitmap.representation(
79+
using: .jpeg,
80+
properties: [.compressionFactor: normalized.quality])
81+
else {
82+
throw ScreenSnapshotError.encodeFailed("jpeg encode failed")
83+
}
84+
data = encoded
85+
}
86+
87+
return (data: data, format: format, width: cgImage.width, height: cgImage.height)
88+
}
89+
90+
private static func normalize(
91+
maxWidth: Int?,
92+
quality: Double?,
93+
format: OpenClawScreenSnapshotFormat)
94+
-> (maxWidth: Int, quality: Double)
95+
{
96+
let resolvedMaxWidth = maxWidth.flatMap { $0 > 0 ? $0 : nil } ?? (format == .png ? 900 : 1600)
97+
let resolvedQuality = min(1.0, max(0.05, quality ?? 0.72))
98+
return (maxWidth: resolvedMaxWidth, quality: resolvedQuality)
99+
}
100+
101+
private static func targetSize(width: Int, height: Int, maxWidth: Int) -> (width: Int, height: Int) {
102+
guard width > 0, height > 0, width > maxWidth else {
103+
return (width: width, height: height)
104+
}
105+
let scale = Double(maxWidth) / Double(width)
106+
let targetHeight = max(1, Int((Double(height) * scale).rounded()))
107+
return (width: maxWidth, height: targetHeight)
108+
}
109+
}

apps/macos/Tests/OpenClawIPCTests/MacNodeRuntimeTests.swift

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,19 @@ struct MacNodeRuntimeTests {
7878
@Test func `handle invoke screen record uses injected services`() async throws {
7979
@MainActor
8080
final class FakeMainActorServices: MacNodeRuntimeMainActorServices, @unchecked Sendable {
81+
func snapshotScreen(
82+
screenIndex: Int?,
83+
maxWidth: Int?,
84+
quality: Double?,
85+
format: OpenClawScreenSnapshotFormat?) async throws
86+
-> (data: Data, format: OpenClawScreenSnapshotFormat, width: Int, height: Int)
87+
{
88+
_ = screenIndex
89+
_ = maxWidth
90+
_ = quality
91+
return (Data("snapshot".utf8), format ?? .jpeg, 640, 360)
92+
}
93+
8194
func recordScreen(
8295
screenIndex: Int?,
8396
durationMs: Int?,
@@ -127,6 +140,94 @@ struct MacNodeRuntimeTests {
127140
#expect(!payload.base64.isEmpty)
128141
}
129142

143+
@Test func `handle invoke screen snapshot uses injected services`() async throws {
144+
@MainActor
145+
final class FakeMainActorServices: MacNodeRuntimeMainActorServices, @unchecked Sendable {
146+
var snapshotCalledAtMs: Int64?
147+
148+
func snapshotScreen(
149+
screenIndex: Int?,
150+
maxWidth: Int?,
151+
quality: Double?,
152+
format: OpenClawScreenSnapshotFormat?) async throws
153+
-> (data: Data, format: OpenClawScreenSnapshotFormat, width: Int, height: Int)
154+
{
155+
self.snapshotCalledAtMs = Int64(Date().timeIntervalSince1970 * 1000)
156+
#expect(screenIndex == 0)
157+
#expect(maxWidth == 800)
158+
#expect(quality == 0.5)
159+
return (Data("ok".utf8), format ?? .jpeg, 800, 450)
160+
}
161+
162+
func recordScreen(
163+
screenIndex: Int?,
164+
durationMs: Int?,
165+
fps: Double?,
166+
includeAudio: Bool?,
167+
outPath: String?) async throws -> (path: String, hasAudio: Bool)
168+
{
169+
let url = FileManager().temporaryDirectory
170+
.appendingPathComponent("openclaw-test-screen-record-\(UUID().uuidString).mp4")
171+
try Data("ok".utf8).write(to: url)
172+
return (path: url.path, hasAudio: false)
173+
}
174+
175+
func locationAuthorizationStatus() -> CLAuthorizationStatus {
176+
.authorizedAlways
177+
}
178+
179+
func locationAccuracyAuthorization() -> CLAccuracyAuthorization {
180+
.fullAccuracy
181+
}
182+
183+
func currentLocation(
184+
desiredAccuracy: OpenClawLocationAccuracy,
185+
maxAgeMs: Int?,
186+
timeoutMs: Int?) async throws -> CLLocation
187+
{
188+
_ = desiredAccuracy
189+
_ = maxAgeMs
190+
_ = timeoutMs
191+
return CLLocation(latitude: 0, longitude: 0)
192+
}
193+
}
194+
195+
let services = await MainActor.run { FakeMainActorServices() }
196+
let runtime = MacNodeRuntime(makeMainActorServices: { services })
197+
198+
let params = MacNodeScreenSnapshotParams(
199+
screenIndex: 0,
200+
maxWidth: 800,
201+
quality: 0.5,
202+
format: .jpeg)
203+
let json = try String(data: JSONEncoder().encode(params), encoding: .utf8)
204+
let response = await runtime.handleInvoke(
205+
BridgeInvokeRequest(
206+
id: "req-screen-snapshot",
207+
command: MacNodeScreenCommand.snapshot.rawValue,
208+
paramsJSON: json))
209+
#expect(response.ok == true)
210+
let payloadJSON = try #require(response.payloadJSON)
211+
212+
struct Payload: Decodable {
213+
var format: String
214+
var base64: String
215+
var width: Int
216+
var height: Int
217+
var capturedAtMs: Int64
218+
}
219+
220+
let payload = try JSONDecoder().decode(Payload.self, from: Data(payloadJSON.utf8))
221+
#expect(payload.format == "jpeg")
222+
#expect(payload.base64 == Data("ok".utf8).base64EncodedString())
223+
#expect(payload.width == 800)
224+
#expect(payload.height == 450)
225+
#expect(payload.capturedAtMs > 0)
226+
let snapshotCalledAtMs = await MainActor.run { services.snapshotCalledAtMs }
227+
#expect(snapshotCalledAtMs != nil)
228+
#expect(payload.capturedAtMs <= snapshotCalledAtMs!)
229+
}
230+
130231
@Test func `handle invoke browser proxy uses injected request`() async {
131232
let runtime = MacNodeRuntime(browserProxyRequest: { paramsJSON in
132233
#expect(paramsJSON?.contains("/tabs") == true)

apps/shared/OpenClawKit/Sources/OpenClawKit/ScreenCommands.swift

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,34 @@
11
import Foundation
22

33
public enum OpenClawScreenCommand: String, Codable, Sendable {
4+
case snapshot = "screen.snapshot"
45
case record = "screen.record"
56
}
67

8+
public enum OpenClawScreenSnapshotFormat: String, Codable, Sendable {
9+
case jpeg
10+
case png
11+
}
12+
13+
public struct OpenClawScreenSnapshotParams: Codable, Sendable, Equatable {
14+
public var screenIndex: Int?
15+
public var maxWidth: Int?
16+
public var quality: Double?
17+
public var format: OpenClawScreenSnapshotFormat?
18+
19+
public init(
20+
screenIndex: Int? = nil,
21+
maxWidth: Int? = nil,
22+
quality: Double? = nil,
23+
format: OpenClawScreenSnapshotFormat? = nil)
24+
{
25+
self.screenIndex = screenIndex
26+
self.maxWidth = maxWidth
27+
self.quality = quality
28+
self.format = format
29+
}
30+
}
31+
732
public struct OpenClawScreenRecordParams: Codable, Sendable, Equatable {
833
public var screenIndex: Int?
934
public var durationMs: Int?

docs/platforms/macos.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ The macOS app presents itself as a node. Common commands:
5555

5656
- Canvas: `canvas.present`, `canvas.navigate`, `canvas.eval`, `canvas.snapshot`, `canvas.a2ui.*`
5757
- Camera: `camera.snap`, `camera.clip`
58-
- Screen: `screen.record`
58+
- Screen: `screen.snapshot`, `screen.record`
5959
- System: `system.run`, `system.notify`
6060

6161
The node reports a `permissions` map so agents can decide what’s allowed.

0 commit comments

Comments
 (0)