livekit · theomonnom · Feb 23, 2026 · Feb 18, 2026 · Feb 22, 2026 · Feb 22, 2026
diff --git a/livekit-agents/livekit/agents/llm/_provider_format/anthropic.py b/livekit-agents/livekit/agents/llm/_provider_format/anthropic.py
@@ -61,11 +61,18 @@ def to_chat_ctx(
                 }
             )
         elif msg.type == "function_call_output":
+            result_content: list[Any] | str = msg.output
+            try:
+                parsed = json.loads(msg.output)
+                if isinstance(parsed, list):
+                    result_content = parsed
+            except (json.JSONDecodeError, TypeError):
+                pass
             content.append(
                 {
                     "tool_use_id": msg.call_id,
                     "type": "tool_result",
-                    "content": msg.output,
+                    "content": result_content,
                     "is_error": msg.is_error,
                 }
             )
@@ -131,4 +138,5 @@ def to_fnc_ctx(tool_ctx: llm.ToolContext) -> list[dict[str, Any]]:
                     "input_schema": info.raw_schema.get("parameters", {}),
                 }
             )
+
     return schemas
diff --git a/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/__init__.py b/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/__init__.py
@@ -17,14 +17,19 @@
 See https://docs.livekit.io/agents/integrations/llm/anthropic/ for more information.
 """
 
+from .computer_tool import ComputerTool
 from .llm import LLM, LLMStream
 from .log import logger
 from .models import ChatModels
+from .tools import AnthropicTool, ComputerUse
 from .version import __version__
 
 __all__ = [
     "LLM",
     "LLMStream",
+    "AnthropicTool",
+    "ComputerTool",
+    "ComputerUse",
     "ChatModels",
     "logger",
     "__version__",

diff --git a/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/computer_tool.py b/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/computer_tool.py
@@ -0,0 +1,148 @@
+"""ComputerTool — Anthropic computer_use Toolset backed by browser PageActions."""
+
+from __future__ import annotations
+
+import asyncio
+import base64
+import logging
+from typing import TYPE_CHECKING, Any
+
+from livekit import rtc
+from livekit.agents import llm
+
+from .tools import ComputerUse
+
+if TYPE_CHECKING:
+    from livekit.plugins.browser import PageActions  # type: ignore[import-untyped]
+
+logger = logging.getLogger(__name__)
+
+_POST_ACTION_DELAY = 0.3
+
+
+class ComputerTool(llm.Toolset):
+    """Anthropic computer_use tool backed by browser PageActions.
+
+    Usage::
+
+        from livekit.plugins.browser import PageActions
+
+        actions = PageActions(page=page)
+        tool = ComputerTool(actions=actions, width=1280, height=720)
+    """
+
+    def __init__(
+        self,
+        *,
+        actions: PageActions,
+        width: int = 1280,
+        height: int = 720,
+    ) -> None:
+        super().__init__(id="computer")
+        self._actions = actions
+        self._provider_tool = ComputerUse(
+            display_width_px=width,
+            display_height_px=height,
+        )
+
+    @property
+    def tools(self) -> list[llm.Tool]:
+        return [self._provider_tool]
+
+    async def execute(self, action: str, **kwargs: Any) -> list[dict[str, Any]]:
+        """Dispatch an Anthropic computer_use action and return screenshot content."""
+        actions = self._actions
+
+        match action:
+            case "screenshot":
+                pass
+            case "left_click":
+                x, y = _require_coordinate(kwargs)
+                await actions.left_click(x, y, modifiers=kwargs.get("text"))
+            case "right_click":
+                x, y = _require_coordinate(kwargs)
+                await actions.right_click(x, y)
+            case "double_click":
+                x, y = _require_coordinate(kwargs)
+                await actions.double_click(x, y)
+            case "triple_click":
+                x, y = _require_coordinate(kwargs)
+                await actions.triple_click(x, y)
+            case "middle_click":
+                x, y = _require_coordinate(kwargs)
+                await actions.middle_click(x, y)
+            case "mouse_move":
+                x, y = _require_coordinate(kwargs)
+                await actions.mouse_move(x, y)
+            case "left_click_drag":
+                sx, sy = _require_coordinate(kwargs, key="start_coordinate")
+                ex, ey = _require_coordinate(kwargs)
+                await actions.left_click_drag(start_x=sx, start_y=sy, end_x=ex, end_y=ey)
+            case "left_mouse_down":
+                x, y = _require_coordinate(kwargs)
+                await actions.left_mouse_down(x, y)
+            case "left_mouse_up":
+                x, y = _require_coordinate(kwargs)
+                await actions.left_mouse_up(x, y)
+            case "scroll":
+                x, y = _require_coordinate(kwargs)
+                await actions.scroll(
+                    x,
+                    y,
+                    direction=kwargs.get("scroll_direction", "down"),
+                    amount=int(kwargs.get("scroll_amount", 3)),
+                )
+            case "type":
+                await actions.type_text(_require(kwargs, "text"))
+            case "key":
+                await actions.key(_require(kwargs, "text"))
+            case "hold_key":
+                await actions.hold_key(
+                    _require(kwargs, "text"),
+                    duration=float(kwargs.get("duration", 0.5)),
+                )
+            case "wait":
+                await actions.wait()
+            case _:
+                raise ValueError(f"Unknown computer_use action: {action!r}")
+
+        await asyncio.sleep(_POST_ACTION_DELAY)
+
+        frame = actions.last_frame
+        if frame is None:
+            return [{"type": "text", "text": "(no frame available yet)"}]
+        return _screenshot_content(frame)
+
+    def aclose(self) -> None:
+        self._actions.aclose()
+
+
+def _require(kwargs: dict[str, Any], key: str) -> Any:
+    """Extract a required argument, raising ValueError if missing."""
+    if key not in kwargs:
+        raise ValueError(f"Missing required argument: {key!r}")
+    return kwargs[key]
+
+
+def _require_coordinate(kwargs: dict[str, Any], *, key: str = "coordinate") -> tuple[int, int]:
+    """Extract and unpack a coordinate pair from Anthropic's [x, y] format."""
+    coord = _require(kwargs, key)
+    return int(coord[0]), int(coord[1])
+
+
+def _screenshot_content(frame: rtc.VideoFrame) -> list[dict[str, Any]]:
+    """Build Anthropic tool_result content blocks with a screenshot."""
+    from livekit.agents.utils.images import EncodeOptions, encode
+
+    png_bytes = encode(frame, EncodeOptions(format="PNG"))
+    b64 = base64.b64encode(png_bytes).decode("utf-8")
+    return [
+        {
+            "type": "image",
+            "source": {
+                "type": "base64",
+                "media_type": "image/png",
+                "data": b64,
+            },
+        }
+    ]
diff --git a/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/llm.py b/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/llm.py
@@ -151,8 +151,21 @@ def chat(
 
         extra["max_tokens"] = self._opts.max_tokens if is_given(self._opts.max_tokens) else 1024
 
+        beta_flag: str | None = None
         if tools:
-            extra["tools"] = llm.ToolContext(tools).parse_function_tools("anthropic")
+            from .tools import AnthropicTool
+
+            tool_ctx = llm.ToolContext(tools)
+            tool_schemas = tool_ctx.parse_function_tools("anthropic")
+
+            for tool in tool_ctx.provider_tools:
+                if isinstance(tool, AnthropicTool):
+                    tool_schemas.append(tool.to_dict())
+                    if tool.beta_flag:
+                        beta_flag = tool.beta_flag
+
+            extra["tools"] = tool_schemas
+
             tool_choice = (
                 cast(ToolChoice, tool_choice) if is_given(tool_choice) else self._opts.tool_choice
             )
@@ -209,17 +222,27 @@ def chat(
                     content[-1]["cache_control"] = CACHE_CONTROL_EPHEMERAL  # type: ignore
                     break
 
-        stream = self._client.messages.create(
-            messages=messages,
-            model=self._opts.model,
-            stream=True,
-            timeout=conn_options.timeout,
-            **extra,
-        )
+        if beta_flag:
+            stream = self._client.beta.messages.create(
+                betas=[beta_flag],
+                messages=messages,  # type: ignore[arg-type]
+                model=self._opts.model,
+                stream=True,
+                timeout=conn_options.timeout,
+                **extra,
+            )
+        else:
+            stream = self._client.messages.create(
+                messages=messages,
+                model=self._opts.model,
+                stream=True,
+                timeout=conn_options.timeout,
+                **extra,
+            )
 
         return LLMStream(
             self,
-            anthropic_stream=stream,
+            anthropic_stream=stream,  # type: ignore[arg-type]
             chat_ctx=chat_ctx,
             tools=tools or [],
             conn_options=conn_options,

diff --git a/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/models.py b/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/models.py
@@ -10,6 +10,8 @@
     "claude-3-5-haiku-20241022",
     "claude-3-7-sonnet-20250219",
     "claude-sonnet-4-20250514",
+    "claude-sonnet-4-6",
     "claude-opus-4-20250514",
     "claude-opus-4-1-20250805",
+    "claude-opus-4-6",
 ]
diff --git a/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/tools.py b/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/tools.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any
+
+from livekit.agents import ProviderTool
+
+
+class AnthropicTool(ProviderTool, ABC):
+    @abstractmethod
+    def to_dict(self) -> dict[str, Any]: ...
+
+    @property
+    def beta_flag(self) -> str | None:
+        return None
+
+
+_TOOL_VERSION_BETA_FLAGS: dict[str, str] = {
+    "computer_20251124": "computer-use-2025-11-24",
+    "computer_20250124": "computer-use-2025-01-24",
+}
+
+
+@dataclass
+class ComputerUse(AnthropicTool):
+    display_width_px: int = 1280
+    display_height_px: int = 720
+    display_number: int = 1
+    tool_version: str = "computer_20251124"
+
+    def __post_init__(self) -> None:
+        super().__init__(id="computer")
+
+    @property
+    def beta_flag(self) -> str | None:
+        return _TOOL_VERSION_BETA_FLAGS.get(self.tool_version)
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "type": self.tool_version,
+            "name": "computer",
+            "display_width_px": self.display_width_px,
+            "display_height_px": self.display_height_px,
+            "display_number": self.display_number,
+        }
diff --git a/livekit-plugins/livekit-plugins-browser/livekit/plugins/browser/__init__.py b/livekit-plugins/livekit-plugins-browser/livekit/plugins/browser/__init__.py
@@ -13,15 +13,19 @@
     PaintData,
 )
 
+from .browser_agent import BrowserAgent
 from .log import logger
+from .page_actions import PageActions
 from .session import BrowserSession
 from .version import __version__
 
 __all__ = [
     "AudioData",
+    "BrowserAgent",
     "BrowserContext",
     "BrowserPage",
     "BrowserSession",
+    "PageActions",
     "PaintData",
 ]