In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

from hud import HUDClient
from hud.adapters.claude.adapter import ClaudeAdapter
from agent.claude import ClaudeAgent

from anthropic import Anthropic

In [2]:
# initialize HUD client
client = HUDClient(api_key=os.getenv("HUD_API_KEY"))

# initalize Claude Computer Use agent
anthropic = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

# initialize adapter to interact with the environment
cua_adapter = ClaudeAdapter()

In [3]:
# load OSWorld environment
gym = await client.load_gym(id="OSWorld-Ubuntu")

# load OSWorld evalset
evalset = await client.load_evalset(id="OSWorld-Ubuntu")

# create a run that will host all evaluations
run = await client.create_run(name="Claude-test-OSWorld", gym=gym, evalset=evalset)

# fetch all task ids from the run
tasks = await run.fetch_task_ids()
print(f"Total tasks in OSWorld: {len(tasks)}")

Total tasks in OSWorld: 368


In [4]:
# It may take around ~1 minute to initialize the OSWorld environment and reset to a task

# make a HUD environment
env = await run.make()
await env.wait_for_ready()

# reset to a task with an observation (screenshot and text)
obs = await env.reset(task_id=tasks[1])
print(f"Task description: {obs.text}")

Task description: Can you make my computer bring back the last tab I shut down?


In [6]:
# agent loop
agent = ClaudeAgent(anthropic)

for i in range(8):
    # rescale screenshot to Claude's resolution
    screenshot = cua_adapter.rescale(obs.screenshot)

    # agent's next action
    done, response = await agent.predict(screenshot, obs.text)
    if done:
        env.final_response = str(response)
        break

    # convert to HUD action space
    actions = cua_adapter.adapt_list([response])
    print(f"Agent's action: {response}")

    # step the environment forward
    obs, reward, terminated, info = await env.step(actions)

    # drop out if terminated
    if terminated:
        break
    print(f"Step {i+1} completed")


Agent's action: {'action': 'key', 'text': 'ctrl+shift+t'}
Step 1 completed


In [6]:
# evaluate environment state
result = await env.evaluate()
print(f"Evaluation result: {result}")

# close environment
await env.close()

Evaluation result: 1.0


In [7]:
analytics = await run.get_analytics()
print(analytics)

Run: Claude-test-OSWorld (ID: de4927e8-f196-49cd-8330-39af16ad9112)
Created: 2025-03-22 21:53:01
------------------------------------------------------------
Progress: 1/1 tasks completed (
            100.0% completion rate)

Status Distribution:
completed : ██████████████████████████████████████████████████ 1 (100.0%)

Average Score: 1.00
Score:  1.00/1.00
