In [1]:
from hud import gym
from hud.utils import stream
from hud.task import Task

In [2]:
# Simple browser task with a single step
task = Task(
    prompt="Insert the text 'capybara' into the google search bar",
    gym="hud-browser",
    setup=("goto", "https://www.google.com"),
    evaluate=("contains_text", "capybara")
)

In [None]:
# Create and set up environment, takes around 20 seconds
env = await gym.make(task)
urls = await env.get_urls()

# Stream the live view
stream(urls["live_url"])

In [None]:
from hud.agent import ClaudeAgent, ClaudeAdapter, OperatorAgent, OperatorAdapter

# Define a new agent each time to reset the message history
# Make sure to define the environment variable OPENAI_API_KEY
agent = OperatorAgent(adapter=OperatorAdapter())

# Initial observation
obs, _ = await env.reset()
print(f"Initial observation complete")

# Agent loop
for i in range(8):
    print(f"========= Step {i+1} =========")

    # Use the agent to predict an action
    action, done = await agent.predict(obs)
    print(f"Agent's action (CLA): {action}")

    if done:
        break
    
    # Step the environment with the action
    obs, reward, terminated, info = await env.step(action)

    if terminated:
        break


In [None]:
result = await env.evaluate()

In [7]:
await env.close()