In [1]:
from hud import gym
from hud.task import Task

### Browser Task Example

In [53]:
# Simple browser task with a single step
task = Task(
    prompt="Insert the text 'capybara' into the search bar",
    gym="hud-browser",
    setup=("goto", "google.com"),
    evaluate=("contains_text", "capybara"),
)

In [None]:
# Create and set up environment with google, takes around 20 seconds
env = await gym.make(task)
await env.stream()

In [None]:
from hud.agent import ClaudeAgent, OperatorAgent, LangchainAgent
from langchain_openai import ChatOpenAI

# Define a new agent each time to reset the message history
# Make sure to define the environment variable OPENAI_API_KEY
# Use Claude if you don't have access to Operator
agent = LangchainAgent(ChatOpenAI(model="gpt-4o"))
# Initial observation
obs, _ = await env.reset()
print(f"Initial observation complete")

# Agent loop
for i in range(50):
    print(f"========= Step {i + 1} =========")

    # Use the agent to predict an action
    action, done = await agent.predict(obs)
    print(f"Agent's action (CLA): {action}")

    # Step the environment with the action
    obs, reward, terminated, info = await env.step(action)  # TODO RESPONSE AGENT

    if terminated or done:
        break

In [None]:
await env.evaluate()

In [6]:
await env.close()

### Response Task Example

In [None]:
# Simple response task with a single step
task = Task(
    prompt="What is the capital of France?",
    gym="hud-browser",
    setup=("goto", "https://www.google.com"),
    evaluate=("response_includes", "Paris"),
)

In [None]:
# Create and set up environment with google, takes around 20 seconds
env = await gym.make(task)
await env.stream()

In [None]:
from hud.agent import ClaudeAgent, ClaudeAdapter, OperatorAgent, OperatorAdapter

# Define a new agent each time to reset the message history
# Make sure to define the environment variable OPENAI_API_KEY
agent = OperatorAgent(adapter=OperatorAdapter())

# Initial observation
obs, _ = await env.reset()
print(f"Initial observation complete")

# Agent loop
for i in range(8):
    print(f"========= Step {i + 1} =========")

    # Use the agent to predict an action
    action, done = await agent.predict(obs)
    print(f"Agent's action (CLA): {action}")

    # Step the environment with the action
    # In this case, the final action will have to submit a response
    obs, reward, terminated, info = await env.step(action)

    if done or terminated:
        break

In [None]:
env.final_response

In [None]:
await env.evaluate()

### Response Task without provisioning CUA environments

In [16]:
# Response task with qa env
task = Task(
    prompt="What is the capital of France?", gym="qa", evaluate=("response_includes", "Paris")
)

In [None]:
env = await gym.make(task)

In [None]:
from hud import Response

await env.step([Response(text="Paris")])

In [None]:
await env.evaluate()