In [1]:
from dotenv import load_dotenv

load_dotenv()  # take environment variables

import os
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import dominate
from selenium.webdriver.remote.webelement import WebElement as Element
import json
import logging
import time

import gymnasium as gym
from simulated_web_agent.executor.env import SeleniumEnv, Browser
import simulated_web_agent

if "DISPLAY" in os.environ:
    del os.environ["DISPLAY"]


In [None]:
from pathlib import Path

In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

options = Options()
options.add_argument("start-maximized")
options.add_argument("--remote-debugging-port=9222")
driver = webdriver.Chrome(options=options)

The chromedriver version (127.0.6533.88) detected in PATH at /opt/homebrew/bin/chromedriver might not be compatible with the detected chrome version (128.0.6613.84); currently, chromedriver 128.0.6613.84 is recommended for chrome 128.*, so it is advised to delete the driver in PATH and retry


In [3]:
driver.get("http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/")
driver.implicitly_wait(0)  # seconds

In [12]:
print(driver.execute_script("1"))

None


In [None]:
import dominate.tags


clickables = {}
inputs = {}


def set_attribute(element: Element, attribute, value):
    driver.execute_script(
        "arguments[0].setAttribute(arguments[1], arguments[2]);",
        element,
        attribute,
        value,
    )


def register_clickable(element: Element, name: str):
    clickables[name] = element
    set_attribute(element, "data-clickable-id", name)


def register_input(element: Element, name: str):
    inputs[name] = element
    set_attribute(element, "data-input-id", name)


def get_text(element):
    elementText = element.text  # sometime NOT work
    if not elementText:
        elementText = element.get_attribute("innerText")
    if not elementText:
        elementText = element.get_attribute("textContent")
    return element.get_attribute("innerText")


actions = {}


def process(element: Element, recipe, parent_name=""):
    elementText = ""
    if "text_selector" in recipe:
        text_element = element.find_element(By.CSS_SELECTOR, recipe["text_selector"])
        elementText = get_text(text_element)
    else:
        elementText = get_text(element)
    if "add_text" in recipe and recipe["add_text"]:
        if not elementText:
            elementText = get_text(element)
    if "text_format" in recipe and recipe["text_format"]:
        elementText = recipe["text_format"].format(elementText)

    tag_name = element.tag_name
    if "tag_name" in recipe:
        tag_name = recipe["tag_name"]
    if tag_name in dominate.tags.underscored_classes:
        node = getattr(dominate.tags, tag_name + "_")(
            elementText if "add_text" in recipe and recipe["add_text"] else ""
        )
    else:
        node = getattr(dominate.tags, tag_name)(
            elementText if "add_text" in recipe and recipe["add_text"] else ""
        )

    if "name" in recipe and recipe["name"]:
        if recipe["name"] == "from_text":
            if elementText:
                element_name = elementText.lower().replace(" ", "_")
                for special_char in "[]{}()<>.:;|!@#$%^&*+-=,?/\\\"'":
                    element_name = element_name.replace(special_char, "")
                node["name"] = (parent_name + "." if parent_name else "") + element_name
                parent_name = node["name"]
            else:
                raise Exception("name from_text must have add_text")
        else:
            node["name"] = (parent_name + "." if parent_name else "") + recipe["name"]
        parent_name = node["name"]
    if "clickable" in recipe and recipe["clickable"]:
        if "name" not in recipe:
            raise Exception("clickable element must have a name")
        register_clickable(element, node["name"])
    for key in ["alt", "src", "href", "title", "type", "value"]:
        value = element.get_dom_attribute(key)
        if value:
            node[key] = value
    for key in ["class", "id"]:
        if key in recipe and recipe[key]:
            node[key] = recipe[key]
    # if 'radio' in recipe and recipe['radio']:
    #     if element.get_attribute('checked'):
    #         node.text += ' (selected)'
    if tag_name == "input":
        input_type = element.get_attribute("type")
        if input_type == "radio":
            if element.get_attribute("checked"):
                node.text += " (selected)"
            assert "clickable" in recipe and recipe["clickable"]
        elif input_type == "text":
            node["value"] = element.get_attribute("value")
            register_input(element, node["name"])
    if "children" in recipe and recipe["children"]:
        with node:
            for child in recipe["children"]:
                if "direct_child" in child and child["direct_child"]:
                    selector = ":scope > " + child["selector"]
                else:
                    selector = child["selector"]
                elements = element.find_elements(By.CSS_SELECTOR, selector)
                for child_element in elements:
                    process(child_element, child, parent_name)
    return node


In [None]:
print(process(driver.find_element(By.CSS_SELECTOR, recipe["selector"]), recipe))

In [None]:
clickables["header.search_box.search_button"].click()

In [None]:
print(
    get_text(
        driver.find_element(By.CSS_SELECTOR, ".header.content").find_element(
            By.CSS_SELECTOR, "button.action.search"
        )
    )
)

In [None]:
driver.current_url

In [None]:
title = (
    driver.find_element(By.CSS_SELECTOR, "html")
    .find_element(By.CSS_SELECTOR, "head")
    .find_element(By.CSS_SELECTOR, "title")
)

In [None]:
title.get_attribute("innerHTML")

In [None]:
type(title)

In [None]:
driver.execute_script(JS_BUILD_CSS_SELECTOR, title)

In [None]:
btn = driver.find_element(
    By.CSS_SELECTOR,
    "#maincontent > div.columns > div > div:nth-child(3) > div > div.block-content > div.products-grid.grid > ol > li:nth-child(3) > div > div > div.product-item-inner > div > div.actions-primary > form > button",
)


In [None]:
btn.setProperty("data-clickable-id", "123")

In [None]:
btn.get_attribute("data-clickable-id")

In [None]:
btn.get_attribute("src")

In [None]:
title = dominate.tags.title("title")

In [None]:
title.render(pretty=False)

In [None]:
title.text += " (selected)"

In [None]:
with title:
    dominate.tags.div("test")

In [1]:
from dotenv import load_dotenv

load_dotenv()  # take environment variables

import os
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
import dominate
from selenium.webdriver.remote.webelement import WebElement as Element
import json
import logging
import time

import gymnasium as gym
from simulated_web_agent.executor.env import SeleniumEnv
from simulated_web_agent.executor.env import Browser
import simulated_web_agent

if "DISPLAY" in os.environ:
    del os.environ["DISPLAY"]

logging.basicConfig()
# env: SeleniumEnv = gym.make(
#     "SeleniumEnv-v0",
#     start_url="http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/",
#     pretty=False,
#     headless=True,
# )
# observation0, info = env.reset()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
observation, _, _, _, _ = env.step(
    '{"type": "type", "name": "header.search_box.search_input", "text": "test"}'
)
observation

html > body html > body
html > body > div[name='header'] html > body > div[name='header']
html > body > div[name='header'] > form[name='header.search_box'] html > body > div[name='header'] > form[name='header.search_box']
html > body > div[name='header'] > form[name='header.search_box'] > input[name='header.search_box.search_input'] html > body > div[name='header'] > form[name='header.search_box'] > input[name='header.search_box.search_input']


  logger.warn(f"{pre} is not within the observation space.")


{'url': 'http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/',
 'page': '<input name="header.search_box.search_input" type="text" value="test">',
 'clickables': ['header.search_box.search_button',
  'header.minicart.view_cart',
  'product_showcases._prebaked_gingerbread_house_kit_value_pack_17_oz_pack_of_2_total_34_oz_.view_product',
  'product_showcases._prebaked_gingerbread_house_kit_value_pack_17_oz_pack_of_2_total_34_oz_.add_to_cart',
  'product_showcases._v8_energy_healthy_energy_drink_steady_energy_from_black_and_green_tea_pomegranate_blueberry_8_ounce_can_pack_of_24_.view_product',
  'product_showcases._v8_energy_healthy_energy_drink_steady_energy_from_black_and_green_tea_pomegranate_blueberry_8_ounce_can_pack_of_24_.add_to_cart',
  'product_showcases._belle_of_the_ball_princess_sprinkle_mix_wedding_colorful_sprinkles_cake_cupcake_cookie_sprinkles_ice_cream_candy_sprinkles_yellow_gold_red_royal_red_rose_icing_flowers_decorating_sprinkles_8oz_.view_product',
  'product_

In [4]:
observation

{'url': 'http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/',
 'page': '<input name="header.search_box.search_input" type="text" value="test">',
 'clickables': ['header.search_box.search_button',
  'header.minicart.view_cart',
  'product_showcases._prebaked_gingerbread_house_kit_value_pack_17_oz_pack_of_2_total_34_oz_.view_product',
  'product_showcases._prebaked_gingerbread_house_kit_value_pack_17_oz_pack_of_2_total_34_oz_.add_to_cart',
  'product_showcases._v8_energy_healthy_energy_drink_steady_energy_from_black_and_green_tea_pomegranate_blueberry_8_ounce_can_pack_of_24_.view_product',
  'product_showcases._v8_energy_healthy_energy_drink_steady_energy_from_black_and_green_tea_pomegranate_blueberry_8_ounce_can_pack_of_24_.add_to_cart',
  'product_showcases._belle_of_the_ball_princess_sprinkle_mix_wedding_colorful_sprinkles_cake_cupcake_cookie_sprinkles_ice_cream_candy_sprinkles_yellow_gold_red_royal_red_rose_icing_flowers_decorating_sprinkles_8oz_.view_product',
  'product_

In [None]:
observation

{'url': 'http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/',
 'page': '<html><head><title>One Stop Market</title></head><body><div name="header"><form name="header.search_box"><input name="header.search_box.search_input" type="text" value="test"><button name="header.search_box.search_button" title="Search" type="submit"> Search </button></form><div name="header.minicart"><a href="http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/checkout/cart/" name="header.minicart.view_cart">Go to cart<span> </span></a></div></div><div name="product_showcases">Product Showcases<div class="product-item-info" name="product_showcases._prebaked_gingerbread_house_kit_value_pack_17_oz_pack_of_2_total_34_oz_"><img alt="Image" src="http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B08PCSHBXY.0.jpg"><div><span>Rating: 20%</span><a href="http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/pre-baked-ginge

In [None]:
env.browser.type_and_submit("header.search_box.search_input", "rain coat")

In [None]:
env.step(
    '{"type": "type_and_submit", "name": "header.search_box.search_input", "text": "waterproof coat"}'
)

In [3]:
html = dominate.tags.html()
with html:
    card = dominate.tags.div("card", cls="product-list")
    title = dominate.tags.title("title")
    with card:
        for i in range(10):
            with dominate.tags.div("card", cls="product-item-info"):
                title = dominate.tags.title("title")
                title.text = "test"
                img = dominate.tags.img()
                img.attributes["src"] = f"https://via.placeholder.com/150x150?text={i}"
                price = dominate.tags.div("price")
                price.text = "$100"
                rating = dominate.tags.div("rating")
                rating.text = "4.5"
                rating.attributes["class"] = "fa fa-star"
html2 = dominate.tags.html()
with html2:
    card2 = dominate.tags.div("card", cls="product-list")
    title = dominate.tags.title("title2")
    with card2:
        for i in range(10):
            with dominate.tags.div("card", cls="product-item-info"):
                title = dominate.tags.title("title")
                title.text = "test1"
                img = dominate.tags.img()
                img.attributes["src"] = f"https://via.placeholder.com/150x150?text={i}"
                price = dominate.tags.div("price")
                price.text = "$100"
                rating = dominate.tags.div("rating")
                rating.text = "4.5"
                rating.attributes["class"] = "fa fa-star"

In [None]:
print(node_to_selector(tree_diff(html, html2)[0]))
print(node_to_selector(tree_diff(html, html2)[1]))

In [4]:
html.children[1]

<dominate.tags.title at 12b08ec10: 0 attributes, 1 child>

In [6]:
ul = dominate.tags.ul()
with ul:
    for i in range(10):
        li = dominate.tags.li(f"test{i}")

In [11]:
node_to_selector(ul.children[0])

'ul > li'

In [10]:
def node_to_selector(node: dominate.tags.html_tag):
    selector = getattr(node, "tag_name", type(node).__name__)
    if selector[-1] == "_":
        selector = selector[:-1]
    if "id" in node.attributes:
        selector += f"#{node['id']}"
    if "class" in node.attributes:
        for _cls in node["class"].split(" "):
            selector += f".{_cls}"
    if "name" in node.attributes:
        selector += f"[name='{node['name']}']"
    if node.parent is None:
        return selector
    return node_to_selector(node.parent) + " > " + selector

In [14]:
ul.children[0].__dict__

{'attributes': {},
 'children': ['test0'],
 'parent': <dominate.tags.ul at 12b45f970: 0 attributes, 10 children>,
 'is_inline': False,
 'is_pretty': True,
 '_ctx': frame(tag=<dominate.tags.ul at 12b45f970: 0 attributes, 10 children>, items=[<dominate.tags.li at 12b45f2e0: 0 attributes, 1 child>, <dominate.tags.li at 12b45f400: 0 attributes, 1 child>, <dominate.tags.li at 12b45f730: 0 attributes, 1 child>, <dominate.tags.li at 12b475670: 0 attributes, 1 child>, <dominate.tags.li at 12b475400: 0 attributes, 1 child>, <dominate.tags.li at 12b475d60: 0 attributes, 1 child>, <dominate.tags.li at 12b475970: 0 attributes, 1 child>, <dominate.tags.li at 12b475100: 0 attributes, 1 child>, <dominate.tags.li at 12b2b9ee0: 0 attributes, 1 child>, <dominate.tags.li at 12b37ff10: 0 attributes, 1 child>], used=set())}

In [None]:
node_to_selector(card.children[1])

In [3]:
driver.get("http://flights.google.com/")
driver.implicitly_wait(0)  # seconds


In [25]:
driver.find_element(
    By.CSS_SELECTOR,
    "#i23 > div.e5F5td.BGeFcf > div > div > div.cQnuXe.k0gFV > div > div > input",
).click()

In [1]:
ActionChains(driver).send_keys("boston").send_keys(Keys.ENTER).perform()

NameError: name 'ActionChains' is not defined

In [2]:
from dotenv import load_dotenv

load_dotenv()  # take environment variables

import os
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import dominate
from selenium.webdriver.remote.webelement import WebElement as Element
import json
import logging
import time

import gymnasium as gym
from simulated_web_agent.executor.env import SeleniumEnv, Browser
import simulated_web_agent

if "DISPLAY" in os.environ:
    del os.environ["DISPLAY"]


In [5]:
# ow10 > div.cQnuXe.k0gFV > div > div > input
browser = Browser("http://flights.google.com/", headless=False, recipes=recipes)

In [4]:
recipes = [
    {
        "match": "#yDmH0d > c-wiz.zQTmif.SSPGKf > div > div:nth-child(2) > c-wiz > div.cKvRXe > c-wiz > div.f8Ucw > div > div.Eo39gc",
        "match_text": "Flights",
        "selector": "html",
        "children": [
            {"selector": "head", "children": [{"selector": "title", "add_text": True}]},
            {
                "selector": "body",
                "children": [
                    {
                        "selector": "div.SS6Dqf.POQx1c",
                        "children": [
                            {"selector": "h1", "add_text": True},
                            {
                                "selector": "div.TQYpgc.gInvKb > div > div",
                                "name": "trip_type",
                                "children": [
                                    {
                                        "selector": "div:nth-child(1)",
                                        "add_text": True,
                                        "text_format": "Current trip type: {}",
                                    },
                                    {
                                        "selector": "ul",
                                        "children": [
                                            {
                                                "selector": "li:not(:last-child)",
                                                "add_text": True,
                                                "clickable": True,
                                                "name": "from_text",
                                                "before_hook": "document.querySelector('div.VfPpkd-O1htCb.VfPpkd-O1htCb-OWXEXe-MFS4be.VfPpkd-O1htCb-OWXEXe-SfQLQb-M1Soyc-Bz112c.VfPpkd-O1htCb-OWXEXe-di8rgd-V67aGc.hqBSCb.RnXJS.PnyZyf.JDygMb.PtTbbc > div').click()",
                                            }
                                        ],
                                    },
                                ],
                            },
                            # todo: add fare type
                            {
                                "selector": "div.JQrP8b.PLrkBc > div > div > div",
                                "name": "fare_type",
                                "children": [
                                    {
                                        "selector": "div:nth-child(1)",
                                        "add_text": True,
                                        "text_format": "Current fare type: {}",
                                    }
                                ],
                            },
                            {
                                "selector": "#i23",
                                "name": "city_picker",
                                "children": [
                                    {
                                        "selector": "input[aria-label='Where from?'][aria-expanded='false']",
                                        "name": "from_city",
                                    },
                                    {
                                        "selector": "input[placeholder='Where to?'][aria-expanded='false']",
                                        "name": "to_city",
                                    },
                                ],
                            },
                            {
                                "selector": "div.bgJkKe.K0Tsu div.cQnuXe.k0gFV",
                                "name": "date_picker",
                                "children": [
                                    {
                                        "selector": "input[aria-label='Departure']",
                                        "name": "departure_date",
                                        "after_hook": "document.body.click()",
                                    },
                                    {
                                        "selector": "input[aria-label='Return']",
                                        "name": "return_date",
                                        "after_hook": "document.body.click()",
                                    },
                                ],
                            },
                            {
                                "selector": "button.VfPpkd-LgbsSe.VfPpkd-LgbsSe-OWXEXe-k8QpJ.VfPpkd-LgbsSe-OWXEXe-Bz112c-M1Soyc.nCP5yc.AjY5Oe.LQeN7.TUT4y.zlyfOd",
                                "name": "search_button",
                                "clickable": True,
                                "add_text": True,
                                "override_attr": {
                                    "disabled": "return arguments[0].innerText !== 'Search'"
                                },
                            },
                        ],
                    }
                ],
            },
        ],
    },
    {
        "match": "#yDmH0d > c-wiz.zQTmif.SSPGKf > div > div:nth-child(2) > c-wiz > div.cKvRXe > c-wiz > div.PSZ8D.EA71Tc > div.FXkZv > div:nth-child(5) > h3",
        "match_text": "Best departing options",
        "selector": "html",
        "children": [
            {"selector": "head", "children": [{"selector": "title", "add_text": True}]},
            {
                "selector": "body",
                "children": [
                    {
                        "selector": "div[jsname='IWWDBc']",
                        "children": [
                            {"selector": "h3", "add_text": True, "direct_child": True},
                            {
                                "selector": "ul.Rk10dc",
                                "name": "best_departure_options",
                                "children": [
                                    {
                                        "selector": "li:not([data-ved])",
                                        "add_text": True,
                                        "clickable": True,
                                        "name": "from_nth_child",
                                    }
                                ],
                            },
                        ],
                    },
                    {
                        "selector": "div[jsname='YdtKid']",
                        "children": [
                            {"selector": "h3", "add_text": True, "direct_child": True},
                            {
                                "selector": "ul.Rk10dc",
                                "name": "other_departure_options",
                                "children": [
                                    {
                                        "selector": "li:not([data-ved])",
                                        "add_text": True,
                                        "clickable": True,
                                        "name": "from_nth_child",
                                    }
                                ],
                            },
                        ],
                    },
                ],
            },
        ],
    },
    {
        "match": "#yDmH0d > c-wiz.zQTmif.SSPGKf > div > div:nth-child(2) > c-wiz > div.cKvRXe > c-wiz > div.PSZ8D.EA71Tc > div.FXkZv > div:nth-child(4) > h3",
        "match_text": "Returning flights",
        "selector": "html",
        "children": [
            {"selector": "head", "children": [{"selector": "title", "add_text": True}]},
            {
                "selector": "body",
                "children": [
                    {"selector": "h3", "add_text": True, "direct_child": True},
                    {
                        "selector": "ul.Rk10dc",
                        "name": "returning_options",
                        "children": [
                            {
                                "selector": "li:not([data-ved])",
                                "add_text": True,
                                "clickable": True,
                                "name": "from_nth_child",
                            }
                        ],
                    },
                ],
            },
        ],
    },
]

In [6]:
browser.recipes = recipes
print(browser.observe()["page"].render(pretty=True))

<html>
  <head>
    <title>Google Flights - Find Cheap Flight Options &amp; Track Prices</title>
  </head>
  <body>
    <div aria-label="Flight" role="search">
      <h1>Flight search</h1>
      <div name="trip_type">
        <div role="combobox">Current trip type: Round trip</div>
        <ul aria-label="Select your ticket type." role="listbox">
          <li aria-selected="true" name="trip_type.round_trip" role="option">Round trip</li>
          <li aria-selected="false" name="trip_type.one_way" role="option">One way</li>
        </ul>
      </div>
      <div name="fare_type">
        <div role="combobox">Current fare type: Economy</div>
      </div>
      <div name="city_picker">
        <input aria-label="Where from?" name="city_picker.from_city" role="combobox" type="text" value="Boston">
        <input aria-label="Where to? " name="city_picker.to_city" role="combobox" type="text" value="">
      </div>
      <div name="date_picker">
        <input aria-label="Departure" name="dat

In [5]:
browser.type_and_submit("date_picker.departure_date", "sep 24 2024")
browser.observe()
browser.type_and_submit("date_picker.return_date", "oct 24 2024")
browser.observe()
browser.type_and_submit("city_picker.from_city", "boston")
browser.type_and_submit("city_picker.to_city", "new york")

In [12]:
browser.click("search_button")

In [8]:
browser.type_and_submit("city_picker.to_city", "new york")


In [16]:
browser.driver.execute_script(
    """
console.log(arguments[0].getBoundingClientRect());
rect = arguments[0].getBoundingClientRect();
div = document.createElement('div');
div.style.position = 'fixed';
div.style.top = rect.top + 'px';
div.style.left = rect.left + 'px';
div.style.width = rect.width + 'px';
div.style.height = rect.height + 'px';
div.style.border = '3px solid #79ccd7';
div.style.outline_offset = '3px';
div.style.zIndex = '10000';
document.body.appendChild(div);
document.highlightedElement = div;

""",
    browser.clickables["trip_type.round_trip"],
)


In [28]:
browser.driver.execute_script(
    "console.log(arguments)", browser.inputs["city_picker.to_city"]
)

In [28]:
browser.click("returning_options.0")

In [7]:
env = gym.make(
    "SeleniumEnv-v0",
    start_url = "https://www.google.com/flights",
    headless=False,
    pretty=True,
    recipes=recipes,
)
env.reset()
env.step(
    json.dumps(
        [
            # {
            #     "type": "type",
            #     "name": "city_picker.to_city",
            #     "text": "New York",
            #     "description": "Typing 'New York' as the arrival city in the destination input field.",
            # },
            {
                "type": "type",
                "name": "date_picker.departure_date",
                "text": "10/10/2024",
                "description": "Typing '10/10/2024' as the departure date in the departure date input field.",
            },
            {
                "type": "type",
                "name": "date_picker.return_date",
                "text": "10/15/2024",
                "description": "Typing '10/15/2024' as the return date in the return date input field.",
            },
        ]
    )
)

  logger.deprecation(


In [3]:
from dotenv import load_dotenv

load_dotenv()  # take environment variables

import os
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import dominate
from selenium.webdriver.remote.webelement import WebElement as Element
import json
import logging
import time

import gymnasium as gym
from simulated_web_agent.executor.env import SeleniumEnv, Browser
from simulated_web_agent.agent.gpt import chat
import simulated_web_agent

if "DISPLAY" in os.environ:
    del os.environ["DISPLAY"]

logging.basicConfig()
loggers = [
    logging.getLogger(name)
    for name in logging.root.manager.loggerDict
    if name.startswith("simulated_web_agent")
]
for logger in loggers:
    logger.setLevel(logging.INFO)

In [66]:
nav = {
    "selector": "#nav-search-bar-form",
    "children": [
        {
            "selector": "input#twotabsearchtextbox",
            "name": "search_input",
        },
        {
            "selector": "#nav-search-submit-button",
            "clickable": True,
            "name": "search_button",
        },
    ],
}
refinement_option = [
    {
        "selector": "span.a-size-base.a-color-base.puis-bold-weight-text",
        "add_text": True,
        "class": "refinement-title",
    },
    {
        "selector": "span.a-declarative > span > li",
        "add_text": True,
        "name": "from_text",
        "clickable": True,
        "children": [{"selector": "input[type='checkbox']"}],
    },
]
recipes = [
    {
        "match": "/",
        "match_method": "url",
        "selector": "html",
        "children": [
            {"selector": "head", "children": [{"selector": "title", "add_text": True}]},
            {
                "selector": "body",
                "children": [nav],
            },
        ],
    },
    {
        "match": "/s",
        "match_method": "url",
        "selector": "html",
        "children": [
            {"selector": "head", "children": [{"selector": "title", "add_text": True}]},
            {
                "selector": "body",
                "children": [
                    nav,
                    {
                        "selector": "div.s-main-slot.s-result-list.s-search-results",
                        "name": "search_results",
                        "children": [
                            {
                                "selector": 'div[data-component-type="s-search-result"]',
                                "text_selector": "span.a-size-base-plus.a-color-base.a-text-normal",
                                "name": "from_text",
                                "children": [
                                    {
                                        "selector": "div[data-cy='title-recipe'] a",
                                        "add_text": True,
                                        "class": "product-name",
                                        "clickable": True,
                                        "name": "view_product",
                                    },
                                    {
                                        "selector": "div[data-cy='reviews-block']",
                                        "class": "product-review",
                                        "children": [
                                            {
                                                # .a-icon-alt
                                                "selector": "span.a-icon-alt",
                                                "add_text": True,
                                                "class": "product-rating",
                                            },
                                            # document.querySelector('[data-component-type="s-search-result"]').querySelector(".a-size-base.s-underline-text")
                                            {
                                                "selector": "span.a-size-base.s-underline-text",
                                                "add_text": True,
                                                "text_format": "{} reviews",
                                                "class": "product-rating-count",
                                            },
                                        ],
                                    },
                                    {
                                        # offscreen
                                        "selector": "div[data-cy='price-recipe']",
                                        "class": "product-price",
                                        "children": [
                                            {
                                                "selector": "a.a-link-normal > span.a-price > span.a-offscreen",
                                                "add_text": True,
                                            },
                                        ],
                                    },
                                    {
                                        "selector": "div[data-cy='delivery-recipe']",
                                        "add_text": True,
                                        "class": "product-delivery",
                                    },
                                ],
                            }
                        ],
                    },
                    {
                        "selector": "#s-refinements",
                        "name": "refinements",
                        "children": [
                            {
                                "selector": "#primeRefinements",
                                "name": "prime_refinements",
                                "children": refinement_option,
                            },
                            {
                                "selector": "#deliveryRefinements",
                                "name": "delivery_refinements",
                                "children": refinement_option,
                            },
                            {
                                "selector": "#deliveryRelatedProgramsRefinements",
                                "name": "delivery_programs_refinements",
                                "children": refinement_option,
                            },
                            {
                                "selector": "#climatePledgeFriendlyRefinements",
                                "name": "climate_pledge_friendly_refinements",
                                "children": refinement_option,
                            },
                            {
                                "selector": "#departments",
                                "name": "departments",
                                "children": [
                                    {
                                        "selector": "li a",
                                        "add_text": True,
                                        "name": "from_text",
                                        "clickable": True,
                                    }
                                ],
                            },
                            {
                                "selector": "#reviewsRefinements",
                                "name": "reviews_refinements",
                                "children": [
                                    {
                                        "selector": "li a",
                                        "add_text": True,
                                        "name": "from_text",
                                        "clickable": True,
                                    }
                                ],
                            },
                            # brandsRefinements
                            {
                                "selector": "#brandsRefinements",
                                "name": "brands_refinements",
                                "children": refinement_option,
                            },
                        ],
                    },
                    {
                        "selector": "span.s-pagination-strip",
                        "children": [
                            {
                                "selector": ".s-pagination-item",
                                "add_text": True,
                                "name": "from_text",
                                "clickable": True,
                            }
                        ],
                    },
                ],
            },
        ],
    },
    {"match": "#add-to-cart-button",
     "match_text": "",
     "terminate": "return true",
     "terminate_callback": "return true",
     "selector": "html",},
]


In [4]:
browser.recipes = recipes
print(browser.observe()["page"].render(pretty=True))

NameError: name 'recipes' is not defined

In [65]:
# browser.type_and_submit("search_input", "rain coat")
# browser.click("refinements.departments._any_department_")
# browser.click("refinements.departments._womens_clothing_")
browser.click(
    "search_results.anyoo_hood_rain_poncho_waterproof_lightweight_raincoat_for_men_women_adult_with_pocket_for_hiking_camping_outdoor_activities.view_product"
)


INFO:simulated_web_agent.executor.env:highlight end


init


INFO:simulated_web_agent.executor.env:sleep end


In [57]:
def solve_captcha(browser: Browser):
    image = browser.driver.find_element(
        By.CSS_SELECTOR,
        "body > div > div.a-row.a-spacing-double-large > div.a-section > div > div > form > div.a-row.a-spacing-large > div > div > div.a-row.a-text-center > img",
    ).get_attribute("src")
    resp = chat(
        [
            {
                "role": "system",
                "content": "You are an OCR expert designed to solve CAPTCHAs. You will respond in a single JSON format: {'text': 'The text in the image'}",
            },
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "What’s in this image?"},
                    {"type": "image_url", "image_url": {"url": image}},
                ],
            },
        ],
        response_format={"type": "json_object"},
    )
    text = json.loads(resp.choices[0].message.content)["text"]
    input_element = browser.driver.find_element(By.CSS_SELECTOR, "#captchacharacters")
    # input_element.send_keys(text)
    # input_element.send_keys(Keys.ENTER)
    for keys in text:
        input_element.send_keys(keys)
        time.sleep(0.2)
    input_element.send_keys(Keys.ENTER)
    time.sleep(1)
    return
# ow10 > div.cQnuXe.k0gFV > div > div > input
browser = Browser("http://amazon.com/", headless=False, recipes=recipes)


In [58]:
solve_captcha(browser)


In [5]:
browser.observe()

INFO:simulated_web_agent.executor.env:OBSERVING


{'page': <dominate.tags.html at 107ddbfa0: 0 attributes, 3 children>,
 'diff_selector': '',
 'url': 'https://www.amazon.com/',
 'clickables': [],
 'inputs': [],
 'ended': False}

In [6]:
browser.driver.current_url

'https://www.amazon.com/'

In [10]:
urllib.parse.urlparse(browser.driver.current_url).path


'/'

In [1]:
import aioboto3, json
from dotenv import load_dotenv

load_dotenv()
from simulated_web_agent.agent.gpt import load_prompt

In [3]:
# Use the ListFoundationModels API to show the models that are available in your region.
import aioboto3

# Create an &BR; client in the &region-us-east-1; Region.
# client = aioboto3.client(service_name="bedrock-runtime", region_name="us-east-1")

# bedrock.list_foundation_models()
prompt = load_prompt("perceive")


In [12]:


session = aioboto3.Session()
async with session.client(
    "bedrock-runtime", region_name="us-east-1"
) as client:

# Invoke Claude 3 with the text prompt
    model_id = "anthropic.claude-3-5-sonnet-20240620-v1:0"

    # try:
    response = await client.invoke_model(
        modelId=model_id,
        body=json.dumps(
            {
                "anthropic_version": "bedrock-2023-05-31",
                "max_tokens": 5000,
                "system": prompt,
                "messages": [
                    
                ],
            }
        ),
    )

    # Process and print the response
    result = json.loads(await response.get("body").read())

In [13]:
result

{'id': 'msg_bdrk_018TMEmHvF9nkTTegRSPvEmK',
 'type': 'message',
 'role': 'assistant',
 'model': 'claude-3-5-sonnet-20240620',
 'content': [{'type': 'text',
   'text': '{\n    "observations": [\n        "There is a header section containing a search box. The search box consists of an input field with no visible placeholder text and a \'Search\' button next to it.",\n        "Below the header, there\'s a section titled \'Product Showcases\' containing multiple product listings.",\n        "The first product displayed is \'Belle Of The Ball Princess Sprinkle Mix\'. It features an image of the product, a rating of 63%, and a link to 12 reviews. The full product name includes details like \'Wedding Colorful Sprinkles, Cake Cupcake Cookie Sprinkles, Ice Cream Candy Sprinkles, Yellow Gold Red Royal Red Rose Icing Flowers Decorating Sprinkles, 8OZ\'. The price is listed as $23.50.",\n        "The second product is \'So Delicious Dairy Free CocoWhip Light, Vegan, Non-GMO Project Verified, 9 oz.

In [24]:
# Invoke Claude 3 with the text prompt
model_id = 'cohere.embed-english-v3'

# try:
response = client.invoke_model(
    modelId=model_id,
    body=json.dumps(
        {
            "texts": ["random string", "another random string", "hello world"],
            "input_type": "search_document",
            "truncate": "START",
        }
    ),
)

# Process and print the response
result = json.loads(response.get("body").read())


In [27]:
import numpy as np
d = np.array(result["embeddings"])

In [29]:
d.shape

(3, 1024)

In [33]:
(d[0] * d[1]).sum()

np.float64(0.8797620779780901)

In [34]:
(d[1] * d[2]).sum()

np.float64(0.35646261280626723)

In [11]:
import json
from simulated_web_agent.agent.gpt import load_prompt, chat
from simulated_web_agent.agent.agent import Agent
import simulated_web_agent.agent.context
from pathlib import Path

In [15]:
env_trace = open(
    "/Users/yuxuanlu/code/simulated_web_agent/runs/2024-09-20_14:34:55_8edd/env_trace.txt"
).readlines()[1]
env_trace = json.loads(env_trace)
simulated_web_agent.agent.context.run_path = Path(
    "/Users/yuxuanlu/code/simulated_web_agent/runs/2024-09-20_14:34:55_8edd"
)

In [6]:
agent = Agent("", "")

In [16]:
result = await agent.perceive(env_trace)

In [19]:
agent.format_memories(agent.memory.memories)

["timestamp: 0; kind: observation; importance: N/A, content: The page displays a search results page for the query 'Columbia jacket'. At the top, there is a search form with an input field for entering the search query and a 'Go' button to submit the search.",
 'timestamp: 0; kind: observation; importance: N/A, content: Below the search form, there is a list of search results for Columbia jackets. Each result is displayed as a product card with an image, product name, and price information.',
 "timestamp: 0; kind: observation; importance: N/A, content: The product cards include options to view the product details, such as 'View product' buttons.",
 'timestamp: 0; kind: observation; importance: N/A, content: On the left side of the page, there are various refinement options to filter the search results, including categories, brands, and customer review ratings.',
 'timestamp: 0; kind: observation; importance: N/A, content: At the bottom of the page, there are pagination controls to navi

In [1]:
from dotenv import load_dotenv

load_dotenv()  # take environment variables

import os
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import dominate
from selenium.webdriver.remote.webelement import WebElement as Element
import json
import logging
import time

import gymnasium as gym
from simulated_web_agent.executor.env import SeleniumEnv, Browser
from simulated_web_agent.agent.gpt import chat
from simulated_web_agent.main.main import solve_captcha
from simulated_web_agent.agent import context
import simulated_web_agent
from pathlib import Path

context.run_path.set(Path("/Users/yuxuanlu/code/simulated_web_agent/temp_run"))

if "DISPLAY" in os.environ:
    del os.environ["DISPLAY"]

logging.basicConfig()
loggers = [
    logging.getLogger(name)
    for name in logging.root.manager.loggerDict
    if name.startswith("simulated_web_agent")
]
for logger in loggers:
    logger.setLevel(logging.INFO)
from simulated_web_agent.executor.amazon_recipes import recipes
browser = Browser("http://amazon.com/", headless=False, recipes=recipes)
solve_captcha(browser)
browser.driver.current_url




{"text": "CBBHAL"}
{"text": "UMUXMH"}
{"text": "MFULCR"}


'https://www.amazon.com/'

In [2]:
env = browser.observe()

INFO:simulated_web_agent.executor.env:OBSERVING


In [3]:
browser.type_and_submit("search_input", "women's jacket")

INFO:simulated_web_agent.executor.env:highlight end


init


INFO:simulated_web_agent.executor.env:sleep end
INFO:simulated_web_agent.executor.env:highlight end


init


INFO:simulated_web_agent.executor.env:sleep end


In [4]:
env = browser.observe()


INFO:simulated_web_agent.executor.env:OBSERVING


In [5]:
browser.click(
    "search_results.2_pack_reusable_rain_ponchos_for_adults_hooded_raincoats_for_women_men_with_drawstring.view_product"
)


INFO:simulated_web_agent.executor.env:highlight end


init


INFO:simulated_web_agent.executor.env:sleep end


In [6]:
browser.recipes = recipes

In [9]:
from bs4 import BeautifulSoup

print(BeautifulSoup(browser.observe()["page"], "html.parser").prettify())


INFO:simulated_web_agent.executor.env:OBSERVING


<html>
 <head>
  <title>
   Amazon.com : rain coat
  </title>
 </head>
 <body>
  <form role="search">
   <input aria-label="Search Amazon" name="search_input" type="text" value="rain coat"/>
   <input name="search_button" type="submit" value="Go"/>
  </form>
  <div name="refinements">
   <div name="refinements.popular_shopping_ideas">
    <span class="refinement-title">
     Popular Shopping Ideas
    </span>
    <li name="refinements.popular_shopping_ideas.long_sleeve" role="checkbox">
     Long Sleeve
    </li>
    <li name="refinements.popular_shopping_ideas.waterproof" role="checkbox">
     Waterproof
    </li>
    <li name="refinements.popular_shopping_ideas.fleece" role="checkbox">
     Fleece
    </li>
    <li name="refinements.popular_shopping_ideas.travel" role="checkbox">
     Travel
    </li>
   </div>
   <div name="refinements.eligible_for_free_shipping">
    <span class="refinement-title">
     Eligible for Free Shipping
    </span>
    <li aria-label="Free Shipping by Ama

In [10]:
browser.click("add_to_cart.buy_now")

INFO:simulated_web_agent.executor.env:highlight end


init


INFO:simulated_web_agent.executor.env:sleep end


before_hook
before_hook result {'asin': 'B0C6LND9Z7', 'options': {'Color': 'White', 'Size': 'One Size'}, 'price': '$9\n.\n99', 'title': 'OFFITECTURE 2-Pack Reusable Rain Ponchos for Adults - Hooded Raincoats for Women/Men with Drawstring'}


In [11]:
browser.driver.execute_script("""
const title = document.querySelector("#title").innerText
const price = document.querySelector("#apex_desktop_newAccordionRow #corePriceDisplay_desktop_feature_div span.a-price.aok-align-center.reinventPricePriceToPayMargin.priceToPay").innerText
const options = Array.from(document.querySelectorAll("#twister div.a-row:has(label.a-form-label):has(span.selection)")).map(a => ({label: a.querySelector("label.a-form-label").innerText, value: a.querySelector("span.selection").innerText}))
const options_dict = {}
for (const option of options) {
    options_dict[option["label"].replace(": ", "")] = option["value"]
}
const asin = document.querySelector("input#ASIN").value
return {title, price, options: options_dict, asin}
""")

JavascriptException: Message: javascript error: Cannot read properties of null (reading 'innerText')
  (Session info: chrome=129.0.6668.90)
Stacktrace:
0   chromedriver                        0x0000000104913998 cxxbridge1$str$ptr + 1887096
1   chromedriver                        0x000000010490be00 cxxbridge1$str$ptr + 1855456
2   chromedriver                        0x0000000104510be0 cxxbridge1$string$len + 89508
3   chromedriver                        0x0000000104515c90 cxxbridge1$string$len + 110164
4   chromedriver                        0x0000000104517680 cxxbridge1$string$len + 116804
5   chromedriver                        0x000000010458f0cc cxxbridge1$string$len + 606864
6   chromedriver                        0x000000010458e4a4 cxxbridge1$string$len + 603752
7   chromedriver                        0x0000000104549a08 cxxbridge1$string$len + 322508
8   chromedriver                        0x000000010454a66c cxxbridge1$string$len + 325680
9   chromedriver                        0x00000001048da058 cxxbridge1$str$ptr + 1651256
10  chromedriver                        0x00000001048de98c cxxbridge1$str$ptr + 1669996
11  chromedriver                        0x00000001048bf1ec cxxbridge1$str$ptr + 1541068
12  chromedriver                        0x00000001048df25c cxxbridge1$str$ptr + 1672252
13  chromedriver                        0x00000001048b0800 cxxbridge1$str$ptr + 1481184
14  chromedriver                        0x00000001048fd0f8 cxxbridge1$str$ptr + 1794776
15  chromedriver                        0x00000001048fd274 cxxbridge1$str$ptr + 1795156
16  chromedriver                        0x000000010490ba9c cxxbridge1$str$ptr + 1854588
17  libsystem_pthread.dylib             0x000000018ac6df94 _pthread_start + 136
18  libsystem_pthread.dylib             0x000000018ac68d34 thread_start + 8


In [8]:
browser.back()

In [None]:
browser.inputs_recipes

{'add_to_cart.buy_now': {'add_text': True,
  'before_hook': '\nconst title = document.querySelector("#title").innerText\nconst price = document.querySelector("#apex_desktop_newAccordionRow #corePriceDisplay_desktop_feature_div span.a-price.aok-align-center.reinventPricePriceToPayMargin.priceToPay").innerText\nconst options = Array.from(document.querySelectorAll("#twister div.a-row:has(label.a-form-label):has(span.selection)")).map(a => ({label: a.querySelector("label.a-form-label").innerText, value: a.querySelector("span.selection").innerText}))\nconst options_dict = {}\nfor (const option of options) {\n    options_dict[option["label"].replace(": ", "")] = option["value"]\n}\nconst asin = document.querySelector("input#ASIN").value\nconsole.log({title, price, options: options_dict, asin})\nreturn {title, price, options: options_dict, asin}\n',
  'class': 'product-buy-now',
  'clickable': True,
  'name': 'buy_now',
  'selector': '#buy-now-button'},
 'search_button': {'clickable': True,
 

In [12]:
from simulated_web_agent.agent import context
context.browser_context.get()

{'asin': 'B0C6LND9Z7',
 'options': {'Color': 'White', 'Size': 'One Size'},
 'price': '$9\n.\n99',
 'title': 'OFFITECTURE 2-Pack Reusable Rain Ponchos for Adults - Hooded Raincoats for Women/Men with Drawstring'}

In [13]:
env = browser.observe()

INFO:simulated_web_agent.executor.env:OBSERVING


In [14]:
env

{'page': 'TERMINATE',
 'diff_selector': '',
 'url': 'https://www.amazon.com/ap/signin?_encoding=UTF8&openid.assoc_handle=amazon_checkout_us&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.amazon.com%2Fgp%2Fcheckoutportal%2Fenter-checkout.html%3Fie%3DUTF8%26asin%3DB0C6LND9Z7%26buyNow%3D1%26cartCustomerID%3D0%26fromSignIn%3D1%26isGift%3D0%26offeringID%3DMpyDgXRVMLKIDAE131BACLh8pFejBJQ2ouOWxAi6WuPDulAflaepLyvkrKTejj73T07Y0hyz%25252F%25252F5kYx4FibtESoD9ewIxopu6qPuPveTyCTjx8dky92%25252BKpRcaSlE3pWG1OmcMC7cWCesHz4qbISV1H6sB0ce%25252B%25252FPTZooSbU9O5T6PrxVtYuaGfVg1GTZfRWe%25252FN%26purchaseInputs%3DHASH%25280x7f0e01bccc00%2529%26quantity%3D1%26sessionID%3D132-9885804-0941