In [2]:
from dotenv import load_dotenv

load_dotenv()  # take environment variables

import os
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import dominate
from selenium.webdriver.remote.webelement import WebElement as Element
import json
import logging
import time

import gymnasium as gym
from simulated_web_agent.executor.env import SeleniumEnv, Browser
import simulated_web_agent

if "DISPLAY" in os.environ:
    del os.environ["DISPLAY"]


In [None]:
from pathlib import Path

In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

options = Options()
options.add_argument("start-maximized")
options.add_argument("--remote-debugging-port=9222")
driver = webdriver.Chrome(options=options)

The chromedriver version (127.0.6533.88) detected in PATH at /opt/homebrew/bin/chromedriver might not be compatible with the detected chrome version (128.0.6613.84); currently, chromedriver 128.0.6613.84 is recommended for chrome 128.*, so it is advised to delete the driver in PATH and retry


In [3]:
driver.get("http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/")
driver.implicitly_wait(0)  # seconds

In [12]:
print(driver.execute_script("1"))

None


In [None]:
import dominate.tags


clickables = {}
inputs = {}


def set_attribute(element: Element, attribute, value):
    driver.execute_script(
        "arguments[0].setAttribute(arguments[1], arguments[2]);",
        element,
        attribute,
        value,
    )


def register_clickable(element: Element, name: str):
    clickables[name] = element
    set_attribute(element, "data-clickable-id", name)


def register_input(element: Element, name: str):
    inputs[name] = element
    set_attribute(element, "data-input-id", name)


def get_text(element):
    elementText = element.text  # sometime NOT work
    if not elementText:
        elementText = element.get_attribute("innerText")
    if not elementText:
        elementText = element.get_attribute("textContent")
    return element.get_attribute("innerText")


actions = {}


def process(element: Element, recipe, parent_name=""):
    elementText = ""
    if "text_selector" in recipe:
        text_element = element.find_element(By.CSS_SELECTOR, recipe["text_selector"])
        elementText = get_text(text_element)
    else:
        elementText = get_text(element)
    if "add_text" in recipe and recipe["add_text"]:
        if not elementText:
            elementText = get_text(element)
    if "text_format" in recipe and recipe["text_format"]:
        elementText = recipe["text_format"].format(elementText)

    tag_name = element.tag_name
    if "tag_name" in recipe:
        tag_name = recipe["tag_name"]
    if tag_name in dominate.tags.underscored_classes:
        node = getattr(dominate.tags, tag_name + "_")(
            elementText if "add_text" in recipe and recipe["add_text"] else ""
        )
    else:
        node = getattr(dominate.tags, tag_name)(
            elementText if "add_text" in recipe and recipe["add_text"] else ""
        )

    if "name" in recipe and recipe["name"]:
        if recipe["name"] == "from_text":
            if elementText:
                element_name = elementText.lower().replace(" ", "_")
                for special_char in "[]{}()<>.:;|!@#$%^&*+-=,?/\\\"'":
                    element_name = element_name.replace(special_char, "")
                node["name"] = (parent_name + "." if parent_name else "") + element_name
                parent_name = node["name"]
            else:
                raise Exception("name from_text must have add_text")
        else:
            node["name"] = (parent_name + "." if parent_name else "") + recipe["name"]
        parent_name = node["name"]
    if "clickable" in recipe and recipe["clickable"]:
        if "name" not in recipe:
            raise Exception("clickable element must have a name")
        register_clickable(element, node["name"])
    for key in ["alt", "src", "href", "title", "type", "value"]:
        value = element.get_dom_attribute(key)
        if value:
            node[key] = value
    for key in ["class", "id"]:
        if key in recipe and recipe[key]:
            node[key] = recipe[key]
    # if 'radio' in recipe and recipe['radio']:
    #     if element.get_attribute('checked'):
    #         node.text += ' (selected)'
    if tag_name == "input":
        input_type = element.get_attribute("type")
        if input_type == "radio":
            if element.get_attribute("checked"):
                node.text += " (selected)"
            assert "clickable" in recipe and recipe["clickable"]
        elif input_type == "text":
            node["value"] = element.get_attribute("value")
            register_input(element, node["name"])
    if "children" in recipe and recipe["children"]:
        with node:
            for child in recipe["children"]:
                if "direct_child" in child and child["direct_child"]:
                    selector = ":scope > " + child["selector"]
                else:
                    selector = child["selector"]
                elements = element.find_elements(By.CSS_SELECTOR, selector)
                for child_element in elements:
                    process(child_element, child, parent_name)
    return node


In [None]:
print(process(driver.find_element(By.CSS_SELECTOR, recipe["selector"]), recipe))

In [None]:
clickables["header.search_box.search_button"].click()

In [None]:
print(
    get_text(
        driver.find_element(By.CSS_SELECTOR, ".header.content").find_element(
            By.CSS_SELECTOR, "button.action.search"
        )
    )
)

In [None]:
driver.current_url

In [None]:
title = (
    driver.find_element(By.CSS_SELECTOR, "html")
    .find_element(By.CSS_SELECTOR, "head")
    .find_element(By.CSS_SELECTOR, "title")
)

In [None]:
title.get_attribute("innerHTML")

In [None]:
type(title)

In [None]:
driver.execute_script(JS_BUILD_CSS_SELECTOR, title)

In [None]:
btn = driver.find_element(
    By.CSS_SELECTOR,
    "#maincontent > div.columns > div > div:nth-child(3) > div > div.block-content > div.products-grid.grid > ol > li:nth-child(3) > div > div > div.product-item-inner > div > div.actions-primary > form > button",
)


In [None]:
btn.setProperty("data-clickable-id", "123")

In [None]:
btn.get_attribute("data-clickable-id")

In [None]:
btn.get_attribute("src")

In [None]:
title = dominate.tags.title("title")

In [None]:
title.render(pretty=False)

In [None]:
title.text += " (selected)"

In [None]:
with title:
    dominate.tags.div("test")

In [1]:
from dotenv import load_dotenv

load_dotenv()  # take environment variables

import os
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
import dominate
from selenium.webdriver.remote.webelement import WebElement as Element
import json
import logging
import time

import gymnasium as gym
from simulated_web_agent.executor.env import SeleniumEnv
from simulated_web_agent.executor.env import Browser
import simulated_web_agent

if "DISPLAY" in os.environ:
    del os.environ["DISPLAY"]

logging.basicConfig()
# env: SeleniumEnv = gym.make(
#     "SeleniumEnv-v0",
#     start_url="http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/",
#     pretty=False,
#     headless=True,
# )
# observation0, info = env.reset()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
observation, _, _, _, _ = env.step(
    '{"type": "type", "name": "header.search_box.search_input", "text": "test"}'
)
observation

html > body html > body
html > body > div[name='header'] html > body > div[name='header']
html > body > div[name='header'] > form[name='header.search_box'] html > body > div[name='header'] > form[name='header.search_box']
html > body > div[name='header'] > form[name='header.search_box'] > input[name='header.search_box.search_input'] html > body > div[name='header'] > form[name='header.search_box'] > input[name='header.search_box.search_input']


  logger.warn(f"{pre} is not within the observation space.")


{'url': 'http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/',
 'page': '<input name="header.search_box.search_input" type="text" value="test">',
 'clickables': ['header.search_box.search_button',
  'header.minicart.view_cart',
  'product_showcases._prebaked_gingerbread_house_kit_value_pack_17_oz_pack_of_2_total_34_oz_.view_product',
  'product_showcases._prebaked_gingerbread_house_kit_value_pack_17_oz_pack_of_2_total_34_oz_.add_to_cart',
  'product_showcases._v8_energy_healthy_energy_drink_steady_energy_from_black_and_green_tea_pomegranate_blueberry_8_ounce_can_pack_of_24_.view_product',
  'product_showcases._v8_energy_healthy_energy_drink_steady_energy_from_black_and_green_tea_pomegranate_blueberry_8_ounce_can_pack_of_24_.add_to_cart',
  'product_showcases._belle_of_the_ball_princess_sprinkle_mix_wedding_colorful_sprinkles_cake_cupcake_cookie_sprinkles_ice_cream_candy_sprinkles_yellow_gold_red_royal_red_rose_icing_flowers_decorating_sprinkles_8oz_.view_product',
  'product_

In [4]:
observation

{'url': 'http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/',
 'page': '<input name="header.search_box.search_input" type="text" value="test">',
 'clickables': ['header.search_box.search_button',
  'header.minicart.view_cart',
  'product_showcases._prebaked_gingerbread_house_kit_value_pack_17_oz_pack_of_2_total_34_oz_.view_product',
  'product_showcases._prebaked_gingerbread_house_kit_value_pack_17_oz_pack_of_2_total_34_oz_.add_to_cart',
  'product_showcases._v8_energy_healthy_energy_drink_steady_energy_from_black_and_green_tea_pomegranate_blueberry_8_ounce_can_pack_of_24_.view_product',
  'product_showcases._v8_energy_healthy_energy_drink_steady_energy_from_black_and_green_tea_pomegranate_blueberry_8_ounce_can_pack_of_24_.add_to_cart',
  'product_showcases._belle_of_the_ball_princess_sprinkle_mix_wedding_colorful_sprinkles_cake_cupcake_cookie_sprinkles_ice_cream_candy_sprinkles_yellow_gold_red_royal_red_rose_icing_flowers_decorating_sprinkles_8oz_.view_product',
  'product_

In [None]:
observation

{'url': 'http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/',
 'page': '<html><head><title>One Stop Market</title></head><body><div name="header"><form name="header.search_box"><input name="header.search_box.search_input" type="text" value="test"><button name="header.search_box.search_button" title="Search" type="submit"> Search </button></form><div name="header.minicart"><a href="http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/checkout/cart/" name="header.minicart.view_cart">Go to cart<span> </span></a></div></div><div name="product_showcases">Product Showcases<div class="product-item-info" name="product_showcases._prebaked_gingerbread_house_kit_value_pack_17_oz_pack_of_2_total_34_oz_"><img alt="Image" src="http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B08PCSHBXY.0.jpg"><div><span>Rating: 20%</span><a href="http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/pre-baked-ginge

In [None]:
env.browser.type_and_submit("header.search_box.search_input", "rain coat")

In [None]:
env.step(
    '{"type": "type_and_submit", "name": "header.search_box.search_input", "text": "waterproof coat"}'
)

In [3]:
html = dominate.tags.html()
with html:
    card = dominate.tags.div("card", cls="product-list")
    title = dominate.tags.title("title")
    with card:
        for i in range(10):
            with dominate.tags.div("card", cls="product-item-info"):
                title = dominate.tags.title("title")
                title.text = "test"
                img = dominate.tags.img()
                img.attributes["src"] = f"https://via.placeholder.com/150x150?text={i}"
                price = dominate.tags.div("price")
                price.text = "$100"
                rating = dominate.tags.div("rating")
                rating.text = "4.5"
                rating.attributes["class"] = "fa fa-star"
html2 = dominate.tags.html()
with html2:
    card2 = dominate.tags.div("card", cls="product-list")
    title = dominate.tags.title("title2")
    with card2:
        for i in range(10):
            with dominate.tags.div("card", cls="product-item-info"):
                title = dominate.tags.title("title")
                title.text = "test1"
                img = dominate.tags.img()
                img.attributes["src"] = f"https://via.placeholder.com/150x150?text={i}"
                price = dominate.tags.div("price")
                price.text = "$100"
                rating = dominate.tags.div("rating")
                rating.text = "4.5"
                rating.attributes["class"] = "fa fa-star"

In [None]:
print(node_to_selector(tree_diff(html, html2)[0]))
print(node_to_selector(tree_diff(html, html2)[1]))

In [4]:
html.children[1]

<dominate.tags.title at 12b08ec10: 0 attributes, 1 child>

In [6]:
ul = dominate.tags.ul()
with ul:
    for i in range(10):
        li = dominate.tags.li(f"test{i}")

In [11]:
node_to_selector(ul.children[0])

'ul > li'

In [10]:
def node_to_selector(node: dominate.tags.html_tag):
    selector = getattr(node, "tag_name", type(node).__name__)
    if selector[-1] == "_":
        selector = selector[:-1]
    if "id" in node.attributes:
        selector += f"#{node['id']}"
    if "class" in node.attributes:
        for _cls in node["class"].split(" "):
            selector += f".{_cls}"
    if "name" in node.attributes:
        selector += f"[name='{node['name']}']"
    if node.parent is None:
        return selector
    return node_to_selector(node.parent) + " > " + selector

In [14]:
ul.children[0].__dict__

{'attributes': {},
 'children': ['test0'],
 'parent': <dominate.tags.ul at 12b45f970: 0 attributes, 10 children>,
 'is_inline': False,
 'is_pretty': True,
 '_ctx': frame(tag=<dominate.tags.ul at 12b45f970: 0 attributes, 10 children>, items=[<dominate.tags.li at 12b45f2e0: 0 attributes, 1 child>, <dominate.tags.li at 12b45f400: 0 attributes, 1 child>, <dominate.tags.li at 12b45f730: 0 attributes, 1 child>, <dominate.tags.li at 12b475670: 0 attributes, 1 child>, <dominate.tags.li at 12b475400: 0 attributes, 1 child>, <dominate.tags.li at 12b475d60: 0 attributes, 1 child>, <dominate.tags.li at 12b475970: 0 attributes, 1 child>, <dominate.tags.li at 12b475100: 0 attributes, 1 child>, <dominate.tags.li at 12b2b9ee0: 0 attributes, 1 child>, <dominate.tags.li at 12b37ff10: 0 attributes, 1 child>], used=set())}

In [None]:
node_to_selector(card.children[1])

In [3]:
driver.get("http://flights.google.com/")
driver.implicitly_wait(0)  # seconds


In [25]:
driver.find_element(
    By.CSS_SELECTOR,
    "#i23 > div.e5F5td.BGeFcf > div > div > div.cQnuXe.k0gFV > div > div > input",
).click()

In [1]:
ActionChains(driver).send_keys("boston").send_keys(Keys.ENTER).perform()

NameError: name 'ActionChains' is not defined

In [1]:
from dotenv import load_dotenv

load_dotenv()  # take environment variables

import os
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import dominate
from selenium.webdriver.remote.webelement import WebElement as Element
import json
import logging
import time

import gymnasium as gym
from simulated_web_agent.executor.env import SeleniumEnv, Browser
import simulated_web_agent

if "DISPLAY" in os.environ:
    del os.environ["DISPLAY"]


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# ow10 > div.cQnuXe.k0gFV > div > div > input
browser = Browser("http://flights.google.com/", headless=False, recipes=recipes)

In [2]:
recipes = [
    {
        "match": "#yDmH0d > c-wiz.zQTmif.SSPGKf > div > div:nth-child(2) > c-wiz > div.cKvRXe > c-wiz > div.f8Ucw > div > div.Eo39gc",
        "match_text": "Flights",
        "selector": "html",
        "children": [
            {"selector": "head", "children": [{"selector": "title", "add_text": True}]},
            {
                "selector": "body",
                "children": [
                    {
                        "selector": "div.SS6Dqf.POQx1c",
                        "children": [
                            {"selector": "h1", "add_text": True},
                            {
                                "selector": "div.TQYpgc.gInvKb > div > div",
                                "name": "trip_type",
                                "children": [
                                    {
                                        "selector": "div:nth-child(1)",
                                        "add_text": True,
                                        "text_format": "Current trip type: {}",
                                    },
                                    {
                                        "selector": "ul",
                                        "children": [
                                            {
                                                "selector": "li:not(:last-child)",
                                                "add_text": True,
                                                "clickable": True,
                                                "name": "from_text",
                                                "before_hook": "document.querySelector('div.VfPpkd-O1htCb.VfPpkd-O1htCb-OWXEXe-MFS4be.VfPpkd-O1htCb-OWXEXe-SfQLQb-M1Soyc-Bz112c.VfPpkd-O1htCb-OWXEXe-di8rgd-V67aGc.hqBSCb.RnXJS.PnyZyf.JDygMb.PtTbbc > div').click()",
                                            }
                                        ],
                                    },
                                ],
                            },
                            # todo: add fare type
                            {
                                "selector": "div.JQrP8b.PLrkBc > div > div > div",
                                "name": "fare_type",
                                "children": [
                                    {
                                        "selector": "div:nth-child(1)",
                                        "add_text": True,
                                        "text_format": "Current fare type: {}",
                                    }
                                ],
                            },
                            {
                                "selector": "#i23",
                                "name": "city_picker",
                                "children": [
                                    {
                                        "selector": "input[aria-label='Where from?'][aria-expanded='false']",
                                        "name": "from_city",
                                    },
                                    {
                                        "selector": "input[placeholder='Where to?'][aria-expanded='false']",
                                        "name": "to_city",
                                    },
                                ],
                            },
                            {
                                "selector": "div.bgJkKe.K0Tsu div.cQnuXe.k0gFV",
                                "name": "date_picker",
                                "children": [
                                    {
                                        "selector": "input[aria-label='Departure']",
                                        "name": "departure_date",
                                        "after_hook": "document.body.click()",
                                    },
                                    {
                                        "selector": "input[aria-label='Return']",
                                        "name": "return_date",
                                        "after_hook": "document.body.click()",
                                    },
                                ],
                            },
                            {
                                "selector": "button.VfPpkd-LgbsSe.VfPpkd-LgbsSe-OWXEXe-k8QpJ.VfPpkd-LgbsSe-OWXEXe-Bz112c-M1Soyc.nCP5yc.AjY5Oe.LQeN7.TUT4y.zlyfOd",
                                "name": "search_button",
                                "clickable": True,
                                "add_text": True,
                                "override_attr": {
                                    "disabled": "return arguments[0].innerText !== 'Search'"
                                },
                            },
                        ],
                    }
                ],
            },
        ],
    },
    {
        "match": "#yDmH0d > c-wiz.zQTmif.SSPGKf > div > div:nth-child(2) > c-wiz > div.cKvRXe > c-wiz > div.PSZ8D.EA71Tc > div.FXkZv > div:nth-child(5) > h3",
        "match_text": "Best departing options",
        "selector": "html",
        "children": [
            {"selector": "head", "children": [{"selector": "title", "add_text": True}]},
            {
                "selector": "body",
                "children": [
                    {
                        "selector": "div[jsname='IWWDBc']",
                        "children": [
                            {"selector": "h3", "add_text": True, "direct_child": True},
                            {
                                "selector": "ul.Rk10dc",
                                "name": "best_departure_options",
                                "children": [
                                    {
                                        "selector": "li:not([data-ved])",
                                        "add_text": True,
                                        "clickable": True,
                                        "name": "from_nth_child",
                                    }
                                ],
                            },
                        ],
                    },
                    {
                        "selector": "div[jsname='YdtKid']",
                        "children": [
                            {"selector": "h3", "add_text": True, "direct_child": True},
                            {
                                "selector": "ul.Rk10dc",
                                "name": "other_departure_options",
                                "children": [
                                    {
                                        "selector": "li:not([data-ved])",
                                        "add_text": True,
                                        "clickable": True,
                                        "name": "from_nth_child",
                                    }
                                ],
                            },
                        ],
                    },
                ],
            },
        ],
    },
    {
        "match": "#yDmH0d > c-wiz.zQTmif.SSPGKf > div > div:nth-child(2) > c-wiz > div.cKvRXe > c-wiz > div.PSZ8D.EA71Tc > div.FXkZv > div:nth-child(4) > h3",
        "match_text": "Returning flights",
        "selector": "html",
        "children": [
            {"selector": "head", "children": [{"selector": "title", "add_text": True}]},
            {
                "selector": "body",
                "children": [
                    {"selector": "h3", "add_text": True, "direct_child": True},
                    {
                        "selector": "ul.Rk10dc",
                        "name": "returning_options",
                        "children": [
                            {
                                "selector": "li:not([data-ved])",
                                "add_text": True,
                                "clickable": True,
                                "name": "from_nth_child",
                            }
                        ],
                    },
                ],
            },
        ],
    },
]

In [4]:
browser.recipes = recipes
print(browser.observe()["page"].render(pretty=True))

<html>
  <head>
    <title>Google Flights - Find Cheap Flight Options &amp; Track Prices</title>
  </head>
  <body>
    <div aria-label="Flight" role="search">
      <h1>Flight search</h1>
      <div name="trip_type">
        <div role="combobox">Current trip type: Round trip</div>
        <ul aria-label="Select your ticket type." role="listbox">
          <li aria-selected="true" name="trip_type.round_trip" role="option">Round trip</li>
          <li aria-selected="false" name="trip_type.one_way" role="option">One way</li>
        </ul>
      </div>
      <div name="fare_type">
        <div role="combobox">Current fare type: Economy</div>
      </div>
      <div name="city_picker">
        <input aria-label="Where from?" name="city_picker.from_city" role="combobox" type="text" value="Boston">
        <input aria-label="Where to? " name="city_picker.to_city" role="combobox" type="text" value="">
      </div>
      <div name="date_picker">
        <input aria-label="Departure" name="dat

In [5]:
browser.type_and_submit("date_picker.departure_date", "sep 24 2024")
browser.observe()
browser.type_and_submit("date_picker.return_date", "oct 24 2024")
browser.observe()
browser.type_and_submit("city_picker.from_city", "boston")
browser.type_and_submit("city_picker.to_city", "new york")

In [12]:
browser.click("search_button")

In [8]:
browser.type_and_submit("city_picker.to_city", "new york")


In [16]:
browser.driver.execute_script(
    """
console.log(arguments[0].getBoundingClientRect());
rect = arguments[0].getBoundingClientRect();
div = document.createElement('div');
div.style.position = 'fixed';
div.style.top = rect.top + 'px';
div.style.left = rect.left + 'px';
div.style.width = rect.width + 'px';
div.style.height = rect.height + 'px';
div.style.border = '3px solid #79ccd7';
div.style.outline_offset = '3px';
div.style.zIndex = '10000';
document.body.appendChild(div);
document.highlightedElement = div;

""",
    browser.clickables["trip_type.round_trip"],
)


In [28]:
browser.driver.execute_script(
    "console.log(arguments)", browser.inputs["city_picker.to_city"]
)

In [28]:
browser.click("returning_options.0")

In [7]:
env = gym.make(
    "SeleniumEnv-v0",
    start_url = "https://www.google.com/flights",
    headless=False,
    pretty=True,
    recipes=recipes,
)
env.reset()
env.step(
    json.dumps(
        [
            # {
            #     "type": "type",
            #     "name": "city_picker.to_city",
            #     "text": "New York",
            #     "description": "Typing 'New York' as the arrival city in the destination input field.",
            # },
            {
                "type": "type",
                "name": "date_picker.departure_date",
                "text": "10/10/2024",
                "description": "Typing '10/10/2024' as the departure date in the departure date input field.",
            },
            {
                "type": "type",
                "name": "date_picker.return_date",
                "text": "10/15/2024",
                "description": "Typing '10/15/2024' as the return date in the return date input field.",
            },
        ]
    )
)

  logger.deprecation(
