In [1]:

from dotenv import load_dotenv

load_dotenv()  # take environment variables

import os
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
import dominate
from selenium.webdriver.remote.webelement import WebElement as Element
import json
import logging
import time

import gymnasium as gym
from simulated_web_agent.executor.env import SeleniumEnv
import simulated_web_agent
if 'DISPLAY' in os.environ:
    del os.environ['DISPLAY']


  from .autonotebook import tqdm as notebook_tqdm


In [1]:
from pathlib import Path

In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.add_argument("start-maximized")
options.add_argument("--remote-debugging-port=9222")
driver = webdriver.Chrome(options=options)

In [3]:
driver.get("http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/")
driver.implicitly_wait(0) # seconds

In [24]:
import dominate.tags


clickables = {}
inputs = {}

def set_attribute(element: Element, attribute, value):
    driver.execute_script("arguments[0].setAttribute(arguments[1], arguments[2]);", element, attribute, value)

def register_clickable(element: Element, name: str):
    clickables[name] = element
    set_attribute(element, 'data-clickable-id', name)
    
def register_input(element: Element, name: str):
    inputs[name] = element
    set_attribute(element, 'data-input-id', name)

def get_text(element):
    elementText = element.text # sometime NOT work
    if not elementText:
        elementText = element.get_attribute("innerText")
    if not elementText:
        elementText = element.get_attribute("textContent")
    return element.get_attribute("innerText")


actions = {}

def process(element: Element, recipe, parent_name = ''):
    elementText = ''
    if 'text_selector' in recipe:
        text_element = element.find_element(By.CSS_SELECTOR, recipe['text_selector'])
        elementText = get_text(text_element)
    else:
        elementText = get_text(element)
    if 'add_text' in recipe and recipe['add_text']:
        if not elementText:
            elementText = get_text(element)
    if 'text_format' in recipe and recipe['text_format']:
        elementText = recipe['text_format'].format(elementText)
    
    tag_name = element.tag_name
    if 'tag_name' in recipe:
        tag_name = recipe['tag_name']
    if tag_name in dominate.tags.underscored_classes:
        node = getattr(dominate.tags, tag_name + '_')(elementText if 'add_text' in recipe and recipe['add_text'] else '')
    else:
        node = getattr(dominate.tags, tag_name)(elementText if 'add_text' in recipe and recipe['add_text'] else '')
    
    if 'name' in recipe and recipe['name']:
        if recipe['name'] == 'from_text':
            if elementText:
                element_name = elementText.lower().replace(' ', '_')
                for special_char in "[]{}()<>.:;|!@#$%^&*+-=,?/\\\"'":
                    element_name = element_name.replace(special_char, '')
                node['name'] = (parent_name + "." if parent_name else '') + element_name
                parent_name = node['name']
            else:
                raise Exception('name from_text must have add_text')
        else:
            node['name'] = (parent_name + "." if parent_name else '') + recipe['name']
        parent_name = node['name']
    if 'clickable' in recipe and recipe['clickable']:
        if 'name' not in recipe:
            raise Exception('clickable element must have a name')
        register_clickable(element, node['name'])
    for key in ['alt', 'src', 'href', 'title', 'type', 'value']:
        value = element.get_dom_attribute(key)
        if value:
            node[key] = value
    for key in ['class', 'id']:
        if key in recipe and recipe[key]:
            node[key] = recipe[key]
    # if 'radio' in recipe and recipe['radio']:
    #     if element.get_attribute('checked'):
    #         node.text += ' (selected)'
    if tag_name == 'input':
        input_type = element.get_attribute('type')
        if input_type == 'radio':
            if element.get_attribute('checked'):
                node.text += ' (selected)'
            assert 'clickable' in recipe and recipe['clickable']
        elif input_type == 'text':
            node['value'] = element.get_attribute('value')
            register_input(element, node['name'])
    if 'children' in recipe and recipe['children']:
        with node:
            for child in recipe['children']:
                if 'direct_child' in child and child['direct_child']:
                    selector = ':scope > ' + child['selector']
                else:
                    selector = child['selector']
                elements = element.find_elements(By.CSS_SELECTOR, selector)
                for child_element in elements:
                    process(child_element, child, parent_name)
    return node


In [32]:
recipe = {
    'selector': 'html',
    'clickable': False,
    'children': [
        {
            'selector': 'head',
            'name': '',
            'children': [
                {
                    'selector': 'title',
                    'add_text': True,
                }
            ]
        },
        {
            'selector': 'body',
            'children': [
                # {
                #     'selector': 'nav',
                #     'name': 'nav',
                #     'children': [
                #         {
                #             'selector': 'ul',
                #             'action': 'strip_add_children',
                #             'direct_child': True,
                #             'children': [
                #                 {
                #                     'selector': 'li',
                #                     'direct_child': True,
                #                     'add_text': True,
                #                     'text_selector': 'a',
                #                     'clickable': True,
                #                     'children': [
                #                         {
                #                             'selector': 'ul',
                #                             'direct_child': True,
                #                             'children': [
                #                                 {
                #                                     'selector': 'li',
                #                                     'add_text': True,
                #                                     'direct_child': True,
                #                                     'text_selector': 'a',
                #                                     'clickable': True,
                #                                     'children': [
                #                                         {
                #                                             'selector': 'ul',
                #                                             'direct_child': True,
                #                                             'children': [
                #                                                 {
                #                                                     'selector': 'li',
                #                                                     'add_text': True,
                #                                                     'direct_child': True,
                #                                                     'text_selector': 'a',
                #                                                     'clickable': True,
                #                                                 }
                #                                             ]
                #                                         }
                #                                     ]
                #                                 }
                #                             ]
                #                         }
                #                     ]
                #                 }
                #             ]
                #         }
                #     ]
                # },
                {
                    'selector': '.header.content',
                    'name': 'header',
                    'children': [
                        {
                            'selector': '#search_mini_form',
                            'name': 'search_box',
                            'children': [
                                {
                                    'selector': 'input#search',
                                    'name': 'search_input',
                                },
                                {
                                    'selector': 'button.action.search',
                                    'name': 'search_button',
                                    'add_text': True,
                                    'clickable': True,
                                }
                            ]
                        }
                    ]
                },
                {
                    'selector': '#maincontent > div.columns > div > div:nth-child(3)',
                    'add_text': True,
                    'text_selector': 'div > div.block-title > strong',
                    'children': [
                        {
                            'selector': 'div.product-item-info',
                            'class': 'product-item-info',
                            'name': 'from_text',
                            'text_selector': 'div.product-item-details strong.product-item-name',
                            'children': [
                                {
                                    'selector': 'img',
                                },
                                {
                                    'selector': 'div.product-item-details',
                                    'children': [
                                        {
                                            'selector': 'div.rating-summary > div > span > span',
                                            'add_text': True,
                                            'name': 'rating',
                                            'clickable': True,
                                            'text_format': 'Rating: {}',
                                            'name': 'view_reviews'
                                        },
                                        {
                                            'selector': '.product-item-name',
                                            'add_text': True,
                                            'clickable': True,
                                            'name': 'view_product'
                                        },
                                        {
                                            'selector': '.price-box',
                                            'add_text': True,
                                        },
                                        {
                                            'selector': '.actions-primary',
                                            'add_text': True,
                                            'clickable': True,
                                            'name':  'add_to_card',
                                            'tag_name': 'button'
                                        }
                                    ]
                                }
                            ]
                        }
                    ]
                }
            ]
        }
    ]
}

In [36]:
print(process(driver.find_element(By.CSS_SELECTOR, recipe['selector']), recipe))

NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":"div > div.block-title > strong"}
  (Session info: chrome-headless-shell=127.0.6533.88); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
#0 0x55d5f928c6ba <unknown>
#1 0x55d5f8f5c730 <unknown>
#2 0x55d5f8fabd9b <unknown>
#3 0x55d5f8fac081 <unknown>
#4 0x55d5f8fa0826 <unknown>
#5 0x55d5f8fcea7d <unknown>
#6 0x55d5f8fa0719 <unknown>
#7 0x55d5f8fcec1e <unknown>
#8 0x55d5f8fed1ca <unknown>
#9 0x55d5f8fce7f3 <unknown>
#10 0x55d5f8f9eec9 <unknown>
#11 0x55d5f8f9f91e <unknown>
#12 0x55d5f92529eb <unknown>
#13 0x55d5f9256972 <unknown>
#14 0x55d5f923fe15 <unknown>
#15 0x55d5f9257502 <unknown>
#16 0x55d5f9224d2f <unknown>
#17 0x55d5f927b578 <unknown>
#18 0x55d5f927b750 <unknown>
#19 0x55d5f928b48c <unknown>
#20 0x7f4193c78ded <unknown>
#21 0x7f4193cfc0dc <unknown>


In [34]:
clickables["header.search_box.search_button"].click()

In [19]:
print(get_text(driver.find_element(By.CSS_SELECTOR, '.header.content').find_element(By.CSS_SELECTOR, 'button.action.search')))

Search


In [35]:
driver.current_url

'http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/catalogsearch/result/?q=shirt'

In [136]:
title = driver.find_element(By.CSS_SELECTOR, 'html').find_element(By.CSS_SELECTOR, 'head').find_element(By.CSS_SELECTOR, 'title')

In [63]:
title.get_attribute('innerHTML')

'One Stop Market'

In [64]:
type(title)

selenium.webdriver.remote.webelement.WebElement

In [65]:
driver.execute_script(JS_BUILD_CSS_SELECTOR, title)

'head > title'

In [77]:
btn = driver.find_element(By.CSS_SELECTOR, '#maincontent > div.columns > div > div:nth-child(3) > div > div.block-content > div.products-grid.grid > ol > li:nth-child(3) > div > div > div.product-item-inner > div > div.actions-primary > form > button')


In [79]:
btn.setProperty('data-clickable-id', '123')

AttributeError: 'WebElement' object has no attribute 'setProperty'

In [83]:
btn.get_attribute('data-clickable-id')

'123'

In [92]:
btn.get_attribute('src')

In [37]:
title = dominate.tags.title('title')

In [41]:
title.render(pretty=False)

'<title>title<div>test</div></title>'

In [111]:
title.text += ' (selected)'

In [39]:
with title:
    dominate.tags.div('test')

In [2]:
logging.basicConfig()
env: SeleniumEnv = gym.make(
    "SeleniumEnv-v0",
    start_url="http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/",
    pretty=True,
    headless=False,
)
observation, info = env.reset()

  logger.deprecation(


/


  logger.warn(f"{pre} is not within the observation space.")


In [3]:
env.browser.type_and_submit('header.search_box.search_input', 'test')

  logger.warn(


In [6]:
env.browser.type_and_submit('header.search_box.search_input', 'rain coat')

In [3]:
env.step('{"type": "type_and_submit", "name": "header.search_box.search_input", "text": "waterproof coat"}')

/


  logger.warn(f"{pre} is not within the observation space.")


({'url': 'http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/',
  'page': '<html>\n  <head>\n    <title>One Stop Market</title>\n  </head>\n  <body>\n    <div name="header">\n      <form name="header.search_box">\n        <input name="header.search_box.search_input" type="text" value="waterproof coat">\n        <button name="header.search_box.search_button" title="Search" type="submit">Search</button>\n      </form>\n      <div name="header.minicart">\n        <a href="http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/checkout/cart/" name="header.minicart.view_cart">Go to cart\n          <span>\n            \n            </span>\n        </a>\n      </div>\n    </div>\n    <div name="product_showcases">Product Showcases\n      <div class="product-item-info" name="product_showcases.prebaked_gingerbread_house_kit_value_pack_17_oz_pack_of_2_total_34_oz">\n        <img alt="Image" src="http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770/media/catalog/product/c