In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import time
import copy
import json
import openai
from dotenv import load_dotenv

from bs4 import BeautifulSoup as bs
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from method.models import generate_random_values, generate_static_values, generate_llm_values
from method.ours.utils import create_driver, get_xpath, interact_with_input
from method.ours.history import HistoryTable
from method.ours.feedback import get_global_feedback

In [3]:
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

# Global Variables
HEADLESS = False
TEXT_EMBEDDING_METHOD = 'ADA' # ['ADA', 'WORD2VEC', 'SPACY']
GRAPH_EMBEDDING_METHOD = 'NODE2VEC' # ['NODE2VEC', 'GCN']

URL = 'https://www.babycenter.com/pregnancy-weight-gain-estimator'
# 'https://www.budget.com/en/home'
# 'https://www.yelp.com/'
# 'https://www.aircanada.com/ca/en/aco/home.html'

In [8]:
METHOD = 'llm'

In [5]:
def get_to_form(driver):
    driver.get(URL)
    
    '''
    # AC - Multi-city
    time.sleep(2)
    WebDriverWait(driver, 5).until(
        EC.presence_of_element_located((By.ID, 'bkmgFlights_tripTypeSelector_M'))
    ).click()
    '''
    
    '''
    # AC - My Bookings
    time.sleep(2)
    WebDriverWait(driver, 5).until(
        EC.presence_of_element_located((By.ID, 'bkmg-tab-button-mngBook'))
    ).click()'''
    
    '''
    # Budget - Reservation
    WebDriverWait(driver, 5).until(
        EC.presence_of_element_located((By.ID, 'PicLoc_value'))
    ).click()
    '''

In [9]:
def find_form():
    return driver.find_element(
        By.XPATH,
        '//BODY/DIV[1]/DIV[3]/DIV[1]/DIV[5]/DIV[1]/DIV[1]/ARTICLE[1]/DIV[2]/DIV[1]/DIV[1]/DIV[2]'
    )


def find_button():
    return find_form().find_elements(By.TAG_NAME, 'button')[-1]
    # return None

In [11]:
driver = create_driver(HEADLESS)
get_to_form(driver)

In [12]:
html = driver.find_element(By.TAG_NAME, 'html').get_attribute('outerHTML')
form = find_form()
form_xpath = get_xpath(driver, form)

inputs = form.find_elements(
    By.XPATH,
    f'{form_xpath}//input | {form_xpath}//textarea | {form_xpath}//select'
)

inputs = list(filter(
    lambda x: x.get_attribute('type') != 'hidden' and x.get_attribute('hidden') != 'true',
    inputs
))

In [13]:
history_table = HistoryTable(
    url=URL,
    xpath=form_xpath
)

In [14]:
values = None
if METHOD == 'static':
    values = generate_static_values(driver, inputs)
elif METHOD == 'random':
    values = generate_random_values(driver, inputs)
else:
    values = generate_llm_values(form.get_attribute('outerHTML'), openai.api_key)

 [0m Prompt: [
  {
    "role": "system",
    "content": "\nYour task is to generate testing values for a web form. When given a web form, you need to generate values that can pass and fail when tried on the form. Your response should be in the following format:\n{\n[field_identifier]: {\npassing: <a base passing value for the field>,\nfailing: <a list of failing values for the field>\n}\n}\nYour response must be parsable by Python's json.loads function. Use \"id\" attribute to refer to the fields.\n"
  },
  {
    "role": "user",
    "content": "<div class=\"blueRectangle\"><div class=\"formFields\"><div class=\"weightInput mui-style-p0tzpz esqtzx80\"><label class=\"mui-style-gvm0ej esqtzx81\">Your pre-pregnancy weight</label><div class=\"false mui-style-wqurjs esqtzx82\"><div class=\"MuiFormControl-root MuiTextField-root weightValueInput mui-style-i44wyl\"><div class=\"MuiInputBase-root MuiOutlinedInput-root MuiInputBase-colorPrimary MuiInputBase-formControl mui-style-13cxp6d\"><input

 [0m Response: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "{\n\"left-prePregnancyWeight\": {\n\"passing\": \"150\",\n\"failing\": [\"\", \"abc\", \"-150\", \"0\"]\n},\n\"left-weightNow\": {\n\"passing\": \"160\",\n\"failing\": [\"\", \"abc\", \"-160\", \"0\"]\n},\n\"left-height\": {\n\"passing\": \"5\",\n\"failing\": [\"\", \"abc\", \"-5\", \"0\"]\n},\n\"right-height\": {\n\"passing\": \"6\",\n\"failing\": [\"\", \"abc\", \"-6\", \"0\"]\n},\n\"pregnancyWeek\": {\n\"passing\": \"20\",\n\"failing\": [\"\", \"abc\", \"-20\", \"0\"]\n}\n}",
        "role": "assistant"
      }
    }
  ],
  "created": 1689882677,
  "id": "chatcmpl-7eTy959H4PxKF20FJSccid6PkMGmE",
  "model": "gpt-4-0613",
  "object": "chat.completion",
  "usage": {
    "completion_tokens": 144,
    "prompt_tokens": 2516,
    "total_tokens": 2660
  }
}
 [0m Total running cost: $0.084


In [15]:
values

{'left-prePregnancyWeight': {'passing': '150',
  'failing': ['', 'abc', '-150', '0']},
 'left-weightNow': {'passing': '160', 'failing': ['', 'abc', '-160', '0']},
 'left-height': {'passing': '5', 'failing': ['', 'abc', '-5', '0']},
 'right-height': {'passing': '6', 'failing': ['', 'abc', '-6', '0']},
 'pregnancyWeek': {'passing': '20', 'failing': ['', 'abc', '-20', '0']}}

In [15]:
def get_element(element_id):
    return driver.find_element(By.ID if METHOD == 'llm' else By.XPATH, element_id)


def send_values(elemend_ids, values):
    for element_id in elemend_ids:
        element = get_element(element_id)
        value = values[element_id]
        
        element_type = element.get_attribute('type') or 'text'
    
        if element_type in ['radio', 'checkbox'] or element.tag_name in ['button']:
            continue
        
        try:
            interact_with_input(element, value)
        except:
            pass

        
def submit_form(form):
    # submit = driver.find_element(By.XPATH, f'{form_xpath}//*[@type="submit"]')
    # driver.find_element(By.TAG_NAME, 'body').click()
    submit = find_button()
    
    for _ in range(3):
        try:
            interact_with_input(submit, True)
            time.sleep(0.5)
        except:
            break

In [22]:
passing_set = {
    element_id: values[element_id]['passing'] for element_id in values.keys()
}


for element_id, element_values in values.items():
    get_to_form(driver)
    time.sleep(1)
    
    element = get_element(element_id)
    element_type = element.get_attribute('type') or 'text'
    
    if element_type in ['radio', 'checkbox', 'submit']:
        continue
    
    # print(element)
    
    for failing_value in element_values['failing']:
        # try:
        get_to_form(driver)
        time.sleep(1)
    
        passing_copy = copy.copy(passing_set)
        passing_copy[element_id] = failing_value
        send_values(values.keys(), passing_copy)

        time.sleep(1)

        submit_form(form)

        new_html = driver.find_element(By.TAG_NAME, 'body').get_attribute('outerHTML')
        global_feedback = get_global_feedback(html, new_html, remove_form_children=False)

        history_table.add(
            passing_copy,
            'base',
            global_feedback,
            driver.current_url
        )
        # except Exception as e:
        #     print(e)

  self.table = self.table.append({
  self.table = self.table.append({
  self.table = self.table.append({


KeyboardInterrupt: 

In [19]:
history_table.table

Unnamed: 0,values,variation_type,feedback,new_url
0,"{""search_description"": """", ""search_location"": ...",base,Filters$$$$$$$$$$SuggestedOpen Now 6:30 PMFeat...,https://www.yelp.com/search?find_desc=&find_lo...
1,"{""search_description"": ""Pizza"", ""search_locati...",base,Filters$$$$$$$$$$SuggestedOpen Now 6:30 PMRese...,https://www.yelp.com/search?find_desc=Pizza&fi...
