In [2]:
%load_ext autoreload
%autoreload 2

import os
import time
import openai
from dotenv import load_dotenv

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup as bs

from method.ours.utils import create_driver, get_xpath, get_application_context
from method.ours.parse import parse_form
from method.ours.prompts import get_form_context
from method.ours.history import HistoryTable
from method.ours.constraints import Invalid
from method.ours.generation import (
    generate_constraints_for_input_groups,
    generate_value_for_input_group,
    generate_values_for_input_groups,
    fill_form_with_value_table,
    submit_form
)
from method.ours.feedback import (
    get_local_feedback,
    get_global_feedback
)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

# Global Variables
HEADLESS = False
TEXT_EMBEDDING_METHOD = 'ADA'
GRAPH_EMBEDDING_METHOD = 'NODE2VEC'

URL = 'https://www.macys.com/'
#URL = 'https://seatgeek.ca/'
# 'https://www.stubhub.ca/'
# 'https://app.invoicing.co/#/register'
# 'http://localhost:3000/default-channel/en-US/account/register/'
# 'http://localhost:9000/dashboard/discounts/sales/add'
# 'http://localhost:9000/dashboard/customers/add'
# 'http://localhost:8080/'
# 'https://www.budget.com/en/home'
# 'https://www.thetrainline.com/en-us'
# 'https://www.mbta.com/'
# 'https://resy.com/'
# 'https://www.yelp.com/'
# 'https://www.aa.com/homePage.do'
# 'https://www.jetblue.com/'
# 'https://www.united.com/en/us'
# 'https://www.aircanada.com/ca/en/aco/home.html'


def get_to_form(driver):
    try:
        driver.get(URL)
        
        '''
        try:
            time.sleep(2)
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((
                    By.XPATH,
                    '//BODY/DIV[1]/MAIN[1]/DIV[1]/DIV[1]/DIV[1]/FORM[1]/DIV[2]/DIV[1]/INPUT[1]')
                )
            ).send_keys('admin@example.com')
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((
                    By.XPATH,
                    '//BODY/DIV[1]/MAIN[1]/DIV[1]/DIV[1]/DIV[1]/FORM[1]/DIV[4]/DIV[1]/DIV[1]/INPUT[1]')
                )
            ).send_keys('admin')
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((
                    By.XPATH,
                    '//BODY/DIV[1]/MAIN[1]/DIV[1]/DIV[1]/DIV[1]/FORM[1]/DIV[5]/BUTTON[1]')
                )
            ).click()
        except:
            pass
        
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((
                By.XPATH,
                '//BODY/DIV[1]/MAIN[1]/DIV[1]/DIV[2]/MAIN[1]/FORM[1]/DIV[1]/DIV[2]/DIV[4]/DIV[2]/LABEL[1]/SPAN[1]/SPAN[1]/INPUT[1]')
            )
        ).click()
        '''
        
        '''
        # Pet Clinic - Add Owner
        time.sleep(2)
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((
                By.XPATH,
                '//BODY/APP-ROOT[1]/DIV[1]/NAV[1]/DIV[1]/UL[1]/LI[2]')
            )
        ).click()
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((
                By.XPATH,
                '//BODY/APP-ROOT[1]/DIV[1]/NAV[1]/DIV[1]/UL[1]/LI[2]/UL[1]/LI[2]')
            )
        ).click()
        '''
        
        '''
        # Budget - Reservation
        WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.ID, 'PicLoc_value'))
        ).click()
        '''
        
        '''
        # MBTA
        time.sleep(2)
        WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.XPATH, '//BODY/DIV[1]/DIV[2]/MAIN[1]/DIV[1]/DIV[1]/DIV[1]/DIV[2]/A[1]'))
        ).click()
        '''
        
        '''
        # AC - My Bookings
        time.sleep(2)
        WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.ID, 'bkmg-tab-button-mngBook'))
        ).click()
        '''
        
        '''
        # AC - Multi-city
        time.sleep(2)
        WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.ID, 'bkmgFlights_tripTypeSelector_M'))
        ).click()
        '''
        
    except:
        print('timeout')


def find_form():
    return WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((
            By.XPATH,
            #'//BODY/DIV[1]/HEADER[1]/DIV[1]/DIV[1]/DIV[2]/DIV[1]/FORM[1]'
            '//BODY/HEADER[1]/DIV[2]/DIV[1]/DIV[1]/SECTION[2]/DIV[2]/FORM[1]/DIV[1]'
        ))
    )



def find_button():
    
    return find_form().find_element(
        By.XPATH,
        '//BODY/HEADER[1]/DIV[2]/DIV[1]/DIV[1]/SECTION[2]/DIV[2]/FORM[1]/DIV[1]'
    )
    
    # return driver.find_elements(By.TAG_NAME, 'button')[10]
    #return None

In [4]:
from transformers import AutoTokenizer, pipeline, logging
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig

def load_model_llama(model_type, use_triton=False):
    if model_type == '70B':
        model_name_or_path = "TheBloke/Llama-2-70B-chat-GPTQ"
        model_basename = "gptq_model-4bit--1g"
    elif model_type == '13B':
        model_name_or_path = "TheBloke/Llama-2-13B-chat-GPTQ"
        model_basename = "gptq_model-4bit-128g"
    else:
        raise ValueError('Invalid model_type. Choose either "70B" or "13B".')

    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, device_map="auto")

    model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
                                               #model_basename=model_basename,
                                               revision = "main",
                                               inject_fused_attention=False,
                                               use_safetensors=True,
                                               trust_remote_code=False,
                                               use_triton=use_triton,
                                               quantize_config=None)
    return tokenizer, model


# Load the model - load_model_llama2_70b
tokenizer, model = load_model_llama("70B")

skip module injection for FusedLlamaMLPForQuantizedModel not support integrate without triton yet.                                                                                   


In [5]:
driver = create_driver(HEADLESS)
get_to_form(driver)

In [6]:
form = find_form()

In [7]:
history_table = HistoryTable(
    url=URL,
    xpath=get_xpath(driver, form)
)

In [8]:
for element in form.find_elements(By.TAG_NAME, 'input'):
    try:
        element.clear()
    except:
        pass

# Processing

In [9]:
html = driver.find_element(By.TAG_NAME, 'body').get_attribute('outerHTML')
relation_graph, input_groups = parse_form(
    driver,
    form,
    TEXT_EMBEDDING_METHOD=TEXT_EMBEDDING_METHOD
)

Computing transition probabilities: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 3496.22it/s]
Generating walks (CPU: 2): 100%|██████████| 50/50 [00:00<00:00, 102400.00it/s]
Generating walks (CPU: 1): 100%|██████████| 50/50 [00:00<00:00, 99015.68it/s]
Generating walks (CPU: 3): 100%|██████████| 50/50 [00:00<00:00, 104596.11it/s]
Generating walks (CPU: 4): 100%|██████████| 50/50 [00:00<00:00, 103614.23it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.04it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


# Generation

In [10]:
try:
    app_context = get_application_context(driver)
except:
    app_context = ''

In [11]:
app_context

''

In [12]:
value_table = generate_constraints_for_input_groups(
   input_groups,
   app_context=app_context,
   model=model,
   tokenizer=tokenizer,
)

# value_table = generate_constraints_for_input_groups(
#     model, 
#     tokenizer,
#     input_groups,
#     ablation_inclusion=constraint_ablation
# )



generated constraints: <s> [INST] <<SYS>>
    Instructions:
Your task is to generate a set of constraints for web form fields. Your decisions must be made independently without seeking user assistance or additional information. If there are multiple ways to express constraints, use the least number of constraints to describe them. Only generate the constraints and refrain from explaining your answers. Only generate constraints for the input field in question, not those in the relevant information section. You must choose your constraints in the format of our modified version of the Jest library in JavaScript. The list of functions in this modified format are:
1. toBeEqual(value) # the input field value is exactly equal to the given value
2. toHaveLengthCondition(condition, value) # the length of the input field value matches the given condition
3. toBeTruthy() # the input field value is truthy and not empty (not false, 0, '', null, undefined, or NaN)
4. toHaveCompareCondition(condition

Traceback (most recent call last):
  File "/home/webtesting/webform-testing/method/ours/generation/constraints.py", line 182, in generate_constraints_for_input_groups
    value_table = generate_constraints_for_input_group(
  File "/home/webtesting/webform-testing/method/ours/generation/constraints.py", line 148, in generate_constraints_for_input_group
    field_name, constraints = generate_constraints_from_string(generated_constraints)
  File "/home/webtesting/webform-testing/method/ours/constraints/utils.py", line 25, in generate_constraints_from_string
    splitted = split_functions(constraint_string)
  File "/home/webtesting/webform-testing/method/ours/constraints/utils.py", line 9, in split_functions
    for char in function_string:
TypeError: 'NoneType' object is not iterable


In [13]:
value_table.print()

In [14]:
value_table = generate_values_for_input_groups(
    input_groups,
    value_table,
    app_context=app_context,
    model=model,
    tokenizer=tokenizer,
)

In [15]:
fill_form_with_value_table(driver, value_table, input_groups)

In [16]:
submit_form(driver, input_groups=input_groups, explicit_submit=find_button())

Message: element click intercepted: Element <div id="clearable_globalSearchInputField" style="position: relative; white-space: nowrap;" xpath="//BODY/HEADER[1]/DIV[2]/DIV[1]/DIV[1]/SECTION[2]/DIV[2]/FORM[1]/DIV[1]" x_start="562" x_end="1270" y_start="75" y_end="115">...</div> is not clickable at point (915, 95). Other element would receive the click: <div id="tinymask" style="width: 1831px; opacity: 0.8; display: block; height: 100vh; position: fixed;"></div>
  (Session info: chrome=115.0.5790.110)
Stacktrace:
#0 0x55928c272613 <unknown>
#1 0x55928bf9e537 <unknown>
#2 0x55928bfe2af2 <unknown>
#3 0x55928bfe1044 <unknown>
#4 0x55928bfdef68 <unknown>
#5 0x55928bfde0ad <unknown>
#6 0x55928bfd31a9 <unknown>
#7 0x55928bffda32 <unknown>
#8 0x55928bfd2bc6 <unknown>
#9 0x55928bffdbfe <unknown>
#10 0x55928c015d09 <unknown>
#11 0x55928bffd803 <unknown>
#12 0x55928bfd168b <unknown>
#13 0x55928bfd242e <unknown>
#14 0x55928c233c28 <unknown>
#15 0x55928c237af7 <unknown>
#16 0x55928c24201c <unknown>
#

In [17]:
new_html = driver.find_element(By.TAG_NAME, 'body').get_attribute('outerHTML')
global_feedback = get_global_feedback(html, new_html, remove_form_children=False)

In [18]:
history_table.add(
    value_table.get_values_dict(),
    'base',
    global_feedback,
    driver.current_url
)

AttributeError: 'DataFrame' object has no attribute 'append'