In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import time
import openai
from dotenv import load_dotenv

from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup as bs

from method.ours.utils import create_driver
from method.ours.parse import (
    initial_parse,
    parse_after_feedback,
)

In [3]:
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

# Global Variables
HEADLESS = False
TEXT_EMBEDDING_METHOD = 'ADA' # ['ADA', 'WORD2VEC', 'SPACY']
GRAPH_EMBEDDING_METHOD = 'NODE2VEC' # ['NODE2VEC', 'GCN']

In [4]:
driver = create_driver(HEADLESS)
driver.get('https://www.aircanada.com/ca/en/aco/home.html')

In [5]:
# form = driver.find_elements(By.TAG_NAME, 'form')[48]
# form = driver.find_element(By.ID, 'register')
form = driver.find_elements(By.TAG_NAME, 'form')[1]

# Processing

In [6]:
relation_graph, input_groups = initial_parse(driver, form, TEXT_EMBEDDING_METHOD)

Computing transition probabilities:   0%|          | 0/340 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 50/50 [00:00<00:00, 579.10it/s]
Generating walks (CPU: 2): 100%|██████████| 50/50 [00:00<00:00, 600.93it/s]
Generating walks (CPU: 3): 100%|██████████| 50/50 [00:00<00:00, 537.71it/s]
Generating walks (CPU: 4): 100%|██████████| 50/50 [00:00<00:00, 596.71it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 36/36 [00:08<00:00,  4.10it/s]
  return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


running time: 42.11640810966492


# Generation

In [8]:
from method.ours.constraints import (
    ConstraintFactory,
    create_constraint_generation_prompt,
    generate_constraints_with_llm
)
from method.ours.values import (
    create_value_generation_prompt,
    generate_values_with_llm,
)

In [9]:
input_group = input_groups[8]
input_group.node.element

<input aria-describedby="bkmgFlights_travelDates_1DatepickerSrHelperText" aria-disabled="false" aria-invalid="false" aria-label="Return" autocomplete="off" autocorrect="off" class="abc-form-element-input abc-form-element-main text-transform-none ng-untouched ng-pristine ng-valid" id="bkmgFlights_travelDates_1-formfield-2" name="bkmgFlights_travelDates_1-formfield-2" placeholder="DD/MM" spellcheck="false" type="text" x_end="1093" x_start="978" xpath="//BODY/AC-WEB-APP[1]/DIV[1]/MAIN[1]/DIV[1]/AC-ACOHOME-PAGE[1]/DIV[1]/DIV[1]/AC-BOOKING-MAGNET[1]/DIV[1]/DIV[1]/DIV[1]/DIV[2]/AC-BKMG-FLIGHTS-TAB[1]/DIV[1]/FORM[1]/FIELDSET[1]/DIV[1]/DIV[1]/DIV[1]/ABC-DATE-PICKER[1]/DIV[1]/DIV[1]/ABC-INPUT[2]/ABC-FORM-ELEMENT-CONTAINER[1]/DIV[1]/DIV[1]/DIV[1]/DIV[1]/DIV[1]/INPUT[1]" y_end="678" y_start="628"/>

In [10]:
constraint_user_prompt = create_constraint_generation_prompt(input_group)
print(constraint_user_prompt)

input: <input id="bkmgFlights_travelDates_1-formfield-2" name="bkmgFlights_travelDates_1-formfield-2" type="text"/>
with label: Return
with the following relevant text tags:
<span>Return</span>

    The relevant information available in the form are (in order of relevance):
    1.
input: <input id="bkmgFlights_travelDates_1-formfield-1" name="bkmgFlights_travelDates_1-formfield-1" type="text"/>
with label: Departure
2.
input: <input id="bkmgFlights_destination_trip_1" name="bkmgFlights_destination_trip_1" type="text"/>
with label: To
3.
input: <input id="bkmgFlights_tripTypeSelector_M" name="tripType" type="radio"/>
with label: Multi-city/Stopover


In [17]:
generated_constraints = generate_constraints_with_llm(
    constraint_user_prompt,
    openai_api_key=openai.api_key
)

 [0m Prompt: [
  {
    "role": "system",
    "content": "Your task is to generate a set of constraints for form fields. Your decisions must be made independently without seeking user assistance or additional information.\n\nThe list of the constraints and their signatures is as follows:\n\n1. toBe(value) # the input field value exactly matches the given value\n2. toHaveLengthCondition(condition, value) # the length of the input field value matches the given condition\n3. toBeTruthy() # the input field value is truthy and not empty (not false, 0, '', null, undefined, or NaN)\n4. toHaveCompareCondition(condition, value) # the input field value has the given condition to the given value\n5. toContainSubstr(value) # the input field value contains a specific string\n6. toContainChar(value) # the input field value contains a specific character\n7. toBeAlpha() # the input field value is alphabetic\n8. toBeNumeric() # the input field value is numeric\n9. toBeAlphaNumeric() # the input value c

In [18]:
print(generated_constraints)

expect(field('bkmgFlights_travelDates_1-formfield-2'))
.toBeTruthy()
.toMatch(/^\d{2}\/\d{2}\/\d{4}$/)
.not.toBe(field('bkmgFlights_travelDates_1-formfield-1'))
.toHaveCompareCondition('>', field('bkmgFlights_travelDates_1-formfield-1'))


In [27]:
constraints = list(map(
    ConstraintFactory.create,
    generated_constraints.split('\n.')[1:]
))

In [28]:
value_user_prompt = create_value_generation_prompt(
    input_group,
    constraints
)
print(value_user_prompt)

input: <input id="bkmgFlights_travelDates_1-formfield-2" name="bkmgFlights_travelDates_1-formfield-2" type="text"/>
with label: Return
with the following relevant text tags:
<span>Return</span>

    The relevant information available in the form are (in order of relevance):
    1.
input: <input id="bkmgFlights_travelDates_1-formfield-1" name="bkmgFlights_travelDates_1-formfield-1" type="text"/>
with label: Departure
2.
input: <input id="bkmgFlights_destination_trip_1" name="bkmgFlights_destination_trip_1" type="text"/>
with label: To
3.
input: <input id="bkmgFlights_tripTypeSelector_M" name="tripType" type="radio"/>
with label: Multi-city/Stopover

    generate values based on the following constraints:
    input field should be non-empty
input field should match /^\d{2}\/\d{2}\/\d{4}$/ regex pattern
input field should not be field('bkmgFlights_travelDates_1-formfield-1')
input field should be greater than field('bkmgFlights_travelDates_1-formfield-1')


In [29]:
generated_values = generate_values_with_llm(
    value_user_prompt,
    openai_api_key=openai.api_key
)

 [0m Prompt: [
  {
    "role": "system",
    "content": "Your task is to generate a set of values for a form field based on the form field information and a set of constraints on the field. Your decisions must always be made independently without seeking user assistance or additional information.\nFor each user prompt, you need to generate five distinct values that satisfy the constraints while keeping in mind the nature of the input from the available information.\nOnly generate the values and don't explain your answers.\nGenerate the values in a Python array. We must be able to parse your generation with json.loads.\nOnly generate values for the inputs in question, and not the ones in the relevant information section."
  },
  {
    "role": "user",
    "content": "We are generating values for the following input field:\n            input: <input id=\"bkmgFlights_travelDates_1-formfield-2\" name=\"bkmgFlights_travelDates_1-formfield-2\" type=\"text\"/>\nwith label: Return\nwith the fo

In [30]:
print(generated_values)

["02/20/2023", "03/15/2023", "04/10/2023", "05/05/2023", "06/01/2023"]


# Feedback

In [8]:
new_form = driver.find_elements(By.TAG_NAME, 'form')[1]

In [9]:
new_relation_graph, new_input_groups = parse_after_feedback(driver, new_form, relation_graph, TEXT_EMBEDDING_METHOD)

Computing transition probabilities:   0%|          | 0/344 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 50/50 [00:00<00:00, 630.89it/s]
Generating walks (CPU: 2): 100%|██████████| 50/50 [00:00<00:00, 641.05it/s]
Generating walks (CPU: 3): 100%|██████████| 50/50 [00:00<00:00, 610.43it/s]
Generating walks (CPU: 4): 100%|██████████| 50/50 [00:00<00:00, 636.99it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 38/38 [00:09<00:00,  4.02it/s]


{'added': [<span-wrap> Round-trip </span-wrap> at y: (572, 592), x: (249, 319), <span-wrap> One-way </span-wrap> at y: (572, 592), x: (364, 421), <span-wrap> Multi-city/Stopover </span-wrap> at y: (572, 592), x: (467, 591), <span-wrap> Book with points </span-wrap> at y: (574, 594), x: (682, 905), <div>Please select a valid destination for this trip.</div> at y: (685, 701), x: (536, 781), <div>Please select valid travel dates for this trip.</div> at y: (685, 701), x: (833, 1071), <span-wrap> Enter the date day and month in this format: DD/MM, or use the 'Show Calendar' button to open the calendar and select your date from there. </span-wrap> at y: (705, 706), x: (832, 833), <span-wrap> Add promotion code </span-wrap> at y: (743, 763), x: (200, 379), <span-wrap> Search flights </span-wrap> at y: (742, 762), x: (1202, 1306)], 'removed': [<span-wrap> Enter the date day and month in this format: DD/MM, or use the 'Show Calendar' button to open the calendar and select your date from there. 

In [10]:
new_relation_graph.edges()

[edge FOR weight 1 from <label> Round-trip </label> at y: (572, 592), x: (249, 319) to <input>value is R</input> at y: (567, 597), x: (219, 319),
 edge FOR weight 1 from <input>value is R</input> at y: (567, 597), x: (219, 319) to <label> Round-trip </label> at y: (572, 592), x: (249, 319),
 edge FOR weight 1 from <label> One-way </label> at y: (572, 592), x: (364, 421) to <input>value is O</input> at y: (567, 597), x: (334, 421),
 edge FOR weight 1 from <input>value is O</input> at y: (567, 597), x: (334, 421) to <label> One-way </label> at y: (572, 592), x: (364, 421),
 edge FOR weight 1 from <label> Multi-city/Stopover </label> at y: (572, 592), x: (467, 591) to <input>value is M</input> at y: (567, 597), x: (437, 591),
 edge FOR weight 1 from <input>value is M</input> at y: (567, 597), x: (437, 591) to <label> Multi-city/Stopover </label> at y: (572, 592), x: (467, 591),
 edge FOR weight 1 from <label> Book with points </label> at y: (574, 594), x: (682, 905) to <input></input> at 