In [1]:
%load_ext autoreload
%autoreload 2


import os
import openai
from dotenv import load_dotenv

from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup as bs


from method.ours.utils import create_driver, embed_properties_into_html
from method.ours.preprocessing import get_processable_nodes
from method.ours.graph_embedding import create_node2vec_model
from method.ours.relation_graph import create_relation_graph
from method.ours.node_linking import create_base_links
from method.ours.similarity import add_similarity_scores_to_graph
from method.ours.edge_pruning import (
    prune_relation_graph_extra_edges,
    prune_low_score_uncertain_edges
)

In [2]:
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

# Global Variables
HEADLESS = False
TEXT_EMBEDDING_METHOD = 'ADA' # ['ADA', 'WORD2VEC', 'SPACY']
GRAPH_EMBEDDING_METHOD = 'NODE2VEC' # ['NODE2VEC', 'GCN']

In [3]:
driver = create_driver(HEADLESS)
driver.get('https://www.aircanada.com/ca/en/aco/home.html')

In [4]:
# form = driver.find_elements(By.TAG_NAME, 'form')[48]
# form = driver.find_element(By.ID, 'register')
form = driver.find_elements(By.TAG_NAME, 'form')[1]
form = embed_properties_into_html(driver, form)

form_doc = bs(form.get_attribute('outerHTML'), 'html.parser')

In [5]:
form_processable_nodes = get_processable_nodes(form_doc)
node2vec_model = create_node2vec_model(form_doc)

Computing transition probabilities:   0%|          | 0/340 [00:00<?, ?it/s]

Generating walks (CPU: 1):   0%|          | 0/50 [00:00<?, ?it/s]Generating walks (CPU: 2):   0%|          | 0/50 [00:00<?, ?it/s]Generating walks (CPU: 3):   0%|          | 0/50 [00:00<?, ?it/s]Generating walks (CPU: 4):   0%|          | 0/50 [00:00<?, ?it/s]Generating walks (CPU: 1): 100%|██████████| 50/50 [00:00<00:00, 885.48it/s]
Generating walks (CPU: 2): 100%|██████████| 50/50 [00:00<00:00, 804.77it/s]
Generating walks (CPU: 3): 100%|██████████| 50/50 [00:00<00:00, 851.34it/s]
Generating walks (CPU: 4): 100%|██████████| 50/50 [00:00<00:00, 935.65it/s]


In [6]:
relation_graph = create_relation_graph(form_processable_nodes, TEXT_EMBEDDING_METHOD)

relation_graph = create_base_links(relation_graph)

relation_graph = add_similarity_scores_to_graph(node2vec_model, relation_graph)

relation_graph = prune_relation_graph_extra_edges(relation_graph)
relation_graph = prune_low_score_uncertain_edges(relation_graph, factor=0.5)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 36/36 [00:10<00:00,  3.59it/s]

'edge NLEFT from <input>value is O</input> at y: (567, 597), x: (334, 421) to <input>value is R</input> at y: (567, 597), x: (219, 319)'
'edge NLEFT from <input>value is M</input> at y: (567, 597), x: (437, 591) to <input>value is O</input> at y: (567, 597), x: (334, 421)'
'edge NLEFT from <button></button> at y: (643, 678), x: (488, 523) to <input></input> at y: (628, 678), x: (198, 466)'
'edge NLEFT from <input></input> at y: (628, 678), x: (536, 804) to <button></button> at y: (643, 678), x: (488, 523)'
'edge NLEFT from <button></button> at y: (628, 680), x: (1143, 1338) to <button></button> at y: (628, 680), x: (1103, 1123)'
'edge NLEFT from <span> 1 Adult </span> at y: (650, 672), x: (1143, 1196) to <span>YVR</span> at y: (650, 672), x: (281, 309)'
'edge NLEFT from <input>DD/MM</input> at y: (628, 678), x: (833, 958) to <label>To</label> at y: (628, 678), x: (536, 554)'
'edge NLEFT from <span>Passenger(s)</span> at y: (623, 673), x: (1143, 1237) to <label>From</label> at y: (622, 


  return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


In [23]:
input_groups = create_input_groups(relation_graph)
input_groups = prune_low_score_group_relations(input_groups, node2vec_model, factor=0.5)

In [50]:
print(str(input_groups[12]))

input:
<button aria-busy="false" aria-disabled="false" aria-labelledby="bkmgFlights_findButtonContent" class="abc-button abc-button-height-default abc-button-theme-blue abc-button-type-primary abc-dir-ltr user-select-none abc-button-has-inset-loader ng-star-inserted" data-analytics-track="home magnet" data-analytics-val="bookingmagnet-flight-find&gt;cash" id="bkmgFlights_findButton" type="submit" x_end="1338" x_start="1170" xpath="//BODY/AC-WEB-APP[1]/DIV[1]/MAIN[1]/DIV[1]/AC-ACOHOME-PAGE[1]/DIV[1]/DIV[1]/AC-BOOKING-MAGNET[1]/DIV[1]/DIV[1]/DIV[1]/DIV[2]/AC-BKMG-FLIGHTS-TAB[1]/DIV[1]/FORM[1]/FIELDSET[1]/DIV[1]/DIV[2]/ABC-BUTTON[1]/BUTTON[1]" y_end="754" y_start="702"><span class="abc-button-content" id="bkmgFlights_findButtonContent" x_end="1306" x_start="1202" xpath="//BODY/AC-WEB-APP[1]/DIV[1]/MAIN[1]/DIV[1]/AC-ACOHOME-PAGE[1]/DIV[1]/DIV[1]/AC-BOOKING-MAGNET[1]/DIV[1]/DIV[1]/DIV[1]/DIV[2]/AC-BKMG-FLIGHTS-TAB[1]/DIV[1]/FORM[1]/FIELDSET[1]/DIV[1]/DIV[2]/ABC-BUTTON[1]/BUTTON[1]/SPAN[1]" 

# Create Global Connections

In [96]:
messages = [
    {
        'role': 'system',
        'content': '''
        Your task is to generate a set of assertions for form fields. The list of the assertions and their signatures is as following:

        1. toBeEqual(value) # for any input type
        2. toHaveLengthCondition(condition, value) # for textual inputs
        3. toBeTruthy() # for boolean inputs
        4. toHaveCondition(condition, value) # for numeric or date inputs
        5. toBeEmpty() # for any input type
        6. toMatch(regexPattern) # for textual inputs

        generate conditions in the as the sample:

        # sample
        expect(field('username'))
        .toHaveLengthCondition('>', 8)
        .toHaveLengthCondition('<', 50)
        .not.toBeEmpty()
        # end of sample

        if there are multiple inputs that should have the relation with each other, you can use a format such as:

        # sample
        expect(field('password'))
        .toBe(field('confirm password'))
        # end of sample

        Only generate the assertions and nothing else. Only generate assertions for the inputs in question, and not the ones in the relevant information section.
        '''
    },
    {
        'role': 'user',
        'content': """
            We are filling the following field in the form:
            <input id="register_password" aria-required="true" type="password" class="ant-input css-dfjnss">

            The relevant information available in the form are (in order of relevance):
            1. <label for="register_password" class="ant-form-item-required" title="Password">Password</label>
            2. <input id="register_confirm" aria-required="true" type="password" class="ant-input css-dfjnss">
            3. <label for="register_confirm" class="ant-form-item-required" title="Confirm Password">Confirm Password</label>
        """
    }
]

In [97]:
response = openai.ChatCompletion.create(
    # model='gpt-3.5-turbo',
    model='gpt-4',
    messages=messages,
    temperature=0.5,
)

In [98]:
response_text = response.choices[0].message.content
print(response_text)

expect(field('register_password'))
.toHaveLengthCondition('>', 8)
.toHaveLengthCondition('<', 50)
.not.toBeEmpty()
.toMatch(/^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)[a-zA-Z\d]{8,50}$/)

expect(field('register_password'))
.toBe(field('register_confirm'))
