In [1]:
%load_ext autoreload
%autoreload 2

In [12]:
import os
import openai
from dotenv import load_dotenv

from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup as bs

from method.ours import (
    create_driver,
    embed_properties,
    get_processable_nodes,
    create_relations_graph,
    create_2d_span_ordered_dict,
    add_for_links,
    add_parent_child_links,
    add_left_right_links,
    add_top_bottom_links,
    create_node2vec_model,
    add_weight_to_graph,
    cutoff_low_score_edges,
)

In [5]:
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [6]:
# Global Variables
HEADLESS = False
TEXT_EMBEDDING_METHOD = 'ADA' # ['ADA', 'WORD2VEC', 'SPACY']
GRAPH_EMBEDDING_METHOD = 'NODE2VEC' # ['NODE2VEC', 'GCN']

In [7]:
driver = create_driver(HEADLESS)
driver.get('https://ant.design/components/form')

In [13]:
form = driver.find_elements(By.TAG_NAME, 'form')[48]
form = embed_properties(driver, form)

form_doc = bs(form.get_attribute('outerHTML'), 'html.parser')

In [14]:
form_processable_nodes = get_processable_nodes(form_doc)

In [15]:
relation_graph = create_relations_graph(form_processable_nodes, TEXT_EMBEDDING_METHOD)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [00:03<00:00,  3.65it/s]


In [16]:
spans_2d = create_2d_span_ordered_dict(relation_graph)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [00:00<00:00, 47974.07it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 35246.25it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 23912.79it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 31936.32it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 10791.52it/s]


In [17]:
relation_graph = add_for_links(relation_graph)
relation_graph = add_parent_child_links(spans_2d, relation_graph)
relation_graph = add_left_right_links(spans_2d, relation_graph)
relation_graph = add_top_bottom_links(spans_2d, relation_graph)

In [18]:
model = create_node2vec_model(form_doc)

Computing transition probabilities:   0%|          | 0/59 [00:00<?, ?it/s]

Generating walks (CPU: 2): 100%|██████████| 50/50 [00:00<00:00, 1490.35it/s]
Generating walks (CPU: 3): 100%|██████████| 50/50 [00:00<00:00, 1437.86it/s]
Generating walks (CPU: 4): 100%|██████████| 50/50 [00:00<00:00, 1396.61it/s]
Generating walks (CPU: 1): 100%|██████████| 50/50 [00:00<00:00, 1446.05it/s]


In [19]:
relation_graph = add_weight_to_graph(model, relation_graph)
relation_graph = cutoff_low_score_edges(relation_graph)

  return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [00:00<00:00, 71.85it/s]

Mean: 0.5838302100659439
Standard Deviation: 0.21880087042511534
Cutoff: 0.8026310804910592





In [20]:
relation_graph.edges()

[edge CHILD weight 0 from <input></input> at y: (8274, 8304), x: (618, 803) to <span>Select province</span> at y: (8274, 8304), x: (618, 803),
 edge CHILD weight 0 from <input></input> at y: (8274, 8304), x: (618, 803) to <path></path> at y: (8286, 8292), x: (793, 803),
 edge CHILD weight 0 from <button>Submit</button> at y: (8385, 8417), x: (606, 684) to <span>Submit</span> at y: (8390, 8412), x: (622, 668),
 edge NRIGHT weight 0.8722547598939875 from <label>Username</label> at y: (8217, 8249), x: (526, 606) to <input>Please input</input> at y: (8217, 8249), x: (606, 766),
 edge NLEFT weight 0.8722547598939875 from <input>Please input</input> at y: (8217, 8249), x: (606, 766) to <label>Username</label> at y: (8217, 8249), x: (526, 606),
 edge NRIGHT weight 0.9001543410086947 from <input>Please input</input> at y: (8217, 8249), x: (606, 766) to <a>Need Help?</a> at y: (8225, 8242), x: (774, 849),
 edge NLEFT weight 0.9001543410086947 from <a>Need Help?</a> at y: (8225, 8242), x: (774, 