In [None]:
import os
import json
from PIL import Image
import random
import gzip
import shutil
from tqdm import tqdm


In [None]:
# from huggingface_hub import snapshot_download
# snapshot_download(repo_id="biglab/webui-val", repo_type="dataset", local_dir="./data/webui-val", local_dir_use_symlinks=False)


In [None]:
DATA_FOLDER = "/Users/thanh/Desktop/experiments/locofy/assessment_2/data/webui-val/val_split_webui"

ids = os.listdir(DATA_FOLDER)

# unzip recursively all .gz files in each id folder
for id in tqdm(sorted(ids)):
    id_path = os.path.join(DATA_FOLDER, id)
    if not os.path.isdir(id_path):
        continue
    gz_files = [f for f in os.listdir(id_path) if f.endswith(".gz")]
    for gz_file in gz_files:
        gz_file_path = os.path.join(id_path, gz_file)

        if not os.path.isfile(gz_file_path[:-3]):
            with gzip.open(gz_file_path, 'rb') as f_in:
                with open(gz_file_path[:-3], 'wb') as f_out:
                    shutil.copyfileobj(f_in, f_out)

        if os.path.exists(gz_file_path):            
            os.remove(gz_file_path)
        

In [None]:
id = random.choice(ids)

In [None]:
image_path = os.path.join(DATA_FOLDER, "{}/default_1280-720-screenshot.webp".format(id))
Image.open(image_path)

In [None]:
name = "/Users/thanh/Desktop/experiments/locofy/assessment_2/data/webui-val/val_split_webui/{}/default_1280-720-{}.json"
axtree = json.load(open(name.format(id, "axtree"), "r"))
bb = json.load(open(name.format(id, "bb"), "r"))
box = json.load(open(name.format(id, "box"), "r"))
style = json.load(open(name.format(id, "style"), "r"))
viewport = json.load(open(name.format(id, "viewport"), "r"))

In [None]:
kept_backend_dom_node_ids = [k for k, v in viewport.items() if v]
len(kept_backend_dom_node_ids)

In [None]:
import numpy as np
import cv2

np_image = np.array(Image.open(image_path))

boxes = []
for dom_id in kept_backend_dom_node_ids:
    xs = [int(d["x"]) for d in box[dom_id]["content"]]
    ys = [int(d["y"]) for d in box[dom_id]["content"]]

    x1, x2 = min(xs), max(xs)
    y1, y2 = min(ys), max(ys)

    # visualize bbox on image

    cv2.rectangle(np_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
    boxes.append([x1, y1, x2, y2])

Image.fromarray(np_image)

In [None]:
# now we need to build a graph that contains only kept_backend_dom_node_ids
# first we need to build the whole graph of node_ids, then we truncate it to make smaller graph which contains only kept_backend_dom_node_ids

# make sure that all backend dom node ids are in the axtree
assert len(set([str(node.get("backendDOMNodeId")) for node in axtree["nodes"] if node.get("backendDOMNodeId") is not None]).intersection(set(kept_backend_dom_node_ids))) == len(kept_backend_dom_node_ids)

# build the whole graph with all nodes
import networkx as nx
graph = nx.DiGraph()
for node in axtree["nodes"]:
    graph.add_node(
        str(node["nodeId"]),
        backendDOMNodeId=str(node.get("backendDOMNodeId", "")),
        parentId=str(node.get("parentId", "")),
        childIds=str(node.get("childIds", [])),
    )
    
    # we have childen in childIds and parent in parentId
    for child_id in node["childIds"]:
        graph.add_edge(str(node["nodeId"]), str(child_id))

# truncate the graph to make smaller graph which contains only kept_backend_dom_node_ids, when we truncate a node, we have to connect parent and children of that node
root_node_id = [node["nodeId"] for node in axtree["nodes"] if node.get("parentId") is None][0]

kept_backend_dom_node_ids = set(kept_backend_dom_node_ids)
removing_nodes = []
new_edges = []
for node_id in graph.nodes():
    if node_id == root_node_id:
        continue
    if graph.nodes[node_id]["backendDOMNodeId"] not in kept_backend_dom_node_ids:
        removing_nodes.append(node_id)
        
        # connect parent and children of that node
        parent_id = graph.nodes[node_id]["parentId"]
        if parent_id:
            child_ids = graph.nodes[node_id]["childIds"]
            for child_id in child_ids:
                new_edges.append((parent_id, child_id))

graph.remove_nodes_from(removing_nodes)
graph.remove_node(root_node_id)

In [None]:
import matplotlib.pyplot as plt
import networkx as nx
import pydot
from networkx.drawing.nx_pydot import graphviz_layout

pos = graphviz_layout(graph, prog="dot")
fig = plt.figure(1, figsize=(10, 10), dpi=60)
nx.draw(graph, pos, with_labels=True, font_weight='bold')

# increase size of figure
plt.show()


In [None]:
# dom_id = graph.nodes["7457"]["backendDOMNodeId"]
# box[dom_id]["content"]

In [None]:
# now convert current graph to backend dom tree
node_id_to_dom_id = {}
for node_id in graph.nodes():
    dom_id = graph.nodes[node_id]["backendDOMNodeId"]
    node_id_to_dom_id[node_id] = dom_id
    

# create new graph of backend dom tree
backend_dom_tree = nx.DiGraph()
for node_id in graph.nodes():
    backend_dom_tree.add_node(
        node_id_to_dom_id[node_id],
    )

for edge in graph.edges():
    backend_dom_tree.add_edge(
        node_id_to_dom_id[edge[0]],
        node_id_to_dom_id[edge[1]],
    )
    
pos = graphviz_layout(backend_dom_tree, prog="dot")
fig = plt.figure(1, figsize=(10, 10), dpi=60)
nx.draw(backend_dom_tree, pos, with_labels=True, font_weight='bold')

# increase size of figure
plt.show()

In [None]:
# let's store necceary information for each node: box, is absolute, z-index, etc.
# we can add more information later, but these are only information listed in the assessment

# node features: box, is absolute, z-index
# edges
np_image = np.array(Image.open(image_path))

nodes = []  # list of dictionary
for dom_id in backend_dom_tree.nodes():
    xs = [int(d["x"]) for d in box[dom_id]["content"]]
    ys = [int(d["y"]) for d in box[dom_id]["content"]]

    x1, x2 = min(xs), max(xs)
    y1, y2 = min(ys), max(ys)
    
    is_absolute = style[dom_id]["position"] == "absolute"

    nodes.append({
        "id": dom_id,
        "box": [x1, y1, x2, y2],
        "is_absolute": is_absolute,
    })
    
    if is_absolute:
        cv2.rectangle(np_image, (x1, y1), (x2, y2), (255, 0, 0), 2)

edges = list(backend_dom_tree.edges())
Image.fromarray(np_image)