In [1]:
import os
import glob
import time
import json
from tqdm.notebook import tqdm

from bs4 import BeautifulSoup as bs

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

from webdriver_manager.chrome import ChromeDriverManager

In [2]:
def get_xpath(element):
    xpath_script = """
    function getPathTo(element) {
        // if (element.id !== '')
        //     return 'id(\"'+element.id+'\")';
        if (element === document.body)
            return element.tagName;
        var ix= 0;
        var siblings= element.parentNode.childNodes;
        for (var i= 0; i<siblings.length; i++) {
            var sibling= siblings[i];
            if (sibling===element)
                return getPathTo(element.parentNode) + '/' + element.tagName + '[' + (ix + 1) + ']';
            if (sibling.nodeType===1 && sibling.tagName===element.tagName)
                ix++;
        }
    }
    const path = getPathTo(arguments[0]);
    if (path.startsWith('id(')) {
        return path;
    }
    return '//' + path;
    """
    return driver.execute_script(xpath_script, element)

In [3]:
def create_application_directory(app_name):
    if os.path.exists(app_name):
        return False
    os.makedirs(app_name)
    return True


def get_next_index(directory):
    if len(os.listdir(directory)) == 0:
        return 1
    return sorted(list(map(lambda x: int(x.split('.')[0]), os.listdir(directory))))[-1] + 1


def take_full_screenshot(element, path):
    # store original size
    original_size = driver.get_window_size()
    
    required_height = element.size['height']
    
    # scroll to element
    driver.execute_script("return arguments[0].scrollIntoView();", element)
    time.sleep(0.5)
    # set window size
    # driver.set_window_size(original_size['width'], required_height + 128)
    
    # now that the element is within the viewport, take screenshot of element
    element.screenshot(path)

    # return window to original size
    # driver.set_window_size(original_size['width'], original_size['height'])


def add_entry_to_directory(element, app_name):
    create_application_directory(app_name)
    index = get_next_index(app_name)
    
    element_html = element.get_attribute('outerHTML')
    pretty_html = bs(element_html, 'html.parser').prettify()

    with open(os.path.join(app_name, f'{index}.json'), 'w+') as f:
        json.dump({
            'url': driver.current_url,
            'form_xpath': get_xpath(element)
        }, f)
        f.close()
    
    with open(os.path.join(app_name, f'{index}.html'), 'w+') as f:
        f.write(pretty_html)
        f.close()
    
    take_full_screenshot(element, os.path.join(app_name, f'{index}.png'))

In [4]:
chrome_options = Options()
# chrome_options.add_argument("--headless")
# chrome_options.add_argument("--window-size=3072x1920");

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

In [5]:
URL = "https://ant.design/"
APP_NAME = 'antd'
driver.get(URL)

In [6]:
driver.maximize_window()

In [8]:
forms = driver.find_elements(By.TAG_NAME, 'form')

print('forms:', len(forms))

for i, form in enumerate(forms):
    print('form', i + 1)
    try:
        add_entry_to_directory(form, APP_NAME)
    except Exception as e:
        print(e)

forms: 89
form 1
form 2
form 3
form 4
form 5
form 6
form 7
form 8
form 9
form 10
form 11
form 12
form 13
form 14
form 15
form 16
form 17
form 18
form 19
form 20
form 21
form 22
form 23
form 24
form 25
form 26
form 27
form 28
form 29
form 30
form 31
form 32
form 33
form 34
form 35
form 36
form 37
form 38
form 39
form 40
form 41
form 42
form 43
form 44
form 45
form 46
form 47
form 48
form 49
form 50
form 51
form 52
form 53
form 54
form 55
form 56
form 57
form 58
form 59
form 60
form 61
form 62
form 63
form 64
form 65
form 66
form 67
form 68
form 69
form 70
form 71
form 72
form 73
form 74
form 75
form 76
form 77
form 78
form 79
form 80
form 81
form 82
form 83
form 84
form 85
form 86
form 87
form 88
form 89
