In [1]:
import csv
import re
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.select import Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

# Functions

## Common

In [2]:
user_agent = ("user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) "
              "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/"
              "83.0.4103.97 Safari/537.36")

def get_driver():
    opts = Options()
    opts.add_argument(user_agent)
    opts.add_argument("start-maximized")
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=opts)
    return driver

In [3]:
def get_label_sibling(web_element):
    label = ""
    while label == "":
        web_element = web_element.find_element(By.XPATH, "preceding-sibling::*")
        label = re.sub(r'[^A-Za-z0-9 ]+', '', web_element.get_attribute("innerText")).strip().lower()
    return label

In [4]:
def get_label_parent(web_element):
    web_element = web_element.find_element(By.XPATH, "parent::*")
    label = re.sub(r'[^A-Za-z0-9 ]+', '', web_element.get_attribute("innerText")).strip().lower()
    return label

In [53]:
def get_label_parent_sibling(web_element):
    web_element = web_element.find_element(By.XPATH, "parent::*")
    web_element = web_element.find_element(By.XPATH, "preceding-sibling::*")
    children = web_element.find_elements(By.XPATH, ".//*")
    if len(children) < 2:
        try:
            return re.sub(r'[^A-Za-z0-9 ]+', '', web_element.get_attribute("innerText")).strip().lower()
        except:
            pass
    else:
        for child in children:
            try:
                label = re.sub(r'[^A-Za-z0-9 ]+', '', child.get_attribute("innerText")).strip().lower()
                if label != "":
                    return label
            except:
                pass
        
    return ""

In [6]:
def get_label(web_element):
    label = ""
    try:
        label = get_label_sibling(web_element)
        if label != "":
            return label
    except:
        pass
    
    try:
        label = get_label_parent(web_element)
        if label != "":
            return label
    except:
        pass
    
    try:
        label = get_label_parent_sibling(web_element)
        if label != "":
            return label
    except:
        pass
        
    return label

In [7]:
def highlight(driver, element, color='yellow'):
    driver.execute_script(f"arguments[0].setAttribute('style', 'background: {color}; border: 2px solid red;');", element)

In [8]:
def unhighlight(driver, element):
    driver.execute_script("arguments[0].setAttribute('style','')", element)

## Input

In [34]:
def get_input_elements(driver):
    in_elements = driver.find_elements(By.XPATH, "//input[@type='email' or @type='password' or @type='text']")
    for elem in in_elements:
        if not elem.get_attribute("aria-required") and not elem.get_attribute("required"):
            in_elements.remove(elem)
    return in_elements

## Select

In [10]:
def get_select_elements(driver):
    select_elements = driver.find_elements(By.XPATH, "//select")
    return select_elements

In [21]:
def get_options(web_element):
    select = Select(web_element)
    options = select.options
    options_text = []
    for option in options:
        options_text.append(option.get_attribute("innerText").lower())
    return options_text

# Driver method

In [19]:
def autofill(info_path, url, driver):
#def autofill(info_path, link, driver):
    info = {}
    new_info = {}
    with open(info_path, 'r', newline='') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            info.update({row[0]: row[1]})
            
    driver.get(url)
    try:
        driver.switch_to.frame(driver.find_element(By.TAG_NAME, "iframe"))
        if len(get_input_elements(driver)) == 0:
            driver.switch_to.default_content()
    except:
        pass 
    
    # text input
    in_elements = get_input_elements(driver)
    for elem in in_elements:
        question = get_label(elem)
        if question == "":
            continue
        highlight(driver, elem, 'pink')
        if question in info.keys():
            answer = info[question]
        else:
            answer = input(f"{question}: ")
            new_info[question] = answer
            
        try:
            elem.send_keys(answer)
            highlight(driver, elem)
        except:
            highlight(driver, elem, 'Salmon')

    
    # drop down
    select_elements = get_select_elements(driver)
    for elem in select_elements:
        question = get_label(elem)
        if question == "":
            continue
        highlight(driver, elem, 'pink')
        options = get_options(elem)
        options_dict = {}
        for i in range(len(options)):
            options_dict[options[i]] = i
        if 'legally authorized' in question:
            answer = info['authorization']
        elif 'sponsorship' in question:
            answer = info['sponsorship']
        elif question in info.keys():
            answer = info[question]
        else:
            index = int(input(f"{question}: {options_dict}"))
            for item in options_dict.items():
                if item[1] == index:
                    answer = item[0]
                    new_info[question] = answer
                    break
        
        if answer in options_dict.keys():
            index = options_dict[answer]
        else:
            index = int(input(f"{question}: {options_dict}"))
            if index == -1:
                continue
        try:
            drop = Select(elem)
            drop.select_by_index(index)
            highlight(driver, elem)
        except:
            highlight(driver, elem, 'Salmon')

    with open(info_path, 'a+', newline='') as csvfile:
        writer = csv.writer(csvfile, delimiter=',')
        for item in new_info.items():
            writer.writerow([item[0], item[1]])

# Testing

https://careers-milbank.icims.com/jobs/1224/legal-assistant-entry-level--ny/candidate?from=login&eem=MHNWjSX2GnEPAL1EYlzDnhNYUrV8ph1lZUlhT%252BM1qIxQ%252BoOTlVd2vx3bSRWvVu1o&code=93d3f900bc8ef3186b3dc3984743817a2157bd7fd5173096c218561a33b36140&ga=a43fcaf627f8cb3a5793c18b381b90eea4860392d3bb4cece643741a6e4d4362  
https://www.guidepoint.com/job/6009703002/?gh_jid=6009703002  
https://boards.greenhouse.io/anaplan/jobs/6143350002#application  

In [30]:
links = ["https://careers-milbank.icims.com/jobs/1224/legal-assistant-entry-level--ny/candidate?from=login&eem=MHNWjSX2GnEPAL1EYlzDnhNYUrV8ph1lZUlhT%252BM1qIxQ%252BoOTlVd2vx3bSRWvVu1o&code=93d3f900bc8ef3186b3dc3984743817a2157bd7fd5173096c218561a33b36140&ga=a43fcaf627f8cb3a5793c18b381b90eea4860392d3bb4cece643741a6e4d4362&mobile=false&width=1020&height=500&bga=true&needsRedirect=false&jan1offset=-300&jun1offset=-240",
        "https://www.guidepoint.com/job/6009703002/?gh_jid=6009703002",
        "https://boards.greenhouse.io/anaplan/jobs/5990560002#application",
        'https://jobs.lever.co/hive/4ee4ed80-13a9-4205-9d58-5a06ee884543/apply']

In [29]:
driver = get_driver()




In [55]:
autofill('info.csv', links[3], driver)