In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import chromedriver_binary

def open_browser():
    """
    Opens a new automated browser window with all tell-tales of automated browser disabled
    """
    options = webdriver.ChromeOptions()
    options.add_argument("start-maximized")

    # remove all signs of this being an automated browser
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)

    # open the browser with the new options
    driver = webdriver.Chrome(options=options)
    return driver

In [2]:
# Open browser
driver = open_browser()
driver.get('https://www.misgrosi.ch/wunsch-grosi-suchen/listegrosis/')

In [3]:
def load_more_candidate(driver):
    """
    View more candidates available
    """
    
    # rather than trying to click a button that might have not loaded yet, we will 
    # wait up to 20 seconds for it to actually appear first
#     wait = WebDriverWait(driver, timeout = 40)
#     wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a.load_more_resumes.btn.btn-default")))
    
    more_button = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a.load_more_resumes.btn.btn-default")))
#     more_button = driver.find_element(By.CSS_SELECTOR, "a.load_more_resumes.btn.btn-default")
    
    if more_button:
        more_button.click()

In [4]:
# Load all candidates
while True:
    try: 
        load_more_candidate(driver)
    except:
        break

In [5]:
# Get all candidates on this page
def find_cands(driver):
    """
    Finds all candidates loaded in the browser
    """
    cands = driver.find_elements(By.CSS_SELECTOR,'ul.resumes li')
    
    return cands

In [1]:
# Get all information of one candidate 
def get_cand_info(cand, driver):
    info = {}
    info['Name'] = cand.find_element(By.TAG_NAME, "h3").text
    info['Intro'] = cand.find_element(By.CLASS_NAME, "candidate-title").text
    info['Location'] = cand.find_element(By.CLASS_NAME, "candidate-location-column").text.strip(', Schweiz')
    info['Role'] = cand.find_element(By.CLASS_NAME, "resume-category").text
    print(info)
    
    row = info.values()
    
    return info, row

In [2]:
# Write data into csv file
def write_to_csv(data):
    header =['Name', 'Intro', 'Location', 'Role']
    with open('../data/misgrosi_candidate.csv', 'w',encoding='utf-8-sig') as f:
        # create the csv writer
        writer = csv.writer(f)

        # write a row to the csv file
        writer.writerow(header)
        writer.writerows(data)

In [8]:
# Get all candidate information and write to csv
import csv
cands = find_cands(driver)
data = []
for cand in cands:
    print(cand.text)
    info, row = get_cand_info(cand, driver)
    if row:
        data.append(row)
    write_to_csv(data)

Nani Moni
Einfach ein lässiges Grosi sy
4900 Langenthal, Schweiz
Tages-Grosi
{'Name': 'Nani Moni', 'Intro': 'Einfach ein lässiges Grosi sy', 'Location': '4900 Langenthal', 'Role': 'Tages-Grosi'}
Jida
Betreue Kinder aus allen Kulturen
4051 Basel, Schweiz
Tages-Grosi
{'Name': 'Jida', 'Intro': 'Betreue Kinder aus allen Kulturen', 'Location': '4051 Basel', 'Role': 'Tages-Grosi'}
Phoebe-Grosi
Bin da offen und neugierig
8105 Watt, Schweiz
Tages-Grosi
{'Name': 'Phoebe-Grosi', 'Intro': 'Bin da offen und neugierig', 'Location': '8105 Watt', 'Role': 'Tages-Grosi'}
Wunsch-Omi Elvira
Jungebliebene, aktive mitten im Leben stehende Grosi/Omi sucht sinnvolle Beschäftigung
9533 Kirchberg, Schweiz
Tages-Grosi
{'Name': 'Wunsch-Omi Elvira', 'Intro': 'Jungebliebene, aktive mitten im Leben stehende Grosi/Omi sucht sinnvolle Beschäftigung', 'Location': '9533 Kirchberg', 'Role': 'Tages-Grosi'}
Grosi claudia
Bin aufgestellt und kinder lieb
3363 Oberönz, Schweiz
Tages-Grosi
{'Name': 'Grosi claudia', 'Intro': '