In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.relative_locator import locate_with

import chromedriver_binary
import csv

def open_browser():
    """
    Opens a new automated browser window with all tell-tales of automated browser disabled
    """
    options = webdriver.ChromeOptions()
    options.add_argument("start-maximized")

    # remove all signs of this being an automated browser
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)

    # open the browser with the new options
    driver = webdriver.Chrome(options=options)
    return driver

def find_jobs(driver):
    """
    Finds all jobs loaded in the browser
    """
    jobs = driver.find_elements(By.CSS_SELECTOR, 'div.row.job-item.ng-star-inserted')
    return jobs

def load_more_job(driver):
    """
    View more job available
    """
    
    # rather than trying to click a button that might have not loaded yet, we will 
    # wait up to 20 seconds for it to actually appear first
    wait = WebDriverWait(driver, timeout = 20)
    wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "button.btn.btn-outline-dark.btn-lg.px-5.mx-auto.ng-star-inserted")))
   
    more_button = driver.find_element(By.CSS_SELECTOR, "button.btn.btn-outline-dark.btn-lg.px-5.mx-auto.ng-star-inserted")
    if more_button:
        more_button.click()
        
def get_brief_description(job):
    """
    Extracts the job description
    """
    description = {}
    try:
        description['title'] = job.find_element(By.CSS_SELECTOR, 'div.col-12.col-lg h5').text
        description['profile description'] = job.find_element(By.CSS_SELECTOR, 'p.text-truncate.job-profile-description').text
        description['time'] = job.find_element(By.CSS_SELECTOR, 'p.text-muted').text
        description['loc'] = job.find_element(By.CSS_SELECTOR, 'p.text-muted.font-weight-bold').text
        description['total_salary'] = job.find_element(By.CSS_SELECTOR, 'div.text-primary.ng-star-inserted h5').text
        description['per_hour_salary'] = job.find_element(By.CSS_SELECTOR, 'div.text-primary.ng-star-inserted span').text
    except:
        # if the description is missing, just get any text from the job
        description = job.text
    return description

def get_job_info(job, driver):
    """
    Extracts the job information
    """
    description = {}
    description['id'] = i
    try:
        description['Job role'] = driver.find_element(By.CSS_SELECTOR, 'div.pl-md-4.text-left h5').text
        detail = driver.find_elements(By.CSS_SELECTOR, 'div.pl-md-4.text-left p')
        detail_list = [d.text for d in detail]
        
        name = driver.find_elements(By.CSS_SELECTOR, 'div.my-4.ng-star-inserted h6')
        name_lst = [n.text for n in name]
        c = 0
        if "Experience level" in name_lst:
            description['Experience Level'] = detail_list[0].strip()
            description['Experience detail'] = detail_list[1]
            c += 2
        else:
            description['Experience Level'] = None
            description['Experience detail'] = None
        
        if "Job description" in name_lst:
            description['Job description'] = detail_list[c]
            c += 1
        else:
            description['Job description'] = None
        
        if 'Industry' in name_lst:
            description['Industry'] = detail_list[c]
            c +=1
        else:
            description['Industry'] = None
            
        loc = detail_list[c].strip()
        post_code = loc[-4:]
        description['City'] = loc.strip(post_code).strip()
        description['Post code'] = post_code
       
        pay_total = driver.find_element(By.CSS_SELECTOR, 'div.text-primary.ng-star-inserted h5').text.replace('’','').strip("CHF")
   
        description['Pay total'] = float(pay_total)
        pay_per_hour = driver.find_element(By.CSS_SELECTOR, 'div.text-primary.ng-star-inserted span').text.strip("CHF/h")
        description['Pay per hour'] = float(pay_per_hour)
        start_shift = driver.find_element(By.CSS_SELECTOR, 'div.my-4 p').text
        description['Start date'] = start_shift.split(" · ")[0]
        description['Shift'] = int(start_shift.split(" · ")[1].strip(' shifts'))
        
        row = description.values()
        
    except:
        description = None
        row = None
        
    return description, row

def write_to_csv(data):
    header =['Rank','Job role', 'Experience Level','Experience detail', 'Job description','Industry','City','Post Code', 'Pay total', 'Pay per hour', 'Start date','Shift']
    with open('coople.csv', 'w',encoding='utf-8-sig') as f:
        # create the csv writer
        writer = csv.writer(f)

        # write a row to the csv file
        writer.writerow(header)
        writer.writerows(data)
    

In [2]:
import time
driver = open_browser()
driver.get('https://www.coople.com/ch/jobs/en/')


The chromedriver version (115.0.5790.170) detected in PATH at /Users/guan/opt/anaconda3/lib/python3.9/site-packages/chromedriver_binary/chromedriver might not be compatible with the detected chrome version (116.0.5845.96); currently, chromedriver 116.0.5845.96 is recommended for chrome 116.*, so it is advised to delete the driver in PATH and retry


In [3]:
# Load all jobs
while True:
    try: 
        load_more_job(driver)
    except:
        break

In [4]:
jobs = find_jobs(driver)
main_window_handle = driver.current_window_handle
# Iterate through each job listing and open it in a new tab
i = 1
data =[]
for job in jobs:
    job_link = job.find_element(By.TAG_NAME,"a")
    url = job_link.get_attribute("href")
    
    # Open the link in a new tab 
    driver.execute_script("window.open('');")
  
    # Switch to the new window and open new URL
    driver.switch_to.window(driver.window_handles[1])
    driver.get(url)
    
    # Extract information from the new tab
    description, row = get_job_info(job, driver)
    if row:
        data.append(row)
    write_to_csv(data)
    print(description)
    
    # Close the current tab and switch back to the main tab
    driver.close()
    driver.switch_to.window(driver.window_handles[0])
    i += 1
    
# Close the browser
driver.quit()


{'id': 1, 'Job role': 'Koch/Köchin', 'Experience Level': 'Gelernt', 'Experience detail': 'Personen mit eidgenössischem Fähigkeitsausweis (EFZ) als Koch/Köchin, einer mindestens dreijährigen beruflichen Grundbildung in der auszuübenden Tätigkeit oder Personen mit einer Attestlehre (EBA) und mindestens drei Jahren Berufspraxis in der auszuübenden Tätigkeit.', 'Job description': 'Sie kochen für unsere Kundschaft im \n\n1 Tagessmenu \nSalatbuffet\nPizza und à la Carte\nDessert\n\nWir haben eine offene Küche wo ihnen der Kunde auch zuschauen kann.\nDa 2 weitere Köche bei uns arbeiten, können die Schichten noch geändert werden.\nWir suchen aber für minium 3 Arbeitstage pro Woche einen Koch.\nFür weitere Auskünfte können sie uns gerne kontaktieren Tel. 032 312 7030 Gerhard Aebi', 'Industry': 'Gastronomie / Hotel', 'City': 'Gampelen', 'Post code': '3236', 'Pay total': 3240.0, 'Pay per hour': 30.0, 'Start date': '17 Aug 2023', 'Shift': 12}
{'id': 2, 'Job role': 'Kassenmitarbeiter/in', 'Experien

NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=116.0.5845.96)
Stacktrace:
0   chromedriver                        0x0000000104725af8 chromedriver + 4987640
1   chromedriver                        0x000000010471ceb3 chromedriver + 4951731
2   chromedriver                        0x00000001042d08d7 chromedriver + 444631
3   chromedriver                        0x00000001042a3681 chromedriver + 259713
4   chromedriver                        0x0000000104341eaf chromedriver + 908975
5   chromedriver                        0x0000000104348a64 chromedriver + 936548
6   chromedriver                        0x000000010433cad8 chromedriver + 887512
7   chromedriver                        0x00000001043089b9 chromedriver + 674233
8   chromedriver                        0x0000000104309b9e chromedriver + 678814
9   chromedriver                        0x00000001046e1dc9 chromedriver + 4709833
10  chromedriver                        0x00000001046e6de4 chromedriver + 4730340
11  chromedriver                        0x00000001046edc99 chromedriver + 4758681
12  chromedriver                        0x00000001046e7b3a chromedriver + 4733754
13  chromedriver                        0x00000001046bb35c chromedriver + 4551516
14  chromedriver                        0x0000000104705908 chromedriver + 4856072
15  chromedriver                        0x0000000104705a87 chromedriver + 4856455
16  chromedriver                        0x0000000104715def chromedriver + 4922863
17  libsystem_pthread.dylib             0x00007ff8185431d3 _pthread_start + 125
18  libsystem_pthread.dylib             0x00007ff81853ebd3 thread_start + 15
