In [64]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time
import json

In [63]:
def get_job_links(url):
    job_links = []

    # Setup the Firefox WebDriver
    driver = webdriver.Firefox()

    driver.get(url)

    while True:
        try:
            # Handling cookie consent
            try:
                cookie_reject_button = WebDriverWait(driver, 5).until(
                    EC.element_to_be_clickable((By.XPATH, "//*[@id='onetrust-reject-all-handler']"))
                )
                cookie_reject_button.click()
            except Exception as e:
                pass

            # Re-query the DOM for job links after each navigation
            WebDriverWait(driver, 10).until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, "a[href*='/careers/jobs/job?jobid=']"))
            )

            jobs = driver.find_elements(By.CSS_SELECTOR, "a[href*='/careers/jobs/job?jobid=']")
            for job in jobs:
                job_links.append(job.get_attribute('href'))

            print(f"Scraping page: {driver.current_url}")  # Log the current URL

            # Find the 'Next' button by its text and check if it's present and visible
            next_buttons = driver.find_elements(By.XPATH, "//div[contains(text(), 'Next') and not(contains(@class, 'disabled'))]")

            if next_buttons:
                next_button = next_buttons[-1]
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
                driver.execute_script("window.scrollBy(0, -150);")  # Adjust if needed
                driver.execute_script("arguments[0].click();", next_button)

                # Wait for the new page to load after clicking 'Next'
                WebDriverWait(driver, 10).until(
                    EC.presence_of_all_elements_located((By.CSS_SELECTOR, "a[href*='/careers/jobs/job?jobid=']"))
                )
            else:
                print(f"No 'Next' button found. Ending pagination at: {driver.current_url}")
                break

        except Exception as e:
            print("Error occurred on page:", driver.current_url)
            print("Error details:", e)
            break

    driver.quit()
    print(f"Total number of job links collected: {len(job_links)}")
    return job_links

# Starting URL
starting_url = 'https://www.mondelezinternational.com/careers/jobs/'
all_job_links = get_job_links(starting_url)
print(all_job_links)

Scraping page: https://www.mondelezinternational.com/careers/jobs/
Scraping page: https://www.mondelezinternational.com/careers/jobs/
Scraping page: https://www.mondelezinternational.com/careers/jobs/
Scraping page: https://www.mondelezinternational.com/careers/jobs/?term=&page=4
Scraping page: https://www.mondelezinternational.com/careers/jobs/?term=&page=5
Scraping page: https://www.mondelezinternational.com/careers/jobs/?term=&page=6
Scraping page: https://www.mondelezinternational.com/careers/jobs/?term=&page=7
Scraping page: https://www.mondelezinternational.com/careers/jobs/?term=&page=8
Scraping page: https://www.mondelezinternational.com/careers/jobs/?term=&page=9
Scraping page: https://www.mondelezinternational.com/careers/jobs/?term=&page=10
Scraping page: https://www.mondelezinternational.com/careers/jobs/?term=&page=11
Scraping page: https://www.mondelezinternational.com/careers/jobs/?term=&page=12
Scraping page: https://www.mondelezinternational.com/careers/jobs/?term=&pag

# Scraping Each Link

In [66]:
def get_job_details(job_links, limit=5):
    job_details = []

    # Setup the Firefox WebDriver
    driver = webdriver.Firefox()

    for link in job_links[:limit]:  # Process only the first 'limit' links
        try:
            driver.get(link)
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, ".jobDetailLeftContentWrap"))
            )

            # Extract the job description
            description_elements = driver.find_elements(By.CSS_SELECTOR, ".jobDetailLeftContentWrap p, .jobDetailLeftContentWrap li")
            description_text = ' '.join([elem.text for elem in description_elements if elem.text])

            # Extract job details safely, checking for the correct number of <p> elements
            detail_containers = driver.find_elements(By.CSS_SELECTOR, "div > div")
            details = {}
            for container in detail_containers:
                p_elements = container.find_elements(By.TAG_NAME, 'p')
                if len(p_elements) >= 2:  # Ensure there are at least two <p> elements to form a label-value pair
                    label = p_elements[0].text
                    value = p_elements[1].text
                    details[label] = value

            job_details.append({
                "url": link,
                "description": description_text,
                "details": details
            })

        except TimeoutException:
            print(f"Timeout while trying to load page: {link}")
        except Exception as e:
            print(f"Error occurred on page: {link}")
            print("Error details:", e)

    driver.quit()
    return job_details

# Example usage
job_details = get_job_details(all_job_links)
print(job_details)

[{'url': 'https://www.mondelezinternational.com/careers/jobs/job?jobid=R-99927&jobtitle=Nabisco%20Sales%20Coverage%20Representative', 'description': "Join our Mission to Lead the Future of Snacking. Are you ready to make it happen at Mondelēz International? Nabisco Senior Sales Service Merchandiser Become one of our Nabisco Senior Sales Service Merchandiser by fulfilling the merchandising needs of our grocery-related customers through stocking the shelves and building and maintaining displays and partnering up with nationally recognized customers such as Walmart, Kroger, Target, Albertsons  and more. Become an ambassador of world-famous brands like Oreo, Ritz, belVita, Chips Ahoy, Triscuit among other delicious industry-leading snacks. In this position you will maximize sales, conduct effective planning & order-writing, as well as supporting and completing activities such as merchandising & shelf-pricing by acting as a sales expert to retail stores on a variety of territories covering 

In [67]:
# Convert the list to JSON format with indentation for readability
json_job_details = json.dumps(job_details, indent=4)

# Print the JSON formatted data
print(json_job_details)

[
    {
        "url": "https://www.mondelezinternational.com/careers/jobs/job?jobid=R-99927&jobtitle=Nabisco%20Sales%20Coverage%20Representative",
        "description": "Join our Mission to Lead the Future of Snacking. Are you ready to make it happen at Mondel\u0113z International? Nabisco Senior Sales Service Merchandiser Become one of our Nabisco Senior Sales Service Merchandiser by fulfilling the merchandising needs of our grocery-related customers through stocking the shelves and building and maintaining displays and partnering up with nationally recognized customers such as Walmart, Kroger, Target, Albertsons  and more. Become an ambassador of world-famous brands like Oreo, Ritz, belVita, Chips Ahoy, Triscuit among other delicious industry-leading snacks. In this position you will maximize sales, conduct effective planning & order-writing, as well as supporting and completing activities such as merchandising & shelf-pricing by acting as a sales expert to retail stores on a varie