In [None]:
pip install selenium

In [None]:
pip install webdriver-manager

In [None]:
import time

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains

from webdriver_manager.chrome import ChromeDriverManager

import utilities.text_utils as text_utils
import utilities.html_utils as html_utils

def scrape_page(driver):            
    # Selector list of offers
    sel_job_list = '#jsJobResContent .jobItem'
    
    # Selectors fields for each item in the list of offers
    sel_job_item_title = '.jobResultsTitle'
    sel_job_item_salary = '.jobResultsSalary'
    sel_job_item_loc = '.jobResultsLoc'
    sel_job_item_type = '.jobResultsType'

    # Selector job details container
    sel_job_detail_container = "#JobDetailContainer .jsCustomScrollContainer"

    # Job details description
    sel_job_detail_skills = '#md_skills'
    sel_job_detail_duration = '#md_duration'
    sel_job_detail_start_date = '#md_start_date'
    sel_job_detail_rate = '#md_rate'
    sel_job_detail_recruiter = '#md_recruiter'
    sel_job_detail_ref = '#md_ref'
    sel_job_detail_posted_date = '#md_posted_date'
    sel_job_detail_permalink = '#md_permalink'

    # Find all job offer items in the list
    job_items = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, sel_job_list)))
    print(f"Job_tems found: {len(job_items)}")
    
    for index, job_item in enumerate(job_items):
        print(f"**Job Index: {index + 1}")

        # Click on the job item to load its details into the div; text outside the view can't be seen by Selenium
        ActionChains(driver).move_to_element(job_item).click(job_item).perform()
        time.sleep(2)

        # Extract fields in job item section
        title = html_utils.find_element_or_none(job_item, sel_job_item_title)
        salary = html_utils.find_element_or_none(job_item, sel_job_item_salary)
        loc = html_utils.find_element_or_none(job_item, sel_job_item_loc)
        type = html_utils.find_element_or_none(job_item, sel_job_item_type)
        
        # Initialize fields in description section
        skills = ""
        duration = ""
        start_date = ""
        rate = ""
        recruiter = ""
        ref = ""
        posted_date = ""
        permalink = ""

        # Select the job detail section
        job_detail_container = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, sel_job_detail_container)))

        # Get the scrollable height of the detail container 
        scroll_height = driver.execute_script("return arguments[0].scrollHeight;", job_detail_container)
        print(f"scroll_height: {scroll_height}")

        current_scroll_position = 0
        increase = 100
        # As selenium only retrieve the visible area then scroll to get every bit of text
        while current_scroll_position < scroll_height:
            driver.execute_script(f"arguments[0].style.top = '-{current_scroll_position}px';", job_detail_container)
            current_scroll_position += increase
            # Wait for some time to allow content to load (you may adjust the time as needed)
            time.sleep(1)
            
            skills_try = html_utils.find_element_or_none(job_detail_container, sel_job_detail_skills)
            skills = text_utils.join_without_overlap(skills, skills_try)
            duration_try = html_utils.find_element_or_none(job_detail_container, sel_job_detail_duration)
            duration = text_utils.join_without_overlap(duration, duration_try)
            start_date_try = html_utils.find_element_or_none(job_detail_container, sel_job_detail_start_date)
            start_date = text_utils.join_without_overlap(start_date, start_date_try)
            rate_try = html_utils.find_element_or_none(job_detail_container, sel_job_detail_rate)
            rate = text_utils.join_without_overlap(rate, rate_try)
            recruiter_try = html_utils.find_element_or_none(job_detail_container, sel_job_detail_recruiter)
            recruiter = text_utils.join_without_overlap(recruiter, recruiter_try)
            ref_try = html_utils.find_element_or_none(job_detail_container, sel_job_detail_ref)
            ref = text_utils.join_without_overlap(ref, ref_try)
            posted_date_try = html_utils.find_element_or_none(job_detail_container, sel_job_detail_posted_date)
            posted_date = text_utils.join_without_overlap(posted_date, posted_date_try)
            permalink_try = html_utils.find_element_or_none(job_detail_container, sel_job_detail_permalink)
            permalink = text_utils.join_without_overlap(permalink, permalink_try)

        print(f"{index + 1}-Item\ntitle:{title}\nsalary:{salary}\nloc:{loc}\ntype:{type}\n")
        print(f"{index + 1}-Details\nskills:{skills}\nduration:{duration}\nstart_date:{start_date}\nrecruiter:{recruiter}\nref:{ref}\nposted_date:{posted_date}\npermalink:{permalink}\n")


def scrape_job_offers(url):
    # Setup Chrome options
    chrome_options = Options()
    # Runs Chrome in headless mode.
    # chrome_options.add_argument("--headless")

    # Setup Selenium with ChromeDriver
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)
    
    # Navigate to the page
    driver.get(url)

    try:
        scrape_page(driver)
    finally:        
        time.sleep(10)
        # Close the browser
        driver.quit()

if __name__ == "__main__":
    ## Edit Search
    # Keywords: python "machine learning" engineer
    # Industry: All
    # Job title: python engineer
    # Location: London (Only show job with remote working)
    # Category: none
    # Salary: All
    # Within: 7days
    # Job type: Any
    # Advertiser: Any
    # Distance: 5miles
    url = "https://www.jobserve.com/gb/en/JobSearch.aspx?shid=2981D9787360CBB0C8BE"

    scrape_job_offers(url)
    print("Ended Scrapping")
