In [15]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import time
import datetime

In [16]:
def scrape_job_description(url):
    driver.get(url)
    try:
        job_description_elem = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "job_description"))
        )
        job_description = job_description_elem.text.strip()
    except:
        job_description = None
    return job_description



def scrape_listings(page_source, processed_links):
    soup = BeautifulSoup(page_source, 'html.parser')
    job_listings = soup.find_all('li', class_='job_listing')
    listings_data = []
    for listing in job_listings:
        job_position_elem = listing.find('h3')
        job_position = job_position_elem.text.strip() if job_position_elem else None

        company_elem = listing.find('div', class_='company')
        company = company_elem.strong.text.strip() if company_elem else None

        location_elem = listing.find('div', class_='location')
        location = location_elem.text.strip() if location_elem else None

        job_type_elem = listing.find('li', class_='job-type')
        job_type = job_type_elem.text.strip() if job_type_elem else None

        posting_date_elem = listing.find('li', class_='date')
        posting_date = posting_date_elem.time['datetime'] if posting_date_elem else None

        job_link_elem = listing.find('a', href=True)
        job_link = job_link_elem['href'] if job_link_elem else None

        if job_position and company and location and job_type and posting_date:
            listing_data = {
                'Position': job_position,
                'Company': company,
                'Location': location,
                'Job Type': job_type,
                'Posting Date': posting_date,
                'Job Link': job_link
            }
            listings_data.append(listing_data)
    return listings_data


driver = webdriver.Chrome() 
driver.get("https://careersmw.com/")


start_date = datetime.datetime(2024, 3, 1)


end_date = datetime.datetime(2024, 3, 3)

new_listings = []
processed_links = set()

while True:  
    try:
        load_more_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CLASS_NAME, "load_more_jobs"))
        )
        load_more_button.click()
        time.sleep(2)  
        page_source = driver.page_source
        scraped_listings = scrape_listings(page_source, processed_links)
        
        if not scraped_listings:
            print("No more listings found.")
            break
        
        for listing in scraped_listings:
            posting_date = datetime.datetime.strptime(listing['Posting Date'], "%Y-%m-%d")
            if posting_date > end_date:
                continue
            if posting_date < start_date:
                print("Reached the specified timeframe.")
                break
            new_listings.append(listing)
        
        print("Processed links:", processed_links) 
        print("Total listings:", len(new_listings))  

        
        if posting_date < start_date:
            print("Reached the specified timeframe.")
            break

    except Exception as e:
        print("An error occurred:", e)
        break  

if new_listings:
    df = pd.DataFrame(new_listings)
    df.to_csv('jobsearch_listings.csv', index=False)
    
    descriptions = []
    for job_link in df['Job Link']:  
        description = scrape_job_description(job_link)
        descriptions.append(description)
    
    df['Job Description'] = descriptions
   
    df.to_csv('jobsearch_listings_with_descriptions.csv', index=False)
    print("Job descriptions scraped successfully.")
else:
    print("No new listings found.")

driver.quit()

Processed links: set()
Total listings: 85
Processed links: set()
Total listings: 220
Processed links: set()
Total listings: 359
An error occurred: Message: 



WebDriverException: Message: disconnected: not connected to DevTools
  (failed to check if window was closed: disconnected: not connected to DevTools)
  (Session info: chrome=124.0.6367.119)
Stacktrace:
	GetHandleVerifier [0x00007FF7075C1522+60802]
	(No symbol) [0x00007FF70753AC22]
	(No symbol) [0x00007FF7073F7CE4]
	(No symbol) [0x00007FF7073DFA14]
	(No symbol) [0x00007FF7073DF8E0]
	(No symbol) [0x00007FF7073FA291]
	(No symbol) [0x00007FF7074898F9]
	(No symbol) [0x00007FF70746A923]
	(No symbol) [0x00007FF707438FEC]
	(No symbol) [0x00007FF707439C21]
	GetHandleVerifier [0x00007FF7078C41BD+3217949]
	GetHandleVerifier [0x00007FF707906157+3488183]
	GetHandleVerifier [0x00007FF7078FF0DF+3459391]
	GetHandleVerifier [0x00007FF70767B8E6+823622]
	(No symbol) [0x00007FF707545FBF]
	(No symbol) [0x00007FF707540EE4]
	(No symbol) [0x00007FF707541072]
	(No symbol) [0x00007FF7075318C4]
	BaseThreadInitThunk [0x00007FFD943D7344+20]
	RtlUserThreadStart [0x00007FFD952626B1+33]
