In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import time
import datetime


def scrape_job_description(url):
    driver.get(url)
    try:
        job_description_elem = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "job_description"))
        )
        job_description = job_description_elem.text.strip()
    except:
        job_description = None
    return job_description


def scrape_listings(page_source, processed_links):
    soup = BeautifulSoup(page_source, 'html.parser')
    job_listings = soup.find_all('li', class_='job_listing')
    listings_data = []
    for listing in job_listings:
        job_position_elem = listing.find('h3')
        job_position = job_position_elem.text.strip() if job_position_elem else None
        if job_position == "How to Search For a Job Advert" or job_position == "Copycats of Our Site  (Dont Support Laziness)":
            continue
        
        company_elem = listing.find('div', class_='company')
        company = company_elem.strong.text.strip() if company_elem and company_elem.strong else None
        if company is None:
            continue

        location_elem = listing.find('div', class_='location')
        location = location_elem.text.strip() if location_elem else None
        if location is None:
            continue

        job_type_elem = listing.find('li', class_='job-type')
        job_type = job_type_elem.text.strip() if job_type_elem else None
        if job_type is None:
            continue

        posting_date_elem = listing.find('li', class_='date')
        posting_date = posting_date_elem.time['datetime'] if posting_date_elem else None
        if posting_date is None:
            continue

        job_link_elem = listing.find('a', href=True)
        job_link = job_link_elem['href'] if job_link_elem else None

        if job_link not in processed_links:
            listing_data = {
                'Position': job_position,
                'Company': company,
                'Location': location,
                'Job Type': job_type,
                'Posting Date': posting_date,
                'Job Link': job_link
            }
            listings_data.append(listing_data)
            processed_links.add(job_link)
    return listings_data


driver = webdriver.Chrome() 
driver.get("https://jobsearchmalawi.com/")


start_date = datetime.datetime(2024, 3, 1)


end_date = datetime.datetime(2024, 5, 13)

new_listings = []
processed_links = set()

while True:  
    try:
        load_more_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CLASS_NAME, "load_more_jobs"))
        )
        load_more_button.click()
        time.sleep(2)  
        page_source = driver.page_source
        scraped_listings = scrape_listings(page_source, processed_links)
        
        if not scraped_listings:
            print("No more listings found.")
            break
        
        for listing in scraped_listings:
            posting_date = datetime.datetime.strptime(listing['Posting Date'], "%Y-%m-%d")
            if posting_date > end_date:
                continue
            if posting_date < start_date:
                print("Reached the specified timeframe.")
                break
            new_listings.append(listing)
        
        print("Processed links:", processed_links) 
        print("Total listings:", len(new_listings))  

        
        if posting_date < start_date:
            print("Reached the specified timeframe.")
            break

    except Exception as e:
        print("An error occurred:", e)
        break  

if new_listings:
    df = pd.DataFrame(new_listings)
    df.to_csv('jobsearch_listings.csv', index=False)
    
    descriptions = []
    for job_link in df['Job Link']:  
        description = scrape_job_description(job_link)
        descriptions.append(description)
    
    df['Job Description'] = descriptions
   
    df.to_csv('jobsearch_listings_with_descriptions.csv', index=False)
    print("Job descriptions scraped successfully.")
else:
    print("No new listings found.")

driver.quit()
