# Wuzzuf web scraping script

open wuzzuf and navigate to the engineering jobs posting page

In [None]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import json
import csv
import re



browser = webdriver.Chrome()
# get wuzzuf main page
browser.get("https://wuzzuf.net/jobs/egypt")
time.sleep(2)
# locate the search box
searchBox = browser.find_element(By.XPATH, '//*[@id="app"]/div/div[1]/main/div[2]/div[1]/form/div/input')
# print(searchBox)
searchBox.clear()
# type engineering inside the search box 
# searchBox.send_keys("Senior Facilities Engineer")
searchBox.send_keys("engineering")
time.sleep(2)
# press enter to submit the search
searchBox.send_keys(Keys.ENTER)

assert "No results found." not in browser.page_source
# wait for the page content to load
time.sleep(5)

# list of dictinories to store jobs' data
jobs_data = []
job_id = 1

page = 1

# use regex to keep only letters, digits, +, white space, -.
# match anything not in this set, which are the required characters
regex = r"[^a-zA-Z\d\s\-\+]"
while True:
    print(f"Scraping page {page}")
    try:
        
        wait = WebDriverWait(browser, 10)

        # return a list of all job cards in the page once they’re present in the DOM
        # Selenium will check the page repeatedly for up to 10 seconds to see if there are any elements matching the class name
        job_cards = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "css-ghe2tq")))

        for i in range(len(job_cards)):
            job_cards = browser.find_elements(By.CLASS_NAME, "css-ghe2tq")  # re-fetch cards to avoid the
            card = job_cards[i]  # fresh reference
            try:
                # fetch job title
                job_title = card.find_element(By.CSS_SELECTOR, "h2 a.css-o171kl")
                # fetch company name
                company_name = card.find_element(By.CLASS_NAME, "css-ipsyv7").text
                # fetch location
                location = card.find_element(By.CLASS_NAME, "css-16x61xq").text
                # fetch experience block
                exp_block = card.find_elements(By.CSS_SELECTOR, "div.css-1rhj4yg div a.css-o171kl")
                # get the experience level from the block
                exp_level = exp_block[0].text if len(exp_block) > 0 else "N/A"
                try:
                    # fetch years of experience 
                    years_exp = card.find_element(By.CSS_SELECTOR, "div.css-1rhj4yg div:nth-child(2) span").text
                    if "yrs" not in years_exp.lower():
                        years_exp = "Not specified"
                except:
                    years_exp = " Not specified"
                try:
                    # fetch posting date
                    posting_date = card.find_element(By.XPATH, ".//span[contains(@class,'css-16x61xq')]/following-sibling::div").text
                except:
                    posting_date = "Not specified"

                try:
                    # fetch required skills
                    required_skills = card.find_elements(By.CLASS_NAME, "css-5x9pm1")
                except:
                    required_skills = "Not specified"
                try:
                    # fetch job type [full time, part time, ...]
                    job_type = card.find_element(By.CLASS_NAME, "css-uc9rga").text
                except:
                    job_type = "Not specified"
                try:
                    # fetch job location type [onsite, hybrid, remote]
                    job_location_type = card.find_element(By.CLASS_NAME, "css-uofntu").text
                except:
                    job_location_type = "Not specified"
                try:
                    # fetch the application link which is the href attribute of the job title link
                    application_link = job_title.get_attribute("href")
                except:
                    application_link = "Not specified"
                print(f"Title: {job_title.text}")
                print(f"Company: {company_name}")
                print(f"Location: {location}")
                print(f"Experience Level: {exp_level}")
                print(f"Years of Experience: {years_exp}")
                print(f"Posting Date: {posting_date}")
                print("Required Skills: ")
                for skill in required_skills:
                    print(f"   {skill.text}")
                print(f"Job Type: {job_type}, {job_location_type}")
                print(f"Application Link: {application_link}")
                print("-" * 60)

                # put the required skills in a list
                skills = [re.sub(regex, "", s.text) for s in required_skills]
                # append the job dict to the job_data list that contains all jobs
                jobs_data.append({
                    "Job Id": job_id,
                    "Job Title": job_title.text,
                    "Company Name": company_name,
                    "Location": location,
                    "Experience Level": exp_level,
                    "Years of Experience": re.sub(regex, "", years_exp),
                    "Posting Date": posting_date,
                    "Required Skills" : skills,
                    "Job Type": job_type,
                    "Application Link": application_link
                })
                job_id += 1
            except Exception as e:
                print("Error Parsing job card", e)

        # Move to next page after processing all job cards
        try:
            # find all pagination buttons
            page_links = browser.find_elements(By.XPATH, "//button[contains(@class,'css-wq4g8g')]//a")

            if not page_links:
                print("No pagination, the result is contained in 1 page only.")
                break  # no pagination at all

            # Case 1: Only 1 button -> could be "next" (page 1) OR "back" (last page)
            if len(page_links) == 1:
                arrow = page_links[0].find_element(By.TAG_NAME, "path").get_attribute("d")
                if arrow.startswith("M14.787"):  # left arrow → back button
                    print("This is the back button")
                    print("Last Page is Reached.")
                    print("Ending Scraping...")
                    break  # reached last page
                    
                else:
                    next_url = page_links[0].get_attribute("href")
                    browser.get(next_url)  # this is a next button on first page
                    page += 1
                    continue

            # Case 2: 2 buttons -> last one is next if they are back and next buttons, or stop scraping if they are back and double back buttons
            if len(page_links) >= 2:
                arrow = page_links[0].find_element(By.TAG_NAME, "path").get_attribute("d")
                if arrow.startswith("M12"):
                    print("Double back button.")
                    print("Last Page is Reached.")
                    print("Ending Scraping...")
                    break
                else:     
                    next_url = page_links[-1].get_attribute("href")
                    browser.get(next_url)
                    page += 1
                    continue

        except NoSuchElementException:
            print("⚠ Next button not found, stopping pagination.")
            print("Ending Scraping...")
            break

    except TimeoutException:
        print("Timeout - moving to next page or stopping.")
        print("Ending Scraping...")
        break


# write the data to json file
file = open("jobs_json.json", 'w')
json_data = json.dumps(jobs_data, indent=2)
file.seek(0)
file.write(json_data)
file.close()

# write to csv file
file2 = open("jobs_csv.csv", 'w', newline='')
fieldNames = ['Job Id', "Job Title", "Company Name", "Location", "Experience Level", "Years of Experience", "Posting Date", "Required Skills", "Job Type", "Application Link"]
writer = csv.DictWriter(file2, fieldnames=fieldNames)
writer.writeheader()
for job in jobs_data:
    writer.writerow(job)


Scraping page 1
Title: Senior Facilities Engineer
Company: Confidential -
Location: New Capital, Cairo, Egypt
Experience Level: Experienced
Years of Experience: · 5 - 7 Yrs of Exp
Posting Date: 10 days ago
Required Skills: 
   Facilities Engineering
   Facility Management
Job Type: Full Time, On-site
Application Link: https://wuzzuf.net/jobs/p/ipnk1fkfvybq-senior-facilities-engineer-midar-for-investment-and-urban-development-cairo-egypt
------------------------------------------------------------
Title: Senior Facility Manager
Company: Confidential -
Location: New Cairo, Cairo, Egypt
Experience Level: Manager
Years of Experience: · 7+ Yrs of Exp
Posting Date: 5 days ago
Required Skills: 
   Facilities Management
   · Vendor Management
   · Health and Safety Compliance
   · Project Management
   · Budgeting and Cost Control
Job Type: Full Time, On-site
Application Link: https://wuzzuf.net/jobs/p/9yn8ndgmw8he-senior-facility-manager-efs-facilities-services-cairo-egypt
-------------------

Extract job info :
    Job title,
    Company name,
    Job location,
    Required experience level,
    Job posting date,
    Required skills/qualifications,
    Job type (full-time, part-time, etc.),
    Application link.