<h1>Web-scrape Jobs details from Wuzzuf</h1>

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import urllib.parse

<h3>To Format the url based on the query and the page number:</h3>

In [2]:
def generate_url(query, page_number):
    params = {'a': 'hpd', 'q': query, 'start': page_number}
    return "https://wuzzuf.net/search/jobs/?{}".format(urllib.parse.urlencode(params))

<h3>Extract the country from the location array: </h3>

In [3]:
def extract_country(location, country):
    for i in range(0, len(location)):
        # extrat the country from the location
        coun = location[i].split(',')[-1]
        country.append(coun)
        # to remove the country from the location
        location[i] = location[i].partition(coun)[0]
        # to remove the "," at the last of the location
        location[i] = location[i][:len(location[i])-1]

<h3>To clean the data: </h3>

In [14]:
def clean_data(data):
    data.strip()
    start = 0
    end = len(data) -1 
    while start >= 0 and start < len(data) and not data[start].isalpha():
        start += 1
    while end >= 0 and end < len(data) and not data[end].isalpha():
        end -= 1
    if start < 0 or start >= len(data) or end < 0 or end >= len(data) or len(data[start: end+1]) == 0:
        return "N/A"
    return data[start: end+1]

<h3>Web Scrap the job page:</h3>

In [15]:
from selenium import webdriver
driver = webdriver.PhantomJS(executable_path='./phantomjs.exe') #selenium for PhantomJS
def web_scrap_job_page(query):
    url = "https://wuzzuf.net{}".format(query)
    #url = "https://wuzzuf.net/jobs/p/5NFxSMKMH5K0-SeniorMid-Senior-Deep-Learning-Engineer-Cairo-Egypt?o=1&l=sp&t=sj&a=machine%20learning|search-v3|hpb|spbg"


    driver.get(url)
    soup = BeautifulSoup(driver.page_source)

    description = [] # css-1uobp1k --> ul --> li
    requirments = [] # css-1t5f0fr --> ul --> li

    # Extract the description
    div = soup.find(class_="css-1uobp1k")
    parent = None
    if div:
        parent = div.find('ul')
    
    if parent:
        for li in parent.find_all("li"):
            description.append(clean_data(li.text))

    # Extract the requirments
    div = soup.find(class_="css-1t5f0fr")
    if div:
        parent = div.find('ul')
    
    if parent:
        for li in parent.find_all("li"):
            requirments.append(clean_data(li.text))


    dic = {
        "Description":description,
        "Requirments": requirments
    }
    
    return dic

<h3>The Web-Srape function:</h3>

In [20]:
def web_scrap(query):
    job_title = [] # css-o171kl
    company = [] # css-17s97q8
    location = [] # css-5wys0k
    type_of_employment = [] # css-n2jc4m
    description = []
    country = []
    experience_needed = []
    requirments = []
    page = 0
    while page < 10:
        response = requests.get(generate_url(query, page))

        soup = BeautifulSoup(response.content, "html5lib")

        #Store all the jobs in the jobs array
        jobs = []
        for s in soup.find_all(class_="css-1gatmva e1v1l3u10"):
            jobs += s

        if len(jobs) == 0:
            break

        for i in range(1, len(jobs)):
            # find the job title
            job_desc = jobs[i].find_all(class_="css-o171kl")

            job_title.append(clean_data(job_desc[0].text))
            desc_req = web_scrap_job_page(job_desc[0]['href'])
            requirments.append(desc_req["Requirments"])
            description.append(desc_req["Description"])
            experience_needed.append(job_desc[1].text)


            # find all the details
            comp = jobs[i].find_all(class_="css-17s97q8")
            loc = jobs[i].find_all(class_="css-5wys0k")
            type_emp = jobs[i].find_all(class_="css-n2jc4m")

            # find all employment types
            all_types = []
            for j in range(0, len(type_emp)):
                if j == len(type_emp)-1:
                    all_types.append(clean_data(type_emp[j].span.text))
                else:
                    all_types.append(clean_data(type_emp[j].span.text))

            # append all the details
            company.append(clean_data(comp[0].text))
            location.append(clean_data(loc[0].text))
            type_of_employment.append(all_types)

        page += 1
    extract_country(location, country)
    dic = {
        "Job Title": job_title,
        "Company Name: ": company,
        "Location": location,
        "Country": country,
        "Employment Type": type_of_employment,
        "Experience Needed": experience_needed,
        "Description":description,
        "Requirments": requirments
    }
    return dic

In [7]:
#examples of jobs
dic_machine_learning = web_scrap("machine learning")

In [8]:
df_machine_learning = pd.DataFrame(dic_machine_learning)
df_machine_learning.head()

Unnamed: 0,Job Title,Company Name:,Location,Country,Employment Type,Experience Needed,Description,Requirments
0,Senior/Mid Senior Deep Learning Engineer,Confidential,"Mokattam, Cairo",Egypt,[Full Time],Experienced,"[Reviewing, replicating, and mapping research ...",[Completed Master’s degree or higher in Artifi...
1,ML Engineer - Recommendation System,TensorGraph,"Maadi, Cairo",Egypt,"[Part Time, Freelance / Project]",Entry Level,[Build and Improve the existing recommendation...,"[BA in Computer Science and similar fields, ha..."
2,Internship - Graduates,Trufla,"Heliopolis, Cairo",Egypt,[Internship],Entry Level,[],[]
3,Data Scientist,Confidential,Riyadh,Saudi Arabia,[Full Time],Experienced,"[Selecting features, building, and optimizing ...",[Strong applied mathematical and statistical s...
4,Senior Data Scientist,BBI-Consultancy,"Nasr City, Cairo",Egypt,[Full Time],Experienced,[Identify valuable data sources and automate c...,"[BSc/BA in Computer Science, Engineering or re..."


In [9]:
df_machine_learning.shape

(42, 8)

In [12]:
df_machine_learning.to_csv("machine_learning_jobs.csv")

In [21]:
dic_data_analysis = web_scrap("data analysis")

In [22]:
df_data_analysis = pd.DataFrame(dic_data_analysis)
df_data_analysis

Unnamed: 0,Job Title,Company Name:,Location,Country,Employment Type,Experience Needed,Description,Requirments
0,Senior Data Analyst - Cairo,Confidential,Cairo,Egypt,[Full Time],Experienced,[Responsible for preparing and analyzing data ...,[To have good knowledge of Tableau and ERISITE...
1,Data Analysis Instructor (Excel - Power BI,EpsilonAI,"Nasr City, Cairo",Egypt,"[Full Time, Part Time, Freelance / Project]",Entry Level,"[As a Data Analysis Instructor, you will guide...",[Exceptional communication and presentation sk...
2,Data Analyst,Hands of Hope Physical Therapy & Wellness,"Maadi, Cairo",Egypt,[Full Time],Experienced,"[Track, collect, and interpret data, then anal...",[Essential experience in one or more of the da...
3,Data Analyst,Gila Electric,"New Cairo, Cairo",Egypt,[Full Time],Experienced,"[Interpret data, analyze results using statist...",[Bachelor’s degree from an accredited universi...
4,Senior Data Scientist,Fixed Solutions,"Sheraton, Cairo",Egypt,[Full Time],Experienced,[Design and Develop analytical insights based ...,[Required Years of experience: 5+ years of exp...
...,...,...,...,...,...,...,...,...
145,Accountant,BTL Industries Egypt,"Sheraton, Cairo",Egypt,[Full Time],Experienced,[Reviews and processes routine accounting data...,"[Male or Female, Presentable, BSc in Accountin..."
146,Internal Auditor,Confidential,"New Cairo, Cairo",Egypt,[Full Time],Experienced,[Perform and control the full audit cycle incl...,[Proven working experience as Internal Auditor...
147,"Financial Planning, Budgeting & Reporting Team...",Confidential,"New Cairo, Cairo",Egypt,[Full Time],Manager,[Supervises the day-to-day operations of Fina...,"[Bachelor’s degree in Finance, Professional ce..."
148,AR Accountant,Elshennawy Group,Cairo,Egypt,[Full Time],Experienced,[Review AR invoices for data processing and en...,"[BSc/Ba in accounting, finance or relevant fie..."


In [23]:
df_data_analysis.shape

(150, 8)

In [24]:
df_data_analysis.to_csv("data_analysis_jobs.csv")

In [25]:
dic_software_testing = web_scrap("software testing")

In [26]:
df_software_testing = pd.DataFrame(dic_software_testing)
df_software_testing.head()

Unnamed: 0,Job Title,Company Name:,Location,Country,Employment Type,Experience Needed,Description,Requirments
0,Software Testing Engineer,realme,"New Cairo, Cairo",Egypt,[Full Time],Entry Level,[Follow testing schedule and plan according to...,"[Males only, Excellent in English (spoken-writ..."
1,Senior Software Test Engineer,Glamera,"Nasr City, Cairo",Egypt,[Full Time],Experienced,"[Design test strategies, specifications, and t...","[Years of experience in Software Testing, Degr..."
2,Software Testing Engineer,DMS,"Heliopolis, Cairo",Egypt,[Full Time],Entry Level,[Reviewing and analyzing system specifications...,[BSC degree of computer science / Engineering ...
3,Trevipay| Senior Software Testing Engineer - E...,Kalasko,"Hurghada, Red Sea",Egypt,[Full Time],Experienced,[Lead tester on small engineering team for bus...,[years of experience in QA Testing and/or Test...
4,Software Quality Testing Engineer,Anspire Agency,"New Cairo, Cairo",Egypt,[Full Time],Entry Level,[Attend the Analyst orientation session (Kick-...,"[Attention to detail, Experience in writing cl..."


In [27]:
df_software_testing.shape

(150, 8)

In [28]:
df_software_testing.to_csv("software_testing_jobs.csv")

In [29]:
merged_df = pd.concat([df_machine_learning, df_data_analysis, df_software_testing])

In [30]:
merged_df.to_csv("All_jobs.csv")