## Data Collection
- Scrape job postings data from jobstreet (PH)
- Include columns such as "Job Name", "Company Name", "Location", "Date Posted", "Job Specialization", "Job Type", and "Job Link"
- Convert the "Date Posted" column from time data to date format using datetime library
- Automate the web scraper to collect data up to the last available job postings and save it in a CSV file

In [106]:
import csv
import requests
from bs4 import BeautifulSoup

import re
import datetime
current_time = datetime.datetime.now()


In [107]:
# datetime conversation to date only
def convert_datetime(dt_posted):
    if dt_posted == 'now':
        return datetime.datetime.now().strftime("%Y-%m-%d")
    
    hours = 0
    minutes = 0
    days = 0
    
    match = re.match(r"(\d+)h ago", dt_posted)
    if match:
        hours = int(match.group(1))
    
    match = re.match(r"(\d+)m ago", dt_posted)
    if match:
        minutes = int(match.group(1))
    
    match = re.match(r"(\d+)d ago", dt_posted)
    if match:
        days = int(match.group(1))
    
    date = datetime.datetime.now() - datetime.timedelta(days=days, hours=hours, minutes=minutes)
    return date.strftime("%Y-%m-%d")

In [108]:
main_url = "https://www.jobstreet.com.ph/en/job-search/job-vacancy.php?pg={page}"
max_page = 1815 # maximum pages to scrape

In [113]:
# Create a new CSV file and write the headers as the first row
with open('jobs.csv', mode='w', newline='', encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(['Job Name', 'Company Name', 'Location', 'Date Posted', 'Job Specialization', 'Job Type', 'Job Link'])
    
    # Iterate over each page and extract the job information
    for page in range(1, max_page+1):
        url = main_url.format(page=page)
        response = requests.get(url)
        
        soup = BeautifulSoup(response.content, 'html.parser')
    
        for job in soup.find_all('div', class_='z1s6m00 _1hbhsw67i _1hbhsw66e _1hbhsw69q _1hbhsw68m _1hbhsw6n _1hbhsw65a _1hbhsw6ga _1hbhsw6fy'):
            try:
                job_name = job.find('div', class_='z1s6m00 l3gun70 l3gun74 l3gun72').get_text()
                company_name = job.find('span', class_='z1s6m00 _17dyj7u1 _1hbhsw64y _1hbhsw60 _1hbhsw6r').get_text()
                location = job.find('span', class_='z1s6m00 _1hbhsw64y y44q7i0 y44q7i3 y44q7i21 y44q7ih').get_text()
                date_posted = convert_datetime(job.find('span', class_='z1s6m00 _1hbhsw64y y44q7i0 y44q7i1 y44q7i22 y44q7ih').get_text())
                
                ddt = soup.find_all("div", class_="z1s6m00 _1hbhsw66u _1hbhsw67y _1hbhsw69q _1hbhsw68m rqoqz2")
                job_specialization = ddt.find("dt", text="Job Specializations").find_next_sibling("dd").text.strip()
                job_type = ddt.find("dt", text="Job Type").find_next_sibling("dd").text.strip()
                
                job_link = "https://www.jobstreet.com.ph" + job.find('a', class_ = 'jdlu994 jdlu996 jdlu999 y44q7i2 z1s6m00 z1s6m0f _1hbhsw6h')['href']

                # Write the extracted information to the CSV file
                writer.writerow([job_name, company_name, location, date_posted, job_specialization, job_type, job_link])
            except Exception as e:
                print(f"Failed to extract job information: {e}")

Failed to extract job information: ResultSet object has no attribute 'find'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?
Failed to extract job information: ResultSet object has no attribute 'find'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?
Failed to extract job information: ResultSet object has no attribute 'find'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?
Failed to extract job information: ResultSet object has no attribute 'find'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?
Failed to extract job information: ResultSet object has no attribute 'find'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?
Faile

In [87]:
url = 'https://www.jobstreet.com.ph/en/job-search/job-vacancy.php'

response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

for job in soup.find_all('div', class_='z1s6m00 _1hbhsw69y _1hbhsw68u _1hbhsw67e _1hbhsw67q'):
    print(job.find('div', class_='z1s6m00 l3gun70 l3gun74 l3gun72').text.strip())
    print(job.find('span', class_='z1s6m00 _17dyj7u1 _1hbhsw64y _1hbhsw60 _1hbhsw6r').text.strip())
    print(job.find('span', class_='z1s6m00 _1hbhsw64y y44q7i0 y44q7i3 y44q7i21 y44q7ih').text.strip())
    print(convert_datetime(job.find('span', class_='z1s6m00 _1hbhsw64y y44q7i0 y44q7i1 y44q7i22 y44q7ih').text.strip()))
    print("https://www.jobstreet.com.ph" + job.find('a', class_ = 'jdlu994 jdlu996 jdlu999 y44q7i2 z1s6m00 z1s6m0f _1hbhsw6h')['href'])

Admin Assistant
F1RST ONE EDUCATION MIGRATION LEARNING HUB
Taguig City
2023-04-17
https://www.jobstreet.com.ph/en/job/admin-assistant-12951989?jobId=jobstreet-ph-job-12951989&sectionRank=1&token=0~ff368c50-dc84-45a2-8c22-003befb32740&fr=SRP%20Job%20Listing
-----------
IT ADMINISTRATOR
Nobile Inc.
Caloocan City
2023-04-17
https://www.jobstreet.com.ph/en/job/it-administrator-12952904?jobId=jobstreet-ph-job-12952904&sectionRank=2&token=0~ff368c50-dc84-45a2-8c22-003befb32740&fr=SRP%20Job%20Listing
-----------
OFFICE ASSISTANT
Iridium Technologies, Inc.
National Capital Reg
2023-04-17
https://www.jobstreet.com.ph/en/job/office-assistant-12951631?jobId=jobstreet-ph-job-12951631&sectionRank=3&token=0~ff368c50-dc84-45a2-8c22-003befb32740&fr=SRP%20Job%20Listing
-----------
OFFICE STAFF
Company Confidential
National Capital Reg
2023-04-17
https://www.jobstreet.com.ph/en/job/office-staff-12953738?jobId=jobstreet-ph-job-12953738&sectionRank=4&token=0~ff368c50-dc84-45a2-8c22-003befb32740&fr=SRP%20J

In [130]:
url = 'https://www.jobstreet.com.ph/en/job-search/job-vacancy.php'

response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

for job in soup.find('div', {'class':'z1s6m00', 'data-automation': 'jobListing'}):
    print(job.get_text())
    print('----')

Admin AssistantF1RST ONE EDUCATION MIGRATION LEARNING HUBTaguig City₱28K - 40,000 monthlyExperience in an international company7h agoJob SpecializationsAdmin/Human Resources / Clerical/Administrative SupportJob TypeFull-Time
----
IT ADMINISTRATORNobile Inc.Caloocan CityOperational in the Philippines for 30 yearsISO certified company for 20 yearsLocated inside the Special Economic Zone in Caloocan5h agoJob SpecializationsComputer/Information Technology / IT-Network/Sys/DB AdminJob TypeFull-Time
----
OFFICE ASSISTANTIridium Technologies, Inc.National Capital Reg₱20K - 25,000 monthly9h agoJob SpecializationsAdmin/Human Resources / Clerical/Administrative SupportJob TypeFull-Time
----
OFFICE STAFFCompany ConfidentialNational Capital Reg3h agoJob SpecializationsServices / Customer ServiceJob TypeFull-Time
----
No Experience Needed | Fresh Graduates | Customer Service Support | BGCCitigroup Business Process Solutions Pte. Ltd.National Capital Reg₱26K - 33,000 monthlyOver-the-Phone Processing

In [143]:
url = 'https://www.jobstreet.com.ph/en/job-search/job-vacancy.php'

response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

for job in soup.find('div', {'class':'z1s6m00', 'data-automation': 'jobListing'}):
    jn = job.find_all('h1', class_='z1s6m00 _1hbhsw64y y44q7i0 y44q7i3 y44q7i21 y44q7ii')
    text = [j.get_text() for j in jn]
    print(text)
    
    cn = job.find_all('a', {'class': '_6xa4xb0 z1s6m00 z1s6m0f rqoqz4', 'data-automation': 'jobCardCompanyLink'})
    #cn = job.find_all('span', class_='z1s6m00 _17dyj7u1 _1hbhsw64y _1hbhsw60 _1hbhsw6r')
    ttext = [c.get_text() for c in cn]
    print(ttext)
    
    loc = job.find_all('a', {'class': '_6xa4xb0 z1s6m00 z1s6m0f rqoqz4', 'data-automation':'jobCardLocationLink'})
    tttext = [l.get_text() for l in loc]
    print(tttext)
    
    print('----')

['Admin Assistant']
['F1RST ONE EDUCATION MIGRATION LEARNING HUB']
['Taguig City']
----
['IT ADMINISTRATOR']
['Nobile Inc.']
['Caloocan City']
----
['OFFICE ASSISTANT']
['Iridium Technologies, Inc.']
['National Capital Reg']
----
['OFFICE STAFF']
[]
['National Capital Reg']
----
['No Experience Needed | Fresh Graduates | Customer Service Support | BGC']
['Citigroup Business Process Solutions Pte. Ltd.']
['National Capital Reg']
----
['No Experience Needed | Fresh Graduates | Customer Service Support |Earn 26k-33k']
['Citigroup Business Process Solutions Pte. Ltd.']
['National Capital Reg']
----
['Reservation Associate']
['City Garden Hotel Makati']
['National Capital Reg']
----
['BANK TELLER - Davao and Zamboanga']
['Philippine National Bank (PNB)']
['Davao del Sur (Davao City)', 'Zamboanga']
----
['NON-VOICE/Voice Agent (BACK OFFICE) | No Work Experience Required (Up to 29k* PAY) - Makati']
['Foundever™']
['National Capital Reg']
----
['Customer Service Representative']
['Royal Cargo 

In [165]:
url = 'https://www.jobstreet.com.ph/en/job-search/job-vacancy.php'

response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

for job in soup.find('div', {'class':'z1s6m00', 'data-automation': 'jobListing'}):
    job_name = job.find('h1', class_='z1s6m00 _1hbhsw64y y44q7i0 y44q7i3 y44q7i21 y44q7ii').get_text()
    print(job_name)
    
    company_name = job.find('a', {'class': '_6xa4xb0 z1s6m00 z1s6m0f rqoqz4', 'data-automation': 'jobCardCompanyLink'})
    if company_name is None:
        company_name = "Confidential"
        print(company_name)
    else:
        print(company_name.get_text())
    
    location = job.find('a', {'class': '_6xa4xb0 z1s6m00 z1s6m0f rqoqz4', 'data-automation':'jobCardLocationLink'}).get_text()
    print(location)
    
    date_posted = convert_datetime(job.find('span', class_='z1s6m00 _1hbhsw64y y44q7i0 y44q7i1 y44q7i22 y44q7ih').text.strip())
    print(date_posted)
    
    job_link = "https://www.jobstreet.com.ph" + job.find('a', class_ = 'jdlu994 jdlu996 jdlu999 y44q7i2 z1s6m00 z1s6m0f _1hbhsw6h')['href']
    print(job_link)
    
    job_specialization = job.find("dt", text="Job Specializations").find_next_sibling("dd").text.strip()
    
    print(job_specialization)
    
    job_type = job.find("dt", text="Job Type").find_next_sibling("dd").text.strip()
    print(job_type)
    
    print('----')

Admin Assistant
F1RST ONE EDUCATION MIGRATION LEARNING HUB
Taguig City
2023-04-17
https://www.jobstreet.com.ph/en/job/admin-assistant-12951989?jobId=jobstreet-ph-job-12951989&sectionRank=1&token=0~ed4579b3-e448-4b09-89d6-667a926ff5a5&fr=SRP%20Job%20Listing
Admin/Human Resources
Full-Time
----
IT ADMINISTRATOR
Nobile Inc.
Caloocan City
2023-04-17
https://www.jobstreet.com.ph/en/job/it-administrator-12952904?jobId=jobstreet-ph-job-12952904&sectionRank=2&token=0~ed4579b3-e448-4b09-89d6-667a926ff5a5&fr=SRP%20Job%20Listing
Computer/Information Technology
Full-Time
----
OFFICE ASSISTANT
Iridium Technologies, Inc.
National Capital Reg
2023-04-17
https://www.jobstreet.com.ph/en/job/office-assistant-12951631?jobId=jobstreet-ph-job-12951631&sectionRank=3&token=0~ed4579b3-e448-4b09-89d6-667a926ff5a5&fr=SRP%20Job%20Listing
Admin/Human Resources
Full-Time
----
OFFICE STAFF
Confidential
National Capital Reg
2023-04-17
https://www.jobstreet.com.ph/en/job/office-staff-12953738?jobId=jobstreet-ph-job-1

  job_specialization = job.find("dt", text="Job Specializations").find_next_sibling("dd").text.strip()
  job_type = job.find("dt", text="Job Type").find_next_sibling("dd").text.strip()
