# Job Hunting Aggregator

Scrape all jobs from: https://ph.indeed.com/?r=us, https://ph.jobstreet.com/, https://www.linkedin.com/

In [26]:
from bs4 import BeautifulSoup
from seleniumbase import SB
import pandas as pd

In [27]:
def get_url (indeed_url, job_title, location, job_type):
    template = '{}/jobs?q={}&l={}&sc={}'
    url = template.format(indeed_url,job_title, location, job_type)
    return url

In [28]:
df = pd.DataFrame({'Job Title': [''],  'Company Name': [''], 'Location': [''], 'Salary Range': [''], 'Summary': [''], 'Link': ['']})

In [29]:
indeed_url = "https://ph.indeed.com"
title = 'python'
location = 'philippines'
filter = '0kf%3Aattr%287EQCZ%29%3B'

with SB(uc_cdp=True, incognito=True) as sb:
    while True:  # Loop until the text is found
        url = get_url(indeed_url, title, location, filter)
        sb.open(url)
        sb.uc_gui_click_captcha()
        sb.sleep(20) # Adjust values depending on load time

        try:
            sb.assert_text("Find jobs", "button.yosegi-InlineWhatWhere-primaryButton[type='submit']")
            print("Text Found")
            break  # Exit loop if text is found
        except Exception:
            print("Text not found. Retrying...")
            continue  # Retry from sb.open(url)

    raw_html = sb.get_page_source()
    soup = BeautifulSoup(raw_html, 'lxml')
    cards = soup.find_all('div', 'job_seen_beacon')
    
    # Testing for single record

    for card in cards:
        td_tag = card.table.tbody.tr.td

        # Job Title and URL
        try:
            job_div = td_tag.find('div', {'class','css-pt3vth e37uo190'}).h2.a
            job_title = job_div.span.get('title')
        except AttributeError:
            job_title = None
        try:
            job_url = indeed_url + job_div.get('href')
        except Exception:
            job_url = None

        # Company Name
        try:
            company_div = td_tag.find('div', {'class','company_location css-i375s1 e37uo190'})
            company_name = company_div.find('div', {'class','css-1afmp4o e37uo190'}).span.text.strip()
        except AttributeError:
            company_name = None

        # Company Location
        try:
            company_location = company_div.find('div', {'class','css-1restlb eu4oa1w0'}).text.strip()
        except Exception:
            company_location = None

        # Salary Range
        try:
            metadata_div = td_tag.find('div', {'class','jobMetaDataGroup css-qspwa8 eu4oa1w0'}).ul
            salary_range = metadata_div.find('li', {'class','css-u74ql7 eu4oa1w0'}).div.div.text.strip()
        except AttributeError:
            salary_range = None

        # Summary
        try:
            summary = card.find('div',{'class', 'underShelfFooter'}).div.div.ul.li.text.strip()
        except AttributeError:
            summary = None

        new_data = pd.DataFrame({
            'Job Title': [job_title],
            'Company Name': [company_name],
            'Location': [company_location],
            'Salary Range': [salary_range],
            'Summary': [summary],
            'Link': [job_url]
        })
        df = pd.concat([df, new_data], ignore_index=True)

        # print(job_title)
        # print(job_url)
        # print(company_name)
        # print(company_location)
        # print(salary_range)
        # print(summary)

Text not found. Retrying...
Text not found. Retrying...
Text not found. Retrying...
Text not found. Retrying...
Text not found. Retrying...
Text not found. Retrying...
Text not found. Retrying...
Text not found. Retrying...
Text not found. Retrying...
Text not found. Retrying...
Text not found. Retrying...
Text not found. Retrying...
Text not found. Retrying...
Text not found. Retrying...
Text not found. Retrying...
Text not found. Retrying...
Text Found


In [30]:
df

Unnamed: 0,Job Title,Company Name,Location,Salary Range,Summary,Link
0,,,,,,
1,Junior Developer - Angeles City (Fresh Grads W...,SiteHost,Angeles 2009 P03,"PHP 30,000 - PHP 150,000 a month",,https://ph.indeed.com/rc/clk?jk=a89947b47c8950...
2,Jr./Sr. Data Analyst (We Provide Training) - F...,Kooapps Philippines Corporation,Philippines,,,https://ph.indeed.com/rc/clk?jk=2e0c06bcebf076...
3,Data Analyst,S.P. Madrid & Associates,Parañaque,,,https://ph.indeed.com/rc/clk?jk=b5a74714ca649c...
4,Robotics Intern,Planate Management Group,Olongapo,,,https://ph.indeed.com/rc/clk?jk=e8c4581419bb38...
5,Software Design Engineer I,NEC Telecom Software Philippines,Taguig,,,https://ph.indeed.com/rc/clk?jk=ed86f468fa80b3...
6,Application Developer,INTEGRATED SECURITY AND AUTOMATION INC.,Mandaluyong Central Post Office 1550 P00,"PHP 22,000 - PHP 25,000 a month",,https://ph.indeed.com/rc/clk?jk=04b1c6cb809ced...
7,Virtual Assistant: Associate Developer,Playbook,Remote in Manila,"PHP 19,000 - PHP 22,000 a month",,https://ph.indeed.com/rc/clk?jk=447cef87a02e7b...
8,Data Analyst | Data Engineer | Pioneer Team,iOPEX Phil,Taguig,,,https://ph.indeed.com/rc/clk?jk=ddc8c35cc34b1c...
9,Business/ Data Analyst,GlobalQuest,Mandaluyong,,,https://ph.indeed.com/rc/clk?jk=f54613445b9fc0...


In [36]:
pd.set_option('display.max_colwidth', None)
df["Link"]

0                                                                                                                                                                                                                                                                                                                                           
1                                             https://ph.indeed.com/rc/clk?jk=a89947b47c89507b&bb=0ZBxulDyEK7K1xz2HU4miOPsoEAgcx7ugB-uTccMav6tHiLxrbDD8hgfnaSDTQ9lYkh22Nn_AHhBUFv66IojdCNJB4DzTc5z1IT4OVO7CtGvY5-0EmZ3NlsG0iTWXTbxwTXjxx23hzA%3D&xkcb=SoB-67M3y5jEXGy01x0LbzkdCdPP&fccid=cf163e7740719533&cmp=SiteHost&ti=New+Graduate&vjs=3
2               https://ph.indeed.com/rc/clk?jk=2e0c06bcebf076d4&bb=0ZBxulDyEK7K1xz2HU4miI-Y4Qt6zCOEKl7E5TD87Eue9Ntc5bhYJKJpEfkrQcZ5UQZ9ZUh8cYf9C0cYfe2dnFxGXSErEbkbD48k8qpUXfnC_CKxTpwBPRW9VH_CrD1-sN3_iSEPysY%3D&xkcb=SoDK67M3y5jEXGy01x0KbzkdCdPP&fccid=90310e4cdefed57e&cmp=Kooapps-Philippines-Corporation&ti=Junior+Data+Analyst&vjs=3
3