# 01 Install & Import LIbrary 

In [2]:
pip install playwright

Note: you may need to restart the kernel to use updated packages.


In [4]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd

# 01 Data Collection (Web Scraping)

In [None]:
# Setup driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

keywords = ["data scientist", "data analyst", "machine learning", "data engineer", "data science"]

# Function for scraping per country
def scrape_job_links(domain, country_label):
    all_hrefs = []
    for keyword in keywords:
        search_keyword = keyword.replace(" ", "-")
        search_url = f"https://{domain}/en/job-search/{search_keyword}-jobs/"
        print(f"\nSearching on {country_label.upper()} for: {keyword.upper()} jobs")
        driver.get(search_url)
        try:
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "a[data-automation='jobTitle']"))
            )
            job_links_elements = driver.find_elements(By.CSS_SELECTOR, "a[data-automation='jobTitle']")
            hrefs = [(el.get_attribute("href"), country_label) for el in job_links_elements if el.get_attribute("href")]
            all_hrefs.extend(hrefs)
            print(f"  Found {len(hrefs)} job links in {country_label} for '{keyword}'")
        except TimeoutException:
            print(f"  Timeout loading results for: {keyword} in {country_label}")
    return all_hrefs

# Scrape per country
all_job_hrefs_malaysia = scrape_job_links("jobstreet.com.my", "malaysia")
all_job_hrefs_singapore = scrape_job_links("jobstreet.com.sg", "singapore")
all_job_hrefs_indonesia = scrape_job_links("id.jobstreet.com", "indonesia")  

# Gabungkan semua
all_job_hrefs = all_job_hrefs_malaysia + all_job_hrefs_singapore + all_job_hrefs_indonesia

# Deduplicate
unique_href_map = {}
for href, country in all_job_hrefs:
    if href and href not in unique_href_map:
        unique_href_map[href] = country

# Scrape detail tiap job
scraped_jobs = []
for i, (href, country_name) in enumerate(list(unique_href_map.items())):  # remove slicing if ingin semua
    if not href.startswith("http"):
        continue

    print(f"\n[{i+1}] Navigating to: {href}")
    try:
        driver.get(href)
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "h1[data-automation='job-detail-title']"))
        )

        title = driver.find_element(By.CSS_SELECTOR, "h1[data-automation='job-detail-title']").text
        company = driver.find_element(By.CSS_SELECTOR, "span[data-automation='advertiser-name']").text
        location = driver.find_element(By.CSS_SELECTOR, "span[data-automation='job-detail-location']").text
        category = driver.find_element(By.CSS_SELECTOR, "span[data-automation='job-detail-classifications']").text
        work_type = driver.find_element(By.CSS_SELECTOR, "span[data-automation='job-detail-work-type']").text
        description = driver.find_element(By.CSS_SELECTOR, "div[data-automation='jobAdDetails']").text

        scraped_jobs.append({
            "url": href,
            "country": country_name,
            "title": title.strip(),
            "company": company.strip(),
            "location": location.strip(),
            "category": category.strip(),
            "work_type": work_type.strip(),
            "description": description.strip(),
        })

        print(f"  Title: {title}")
        print(f"  Company: {company}")
        print(f"  Location: {location}")
        print(f"  Category: {category}")
        print(f"  Work Type: {work_type}")
        print(f"  Description preview: {description[:200]}...")

    except (TimeoutException, NoSuchElementException) as e:
        print(f"  Error scraping {href}: {e}")

driver.quit()

df_jobs = pd.DataFrame(scraped_jobs)
print("\nTotal jobs scraped:", len(df_jobs))


Searching on MALAYSIA for: DATA SCIENTIST jobs
  Found 32 job links in malaysia for 'data scientist'

Searching on MALAYSIA for: DATA ANALYST jobs
  Found 32 job links in malaysia for 'data analyst'

Searching on MALAYSIA for: MACHINE LEARNING jobs
  Found 32 job links in malaysia for 'machine learning'

Searching on MALAYSIA for: DATA ENGINEER jobs
  Found 32 job links in malaysia for 'data engineer'

Searching on MALAYSIA for: DATA SCIENCE jobs
  Found 32 job links in malaysia for 'data science'

Searching on SINGAPORE for: DATA SCIENTIST jobs
  Found 32 job links in singapore for 'data scientist'

Searching on SINGAPORE for: DATA ANALYST jobs
  Found 32 job links in singapore for 'data analyst'

Searching on SINGAPORE for: MACHINE LEARNING jobs
  Found 32 job links in singapore for 'machine learning'

Searching on SINGAPORE for: DATA ENGINEER jobs
  Found 32 job links in singapore for 'data engineer'

Searching on SINGAPORE for: DATA SCIENCE jobs
  Found 32 job links in singapore f

In [2]:
df_jobs = pd.DataFrame(scraped_jobs)
df_jobs

Unnamed: 0,url,country,title,company,location,category,work_type,description
0,https://my.jobstreet.com/job/84998805?type=sta...,malaysia,"Data Scientist, Financial Conglomerates Superv...",Bank Negara Malaysia,Kuala Lumpur,Analysis & Reporting (Banking & Financial Serv...,Full time,ROLE PURPOSE:\nExecute the SupTech and Data An...
1,https://my.jobstreet.com/job/85371197?type=sta...,malaysia,Data Scientist,Western Digital Tech and Regional Center (M) S...,Kuala Lumpur,"Mathematics, Statistics & Information Sciences...",Full time,Job Description\nResponsibilities\nWork with d...
2,https://my.jobstreet.com/job/85548582?type=sta...,malaysia,Data Annotator,Concentrix,"Cyberjaya, Selangor",Database Development & Administration (Informa...,Full time,Job Description\nKey Responsibilities:\n Accur...
3,https://my.jobstreet.com/job/85154961?type=sta...,malaysia,Data Scientist (Artificial Intelligence),Dcap Commercial Sdn Bhd,Kuala Lumpur,"Mathematics, Statistics & Information Sciences...",Full time,"Why Join Us? \nAt DCAP, we’re dynamic, fast-gr..."
4,https://my.jobstreet.com/job/84832618?type=sta...,malaysia,Data Scientist,MSA Focus International Ltd,"Kelana Jaya, Selangor","Mathematics, Statistics & Information Sciences...",Full time,Key Responsibilities\nData Exploration & Model...
...,...,...,...,...,...,...,...,...
475,https://id.jobstreet.com/job/85011674?type=sta...,indonesia,Data Scientist,PT CRIF Lembaga Informasi Keuangan,Jakarta,"Mathematics, Statistics & Information Sciences...",Full time,Job Descriptions: \nWorking closely with the i...
476,https://id.jobstreet.com/job/83068821?type=sta...,indonesia,Data Scientist - Pricing,PT Trinusa Travelindo,Jakarta,Business/Systems Analysts (Information & Commu...,Full time,It's fun to work in a company where people tru...
477,https://id.jobstreet.com/job/85633431?type=sta...,indonesia,Data Analyst,PT. INTERINDO BARA UTAMA,Jakarta,"Mathematics, Statistics & Information Sciences...",Part time,"Tugas dan Tanggung Jawab:\nMengumpulkan, membe..."
478,https://id.jobstreet.com/job/85546452?type=sta...,indonesia,Data Analyst – Pricing Staff,PT Global Jet Cargo (J&T Cargo),"North Jakarta, Jakarta","Analysis & Reporting (Manufacturing, Transport...",Full time,"Key Responsibilities:\nCollect, process, and m..."


In [3]:
df_jobs.to_csv('jobstreet_data.csv', index=False)