In [1]:
# Installing packages
!pip install --upgrade pip
!pip install -U selenium



In [2]:
!pip install undetected-chromedriver



In [3]:
# Importing libraries
import time
import random
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import undetected_chromedriver as uc

In [4]:
# Initialize an empty list to store job data
job_data = []

# Initialize the browser using undetected-chromedriver
driver = uc.Chrome()

# Open the Indeed website
driver.get("https://au.indeed.com/")
print("Opened Indeed website")
time.sleep(random.uniform(2, 4))  # Wait randomly between 2 to 4 seconds for the page to load

# Enter search criteria
what_box = driver.find_element(By.ID, "text-input-what")   # "What" input box
where_box = driver.find_element(By.ID, "text-input-where")  # "Where" input box

where_box.clear()  # Clear the location input box
what_box.send_keys("Data Analyst")  # Enter the job keyword
time.sleep(random.uniform(2, 4))
where_box.send_keys("Australia")  # Enter the location
time.sleep(random.uniform(2, 4))
where_box.send_keys(Keys.RETURN)  # Simulate pressing the search button
time.sleep(random.uniform(3, 5))

# Define a function to navigate to the next page
def go_to_next_page(driver):
    try:
        next_button = driver.find_element(By.CSS_SELECTOR, "a[data-testid='pagination-page-next']")
        next_button.click()  # Click the next page button
        time.sleep(random.uniform(3, 5))  # Wait for the page to load
        return True
    except Exception as e:
        print("No more next page, the scraping ends.")
        return False

# Scrape data from multiple pages
while True:
    # Retrieve job cards on the current page
    job_cards = driver.find_elements(By.CSS_SELECTOR, "a.jcs-JobTitle")
    print(f"Found {len(job_cards)} jobs.")

    for job in job_cards:
        try:
            title = job.text  # Get the job title
            job_url = job.get_attribute("href")  # Get the job URL
            print(f"Job Title: {title}")
            print(f"Job URL: {job_url}")
            job_data.append({"Job Title": title, "Job URL": job_url})
            time.sleep(random.uniform(2, 4))  # Random delay to mimic human behavior
        except Exception as e:
            print(f"Error fetching job details: {e}")

    print("Finished scraping current page, moving to the next page...")
    print("-" * 50)

    # Navigate to the next page
    if not go_to_next_page(driver):
        break

# Close the browser
driver.quit()
print("Closed Indeed website")

# Convert the job data list to a DataFrame
df = pd.DataFrame(job_data)

# Save the results to a CSV file
df.to_csv("indeed_jobs.csv", index=False)
print("Job data saved to 'indeed_jobs.csv'.")

# Display the data
print(df)

Opened Indeed website
Found 15 jobs.
Job Title: Technical Business Analyst
Job URL: https://au.indeed.com/pagead/clk?mo=r&ad=-6NYlbfkN0CP0xCJ0Wr0sgcJ3Crie2luOxE6bx_awjXc77DzKaI3h71juiEckgUWYQHSYh3Agt9-AR6s7kAthXBF2Z1e0XsikFLiV90Qs_Z590nQXNJ0dzpDBc3ktoMHCV04HQP07nOsPMECTIyZau7IKcRSk9EBDHrSaQIuqbzUyDcx2MDp2ONGKJBKj_LAA01saxOU3UdPgiLNzAovBilcoFSTwhXrkI6g_HgyHphaMAp6gQ3EMwTvaxAug380z63EVw94-8JQhEERL-BGWgbStmOFQ44JMBtJyaE4A9L4jc9SAqvbh5dfP9jDk0-TQPfwYKIu_dbSXA3JQwDgx7vfmMZJNEA4Gy_eivuGL-vCzM4qgjcTQLjnUeKibp7j-tXlY6-CShfu_bF6_CPuc1HBlYSrT9S1-ryopXo8D7OsHzvsabE_cl9oFdpmoWdKHOB9dPC1TwZuiYMpGjI6TZYR8YJW3vVRF0B6zRMMPytXA4WUiLkDWwtZcqsS0YlHsx4vxh3ssBAXy0xOhVCxTFaf-QI3ZUwwVtePJa-LpuSvaU8VDQx6o10tCZLazJoWA9gHYkPX2mMhHqK03G1LhKPqY2DurugC36cWNMp-8KFL1jOlBvRNLdVMGzWc5LsogAIwoMmS4b4tUJnwp4uqCFfOlhX-ibO9i5ksduVMjCuGRctD4gJ2FzjNeSfnjhkAN2pbE5Ef5h-x8-OdXT6kLA9P0NgYP93RTMdHlODGb6Dkheod3063cmuspnEzqdqZNImULY0TGmigNnJlRsK0KjnPHIhXxlBRuWSlRBUmfjTj0kbnHSNtqYdISDukEcT94aZVddUAdzuAO6yMheucCxnIc-d7dPBka3xmkdQ7Je8

In [5]:
df.head()

Unnamed: 0,Job Title,Job URL
0,Technical Business Analyst,https://au.indeed.com/pagead/clk?mo=r&ad=-6NYl...
1,Senior Business Analyst - Data,https://au.indeed.com/pagead/clk?mo=r&ad=-6NYl...
2,Senior Credit Risk Analyst,https://au.indeed.com/pagead/clk?mo=r&ad=-6NYl...
3,Data & Analytics - 2025/26 Graduate Program (B...,https://au.indeed.com/rc/clk?jk=c6b3510aaf1f82...
4,Data & Analytics - 2025/26 Vacationer Program ...,https://au.indeed.com/rc/clk?jk=4c3baabf09aff9...


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1574 entries, 0 to 1573
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Job Title  1574 non-null   object
 1   Job URL    1574 non-null   object
dtypes: object(2)
memory usage: 24.7+ KB


In [7]:
df.describe()

Unnamed: 0,Job Title,Job URL
count,1574,1574
unique,155,210
top,Data Analyst,https://au.indeed.com/pagead/clk?mo=r&ad=-6NYl...
freq,188,33


In [8]:
df.to_csv("indeed_jobs.csv", index=False)