In [1]:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import time
from tqdm import tqdm
import urllib.parse
import undetected_chromedriver as uc


In [2]:
search_term = ['data analyst','business analyst','business intelligence analyst','financial analyst','marketing analyst',
               'data scientist',' credit analyst','operations analyst','technical analyst','project analyst']

num_pages = 20  # 20 pages × 15 jobs ≈ 300 jobs per role

# Dates posted filter
while (filter:= input("Show jobs from past how many days? (1,3,7,14): ")) not in {"1", "3", "7", "14"}:
    print("Invalid input. Try again.")


Show jobs from past how many days? (1,3,7,14):  14


In [3]:
# setting up chrome options
options = Options()

# Use your actual persistent Chrome profile path to avoid bot detection and login credentials on indeed.
options.add_argument(r'--user-data-dir=/Users/mayanksinghrawat/Library/Application\ Support/Google/Chrome')
options.add_argument(r'--profile-directory=Profile\ 2')
driver = uc.Chrome(options=options)
wait = WebDriverWait(driver, 10)
time.sleep(2)


In [4]:
# Results storage
all_jobs = []

# Loop over job roles
for role in tqdm(search_term, desc='Scraping roles'):
    print(f"\nScraping role: {role}")
    print(f"Role: {role} | Type: {type(role)}")
    encoded_role = urllib.parse.quote_plus(role)

    for page in range(0,num_pages*10,10):  #Indeed paginationstart=0,10,20,...
        url = f"https://ca.indeed.com/jobs?q={encoded_role}&l=Canada&fromage=filter&start={page}"
        driver.get(url)
        time.sleep(3)

         # if a bot challenge page is detected.
        if "bot-detection" in driver.current_url or "cloudflare" in driver.page_source.lower():
            print("bot detection triggered, Verify manually")
            input("Press enter to proceed after verifying manually in the browser")
            
        job_cards = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "job_seen_beacon")))
        print(f"  Page {int(page/10)+1}: {len(job_cards)} jobs")
        time.sleep(1)
        
        #Stop if this page has less than 15 job cards (probably the last page)
        if len(job_cards) < 15:
            print("Last page reached.")
            break

        for card in job_cards:
            try:
                card.click()
                time.sleep(2)
                try:
                    title = driver.find_element(By.CLASS_NAME, "jobsearch-JobInfoHeader-title").text
                except:
                    title = None
                
                try:
                    company = driver.find_element(By.CSS_SELECTOR, 'div[data-company-name="true"] a').text
                except:
                    company = None
                
                def try_multiple_xpaths(driver, xpaths):
                    for path in xpaths:
                        try:
                            return driver.find_element(By.XPATH, path).text
                        except:
                            continue
                    return None
                
                # Usage
                location = try_multiple_xpaths(driver, [
                    "//div[@data-testid='job-location']",
                    "//div[@data-testid='inlineHeader-companyLocation']/div"])
                
                try:
                    salary_block = card.find_element(By.XPATH, "//div[@id='salaryInfoAndJobType']")
                    spans = salary_block.find_elements(By.TAG_NAME, "span")
                    
                    for span in spans:
                        text = span.text.strip()
                        if any(keyword in text for keyword in ['$', 'year', 'hour']):
                            salary = text
                        elif any(term in text.lower() for term in ['full-time', 'part-time', 'contract', 'temporary', 'intern']):
                            job_type = text
                except:
                    salary = None
                    job_type = None

                try:
                    work_location = driver.find_element(By.XPATH, "//div[@id='jobDescriptionText']//p[contains(text(), 'Work Location')]").text
                except:
                    work_location = None
                if work_location:
                    work_location = work_location.split(":")[-1].strip()

                # Try to click the "show more" button if it exists in skills section
                skills = [] #default
                try:
                    show_more = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), '+ show more')]")))
                    show_more.click()
                    time.sleep(3)  # allow time for extra skills to load
             
                    # Scrape all skill buttons
                    skill_buttons = wait.until(EC.presence_of_element_located((By.XPATH, "//div[@id='js-match-insights-provider']//button")))
                    skills = [btn.text for btn in skill_buttons if btn.text.strip() != ""]
                except:
                    skills = None

                try:
                    url = driver.find_element(By.XPATH, "//div[@id='jobsearch-ViewjobPaneWrapper']//a[1]").get_attribute("href")
                except:
                    url = None
                
                

            except:
                description = "Unable to click card"
                
            
            all_jobs.append({
                "job_role_category": role,
                "job_title": title,
                "company": company,
                "location": location,
                "salary": salary,
                "job_type": job_type,
                "work_location": work_location,
                "skills_required": skills,
                "job_url": url})

print(all_jobs)

driver.quit()       

# converting list into data_frame
df = pd.DataFrame(all_jobs)

# Export to csv
df.to_csv('/Users/mayanksinghrawat/Desktop/Projects/Indeed Analysis/scrapedindeed_jobs.csv')




Scraping roles:   0%|                                    | 0/10 [00:00<?, ?it/s]


Scraping role: data analyst
Role: data analyst | Type: <class 'str'>
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 1: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 2: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 3: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 4: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 5: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 6: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 7: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 8: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 9: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 10: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 11: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 12: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 13: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 14: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 15: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 16: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 17: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 18: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 19: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 20: 15 jobs


Scraping roles:  10%|██▍                     | 1/10 [57:02<8:33:19, 3422.19s/it]


Scraping role: business analyst
Role: business analyst | Type: <class 'str'>
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 1: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 2: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 3: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 4: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 5: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 6: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 7: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 8: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 9: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 10: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 11: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 12: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 13: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 14: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 15: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 16: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 17: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 18: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 19: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 20: 15 jobs


Scraping roles:  20%|████▏                | 2/10 [2:31:40<10:33:12, 4749.06s/it]


Scraping role: business intelligence analyst
Role: business intelligence analyst | Type: <class 'str'>
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 1: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 2: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 3: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 4: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 5: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 6: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 7: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 8: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 9: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 10: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 11: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 12: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 13: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 14: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 15: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 16: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 17: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 18: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 19: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 20: 15 jobs


Scraping roles:  30%|██████▎              | 3/10 [4:12:06<10:22:05, 5332.20s/it]


Scraping role: financial analyst
Role: financial analyst | Type: <class 'str'>
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 1: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 2: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 3: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 4: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 5: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 6: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 7: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 8: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 9: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 10: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 11: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 12: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 13: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 14: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 15: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 16: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 17: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 18: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 19: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 20: 15 jobs


Scraping roles:  40%|████████▊             | 4/10 [5:15:47<7:53:33, 4735.58s/it]


Scraping role: marketing analyst
Role: marketing analyst | Type: <class 'str'>
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 1: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 2: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 3: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 4: 8 jobs


Scraping roles:  50%|███████████           | 5/10 [5:26:10<4:31:03, 3252.70s/it]

Last page reached.

Scraping role: data scientist
Role: data scientist | Type: <class 'str'>
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 1: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 2: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 3: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 4: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 5: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 6: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 7: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 8: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 9: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 10: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 11: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 12: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 13: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 14: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 15: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 16: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 17: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 18: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 19: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 20: 15 jobs


Scraping roles:  60%|█████████████▏        | 6/10 [6:33:57<3:55:18, 3529.65s/it]


Scraping role:  credit analyst
Role:  credit analyst | Type: <class 'str'>
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 1: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 2: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 3: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 4: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 5: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 6: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 7: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 8: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 9: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 10: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 11: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 12: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 13: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 14: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 15: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 16: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 17: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 18: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 19: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 20: 15 jobs


Scraping roles:  70%|█████████████▎     | 7/10 [25:50:12<20:52:47, 25055.87s/it]


Scraping role: operations analyst
Role: operations analyst | Type: <class 'str'>
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 1: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 2: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 3: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 4: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 5: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 6: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 7: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 8: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 9: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 10: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 11: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 12: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 13: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 14: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 15: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 16: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 17: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 18: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 19: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 20: 15 jobs


Scraping roles:  80%|███████████████▏   | 8/10 [27:22:15<10:27:54, 18837.45s/it]


Scraping role: technical analyst
Role: technical analyst | Type: <class 'str'>
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 1: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 2: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 3: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 4: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 5: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 6: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 7: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 8: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 9: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 10: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 11: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 12: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 13: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 14: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 15: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 16: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 17: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 18: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 19: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 20: 15 jobs


Scraping roles:  90%|██████████████████  | 9/10 [28:47:52<4:02:34, 14554.49s/it]


Scraping role: project analyst
Role: project analyst | Type: <class 'str'>
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 1: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 2: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 3: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 4: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 5: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 6: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 7: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 8: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 9: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 10: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 11: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser \


  Page 12: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 13: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 14: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 15: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 16: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 17: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 18: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 19: 15 jobs
bot detection triggered, Verify manually


Press enter to proceed after verifying manually in the browser 


  Page 20: 15 jobs


Scraping roles: 100%|█████████████████████| 10/10 [30:03:42<00:00, 10822.29s/it]

[{'job_role_category': 'data analyst', 'job_title': 'Buyer - Procurement Analyst\n- job post', 'company': 'Nexom Inc.', 'location': '5 Burks Way, Navin, MB', 'salary': '$48,000–$61,000 a year', 'job_type': '- Permanent, Full-time', 'work_location': 'In person', 'skills_required': None, 'job_url': 'https://ca.indeed.com/cmp/Nexom-Inc.?campaignid=mobvjcmp&from=mobviewjob&tk=1j1k3o39ik3dj800&fromjk=d5f984b52805f882'}, {'job_role_category': 'data analyst', 'job_title': 'Inventory Analyst\n- job post', 'company': 'Northern Mining Equipment', 'location': 'Mount Pearl, NL', 'salary': '$85,000–$95,000 a year', 'job_type': '- Full-time', 'work_location': 'In person', 'skills_required': None, 'job_url': 'https://ca.indeed.com/cmp/Northern-Mining-Equipment?campaignid=mobvjcmp&from=mobviewjob&tk=1j1k3ofhii9d580c&fromjk=d2a60474bf1c4ab4'}, {'job_role_category': 'data analyst', 'job_title': 'Power BI Developer\n- job post', 'company': 'AMPHENOL CANADA CORP', 'location': '5950 14th Avenue, Markham, O




In [11]:
df.shape

(2745, 9)

In [13]:
df.drop_duplicates(subset = "job_url", inplace = True) 

In [15]:
df.shape

(2267, 9)

In [25]:
# drop rows if job_title is missing
df = df[df['job_title'].str.strip().astype(bool)]
df.shape

(2265, 9)

In [137]:
df['job_title'] = df['job_title'].str.replace(r'\n- job post', '', regex=True).str.strip()

In [29]:
df['job_title'].head(10)

0                         Buyer - Procurement Analyst
1                                   Inventory Analyst
2                                  Power BI Developer
3    Senior ERP Technical Analyst – Cognos & Power BI
4                    BI Analytics Analyst (Architect)
5                     Sr Analyst, Actuarial - Pricing
6                                    Business Analyst
7                     Business Analyst (nCino expert)
8       Senior Business Analyst, Process & AI Systems
9             Senior Data Analyst (18 month contract)
Name: job_title, dtype: object

In [139]:
# Apply transformation
# if job title starts from word analyst: remain unchanged
# if there is no analyst word in job title: remain unchanged
# if analyst word is in the middle: remove all words after analyst

import re
df['job_title'] = df['job_title'].apply(lambda x: x if x.lower().startswith('analyst') 
    else re.sub(r'^(.*?\bAnalyst\b).*$', r'\1', x, flags=re.IGNORECASE).strip() if 'analyst' in x.lower() 
    else x
)
df['job_title'] = df['job_title'].str.capitalize()

In [141]:
df['job_title'].head(10)

0     Buyer - procurement analyst
1               Inventory analyst
2              Power bi developer
3    Senior erp technical analyst
4            Bi analytics analyst
5                      Sr analyst
6                Business analyst
7                Business analyst
8         Senior business analyst
9             Senior data analyst
Name: job_title, dtype: object

In [157]:
# transforming location column
provinces = ['AB','BC','MB','NB','NL','NS','ON','PE','QC','SK','YT','NU','NT']

# Regex to capture everything up to and including the province
pattern1 = r'^(.*?\b(?:' + '|'.join(provinces) + r')\b)'

df['location'] = df['location'].apply(lambda x: re.match(pattern1, x).group(1).strip() if re.search(pattern1, x) else x)

pattern2 = r'([A-Za-z\s]+,\s*[A-Z]{2})$'
df['location'] = df['location'].apply(lambda x: re.search(pattern2, x).group(1).strip() if re.search(pattern2, x) else x)

df['location'] = df['location'].str.title()

In [159]:
df['location'].head(10)

0          Navin, Mb
1    Mount Pearl, Nl
2        Markham, On
3         Ottawa, On
4       Edmonton, Ab
5       Waterloo, On
6             Al, Qc
7        Toronto, On
8        Calgary, Ab
9       Waterloo, On
Name: location, dtype: object

In [147]:
# transforming job_type
df['job_type'] = df['job_type'].apply(lambda x: x.lstrip('-').split(',')[0].strip() if isinstance(x, str) else 'Not specified')
df['job_type'] = df['job_type'].str.capitalize()

In [149]:
df['job_type'].head(15)

0         Permanent
1         Full-time
2         Permanent
3          Contract
4         Permanent
5         Full-time
6         Temporary
7         Temporary
8         Full-time
9         Full-time
10        Full-time
11        Full-time
12        Full-time
13        Permanent
14    Not specified
Name: job_type, dtype: object

In [151]:
# transforming work_location

df['work_location'] = df['work_location'].apply(lambda x: x.strip().split(' ')[0].strip() if isinstance(x, str) else 'Not specified')
df['work_location'] = df['work_location'].replace('In','In person')
df['work_location'] = df['work_location'].replace('Not','Not specified')

df['work_location'] = df['work_location'].str.capitalize()

In [167]:
df['work_location'].head(10)

0        In person
1        In person
2        In person
3        In person
4        In person
5    Not specified
6        In person
7    Not specified
8    Not specified
9    Not specified
Name: work_location, dtype: object

In [247]:
df.drop(columns='skills_required', inplace=True)
# importing scrapedindeed file to python
df = pd.read_excel('/Users/mayanksinghrawat/Desktop/Projects/Indeed Analysis/scrapedindeed.xlsx')

In [249]:
# transforming salary column (taking avaerage of the salary)
import numpy as np

# Define function to extract average and convert to annual
def convert_to_annual(salary_text):
    if not isinstance(salary_text, str):  # skip NaN/None values
        return np.nan

    salary_text = salary_text.lower()
    numbers = re.findall(r'\$?[\d,]+', salary_text)
    if not numbers:
        return np.nan
    salaries = [int(num.replace('$', '').replace(',', '')) for num in numbers]
    avg_salary = sum(salaries) / len(salaries)

    # Convert based on time unit
    if 'per month' in salary_text or 'a month' in salary_text:
        return avg_salary * 12
    elif 'per week' in salary_text or 'a week' in salary_text:
        return avg_salary * 52
    elif 'per hour' in salary_text or 'an hour' in salary_text or 'hourly' in salary_text:
        return avg_salary * 40 * 52  # considering 40hours per week
    elif 'per year' in salary_text or 'a year' in salary_text:
        return avg_salary

    else:
        return np.nan   # optional: handle hourly or unknowns

# Apply function to entire salary column
df['salary'] = df['salary'].apply(convert_to_annual)

In [233]:
df.shape

(2264, 8)

In [251]:
df.head(10)

Unnamed: 0,job_role_category,job_title,company,location,salary,job_type,work_location,job_url
0,data analyst,Buyer - procurement analyst,Nexom Inc.,"Navin, Mb",54500.0,Permanent,In person,https://ca.indeed.com/cmp/Nexom-Inc.?campaigni...
1,data analyst,Inventory analyst,Northern Mining Equipment,"Mount Pearl, Nl",90000.0,Full-time,In person,https://ca.indeed.com/cmp/Northern-Mining-Equi...
2,data analyst,Power bi developer,AMPHENOL CANADA CORP,"Markham, On",95000.0,Permanent,In person,https://ca.indeed.com/cmp/Amphenol?campaignid=...
3,data analyst,Senior erp technical analyst,Donna Cona,"Ottawa, On",95000.0,Contract,In person,https://ca.indeed.com/cmp/Donna-Cona?campaigni...
4,data analyst,Bi analytics analyst,The Good Samaritan Society (Lutheran Social Se...,"Edmonton, Ab",44376.5,Permanent,In person,https://ca.indeed.com/cmp/The-Good-Samaritan-S...
5,data analyst,Sr analyst,Definity Financial Corporation,"Waterloo, On",111050.0,Full-time,Not specified,https://ca.indeed.com/cmp/Definity-1?campaigni...
6,data analyst,Business analyst,APPTOZA INC.,"Al, Qc",111050.0,Temporary,In person,https://ca.indeed.com/cmp/Apptoza-Inc?campaign...
7,data analyst,Business analyst,Compest Solutions Inc,"Toronto, On",130000.0,Temporary,Not specified,https://ca.indeed.com/cmp/Compest-Solutions-In...
8,data analyst,Senior business analyst,Ripple Property Management,"Calgary, Ab",120000.0,Full-time,Not specified,https://ca.indeed.com/cmp/Ripple-Property-Mana...
9,data analyst,Senior data analyst,Definity Financial Corporation,"Waterloo, On",98000.0,Full-time,Not specified,https://ca.indeed.com/cmp/Definity-1?campaigni...


In [253]:
df.describe()

Unnamed: 0,salary
count,1717.0
mean,96509.017327
std,32624.37456
min,22589.0
25%,78234.0
50%,94275.0
75%,111050.0
max,269000.0


In [255]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2264 entries, 0 to 2263
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   job_role_category  2264 non-null   object 
 1   job_title          2264 non-null   object 
 2   company            2264 non-null   object 
 3   location           2264 non-null   object 
 4   salary             1717 non-null   float64
 5   job_type           2264 non-null   object 
 6   work_location      2264 non-null   object 
 7   job_url            2264 non-null   object 
dtypes: float64(1), object(7)
memory usage: 141.6+ KB


In [259]:
df.to_excel('/Users/mayanksinghrawat/Desktop/Projects/Indeed Analysis/cleanedindeed.xlsx', index = False)