## LinkedIn job search

In [1]:
import os, time
from selenium import webdriver
from selenium.webdriver.common.by import By

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from urllib.parse import urlencode

from dotenv import load_dotenv
load_dotenv()

import warnings
warnings.filterwarnings("ignore")

In [2]:
driver = webdriver.Chrome()
driver.get('https://www.linkedin.com/login')
driver.title

'LinkedIn Login, Sign in | LinkedIn'

In [None]:
email = driver.find_element(By.ID, 'username')
email.send_keys(os.environ['EMAIL'])

password = driver.find_element(By.ID, 'password')
password.send_keys(os.environ['PASSWORD'])

password.submit()

In [None]:
## MAKE SURE TO USE ONLY THIS URL TO AVOID BEING STUCK IN ERRORS

url = "https://www.linkedin.com/jobs/"
driver.get(url)

In [None]:
from urllib.parse import urlencode
params = {
    'keywords': 'Data Engineer',
    'location': 'Berlin, Germany',
    'geoId': '103035651',
    'f_TPR': 'r86400',  # Past 24 hours
    'f_WT': '1,2,3',  # On-site=1, Remote=2, Hybrid=3
    'f_E': '2,3,4',  # Entry level and Associate
    'f_JT': 'F,C',  # Full-time
}

base_url = 'https://www.linkedin.com/jobs/search/'
full_url = base_url + '?' + urlencode(params)

print(full_url)
driver.get(full_url)

https://www.linkedin.com/jobs/search/?keywords=Data+Engineer&location=Berlin%2C+Germany&geoId=103035651&f_TPR=r86400&f_WT=1%2C2%2C3&f_E=2%2C3%2C4&f_JT=F%2CC


In [None]:
def scroll_to_load_jobs(driver):
    """Scroll the job list to load all jobs on current page"""
    try:
        job_list = driver.find_element(By.CLASS_NAME, 'jobs-search-results-list')
        
        # Scroll within the job list container
        last_height = driver.execute_script("return arguments[0].scrollHeight", job_list)
        
        while True:
            # Scroll down in the job list
            driver.execute_script("arguments[0].scrollTo(0, arguments[0].scrollHeight);", job_list)
            time.sleep(2)
            
            # Calculate new scroll height
            new_height = driver.execute_script("return arguments[0].scrollHeight", job_list)
            
            if new_height == last_height:
                break
            last_height = new_height
            
        print("Finished loading all jobs on current page")
    except Exception as e:
        print(f"Error during scrolling: {e}")

def extract_job_data(job_card):
    """Extract data from a single job card"""
    try:
        data = {}
        
        # Job title
        try:
            title_elem = job_card.find_element(By.CSS_SELECTOR, '.job-card-list__title strong')
            data['title'] = title_elem.text.strip()
        except:
            data['title'] = 'N/A'
        
        # Company name
        try:
            company_elem = job_card.find_element(By.CSS_SELECTOR, '.artdeco-entity-lockup__subtitle')
            data['company'] = company_elem.text.strip()
        except:
            data['company'] = 'N/A'
        
        # Location
        try:
            location_elem = job_card.find_element(By.CSS_SELECTOR, '.artdeco-entity-lockup__caption li')
            data['location'] = location_elem.text.strip()
        except:
            data['location'] = 'N/A'
        
        # Job link
        try:
            link_elem = job_card.find_element(By.CSS_SELECTOR, 'a.job-card-container__link')
            data['link'] = link_elem.get_attribute('href')
        except:
            data['link'] = 'N/A'
        
        # Job ID (from link)
        try:
            if 'link' in data and data['link'] != 'N/A':
                job_id = data['link'].split('/jobs/view/')[1].split('/')[0].split('?')[0]
                data['job_id'] = job_id
            else:
                data['job_id'] = 'N/A'
        except:
            data['job_id'] = 'N/A'
        
        return data
    except Exception as e:
        print(f"Error extracting job data: {e}")
        return None

def scrape_current_page(driver):
    """Scrape all jobs from the current page"""
    jobs_data = []
    
    try:
        # Wait for job cards to load
        WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, 'job-card-container'))
        )
        
        # Get all job cards
        job_cards = driver.find_elements(By.CLASS_NAME, 'job-card-container')
        print(f"Found {len(job_cards)} jobs on current page")
        
        for idx, job_card in enumerate(job_cards, 1):
            job_data = extract_job_data(job_card)
            if job_data:
                jobs_data.append(job_data)
                print(f"  {idx}. {job_data['title']} at {job_data['company']}")
        
        return jobs_data
    except Exception as e:
        print(f"Error scraping current page: {e}")
        return jobs_data

def click_next_page(driver):
    """Click the next page button if available"""
    try:
        # Try to find the next button
        next_button = driver.find_element(By.CSS_SELECTOR, 'button[aria-label="View next page"]')
        
        # Check if button is disabled
        if 'disabled' in next_button.get_attribute('class'):
            print("Next button is disabled - no more pages")
            return False
        
        # Scroll to the button
        driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
        time.sleep(1)
        
        # Click the button
        next_button.click()
        print("Clicked next page button")
        
        # Wait for new page to load
        time.sleep(3)
        
        # Wait for job cards to appear
        WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, 'job-card-container'))
        )
        
        return True
    except NoSuchElementException:
        print("Next button not found - reached last page")
        return False
    except Exception as e:
        print(f"Error clicking next page: {e}")
        return False

def scrape_all_pages(driver, max_pages=5):
    """Scrape jobs from all pages with pagination"""
    all_jobs = []
    page = 1
    
    while page <= max_pages:
        print(f"\n{'='*60}")
        print(f"Scraping Page {page}")
        print(f"{'='*60}")
        
        # Scroll to load all jobs on current page
        scroll_to_load_jobs(driver)
        
        # Scrape current page
        jobs = scrape_current_page(driver)
        all_jobs.extend(jobs)
        
        print(f"\nTotal jobs scraped so far: {len(all_jobs)}")
        
        # Try to go to next page
        if not click_next_page(driver):
            print("\nNo more pages available")
            break
        
        page += 1
    
    return all_jobs

# Scrape all pages (max 5 pages)
all_jobs = scrape_all_pages(driver, max_pages=5)

# Print summary
print(f"\n{'='*60}")
print(f"SCRAPING COMPLETE")
print(f"{'='*60}")
print(f"Total jobs found: {len(all_jobs)}")

# Optional: Save to CSV
import csv
with open('linkedin_jobs.csv', 'w', newline='', encoding='utf-8') as f:
    if all_jobs:
        writer = csv.DictWriter(f, fieldnames=all_jobs[0].keys())
        writer.writeheader()
        writer.writerows(all_jobs)
        print(f"Data saved to linkedin_jobs.csv")


Scraping Page 1
Error during scrolling: Message: no such element: Unable to locate element: {"method":"css selector","selector":".jobs-search-results-list"}
  (Session info: chrome=142.0.7444.176); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
Symbols not available. Dumping unresolved backtrace:
	0x7ff7c758a235
	0x7ff7c72e2630
	0x7ff7c70716dd
	0x7ff7c70ca27e
	0x7ff7c70ca58c
	0x7ff7c711ed77
	0x7ff7c711baba
	0x7ff7c70bb0ed
	0x7ff7c70bbf63
	0x7ff7c75b5d60
	0x7ff7c75afe8a
	0x7ff7c75d1005
	0x7ff7c72fd71e
	0x7ff7c7304e1f
	0x7ff7c72eb7c4
	0x7ff7c72eb97f
	0x7ff7c72d18e8
	0x7ffb289be8d7
	0x7ffb2a0ec53c

Found 11 jobs on current page
  1. N/A at BWI GmbH
  2. N/A at Canonical
  3. N/A at ToysReloved
  4. N/A at OBI Group Holding
  5. N/A at kiresult
  6. N/A at Strava
  7. N/A at DKB | Deutsche Kreditbank AG
  8. N/A at Aiven
  9. N/A at SysEleven GmbH
  10. N/A at netgo
  11. N/A at A