In [1]:
import sqlite3
import httpx
import yaml
import json
from enum import Enum
from pydantic import BaseModel, Field, PrivateAttr, field_validator
from bs4 import BeautifulSoup
from urllib.parse import urlencode, quote_plus
from typing import Optional

In [2]:
class JobCard(BaseModel):
    title: str
    company: str
    location: str
    url: str

class ExperienceLevel(str, Enum):
    ENTRY_LEVEL = "entry_level"
    ASSOCIATE = "associate"
    MID_SENIOR = "mid_senior"
    DIRECTOR = "director"
    EXECUTIVE = "executive"

class Salary(str, Enum):
    S100K = "100k"
    S120K = "120k"
    S140K = "140k"
    S160K = "160k"
    S180K = "180k"
    S200K = "200k"


class Config(BaseModel):
    keywords: str = Field(
        ..., 
        description="Keywords to search for jobs, just as you would type them in the LinkedIn search bar"
    )
    location: str = "United States"
    time_since_post: int =  Field(
            None, 
            gt=1, 
            le=2592000, 
            description="Time since post in seconds, must be between 1 and 2592000 (30 days)"
        )
    remote: bool = False
    max_results: int = 10
    experience_levels: Optional[list[ExperienceLevel]] = None
    salary: Optional[Salary] = None

    _f_E: Optional[str] = PrivateAttr(default=None)
    _f_TPR: Optional[str] = PrivateAttr(default=None)
    _f_WT: Optional[str] = PrivateAttr(default=1)

    def model_post_init(self, __context):
        level_map = {
            ExperienceLevel.ENTRY_LEVEL: "1",
            ExperienceLevel.ASSOCIATE: "2",
            ExperienceLevel.MID_SENIOR: "3",
            ExperienceLevel.DIRECTOR: "4",
            ExperienceLevel.EXECUTIVE: "5",
        }
        salary_map = {
            Salary.S100K: "4",
            Salary.S120K: "5",
            Salary.S140K: "6",
            Salary.S160K: "7",
            Salary.S180K: "8",
            Salary.S200K: "9",
        }

        if self.experience_levels:
            self._f_E = ",".join(level_map[l] for l in self.experience_levels)
        if self.time_since_post:
            self._f_TPR = f"r{self.time_since_post}"
        if self.remote:
            self._f_WT = "2"
        if self.salary:
            self._f_SB2 = salary_map.get(self.salary)

    @field_validator("keywords")
    def validate_keywords(cls, v):
        if not v:
            raise ValueError("Keywords cannot be empty")
        return v

In [None]:
def build_search_url(config: Config, start=0):
    base_url = config['base_url']
    query = {
        "keywords": config.keywords,
        "location": config.location,
        "start": start,
        "f_E": config._f_E,
        "f_TPR": config._f_TPR,
        "f_WT": config._f_WT,
        "f_SB2": config._f_SB2,
    }
    
    if config.get("salary"):
        query["f_SB2"] = config["salary"]

    return base_url + "?" + urlencode(query, quote_via=quote_plus)

In [4]:
def load_config(file_path: str) -> Config:
    with open(file_path, 'r') as file:
        config_data = json.load(file)
    return Config(**config_data)

In [5]:
config = load_config('./config_example.json')

In [None]:
def scrape_job_cards(html):
    soup = BeautifulSoup(html, 'html.parser')
    job_cards = soup.find_all('div', class_='job-card-container')
    jobs = []

    for card in job_cards:
        try:
            title = card.select_one('h3').get_text(strip=True)
            company = card.select_one('h4').get_text(strip=True)
            location = card.select_one('.job-search-card_location').get_text(strip=True)
            job_link = "https://www.linkedin.com" + card.select_one('a')['href'].split('?')[0]
            jobs.append({
                "title": title,
                "company": company,
                "location": location,
                "link": job_link
            })

    return jobs

In [None]:
headers = {"User-Agent": "Mozilla/5.0"}
max_results = 10

url = build_search_url(config)
response = httpx.get(url, headers=headers)

jobs = scrape_job_cards(response.text)