In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import re

URL = "https://www.indeed.com/jobs?as_and=python&as_phr=&as_any=&as_not=&as_ttl=&as_cmp=&jt=all&st=&salary=&radius=50&l=new+york&fromage=any&limit=50&sort=&psf=advsrch&from=advancedsearch"

def get_last_page():
    result = requests.get(URL)
    soup = BeautifulSoup(result.text, "html.parser")
    pagination = soup.find("div", {"class": "pagination"})

    links = pagination.find_all('a')
    pages = []
    for link in links[:-1]:
        pages.append(int(link.string))

    max_page = pages[-1]
    return max_page

def extract_job(html):
    title = html.find("a", {"class": "jobtitle"})["title"]
    
    company = html.find("span", {"class": "company"})
    company_anchor = company.find("a")
    if company:
        if company_anchor is not None:
            company = str(company_anchor.string)
        else:
            company = str(company.string)
            company = company.strip()
    
    location = html.find("div", {"class": "recJobLoc"})["data-rc-loc"]
    
    job_id = html["data-jk"]
    
    min_salary = html.find("span", {"class": "salaryText"})
    if min_salary is not None:
        min_salary = str(min_salary.string)
 
    rating = html.find("span", {"class": "ratingsContent"})
    if rating is not None:
        rating = rating.get_text(strip=True)
    urgent = html.find("td", {"class": "jobCardShelfItem urgentlyHiring"})
    if urgent is not None:
        urgent = 0
   
    return {
        'title': title,
        'company': company,
        'location': location,
        'minimum salary': min_salary, 
        'rating': rating,
        "urgent hiring": urgent,
        'link': f"https://www.indeed.com/viewjob?jk={job_id}"
    }

def extract_jobs(last_page):
    jobs = []
    for page in range(last_page):
        print(f"Scrapping Indeed: Page: {page}")
        result = requests.get(URL)
        soup = BeautifulSoup(result.text, "html.parser")
        results = soup.find_all("div", {"class": "jobsearch-SerpJobCard"})
        for result in results:
            job = extract_job(result)
            jobs.append(job)
    return jobs

def get_indeed_jobs():
    last_page = get_last_page()
    jobs = extract_jobs(last_page)
    return jobs


def save_file(jobs):
    file = open("jobs.csv", mode="w")
    writer = csv.writer(file)
    writer.writerow(["title", "company", "location", "minimum salary", "rating", "urgent hiring", "link"])
    for job in jobs:
        writer.writerow(list(job.values()))
    return

indeed_jobs = get_indeed_jobs()
jobs = indeed_jobs 
save_file(jobs)

Scrapping Indeed: Page: 0
Scrapping Indeed: Page: 1
Scrapping Indeed: Page: 2
Scrapping Indeed: Page: 3
Scrapping Indeed: Page: 4
Scrapping Indeed: Page: 5
Scrapping Indeed: Page: 6
Scrapping Indeed: Page: 7
Scrapping Indeed: Page: 8
Scrapping Indeed: Page: 9
Scrapping Indeed: Page: 10
Scrapping Indeed: Page: 11
Scrapping Indeed: Page: 12
Scrapping Indeed: Page: 13
Scrapping Indeed: Page: 14
Scrapping Indeed: Page: 15
Scrapping Indeed: Page: 16
Scrapping Indeed: Page: 17
Scrapping Indeed: Page: 18
Scrapping Indeed: Page: 19
