Click [here](https://medium.com/@morihosseini/python-web-scraping-a-beginners-guide-to-scraping-job-listings-9d185855e7cb) to access the associated Medium article.

## Setup

In [2]:
!pip install -q requests beautifulsoup4 pandas

## Scraping a single page

In [79]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

# Define the search query and location
search_query = "data scientist"
location = "New York City, NY"

# Construct the URL
url = f"http://www.jobinventory.com/search?q={search_query}&l={location}"

# Send a GET request to the URL
response = requests.get(url)

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, "html.parser")

# Find all the job listings on the page
job_listings = soup.find_all("li", class_="resultBlock")

# Define empty lists to store the job details
titles = []
companies = []
locations = []
descriptions = []

# Loop through each job listing and extract the relevant details
for job in job_listings:
    title = job.find("div", class_="title").text.strip()
    company = job.find("span", class_="company").text.strip()
    location = (
        job.find("div", class_="state").text.split("\xa0-\xa0")[-1].strip()
    )
    description = job.find("div", class_="description").text.strip()

    titles.append(title)
    companies.append(company)
    locations.append(location)
    descriptions.append(description)

# Clean up the job descriptions using regular expressions
regex = re.compile(r"\s+")
clean_descriptions = [regex.sub(" ", d).split(" - ")[1] for d in descriptions]

# Create a Pandas DataFrame to store the job details
df = pd.DataFrame(
    {
        "Title": titles,
        "Company": companies,
        "Location": locations,
        "Description": clean_descriptions,
    }
)

# Export the DataFrame to a CSV file
df.to_csv("job_listings.csv", index=False)

print("Scraping complete! The results are saved in 'job_listings.csv'.")

df

Scraping complete! The results are saved in 'job_listings.csv'.


Unnamed: 0,Title,Company,Location,Description
0,Lead Data Scientist,Tiro,"New York, NY",Lead Data Scientist Enigma is seekingand visua...
1,Data Scientist,Smith Hanley Associates,"New York, NY",Title: Data Scientist Location: Newengineering...
2,Data Scientist,Averity,"New York, NY",like to become a Data Scientist at a global in...
3,Data Scientist,Revelio Labs,"New York, NY",for: Revelio Labs is looking for a creative Se...
4,Lead Data Scientist,Thomas,"New York, NY",looking for a Lead Data Scientist to lead and ...
5,Data Scientist,Eliassen Group,"Jersey City, NJ",The client is seeking a Neo4j data scientist/e...
6,Sr. Data Scientist,CVS,"New York, NY","hiring for the following role in New York, NY:..."
7,Data Scientist,E-Frontiers,"New York, NY",Data Scientist The Company is aExperience in a...
8,Staff Data Scientist,Harnham,"New York, NY",Staff Data Scientist AdTech Companyimplement. ...
9,"Data Scientist, Modeling",Gro Intelligence,"New York, NY","addresses agriculture, food, and our climate o..."


## Scraping multiple pages

In [81]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

# Define the search query and location
search_query = "data scientist"
location = "New York City, NY"

# Construct the base URL
base_url = "http://www.jobinventory.com"

# Define empty lists to store the job details
titles = []
companies = []
locations = []
descriptions = []

# Loop through each page of job listings
max_pages = 5
page_num = 1

while page_num <= max_pages:
    # Construct the URL for the current page
    url = f"{base_url}/search?q={search_query}&l={location}&start={page_num}"

    # Send a GET request to the URL
    response = requests.get(url)

    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.content, "html.parser")

    # Find all the job listings on the page
    job_listings = soup.find_all("li", class_="resultBlock")

    # If there are no job listings on the current page, we have reached the end
    # of the results
    if not job_listings:
        break

    # Loop through each job listing and extract the relevant details
    for job in job_listings:
        title = job.find("div", class_="title").text.strip()
        company = job.find("span", class_="company").text.strip()
        location = (
            job.find("div", class_="state").text.split("\xa0-\xa0")[-1].strip()
        )
        description = job.find("div", class_="description").text.strip()

        titles.append(title)
        companies.append(company)
        locations.append(location)
        descriptions.append(description)

    # Increment the page number
    page_num += 1

# Clean up the job descriptions using regular expressions
regex = re.compile(r"\s+")
clean_descriptions = [regex.sub(" ", d).split(" - ")[1] for d in descriptions]

# Create a Pandas DataFrame to store the job details
df = pd.DataFrame(
    {
        "Title": titles,
        "Company": companies,
        "Location": locations,
        "Description": clean_descriptions,
    }
)

# Export the DataFrame to a CSV file
df.to_csv("job_listings_multiple.csv", index=False)

print("Scraping complete! Check 'job_listings_multiple.csv' for the results.")

df

Scraping complete! Check 'job_listings_multiple.csv' for the results.


Unnamed: 0,Title,Company,Location,Description
0,Lead Data Scientist,Tiro,"New York, NY",Lead Data Scientist Enigma is seekingand visua...
1,Data Scientist,Smith Hanley Associates,"New York, NY",Title: Data Scientist Location: Newengineering...
2,Data Scientist,Averity,"New York, NY",like to become a Data Scientist at a global in...
3,Data Scientist,Revelio Labs,"New York, NY",for: Revelio Labs is looking for a creative Se...
4,Lead Data Scientist,Thomas,"New York, NY",looking for a Lead Data Scientist to lead and ...
...,...,...,...,...
95,Senior Data Scientist,Wonder,"New York, NY",written and verbal) to collaborate with busine...
96,"Data Scientist, Product Experimentation",Captions,"New York, NY",", or a related discipline. * 3-5 years of prov..."
97,"Staff Data Scientist, Marketplace",CookUnity,"New York, NY",with engineering. * Provide mentorship and gui...
98,Sr. Product Data Scientist (NY),Philo,"New York, NY",streaming service. You'll be working closely w...
