In [3]:
!python -m pip install requests beautifulsoup4 pandas openpyxl

Collecting requests
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting beautifulsoup4
  Using cached beautifulsoup4-4.14.3-py3-none-any.whl.metadata (3.8 kB)
Collecting pandas
  Using cached pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl.metadata (91 kB)
Collecting openpyxl
  Using cached openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting charset_normalizer<4,>=2 (from requests)
  Using cached charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl.metadata (37 kB)
Collecting idna<4,>=2.5 (from requests)
  Using cached idna-3.11-py3-none-any.whl.metadata (8.4 kB)
Collecting urllib3<3,>=1.21.1 (from requests)
  Using cached urllib3-2.6.2-py3-none-any.whl.metadata (6.6 kB)
Collecting certifi>=2017.4.17 (from requests)
  Using cached certifi-2025.11.12-py3-none-any.whl.metadata (2.5 kB)
Collecting soupsieve>=1.6.1 (from beautifulsoup4)
  Using cached soupsieve-2.8.1-py3-none-any.whl.metadata (4.6 kB)
Collecting typing-extensions>=4.0.0 (fro

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

BASE_URL = "https://realpython.github.io/fake-jobs/"

print("Imports OK. Base URL set to:", BASE_URL)


Imports OK. Base URL set to: https://realpython.github.io/fake-jobs/


In [None]:
response = requests.get(BASE_URL)
print("Status code:", response.status_code)
response.raise_for_status()  

soup = BeautifulSoup(response.text, "html.parser")
print("Page title:", soup.title.text.strip())


Status code: 200
Page title: Fake Python


In [6]:
results = soup.find(id="ResultsContainer")
job_cards = results.find_all("div", class_="card-content")

print("Number of job cards found:", len(job_cards))

first_card = job_cards[0]
print(first_card.h2.get_text(strip=True))
print(first_card.h3.get_text(strip=True))
print(first_card.find("p", class_="location").get_text(strip=True))


Number of job cards found: 100
Senior Python Developer
Payne, Roberts and Davis
Stewartbury, AA


In [7]:
jobs_data = []

for card in job_cards:
    title_elem = card.find("h2", class_="title")
    company_elem = card.find("h3", class_="company")
    location_elem = card.find("p", class_="location")
    date_elem = card.find("time")

    job_title = title_elem.get_text(strip=True) if title_elem else "N/A"
    company = company_elem.get_text(strip=True) if company_elem else "N/A"
    location = location_elem.get_text(strip=True) if location_elem else "N/A"
    date_posted = date_elem.get("datetime", "").strip() if date_elem else "N/A"

    apply_link_elem = card.find("a", string=lambda t: t and "Apply" in t)
    apply_link = apply_link_elem["href"] if apply_link_elem and apply_link_elem.has_attr("href") else "N/A"

    jobs_data.append(
        {
            "Job Title": job_title,
            "Company": company,
            "Location": location,
            "Date Posted": date_posted,
            "Apply Link": apply_link,
        }
    )

len(jobs_data), jobs_data[0]


(100,
 {'Job Title': 'Senior Python Developer',
  'Company': 'Payne, Roberts and Davis',
  'Location': 'Stewartbury, AA',
  'Date Posted': '2021-04-08',
  'Apply Link': 'https://realpython.github.io/fake-jobs/jobs/senior-python-developer-0.html'})

In [8]:
df = pd.DataFrame(jobs_data)
print(df.shape)
df.head()


(100, 5)


Unnamed: 0,Job Title,Company,Location,Date Posted,Apply Link
0,Senior Python Developer,"Payne, Roberts and Davis","Stewartbury, AA",2021-04-08,https://realpython.github.io/fake-jobs/jobs/se...
1,Energy engineer,Vasquez-Davidson,"Christopherville, AA",2021-04-08,https://realpython.github.io/fake-jobs/jobs/en...
2,Legal executive,"Jackson, Chambers and Levy","Port Ericaburgh, AA",2021-04-08,https://realpython.github.io/fake-jobs/jobs/le...
3,Fitness centre manager,Savage-Bradley,"East Seanview, AP",2021-04-08,https://realpython.github.io/fake-jobs/jobs/fi...
4,Product manager,Ramirez Inc,"North Jamieview, AP",2021-04-08,https://realpython.github.io/fake-jobs/jobs/pr...


In [9]:
output_file = "job_postings.xlsx"

df.to_excel(output_file, index=False, engine="openpyxl")

print("Saved to:", output_file)


Saved to: job_postings.xlsx
