In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import json

BASE_URL = "https://catalog.unc.edu"
DEPT_CODES = ["COMP", "BIOL"]
DEPT_URLS = [f"{BASE_URL}/courses/{code.lower()}/" for code in DEPT_CODES]

def extract_requisites_from_department(dept_url):
    response = requests.get(dept_url)
    soup = BeautifulSoup(response.text, "html.parser")
    course_blocks = soup.find_all("div", class_="courseblock")

    course_reqs = {}

    for block in course_blocks:
        header = block.find("div", class_="cols noindent")
        strong_tags = header.find_all("strong") if header else []

        if len(strong_tags) >= 1:
            code = strong_tags[0].text.strip().rstrip(".")
            if " " not in code:
                continue
            course_id = code

            # Find the requisites block
            req_span = block.find("span", class_="text detail-requisites margin--default")
            if req_span:
                req_text = req_span.text.strip()
                course_reqs[course_id] = req_text

    return course_reqs

# Extract requisites
all_reqs = {}
for url in DEPT_URLS:
    dept_reqs = extract_requisites_from_department(url)
    all_reqs.update(dept_reqs)

# Save to JSON
with open("output/requisites.json", "w", encoding="utf-8") as f:
    json.dump(all_reqs, f, indent=2, ensure_ascii=False)

print("✅ Saved to requisites.json")


✅ Saved to requisites.json
