In [None]:
import requests
from bs4 import BeautifulSoup
import csv
import time

URLS_FILE = "C:/Users/Think/uci_courses.csv"
OUTPUT_FILE = "all_courses.csv"     # output

course_links = []
with open(URLS_FILE, newline='', encoding="utf-8-sig") as f:
    reader = csv.DictReader(f)
    for row in reader:
        course_links.append((row["course_name"], row["course_url"]))

print(f"读取到 {len(course_links)} 个课程目录链接")

def scrape_course_page(course_name, url):
    print(f"正在抓取 {course_name} ({url}) ...")
    try:
        resp = requests.get(url, timeout=10)
        resp.raise_for_status()
    except Exception as e:
        print(f"无法访问 {url} ：{e}")
        return []

    soup = BeautifulSoup(resp.text, "html.parser")
    courses = []

    for block in soup.select("div.courseblock"):
        code_tag = block.select_one("span.detail-code")
        title_tag = block.select_one("span.detail-title")
        units_tag = block.select_one("span.detail-hours_html")
        desc_tag = block.select_one("div.courseblockextra")
        prereq_tag = block.select_one("span.detail-prereqs")
        repeat_tag = block.select_one("span.detail-repeatability")
        gened_tag = block.select_one("span.detail-gened")
        
        courses.append({
            "course_name": course_name,
            "code": code_tag.get_text(strip=True) if code_tag else "",
            "title": title_tag.get_text(strip=True) if title_tag else "",
            "units": units_tag.get_text(strip=True) if units_tag else "",
            "description": desc_tag.get_text(" ", strip=True) if desc_tag else "",
            "prerequisites": prereq_tag.get_text(" ", strip=True) if prereq_tag else "",
            "repeatability": repeat_tag.get_text(" ", strip=True) if repeat_tag else "",
            "gened": gened_tag.get_text(" ", strip=True) if gened_tag else ""
        })

    print(f" {course_name} 抓取完成，共 {len(courses)} 门课程")
    return courses

all_courses = []
for name, url in course_links:
    all_courses.extend(scrape_course_page(name, url))
    time.sleep(1)  #delay

keys = ["course_name", "code", "title", "units", "description", "prerequisites", "repeatability", "gened"]
with open(OUTPUT_FILE, "w", newline='', encoding="utf-8-sig") as f:
    writer = csv.DictWriter(f, fieldnames=keys)
    writer.writeheader()
    writer.writerows(all_courses)

print(f"\n所有课程抓取完毕，共 {len(all_courses)} 条记录，已保存到 {OUTPUT_FILE}")
