In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [2]:
# Works from Summer 2004 onwards
def scrape_sem_to_csv(YEAR, SEM):
    if SEM == "Fall":
        term = f"{YEAR+1}{10}"
    elif SEM == "Spring":
        term = f"{YEAR}{20}"
    elif SEM == "Summer":
        term = f"{YEAR}{30}"
    URL = f"https://courses.rice.edu/courses/!SWKSCAT.cat?p_action=QUERY&p_term={term}&p_ptrm=&p_crn=&p_onebar=%2B&p_mode=OR&p_subj_cd=&p_subj=&p_dept=&p_school=&p_spon_coll=&p_df=&p_insm=&p_submit=&as_ffc_field="
    page = requests.get(URL)
    soup = BeautifulSoup(page.content, "html.parser")
    table = soup.find("table", class_="table-condensed")
    courses = table.find_all("tr")
    skip = True
    data = []
    for course in courses:
        if skip:
            skip = False
            continue
        course_crn = course.find("td", class_="cls-crn")
        course_crn_link = course_crn.find("a")["href"]
        course_crn = course_crn.text
        course_code = " ".join(course.find("td", class_="cls-crs").text.split(" ")[:2])
        title = course.find("td", class_="cls-ttl").text
        instructors = []
        for item in course.find("td", class_="cls-ins").find_all("div"):
            instructors.append(item.text)
        instructors = "; ".join(instructors)
        meetings = []
        for item in course.find("td", class_="cls-mtg").find("div", class_="mtg-clas").find_all("div"):
            meetings.append(item.text)
        meetings = "\n".join(meetings)
        credits = course.find("td", class_="cls-crd").text

        data.append([course_crn, course_crn_link, course_code, title, instructors, meetings, credits])
    df = pd.DataFrame(data, columns=['CRN', 'URL', 'Course', 'Title', 'Instructors', 'Course Meeting', 'Credits'])
    print(f"Scraped {len(df)} courses for the {SEM} {YEAR} semester")
    df.to_csv(f"./schedule/{SEM} {YEAR}.csv", index=False)

In [3]:
scrape_sem_to_csv(2024, "Fall")
time.sleep(30)
scrape_sem_to_csv(2024, "Spring")
time.sleep(30)
scrape_sem_to_csv(2024, "Summer")

Scraped 4539 courses for the Fall 2024 semester
Scraped 4285 courses for the Spring 2024 semester
Scraped 1239 courses for the Summer 2024 semester


In [None]:
t = 20

for yr in range(2021, 2022):
    
    while True:
        try:
            time.sleep(t)
            scrape_sem_to_csv(yr, "Fall")
        except:
            t += 20
        else:
            break
            
    while True:
        try:
            time.sleep(t)
            scrape_sem_to_csv(yr, "Spring")
        except:
            t += 20
        else:
            break
            
    while True:
        try:
            time.sleep(t)
            scrape_sem_to_csv(yr, "Summer")
        except:
            t += 20
        else:
            break