In [1]:
import json
import re
from datetime import datetime

import mysql.connector
import requests
from mysql.connector import IntegrityError


In [2]:
def insert_course(course_info: dict, course_json: dict):
    if course_info is None: return

    connection = mysql.connector.connect(
        host="coms-309-029.class.las.iastate.edu",
        port=3306,
        user="root",
        password="5b36fb66f5d37314",
        database="Scheduler"
    )
    cursor = connection.cursor()

    sql = (
        "INSERT INTO Course (program_identifier, num, display_name, description, credits, is_variable_credit, is_graded)" +
        "VALUES (%s, %s, %s, %s, %s, %s, %s)"
    )
    values = (
        course_info["program_identifier"],
        course_info["num"],
        course_info["displayName"],
        course_info["description"].encode("latin-1").decode("utf-8"),
        course_info["credits"],
        course_info["is_variable_credit"],
        course_info["is_graded"]
    )
    try:
        cursor.execute(sql, values)
        connection.commit()
    except IntegrityError:
        print(f"Duplicate entry: {course_info['program_identifier']} {course_info['num']}")

    year = 2024
    season = 1
    for sec in get_sections(course_json):
        try:
            cursor.callproc("addSection", (
                course_info["program_identifier"],
                course_info["num"],
                sec["section"],
                year,
                season,
                sec["is_online"],
                sec["start_time"],
                sec["end_time"],
                sec["location"],
                sec["instructor"]
            ))
            connection.commit()
        except IntegrityError:
            print(f"Duplicate entry: {sec['section']}")

    connection.commit()
    cursor.close()
    connection.close()

In [7]:
def scrape_course(program: str, num: int, sem: int = 3) -> tuple[dict, dict] | None:
    url = "https://classes.iastate.edu/app/rest/courses/preferences"
    headers: dict = {"Content-Type": "application/json"}
    payload: str = json.dumps({
        "defSem": sem,
        "selectedTerm": sem,
        "selectedDepartment": program,
        "courseNumber": num
    })

    response: dict = requests.post(url, headers=headers, data=payload).json()
    if len(response["response"]) == 0: return None
    course_data: dict = response["response"][0]
    catalog_data: str = requests.get(course_data["catalogUrl"]).text

    # Don't want experimental courses
    if course_data["classNumber"][-1] == "X": return None
    try:
        return {
            "program_identifier": course_data["deptCode"],
            "num": int(re.search(r"([0-9]{1,3})[a-zA-Z]{,3}", course_data["classNumber"]).group(1)),
            "displayName": re.search(r"&#160;[0-9]{1,3}[A-Za-z]{,3}:(.+)</strong>", catalog_data, re.DOTALL).group(
                1).strip() if len(catalog_data) > 55 else None,
            "description": re.search(r"><br />(.+)</p>", catalog_data, re.DOTALL).group(1).strip() if len(
                catalog_data) > 55 else None,
            "credits": int(course_data["creditLow"]),
            "is_variable_credit": course_data["creditType"] == 'V',
            "is_graded": course_data["gradeType"] == ""
        }, course_data
    except AttributeError:
        return None

def convert_time(s: str):
    t = datetime.strptime(s, "%H:%M:%S.%f")
    return t.hour * 60 + t.minute


def get_sections(course_json: dict) -> list[dict]:
    sections: list[dict] = []
    for section_data in course_json["sections"]:
        is_online = None if section_data["deliveryTypeDisplay"] == "" else "online" in section_data[
            "deliveryTypeDisplay"]
        section_time = section_data["sectionTimes"][0]
        location = f'{section_time["buildingName"]} {section_time["roomNum"]}'.strip()
        section = section_data["sectionID"].strip()
        instructor = section_time["instrName"].strip()

        sections.append({
            "section": None if section == "" else section,
            "is_online": is_online,
            "start_time": None if is_online else convert_time(section_time["startTime"]),
            "end_time": None if is_online else convert_time(section_time["stopTime"]),
            "location": None if location == "" else location,
            "instructor": None if instructor == "" else instructor
        })

    return sections

In [4]:
insert_course(*scrape_course("COM S", 309))

Duplicate entry: COM S 309
Duplicate entry: 1
Duplicate entry: 2
Duplicate entry: 3


In [17]:
dept = "DS"
for n in range(100, 700):
    c = scrape_course(dept, n, sem=3)

    if c is None:
        print(f"{dept} {n} doesn't exist")
        continue
    print(f"Inserting {dept} {n}")

    insert_course(*c)


DS 100 doesn't exist
DS 101 doesn't exist
DS 102 doesn't exist
DS 103 doesn't exist
DS 104 doesn't exist
DS 105 doesn't exist
DS 106 doesn't exist
DS 107 doesn't exist
DS 108 doesn't exist
DS 109 doesn't exist
DS 110 doesn't exist
DS 111 doesn't exist
DS 112 doesn't exist
DS 113 doesn't exist
DS 114 doesn't exist
DS 115 doesn't exist
DS 116 doesn't exist
DS 117 doesn't exist
DS 118 doesn't exist
DS 119 doesn't exist
DS 120 doesn't exist
DS 121 doesn't exist
DS 122 doesn't exist
DS 123 doesn't exist
DS 124 doesn't exist
DS 125 doesn't exist
DS 126 doesn't exist
DS 127 doesn't exist
DS 128 doesn't exist
DS 129 doesn't exist
DS 130 doesn't exist
DS 131 doesn't exist
DS 132 doesn't exist
DS 133 doesn't exist
DS 134 doesn't exist
DS 135 doesn't exist
DS 136 doesn't exist
DS 137 doesn't exist
DS 138 doesn't exist
DS 139 doesn't exist
DS 140 doesn't exist
DS 141 doesn't exist
DS 142 doesn't exist
DS 143 doesn't exist
DS 144 doesn't exist
DS 145 doesn't exist
DS 146 doesn't exist
DS 147 doesn'

In [None]:
(scrape_course("MATH", 699)[0]["description"].encode("latin-1").decode("utf-8"))

In [330]:
re.search(r"&#160;[0-9]{1,3}[A-Za-z]{,3}:(.+)</strong>",
          requests.get("https://catalog.iastate.edu/ribbit/?page=getcourse.rjs&code=MATH%20699C&edition=2023-24").text,
          re.DOTALL)

In [14]:
requests.get("https://catalog.iastate.edu/ribbit/?page=getcourse.rjs&code=CPR E%20529&edition=2023-24").text.encode("latin-1").decode("utf-8")

'<?xml version="1.0"?>\n<courseinfo>\n<course code="CPR E 529">\n<![CDATA[\n<div class="courseblock">\n<div class=\'courseblocktitle\'><a href=\'#\' class=\'toggle-accordion courseblocklink\'><strong>CPR&#160;E&#160;529: Data Analytics in Electrical and Computer Engineering</strong><span></span></a></div><div class="courseblockdesc accordion-content"><p class="credits noindent">\n(Cross-listed with E E).  (3-0) Cr. 3.\n S. \n</p><p class=\'prereq\'><em>Prereq: E E 322 or equivalent</em><br />Introduces a variety of data analytics techniques ‐ particularly those relevant for electrical and computer engineers ‐ from a foundational perspective. Topics to be covered include techniques for classification, visualization, and parameter estimation, with applications to signals, images, matrices, and graphs. Emphasis will be placed on rigorous analysis as well as principled design of such techniques.\n</p></div>\n</div>\n]]>\n</course>\n</courseinfo>\n'