In [None]:
import json
import re
from datetime import datetime

import mysql.connector
import requests
from mysql.connector import IntegrityError


In [None]:
from mysql.connector import DataError


def insert_course(course_info: dict, course_json: dict):
    if course_info is None: return

    connection = mysql.connector.connect(
        host="coms-309-029.class.las.iastate.edu",
        port=3306,
        user="root",
        password="5b36fb66f5d37314",
        database="Scheduler"
    )
    cursor = connection.cursor()

    sql = (
        "INSERT INTO Course (program_identifier, num, display_name, description, credits, is_variable_credit, is_graded)" +
        "VALUES (%s, %s, %s, %s, %s, %s, %s)"
    )
    values = (
        course_info["program_identifier"],
        course_info["num"],
        course_info["displayName"],
        None if course_info["description"] is None else course_info["description"].encode("latin-1").decode("utf-8"),
        course_info["credits"],
        course_info["is_variable_credit"],
        course_info["is_graded"]
    )
    try:
        cursor.execute(sql, values)
        connection.commit()
    except IntegrityError as e:
        print(e.msg)
        print(f"Duplicate entry: {course_info['program_identifier']} {course_info['num']}")

    year = 2024
    season = 1
    for sec in get_sections(course_json):
        try:
            cursor.callproc("addSection", (
                sec["ref_num"],
                course_info["program_identifier"],
                course_info["num"],
                sec["section"],
                year,
                season,
                sec["is_online"],
            ))
            connection.commit()
        except IntegrityError:
            print(f"Duplicate entry: {sec['section']}")
        except DataError:
            print(f"bad data")

        for t in sec["schedule"]:
            try:
                cursor.callproc("addSectionSchedule", (
                    course_info["program_identifier"],
                    course_info["num"],
                    sec["section"],
                    t["start_time"],
                    t["end_time"],
                    t["meet_days_bitmask"],
                    t["location"],
                    t["instructor"],
                    t["instruction_type"]
                ))
                connection.commit()
            except IntegrityError:
                print(f"Duplicate entry: {sec['section']}")

    connection.commit()
    cursor.close()
    connection.close()

In [None]:
def scrape_course(program: str, num: int | str, sem: int = 3) -> list[tuple[dict, dict]] | None:
    url = "https://classes.iastate.edu/app/rest/courses/preferences"
    headers: dict = {"Content-Type": "application/json"}
    payload: str = json.dumps({
        "defSem": sem,
        "selectedTerm": sem,
        "selectedDepartment": program,
        "courseNumber": num
    })

    response: dict = requests.post(url, headers=headers, data=payload).json()
    if len(response["response"]) == 0: return None

    courses = []
    for course_data in response["response"]:
        catalog_data: str = requests.get(course_data["catalogUrl"]).text

        # Don't want experimental courses
        if course_data["classNumber"][-1] == "X": continue
        try:
            courses.append(({
                                "program_identifier": course_data["deptCode"],
                                "num": int(re.search(r"([0-9]{1,3})[a-zA-Z]{,3}", course_data["classNumber"]).group(1)),
                                "displayName": re.search(r"&#160;[0-9]{1,3}[A-Za-z]{,3}:(.+)</strong>", catalog_data,
                                                         re.DOTALL).group(
                                    1).strip() if len(catalog_data) > 55 else None,
                                "description": re.search(r"><br />(.+)</p>", catalog_data, re.DOTALL).group(
                                    1).strip() if len(
                                    catalog_data) > 55 else None,
                                "credits": int(course_data["creditLow"]),
                                "is_variable_credit": course_data["creditType"] == 'V',
                                "is_graded": course_data["gradeType"] == ""
                            }, course_data))
        except AttributeError:
            continue

    return courses


def meet_days_to_bitmask(s: str):
    mask = 0
    for c in s:
        if c == "M":
            mask |= 1
        elif c == "T":
            mask |= 2
        elif c == "W":
            mask |= 4
        elif c == "R":
            mask |= 8
        elif c == "F":
            mask |= 16
    return mask


def convert_time(s: str):
    t = datetime.strptime(s, "%H:%M:%S.%f")
    return t.hour * 60 + t.minute


def get_sections(course_json: dict) -> list[dict]:
    sections: list[dict] = []
    for section_data in course_json["sections"]:
        is_online = None if section_data["deliveryTypeDisplay"] == "" else "online" in section_data[
            "deliveryTypeDisplay"]

        times = []
        for section_time in section_data["sectionTimes"]:
            location = f'{section_time["buildingName"]} {section_time["roomNum"]}'.strip()
            section = section_data["sectionID"].strip()
            instructor = section_time["instrName"].strip()
            instruction_type = section_time["instructionType"].strip()
            meet_days = section_time["meetDays"].strip()

            times.append({
                "start_time": None if is_online else convert_time(section_time["startTime"]),
                "end_time": None if is_online else convert_time(section_time["stopTime"]),
                "meet_days_bitmask": None if meet_days == "" else meet_days_to_bitmask(meet_days),
                "location": None if location == "" else location,
                "instructor": None if instructor == "" else instructor,
                "instruction_type": None if instruction_type == "" else instruction_type
            })

        sections.append({
            "section": None if section == "" else section,
            "ref_num": section_data["referenceNumber"],
            "is_online": is_online,
            "schedule": times
        })

    return sections

In [None]:
scrape_course("DS", "")[0]

In [None]:
depts = ["DANCE", "DS", "DES", "DSN S", "DIET",
         "E C P", "EEB", "EEOB", "ECON", "EDUC", "EDADM", "EL PS", "E E", "ENGR", "E M", "ENGL", "ENT", "ENTSP",
         "ENV E", "ENSCI", "ENV S", "EVENT", "FCEDS", "FFP", "FIN", "FS HN", "FOR", "FRNCH", "GEN", "GENET", "GDCB",
         "GEOL", "GER", "GERON", "GLOBE", "GR ST", "ARTGR", "H S", "HCM", "HG ED", "HIST", "HON", "HORT", "HSP M",
         "HCI", "HD FS", "H SCI", "IMBIO", "IND D", "I E", "ARTIS", "IGS", "ARTID", "INTST", "JL MC", "KIN", "L A",
         "LATIN", "LD ST", "L L S", "LAS", "LIB", "LING", "MGMT", "MIS", "MKT", "MAT E", "M S E", "MATH", "M E",
         "MTEOR", "MICRO", "M S", "MCDB", "MUSIC", "NREM", "N S", "NEURO", "NRS", "NUTRS", "OTS", "PERF", "PHIL",
         "PHYS", "PLBIO", "PL P", "POL S", "PORT", "PSYCH", "P R", "RELIG", "RESEV", "RUS", "STB", "SOC", "S E", "SPAN",
         "SP ED", "SP CM", "STAT", "SCM", "SUSAG", "SUS E", "TSM", "THTRE", "TOX", "TRANS", "US LS", "U ST", "URB D",
         "VDPAM", "V C S", "V MPM", "V PTH", "WESEP", "WISE", "WGS", "W F S", "WLC", "YTH"
         ]
for dept in depts:
    # for n in range(100, 700):
    try:
        courses = scrape_course(dept, "")
        print(f"got {dept}: {len(courses)}")
        # if courses is None:
        #     print(f"{dept} doesn't exist")
        #     continue
        # else:
        for c in courses:
            print(f"Inserting {c[0]['program_identifier']} {c[0]['num']}")
            insert_course(c[0], c[1])

    except Exception as e:
        print(e)
        pass


In [None]:
(scrape_course("MATH", 699)[0]["description"].encode("latin-1").decode("utf-8"))

In [None]:
scrape_course("", 101)

In [None]:
re.search(r"&#160;[0-9]{1,3}[A-Za-z]{,3}:(.+)</strong>",
          requests.get("https://catalog.iastate.edu/ribbit/?page=getcourse.rjs&code=MATH%20699C&edition=2023-24").text,
          re.DOTALL)

In [None]:
requests.get("https://catalog.iastate.edu/ribbit/?page=getcourse.rjs&code=CPR E%20529&edition=2023-24").text.encode(
    "latin-1").decode("utf-8")