In [1]:
import pandas as pd

courses = pd.read_csv('ucdavis_courses.csv')

In [2]:
import requests
import requests_cache
from datetime import timedelta
from ratelimit import limits, sleep_and_retry
import warnings
warnings.filterwarnings("ignore")

# Enable caching
requests_cache.install_cache("registrar")

headers = {
    'Accept': '*/*',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}


term_codes = ['202401', '202403', '202410']
data = {
    'termCode': '',
    'course_number': '',
    'multiCourse': '',
    'course_title': '',
    'instructor': '',
    'subject': '-',
    'course_start_eval': '-',
    'course_start_time': '-',
    'course_end_eval': '-',
    'course_end_time': '-',
    'course_status': '-',
    'course_level': '-',
    'course_units': '-',
    'virtual': '-',
    'runMe': '1',
    'clearMe': '1',
    'reorder': '',
    'gettingResults': '0',
}

registrar_table = pd.DataFrame(columns=['CourseId', 'ProfName', 'Quarter'])

@sleep_and_retry
@limits(calls=10, period=timedelta(seconds=10).total_seconds())
def extract():
    global registrar_table, data
    course_list = list(courses['Course Code'])
    for i in range(0, len(course_list), 250):
        search_text = ','.join(course_list[i:i+250])
        data['multiCourse'] = search_text
        for term in term_codes:
            try:
                data['termCode'] = term
                response = requests.post(
                    'https://registrar-apps.ucdavis.edu/courses/search/course_search_results.cfm',
                    headers=headers,
                    data=data,
                )
                search_df = pd.read_html(response.text)[0]
                search_df = search_df.iloc[4:]
                search_df.columns = ['0', '1', '2', '3', '4', '5', '6']
                search_df.drop(columns=['0', '2', '3', '5', '6'], inplace=True)
                search_df.rename(columns={'1': 'CourseId', '4': 'ProfName'}, inplace=True)
                search_df['Quarter'] = [term for _ in range(search_df.shape[0])]
                registrar_table = pd.concat([registrar_table, search_df], ignore_index=True)
            except Exception as e:
                print(data)
                
extract()

KeyboardInterrupt: 

In [39]:
import re
prof_dict = {}
quarter_dict = {}

for index, rows in registrar_table.iterrows():
    prof_name = rows['ProfName']
    if not '@ Denotes' in prof_name and not 'The Staff' in prof_name:
        try:
            prof_name = re.sub(r'\d+.\d+', '', prof_name)
            course_code = re.findall(r'(\w+ \d+[A-Z]*)', rows['CourseId'])[0]
            if course_code not in prof_dict:
                prof_dict[course_code] = prof_name
            if course_code not in quarter_dict:
                quarter_dict[course_code] = rows['Quarter']   
        except Exception as e:
            print(rows['CourseId'])
            # raise e

** TOPIC: TO BE ANNOUNCED **
** TOPIC: TO BE ANNOUNCED **
** TOPIC: TO BE ANNOUNCED **
** TOPIC: FINAL FANTASY VII: JRPGS, ECO-CRITICISMAND ADAPTION **
** TOPIC: FINAL FANTASY VII: JRPGS, ECO-CRITICISMAND ADAPTION **
** TOPIC: LATINA/O/X LITERATURE & MEDIA **
** TOPIC: LATINA/O/X LITERATURE & MEDIA **
** TOPIC: EMPIRE, RACIAL CAPITALISM, AND LITERATURESOF SPECULATION **


In [43]:
registrar_table = pd.DataFrame([
    [course_code, prof_name, quarter] for course_code, prof_name, quarter in 
    zip(list(prof_dict.keys()), list(prof_dict.values()), list(quarter_dict.values()))
], columns=['CourseId', 'ProfName', 'Quarter'])

registrar_table.to_csv('registrar_data.csv', index=False)

In [3]:
print('Number of unique professors - ', len(set(list(registrar_table['ProfName']))))

Number of unique professors -  1957
