In [5]:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
import time

In [6]:
def get_dept_info(dept):
    print("Init page...")
    
     # init driver
    driver = webdriver.Chrome() 
    driver.get("https://oracle-www.dartmouth.edu/dart/groucho/timetable.main")
    time.sleep(2)

    # click through to course selection page
    subj_search = driver.find_element(By.CSS_SELECTOR,"[value='Subject Area(s)']")
    subj_search.click()
    time.sleep(2)

    print(f"Selecting dept: {dept.upper()}...")

    # all terms
    allterms = driver.find_element(By.CSS_SELECTOR,"#allterms")
    allterms.click()
    time.sleep(2)

    # specific department & submit
    dept_choice = driver.find_element(By.CSS_SELECTOR,f"[value='{dept.upper()}']")
    dept_choice.click()
    time.sleep(2)

    print("Getting course list...")

    submit = driver.find_element(By.CSS_SELECTOR,"[value='Search for Courses']")
    submit.click()
    time.sleep(2)

    print("Producing data...")
    
    ## iterating through data and producing DF w/ info
    course_table = driver.find_element(By.CSS_SELECTOR,"[class='data-table']")
    course_rows = course_table.find_elements(By.TAG_NAME, "tr")

    course_columns = [col.text for col in course_rows[0].find_elements(By.TAG_NAME, "th")]
    course_list = course_rows[1:]

    # produces DF and gets syllabus info
    course_mat = []
    syllabi = {}
    prev_course = None
    
    for course in course_list:
        curr_course = None    
        curr_course_info = []
        raw_course_data = course.find_elements(By.TAG_NAME, "td")
    
        for i in range(len(raw_course_data)):
            cell = raw_course_data[i]
            curr_course_info.append(cell.text)

            # gets curr_course to avoid redundant syllabus info
            if i==3:
                curr_course = cell.text

            # syllabus cell
            elif i==5 and prev_course!=curr_course: # title col
                raw_link = cell.find_element(By.TAG_NAME,"a").get_attribute("href")
                link = raw_link.split("(")[1].split("%")[0].strip("'")
                
                main_content = requests.get(link)
                course_syllabus = BeautifulSoup(main_content.text,'lxml').text.strip("\n")
                
                syllabi[curr_course] = course_syllabus
            
        course_mat.append(curr_course_info)
        prev_course = curr_course

    course_df = pd.DataFrame(course_mat, columns=course_columns)
    course_df['Syllabus'] = course_df['Num'].apply(lambda num: syllabi[num])

    return course_df

In [7]:
cs_info = get_dept_info('cosc')

Init page...
Selecting dept: COSC...
Getting course list...
Producing data...


In [8]:
# API Exploration

In [9]:
import openai
import os
from dotenv import load_dotenv

In [10]:
load_dotenv()

True

In [11]:
openai.api_key = os.getenv("OPENAI_KEY")

In [12]:
def produce_syllabus_text(syllabus_list,start,end):
    # creates concatenated text
    txt = ""                                                                                                                
    if start<0 or end>=len(syllabus_list):
        return "Bad range"
    else:
        for i in range(start,end):
            txt+=(syllabus_list[i])
            txt+="                                                                                                                                              "
            txt+="--"
            txt+="                                                                                                                                              "
    
    return txt

In [25]:
def get_response(prompt):
    # passes complete prompt into GPT
    raw_response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role":"user","content":prompt}]
    )

    return raw_response.choices[0].message.content

In [48]:
def analyze_courses(course_info,dept):
    # list of unique courses' syllabi
    syllabus_list=list(course_info['Syllabus'].unique())

    # prompting
    init_prompt = "I'm going to give you a college department as well as a series of courses (name and number) as well as their syllabi. For each, create a list of 'tags' (~3 for *each* course) in terms of the specific areas within the department that characterize the course. In addition, create at 6 short bullets (each only a few words/phrase): for the first 3, write 'Pre-requesite skills' before and then list prerequisite skills needed or whether something clearly emphasizes that something is not needed, etc. (if any are glaring, otherwise don't list any and say 'None specified'); for the other 3 (has to be 3), first write 'Skills taught' and then list skills taught. These max. 6 bullets must be done for each course. Please also end the info for each course with a new line containing '--' to separate one course from another:"
    course_txt = produce_syllabus_text(syllabus_list,0,18) # Hardcoded for now

    complete_prompt = init_prompt +     "                                                                                                                        --                                                                                                                                           " + f"*Department: {dept} -- *" + course_txt

    # output
    output = get_response(complete_prompt)

    return complete_prompt,output

In [49]:
course_analysis = analyze_courses(cs_info,"COSC/Computer Science")

In [59]:
print(course_analysis[1])

Tags for COSC 001:
1. Introduction to Computer Science
2. Data Analysis
3. Programming Fundamentals

Pre-requesite skills:
None specified

Skills taught:
1. Problem decomposition
2. Program efficiency
3. Programming style

--

Tags for COSC 002:
1. Visual Art
2. Interactive Works
3. Processing Language

Pre-requesite skills:
None specified

Skills taught:
1. Color manipulation
2. Shape representation
3. Image manipulation

--

Tags for COSC 010:
1. Problem Solving
2. Data Structures
3. Algorithms

Pre-requesite skills:
None specified

Skills taught:
1. Abstraction
2. Modularity
3. Object-oriented programming

--

Tags for COSC 016:
1. Computational Neuroscience
2. Brain Computations
3. Robotics

Pre-requesite skills:
None specified

Skills taught:
1. Anatomical circuit design
2. Physiological operating rules
3. Development of applications

--

Tags for COSC 19.01:
1. Writing about Technology
2. Technical Background
3. Prose Writing

Pre-requesite skills:
None specified

Skills taught:


In [58]:
course_blurbs = course_analysis[1].split("--")