In [1]:
from selenium import webdriver
from bs4 import BeautifulSoup
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select

# Set the path to the WebDriver
driver_path = '/Users/rupampatir/Downloads/chromedriver_mac64/chromedriver'  # Replace with your WebDriver path

# Initialize the WebDriver (example using Chrome)
driver = webdriver.Chrome()
driver.implicitly_wait(5)

In [2]:
# Open the webpage
driver.get('https://engineering.buffalo.edu/computer-science-engineering/graduate/courses/class-schedule.html')  # Replace with the actual URL

# Optionally, wait for the page to load completely
# time.sleep(2)  # Adjust the time as needed

# Find the button and click it
# Replace 'button_selector' with the appropriate selector for the button
select = driver.find_element(By.ID, "termsourcekey_param")  # Example using class name
select = Select(select)
semesters = []
for option in select.options:
    semesters.append(option.text)

In [3]:
def go_to_semester(semester):
    driver.get('https://engineering.buffalo.edu/computer-science-engineering/graduate/courses/class-schedule.html') 
    # time.sleep(1)
    select = driver.find_element(By.ID, "termsourcekey_param") 
    select = Select(select)
    select.select_by_visible_text(semester)
    # time.sleep(1)

In [4]:
def go_to_course(course_idx):
    button = driver.find_elements(By.CLASS_NAME, "linklike")[course_idx]  # Example using class name
    button.click()
    # time.sleep(1)  

In [5]:
def fetch_course_details(html):
    # Parse the HTML
    soup = BeautifulSoup(html, 'html.parser')

    # Extract Course Title and Description
    course_title = soup.find('h3', text='Course Title').find_next_sibling('p').text
    course_description = soup.find('h3', text='Course Description').find_next_sibling('p').text

    # Extract Program Requirements (assuming there's only one table for this section)
    program_requirements = {}
    for row in soup.find_all('table')[0].find_all('tr'):
        columns = row.find_all('td')
        if columns:
            program_requirements[columns[0]['class'][0]] = columns[0].text  # PhD requirements
            program_requirements[columns[1]['class'][0]] = columns[1].text  # MS requirements

    # Extract Course Instances
    course_instances = []
    course_table = soup.find_all('table')[1]  # Assuming the second table is for course instances
    for row in course_table.find_all('tr')[1:]:  # Skip header row
        cols = [td.text.strip() for td in row.find_all('td')]
        if cols:
            instance = {
                'Term': cols[0],
                'Title': cols[1],
                'Instructor': cols[2],
                'Dates': cols[3],
                'Days': cols[4],
                'Times': cols[5],
                'Credit Hours': cols[6],
                'Enrolled': cols[7]
            }
            course_instances.append(instance)

    return {
        "Course Title": course_title,
        "Course Description": course_description,
        "Program Requirements": program_requirements,
        "Course Instances": course_instances
    }

In [6]:
def get_semester_info(html):
    
    soup = BeautifulSoup(html, 'html.parser')

    # Find the table by ID or other attributes (if needed)
    table = soup.find('table')

    # Initialize a list to hold course data
    courses = []
    if (not table):
        return []
    # Iterate through each row in the table, skipping the header row
    for row in table.find_all('tr')[1:]:
        cols = row.find_all('td')
        
        # Extract each piece of information
        course_code = cols[0].text.strip()
        section = cols[1].text.strip()
        title = cols[2].text.strip()
        instructor = cols[3].text.strip()
        dates = cols[4].text.strip()
        days = cols[5].text.strip()
        times = cols[6].text.strip()
        room = cols[7].text.strip()
        instruction_mode = cols[8].text.strip()
        enrolled = cols[9].text.strip()

        
        # Add the course data to the list
        courses.append({
            "Course Code": course_code,
            "Section": section,
            "Title": title,
            "Instructor": instructor,
            "Dates": dates,
            "Days": days,
            "Times": times,
            "Room": room,
            "Instruction Mode": instruction_mode,
            "Enrolled": enrolled
        })

    return courses

In [7]:
semester_info = {}
course_details = {}
for semester in semesters:
        # semester = "Fall 2024"
        go_to_semester(semester)
        semester_info[semester] = get_semester_info(driver.page_source)
        for course_idx in range(len(semester_info[semester])):
                if semester_info[semester][course_idx]["Course Code"] in course_details:
                        continue
                go_to_semester(semester)
                go_to_course(course_idx)
                print("Processing ", semester_info[semester][course_idx]["Title"])
                course_details[semester_info[semester][course_idx]["Course Code"]] = fetch_course_details(driver.page_source)

In [8]:
import json
with open("course_details", 'w') as file:
    json.dump(course_details, file, indent=4)
with open("semester_details", 'w') as file:
    json.dump(semester_info, file, indent=4)