In [1]:
%pip install selenium webdriver-manager

Collecting selenium
  Downloading selenium-4.25.0-py3-none-any.whl.metadata (7.1 kB)
Collecting webdriver-manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.27.0-py3-none-any.whl.metadata (8.6 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting typing_extensions~=4.9 (from selenium)
  Downloading typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)
Collecting websocket-client~=1.8 (from selenium)
  Downloading websocket_client-1.8.0-py3-none-any.whl.metadata (8.0 kB)
Collecting attrs>=23.2.0 (from trio~=0.17->selenium)
  Downloading attrs-24.2.0-py3-none-any.whl.metadata (11 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting sniffio>=1.3.0 (from trio~=0.17->selenium)
  Downloading sniffio-1.3.1-py3-none-any.whl.metadata (3.9 k

In [47]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Optional: Configure Chrome options
from selenium.webdriver.chrome.options import Options

import pandas as pd

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument('--ignore-certificate-errors')
# chrome_options.add_argument('--headless')  # Uncomment to run in headless mode
chrome_options.add_argument('--disable-gpu')

# Initialize the Chrome WebDriver
driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=chrome_options
)

# List of course codes
course_codes = ['901', '904', '906', '908', '909', '911']

# Initialize an empty list to store data for all courses
all_data = []

try:
    for course_code in course_codes:
        driver.get('https://sis.metu.edu.tr/')

        # Wait until the page is fully loaded
        wait = WebDriverWait(driver, 30) 

        courses_link = wait.until(EC.element_to_be_clickable((By.LINK_TEXT, 'Courses')))
        courses_link.click()

        semester_info_link = wait.until(EC.element_to_be_clickable((By.LINK_TEXT, 'Semester Information')))
        semester_info_link.click()

        wait.until(EC.presence_of_element_located((By.TAG_NAME, 'body')))

        semester_select2 = wait.until(EC.element_to_be_clickable(
            (By.CSS_SELECTOR, '#s2id_selectSemester .select2-choice')
        ))
        semester_select2.click()

        # Select the latest option
        second_option = wait.until(EC.element_to_be_clickable(
            (By.XPATH, "(//ul[@class='select2-results']//div[@class='select2-result-label'])[2]")
        ))
        second_option.click()

        program_select2 = wait.until(EC.element_to_be_clickable(
            (By.CSS_SELECTOR, '#s2id_selectProgram .select2-choice')
        ))
        program_select2.click()

        # Wait for the options to be visible
        wait.until(EC.presence_of_all_elements_located(
            (By.XPATH, "//ul[@class='select2-results']//div[@class='select2-result-label']")
        ))

        options = driver.find_elements(By.XPATH, "//ul[@class='select2-results']//div[@class='select2-result-label']")

        # Find and click the desired program based on the course code
        desired_program_option = None
        for option in options:
            if course_code in option.text:
                desired_program_option = option
                break

        if desired_program_option:
            # Scroll into view and click the option
            driver.execute_script("arguments[0].scrollIntoView();", desired_program_option)
            desired_program_option.click()
        else:
            print(f"Desired program with course code '{course_code}' not found.")
            continue  # Skip to the next course code

        submit_button = wait.until(EC.element_to_be_clickable((By.ID, 'submitSearchForm')))
        submit_button.click()

        results_table = wait.until(EC.presence_of_element_located((By.ID, 'SearchResults')))

        # Locate the display length dropdown and click it to open options
        display_length_select2 = wait.until(EC.element_to_be_clickable(
            (By.CSS_SELECTOR, '#SearchResults_length .select2-container')
        ))
        display_length_select2.click()

        # Wait for the dropdown options to be visible
        wait.until(EC.visibility_of_element_located((By.ID, 'select2-drop')))

        # Locate the 'All' option in the dropdown
        all_option = wait.until(EC.element_to_be_clickable(
            (By.XPATH, "//ul[@class='select2-results']//div[text()='All']")
        ))
        all_option.click()

        # Wait for the table to update
        results_table = wait.until(EC.presence_of_element_located((By.ID, 'SearchResults')))

        # --- Code to Select All Columns ---

        column_toggler_button = wait.until(EC.element_to_be_clickable(
            (By.XPATH, "//div[contains(@class, 'btn-group')]/a[contains(text(), 'Columns')]")
        ))
        column_toggler_button.click()

        # Wait for the dropdown menu to be visible
        wait.until(EC.visibility_of_element_located((By.XPATH, "//div[@id='SearchResults_column_toggler']")))

        checkboxes = driver.find_elements(By.XPATH, "//div[@id='SearchResults_column_toggler']//input[@type='checkbox']")

        for checkbox in checkboxes:
            if not checkbox.is_selected():
                driver.execute_script("arguments[0].checked = true;", checkbox)
                driver.execute_script("arguments[0].dispatchEvent(new Event('change'));", checkbox)

        column_toggler_button.click()

        results_table = wait.until(EC.presence_of_element_located((By.ID, 'SearchResults')))

        # Extract data from the table
        # Get the headers
        headers = []
        header_elements = driver.find_elements(By.XPATH, "//table[@id='SearchResults']//th")
        for header in header_elements:
            headers.append(header.text.strip())

        # Get all the rows in the table body
        data = []
        rows = driver.find_elements(By.XPATH, "//table[@id='SearchResults']/tbody/tr")
        for row in rows:
            cells = row.find_elements(By.TAG_NAME, 'td')
            cell_data = [cell.text.strip() for cell in cells]
            data.append(cell_data)

        # Create a pandas DataFrame
        df = pd.DataFrame(data, columns=headers)
        df['Course Code'] = course_code

        # Append the DataFrame to the list
        all_data.append(df)

        # Optionally, save the DataFrame for the current course
        df.to_csv(f'course_data_{course_code}.csv', index=False)
        print(f"Data for course code {course_code} saved.")

    # After processing all course codes, combine all DataFrames
    if all_data:
        combined_df = pd.concat(all_data, ignore_index=True)
        combined_df.to_csv('all_course_data.csv', index=False)
        print("Combined data for all courses saved.")

finally:
    driver.quit()


Data for course code 901 saved.
Data for course code 904 saved.
Data for course code 906 saved.
Data for course code 908 saved.
Data for course code 909 saved.
Data for course code 911 saved.
Combined data for all courses saved.
