In [1]:
# Cell 1: Import and setup
import re
import json
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from tqdm import tqdm
import google.generativeai as genai
import time
import os
from dotenv import load_dotenv
import psycopg2
from psycopg2.extras import RealDictCursor, Json
import logging
from datetime import datetime
from typing import Dict, List, Optional

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Cell 2: Configure API and Database
# Load environment variables from .env file
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
DATABASE_URL = os.getenv("DATABASE_URL")

if not GEMINI_API_KEY:
    raise ValueError("GEMINI_API_KEY not found in .env file")

if not DATABASE_URL:
    raise ValueError("DATABASE_URL not found in .env file")

genai.configure(api_key=GEMINI_API_KEY)

BASE_URL = "https://catalog.unc.edu"
COURSE_INDEX_URL = f"{BASE_URL}/courses/#text"

In [3]:
# Cell 3: Database Manager class
class DatabaseManager:
    def __init__(self, db_url: str):
        """Initialize database connection and caches."""
        # Parse the URL to add gssencmode parameter
        from urllib.parse import urlparse
        url = urlparse(db_url)
        
        conn_params = {
            "host": url.hostname,
            "port": url.port,
            "database": url.path[1:],  # Remove leading '/'
            "user": url.username,
            "password": url.password,
            "sslmode": "require",
            "gssencmode": "disable"  # Fix for Windows GSSAPI error
        }
        
        self.conn = psycopg2.connect(**conn_params)
        self.conn.autocommit = False
        self.cur = self.conn.cursor(cursor_factory=RealDictCursor)
        
        # Cache for lookups
        self.department_cache = {}
        self.course_id_cache = {}
        
        # Load existing data into cache
        self._load_cache()
    
    def _load_cache(self):
        """Load existing departments and courses into cache."""
        # Load departments
        self.cur.execute("SELECT id, code FROM departments")
        for row in self.cur.fetchall():
            self.department_cache[row['code']] = row['id']
        
        # Load course IDs
        self.cur.execute("SELECT id, course_id FROM courses")
        for row in self.cur.fetchall():
            self.course_id_cache[row['course_id']] = row['id']
        
        logger.info(f"Loaded {len(self.department_cache)} departments and {len(self.course_id_cache)} courses into cache")
    
    def get_or_create_department(self, dept_code: str) -> int:
        """Get or create a department, returning its ID."""
        if dept_code in self.department_cache:
            return self.department_cache[dept_code]
        
        self.cur.execute("""
            INSERT INTO departments (code) 
            VALUES (%s) 
            ON CONFLICT (code) DO UPDATE SET code = EXCLUDED.code
            RETURNING id
        """, (dept_code,))
        
        dept_id = self.cur.fetchone()['id']
        self.department_cache[dept_code] = dept_id
        return dept_id
    
    def save_course(self, course_data: Dict) -> Optional[int]:
        """Save a course to the database."""
        try:
            dept_id = self.get_or_create_department(course_data['department'])
            
            # Extract gen_ed as array
            gen_ed = []
            if course_data.get('gen_ed'):
                gen_ed = [course_data['gen_ed']] if isinstance(course_data['gen_ed'], str) else course_data['gen_ed']
            
            # Insert or update course
            self.cur.execute("""
                INSERT INTO courses 
                (course_id, department_id, course_number, name, description, 
                 credits, gen_ed, grading_status)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
                ON CONFLICT (course_id) DO UPDATE SET
                    name = EXCLUDED.name,
                    description = EXCLUDED.description,
                    credits = EXCLUDED.credits,
                    gen_ed = EXCLUDED.gen_ed,
                    grading_status = EXCLUDED.grading_status,
                    updated_at = NOW()
                RETURNING id
            """, (
                course_data['course_id'],
                dept_id,
                course_data['course_number'],
                course_data['course_name'],
                course_data.get('description'),
                course_data.get('credits'),
                gen_ed,
                course_data.get('grading_status')
            ))
            
            course_db_id = self.cur.fetchone()['id']
            self.course_id_cache[course_data['course_id']] = course_db_id
            
            # Save prerequisites if present
            if course_data.get('requisites'):
                self._save_prerequisites(course_db_id, course_data['requisites'])
            
            # Save grade requirements if present
            if course_data.get('grade_requirements'):
                self._save_grade_requirements(course_db_id, course_data['grade_requirements'])
            
            return course_db_id
            
        except Exception as e:
            logger.error(f"Error saving course {course_data.get('course_id')}: {e}")
            raise
    
    def _save_prerequisites(self, course_db_id: int, requisites: Dict):
        """Save prerequisites for a course."""
        # Clear existing prerequisites
        self.cur.execute("DELETE FROM prerequisites WHERE course_id = %s", (course_db_id,))
        
        # Save prerequisites (AND groups)
        for group_idx, prereq_group in enumerate(requisites.get('prerequisites', [])):
            for prereq_course_code in prereq_group:
                prereq_db_id = self.course_id_cache.get(prereq_course_code.strip())
                if prereq_db_id:
                    self.cur.execute("""
                        INSERT INTO prerequisites 
                        (course_id, prereq_group, prereq_course_id, is_corequisite)
                        VALUES (%s, %s, %s, %s)
                        ON CONFLICT DO NOTHING
                    """, (course_db_id, group_idx, prereq_db_id, False))
        
        # Save corequisites
        for group_idx, coreq_group in enumerate(requisites.get('corequisites', [])):
            for coreq_course_code in coreq_group:
                coreq_db_id = self.course_id_cache.get(coreq_course_code.strip())
                if coreq_db_id:
                    self.cur.execute("""
                        INSERT INTO prerequisites 
                        (course_id, prereq_group, prereq_course_id, is_corequisite)
                        VALUES (%s, %s, %s, %s)
                        ON CONFLICT DO NOTHING
                    """, (course_db_id, group_idx + 1000, coreq_db_id, True))
    
    def _save_grade_requirements(self, course_db_id: int, grade_requirements: Dict):
        """Save grade requirements for a course."""
        # Clear existing grade requirements
        self.cur.execute("DELETE FROM grade_requirements WHERE course_id = %s", (course_db_id,))
        
        for req_course_code, min_grade in grade_requirements.items():
            # Try different formats
            req_course_code = req_course_code.replace(' ', '')
            req_db_id = None
            for possible_code in [req_course_code, f"{req_course_code[:4]} {req_course_code[4:]}"]:
                req_db_id = self.course_id_cache.get(possible_code)
                if req_db_id:
                    break
            
            if req_db_id:
                self.cur.execute("""
                    INSERT INTO grade_requirements 
                    (course_id, required_course_id, minimum_grade)
                    VALUES (%s, %s, %s)
                    ON CONFLICT DO NOTHING
                """, (course_db_id, req_db_id, min_grade))
    
    def commit(self):
        """Commit the current transaction."""
        self.conn.commit()
    
    def rollback(self):
        """Rollback the current transaction."""
        self.conn.rollback()
    
    def close(self):
        """Close database connection."""
        self.cur.close()
        self.conn.close()

In [4]:
# Cell 4: RequisiteParser class
class RequisiteParser:
    def __init__(self, model="gemini-2.5-flash", delay: float = 2.1):
        """Initialize the parser with Gemini API."""
        self.model = genai.GenerativeModel(model)
        self.delay = delay
        self.api_calls = 0
        self.failed_parses = []
        self.last_call_time = 0
        
    def parse_requisites(self, raw: str, course_id: str = None) -> dict:
        """Parse requisites using Gemini API."""
        if not raw or not raw.strip():
            return {
                "prerequisites": [],
                "corequisites": [],
                "grade_requirements": {},
                "requisites_note": None
            }
        
        # Rate limiting
        current_time = time.time()
        time_since_last_call = current_time - self.last_call_time
        if time_since_last_call < self.delay:
            sleep_time = self.delay - time_since_last_call
            time.sleep(sleep_time)
        
        self.last_call_time = time.time()
        self.api_calls += 1
        
        prompt = f"""Parse the following course requisite statement and return a JSON object with this exact structure:

{{
    "prerequisites": [
        // List of AND-groups, where each group is a list of courses that can be taken as alternatives (OR)
        // Example: [["COMP 110"], ["MATH 231", "MATH 241"]] means COMP 110 AND (MATH 231 OR MATH 241)
    ],
    "corequisites": [
        // Same structure as prerequisites but for co-requisites
    ],
    "grade_requirements": {{
        // Map of course to required grade
        // Example: {{"COMP 110": "C", "MATH 231": "C+"}}
    }},
    "requisites_note": // String with any additional requirements like "permission of instructor" or null if none
}}

CRITICAL PARSING RULES:

1. AND relationships (all required):
   - Separated by "and", semicolons (;), or commas in a list
   - Example: "COMP 110 and MATH 231" → [["COMP 110"], ["MATH 231"]]
   - Example: "COMP 210; COMP 211; COMP 301" → [["COMP 210"], ["COMP 211"], ["COMP 301"]]

2. OR relationships (choose one):
   - Separated by "or"
   - Example: "COMP 283 or MATH 381 or STOR 315" → [["COMP 283", "MATH 381", "STOR 315"]]

3. Mixed AND/OR:
   - Example: "MATH 231 or 241; COMP 210, COMP 211, and COMP 301"
   - Parse as: [["MATH 231", "MATH 241"], ["COMP 210"], ["COMP 211"], ["COMP 301"]]
   - The semicolon separates AND groups, "or" creates OR options within a group

4. Pre- or corequisites:
   - Add the SAME courses to BOTH prerequisites and corequisites arrays
   - Example: "Pre- or corequisites, COMP 283 or MATH 381"
   - Prerequisites: [["COMP 283", "MATH 381"]]
   - Corequisites: [["COMP 283", "MATH 381"]]

5. Grade requirements:
   - Look for "grade of X or better", "C or better", etc.
   - Apply to ALL courses mentioned in the same clause
   - Example: "COMP 211 and COMP 301; a grade of C or better is required in both"
   - grade_requirements: {{"COMP 211": "C", "COMP 301": "C"}}

6. Course code format:
   - Always format as "DEPT ###" with a space (e.g., "COMP 110", not "COMP110")
   - Include letter suffixes if present (e.g., "BIOL 101L")

7. Special requirements (if applicable, put in requisites_note):
   - "Permission of the instructor" → Include exact text
   - "May be repeated for credit" → Include this note
   - "Not open to students who have credit for X" → Include full restriction
   - "for students lacking the prerequisite" → Include context
   - Any GPA requirements → Include exact GPA needed
   - Class standing restrictions (e.g., "Juniors and seniors only")

Common patterns to recognize:
- "Prerequisites, X and Y" → both required
- "Prerequisite, X or Y" → choose one
- "Prerequisites, X; Y or Z" → X is required AND (Y OR Z)
- "one of the following" → all listed courses are OR options
- "all of the following" → all listed courses are AND requirements
- "permission of the instructor for students lacking the prerequisite" → courses are still required, but add note about permission option

Example complex requisite:
"Prerequisites, COMP 211 and 301, or COMP 401, 410, and 411; a grade of C or better is required in all prerequisite courses; permission of the instructor for students lacking the prerequisites; may be repeated for credit."

Should parse to:
{{
    "prerequisites": [["COMP 211", "COMP 301"], ["COMP 401", "COMP 410", "COMP 411"]],
    "corequisites": [],
    "grade_requirements": {{"COMP 211": "C", "COMP 301": "C", "COMP 401": "C", "COMP 410": "C", "COMP 411": "C"}},
    "requisites_note": "permission of the instructor for students lacking the prerequisites; may be repeated for credit"
}}

Requisite statement to parse:
{raw}

Return ONLY the JSON object, no explanation or markdown."""

        try:
            response = self.model.generate_content(prompt)
            json_text = response.text.strip()
            json_text = re.sub(r'^```json\s*', '', json_text)
            json_text = re.sub(r'\s*```$', '', json_text)
            
            result = json.loads(json_text)
            return self._validate_result(result)
            
        except Exception as e:
            if course_id:
                self.failed_parses.append((course_id, str(e)))
            return self._fallback_parse(raw)
    
    def _validate_result(self, result: dict) -> dict:
        """Validate and clean the parsed result."""
        validated = {
            "prerequisites": result.get("prerequisites", []),
            "corequisites": result.get("corequisites", []),
            "grade_requirements": result.get("grade_requirements", {}),
            "requisites_note": result.get("requisites_note", None)
        }
        
        # Ensure prerequisites and corequisites are lists of lists
        for key in ["prerequisites", "corequisites"]:
            if not isinstance(validated[key], list):
                validated[key] = []
            else:
                cleaned_list = []
                for item in validated[key]:
                    if isinstance(item, list):
                        cleaned_list.append(item)
                    elif isinstance(item, str):
                        cleaned_list.append([item])
                validated[key] = cleaned_list
        
        if not isinstance(validated["grade_requirements"], dict):
            validated["grade_requirements"] = {}
        
        return validated
    
    def _fallback_parse(self, raw: str) -> dict:
        """Basic fallback parser if API fails."""
        course_pattern = re.compile(r'\b[A-Z]{2,5}\s?\d{2,3}[A-Z]?\d?[A-Z]?\b')
        courses = course_pattern.findall(raw)
        
        normalized_courses = []
        for course in courses:
            if ' ' not in course:
                course = re.sub(r'([A-Z]+)(\d)', r'\1 \2', course)
            normalized_courses.append(course)
        
        prerequisites = [[course] for course in normalized_courses]
        
        grade_requirements = {}
        if 'C or better' in raw or 'grade of C' in raw:
            for course in normalized_courses:
                grade_requirements[course.replace(' ', '')] = 'C'
        
        note = None
        if 'permission' in raw.lower() or 'instructor' in raw.lower():
            note = "Permission of instructor may be required"
        
        return {
            "prerequisites": prerequisites,
            "corequisites": [],
            "grade_requirements": grade_requirements,
            "requisites_note": note
        }

In [5]:
# Cell 5: Scraping functions
def get_department_links(only=None):
    """Scrape all department links from the main courses page."""
    response = requests.get(COURSE_INDEX_URL)
    soup = BeautifulSoup(response.text, "html.parser")
    index_div = soup.find("div", {"id": "atozindex"})
    links = []

    for a in index_div.find_all("a", href=True):
        dept_code = a['href'].split("/")[-2].upper()
        if only is None or dept_code in only:
            links.append((dept_code, urljoin(BASE_URL, a['href'])))

    return links

def parse_course_block(block, parser: RequisiteParser):
    """Parse a course block from the HTML."""
    data = {
        "department": None,
        "course_number": None,
        "course_name": None,
        "credits": None,
        "description": None,
        "requisites": {"prerequisites": [], "corequisites": []},
        "grade_requirements": {},
        "requisites_note": None,
        "gen_ed": None,
        "grading_status": None
    }

    # Header line
    header = block.find("div", class_="cols noindent")
    if header:
        strong_tags = header.find_all("strong")
        if len(strong_tags) >= 3:
            code = strong_tags[0].text.strip()
            if " " in code:
                data["department"], data["course_number"] = code.split(" ", 1)
                data["course_number"] = data["course_number"].rstrip(".")
            data["course_id"] = f"{data['department']} {data['course_number']}"
            data["course_name"] = strong_tags[1].text.strip()
            data["credits"] = strong_tags[2].text.strip().replace(" Credits.", "")

    # Description
    desc_block = block.find("p", class_="courseblockextra")
    if desc_block:
        data["description"] = desc_block.text.strip()

    # Requisites - using LLM parser
    req_span = block.find("span", class_="text detail-requisites margin--default")
    if req_span:
        course_id = data.get("course_id", "Unknown")
        req_data = parser.parse_requisites(req_span.text, course_id)
        data["requisites"] = {
            "prerequisites": req_data["prerequisites"],
            "corequisites": req_data["corequisites"]
        }
        data["grade_requirements"] = req_data["grade_requirements"]
        data["requisites_note"] = req_data["requisites_note"]

    # Gen Ed
    idea_span = block.find("span", class_="text detail-idea_action margin--default")
    if idea_span:
        idea_text = idea_span.text.strip().replace("IDEAs in Action Gen Ed:", "")
        data["gen_ed"] = idea_text.strip()

    # Grading
    grading_span = block.find("span", class_="text detail-grading_status margin--default")
    if grading_span:
        data["grading_status"] = grading_span.text.strip().replace("Grading Status: ", "")

    return data

def parse_department(url, parser: RequisiteParser, dept_code: str, db_manager: DatabaseManager, mode: str = 'database'):
    """Parse all courses from a department page."""
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    course_blocks = soup.find_all("div", class_="courseblock")
    
    total_courses = len(course_blocks)
    print(f"   Found {total_courses} courses to parse")
    
    courses = []
    saved_count = 0
    
    for i, cb in enumerate(course_blocks, 1):
        # Extract course ID for progress display
        header = cb.find("div", class_="cols noindent")
        course_id = "Unknown"
        if header:
            strong_tags = header.find_all("strong")
            if strong_tags:
                course_id = strong_tags[0].text.strip()
        
        print(f"   Processing {course_id} ({i}/{total_courses})...", end='\r')
        
        course_data = parse_course_block(cb, parser)
        
        # Save to database if in database mode
        if mode in ['database', 'both'] and db_manager:
            try:
                db_manager.save_course(course_data)
                saved_count += 1
            except Exception as e:
                logger.error(f"Failed to save {course_id}: {e}")
        
        # Collect for JSON if needed
        if mode in ['json', 'both']:
            courses.append(course_data)
    
    print(f"   ✓ Completed all {total_courses} courses in {dept_code} (saved {saved_count} to database)     ")
    return courses

def scrape_all_courses(parser: RequisiteParser, db_manager: Optional[DatabaseManager] = None, 
                      only=None, mode='database', dry_run=False, update_existing=True):
    """
    Scrape all courses with flexible output options.
    
    Args:
        parser: RequisiteParser instance
        db_manager: DatabaseManager instance (required for database mode)
        only: Set of department codes to scrape (None for all)
        mode: 'database', 'json', or 'both'
        dry_run: If True, don't actually save anything
        update_existing: If True, update existing courses; if False, skip them
    """
    department_links = get_department_links(only=only)
    all_courses = {}
    
    print(f"\n🎯 Starting scrape of {len(department_links)} departments")
    print(f"   Mode: {mode}")
    print(f"   Dry run: {dry_run}")
    print(f"   Update existing: {update_existing}\n")
    
    overall_start_time = time.time()

    for dept_idx, (dept_code, url) in enumerate(department_links, 1):
        try:
            print(f"📚 [{dept_idx}/{len(department_links)}] Scraping {dept_code}...")
            dept_start_time = time.time()
            
            # Begin transaction for this department
            if db_manager and not dry_run:
                db_manager.conn.commit()  # Commit any pending changes
            
            courses = parse_department(url, parser, dept_code, db_manager if not dry_run else None, mode)
            
            if mode in ['json', 'both']:
                all_courses[dept_code] = courses
            
            # Commit department transaction
            if db_manager and not dry_run and mode in ['database', 'both']:
                db_manager.commit()
            
            dept_elapsed = time.time() - dept_start_time
            print(f"✅ Successfully scraped {dept_code} in {dept_elapsed/60:.1f} minutes\n")
            
        except Exception as e:
            print(f"❌ Error scraping {dept_code}: {e}\n")
            if db_manager and not dry_run:
                db_manager.rollback()
    
    overall_elapsed = time.time() - overall_start_time
    print(f"⏱️  Total scraping time: {overall_elapsed/60:.1f} minutes")
    
    return all_courses

def save_to_json(data, filename="unc_courses.json"):
    """Save course data to JSON file."""
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    print(f"\n💾 Saved to {filename}")

In [6]:
# Cell 6: Main execution
# Initialize components
parser = RequisiteParser(model="gemini-2.5-flash-lite", delay=0.5)
db_manager = DatabaseManager(DATABASE_URL)

# Configuration options
MODE = 'database'  # 'database', 'json', or 'both'
DRY_RUN = False    # Set to True to test without saving
UPDATE_EXISTING = True  # Set to False to skip existing courses

# Option 1: Scrape sample departments
# sample_departments = {"COMP", "BIOL", "CHEM"}
# courses = scrape_all_courses(
#     parser, 
#     db_manager,
#     only=sample_departments,
#     mode=MODE,
#     dry_run=DRY_RUN,
#     update_existing=UPDATE_EXISTING
# )

# # Save JSON backup if requested
# if MODE in ['json', 'both'] and courses:
#     save_to_json(courses, "unc_courses_sample.json")

# Option 2: Scrape all departments
courses = scrape_all_courses(parser, db_manager, mode=MODE)
if MODE in ['json', 'both'] and courses:
    save_to_json(courses, "unc_courses_all.json")

# Print statistics
print(f"\n📊 Statistics:")
print(f"   Total API calls: {parser.api_calls}")
print(f"   Failed parses: {len(parser.failed_parses)}")
if parser.failed_parses:
    print("\n⚠️  Failed to parse requisites for:")
    for course_id, error in parser.failed_parses[:5]:
        print(f"   - {course_id}: {error[:50]}...")
    if len(parser.failed_parses) > 5:
        print(f"   ... and {len(parser.failed_parses) - 5} more")

# Close database connection
db_manager.close()


2025-07-22 11:55:57,473 - INFO - Loaded 7 departments and 564 courses into cache



🎯 Starting scrape of 152 departments
   Mode: database
   Dry run: False
   Update existing: True

📚 [1/152] Scraping AERO...
   Found 13 courses to parse
   ✓ Completed all 13 courses in AERO (saved 13 to database)     
✅ Successfully scraped AERO in 0.1 minutes

📚 [2/152] Scraping AAAD...
   Found 123 courses to parse
   ✓ Completed all 123 courses in AAAD (saved 123 to database)     
✅ Successfully scraped AAAD in 0.2 minutes

📚 [3/152] Scraping AMST...
   Found 108 courses to parse
   ✓ Completed all 108 courses in AMST (saved 108 to database)     
✅ Successfully scraped AMST in 0.2 minutes

📚 [4/152] Scraping ANTH...
   Found 253 courses to parse
   ✓ Completed all 253 courses in ANTH (saved 253 to database)     
✅ Successfully scraped ANTH in 0.5 minutes

📚 [5/152] Scraping APPL...
   Found 38 courses to parse
   ✓ Completed all 38 courses in APPL (saved 38 to database)     
✅ Successfully scraped APPL in 0.2 minutes

📚 [6/152] Scraping ARAB...
   Found 24 courses to parse
   ✓ 

In [None]:
# Cell 7: Database verification
def verify_scraping_results():
    """Verify what was scraped into the database."""
    from db_queries import CourseDatabase
    
    with CourseDatabase() as db:
        stats = db.get_database_stats()
        print("\n🔍 Database Contents:")
        print(f"   Total courses: {stats['total_courses']}")
        print(f"   Courses with prerequisites: {stats['courses_with_prereqs']}")
        
        # Show sample courses
        print("\n📚 Sample courses:")
        sample_courses = ["COMP 110", "COMP 211", "BIOL 101"]
        for course_id in sample_courses:
            course = db.get_course(course_id)
            if course:
                prereqs = db.get_course_prerequisites(course_id)
                print(f"   {course_id}: {course['name']}")
                if prereqs['prerequisites']:
                    print(f"      Prerequisites: {len(prereqs['prerequisites'])} groups")

# Run verification
verify_scraping_results()

In [None]:
# Cell to count non-empty requisites across all departments
def count_requisites(only=None):
    """Count how many courses have non-empty requisites across departments."""
    department_links = get_department_links(only=only)
    
    total_courses = 0
    courses_with_requisites = 0
    dept_stats = {}
    
    print(f"🔍 Analyzing {len(department_links)} departments...\n")
    
    for dept_code, url in tqdm(department_links, desc="Scanning departments"):
        response = requests.get(url)
        soup = BeautifulSoup(response.text, "html.parser")
        course_blocks = soup.find_all("div", class_="courseblock")
        
        dept_total = len(course_blocks)
        dept_with_reqs = 0
        
        for block in course_blocks:
            req_span = block.find("span", class_="text detail-requisites margin--default")
            if req_span and req_span.text.strip() and req_span.text.strip() != "Requisites:":
                dept_with_reqs += 1
        
        dept_stats[dept_code] = {
            "total": dept_total,
            "with_requisites": dept_with_reqs,
            "percentage": (dept_with_reqs / dept_total * 100) if dept_total > 0 else 0
        }
        
        total_courses += dept_total
        courses_with_requisites += dept_with_reqs
    
    # Print summary
    print(f"\n📊 Requisite Analysis Complete!\n")
    print(f"Total courses across all departments: {total_courses}")
    print(f"Courses with requisites: {courses_with_requisites}")
    print(f"Courses without requisites: {total_courses - courses_with_requisites}")
    print(f"Percentage with requisites: {courses_with_requisites/total_courses*100:.1f}%")
    print(f"\n💡 You will need {courses_with_requisites} API calls")
    print(f"⏱️  Estimated time at 2.1s/call: {courses_with_requisites * 2.1 / 60:.1f} minutes")
    
    # Show top departments by requisite count
    print(f"\n📈 Top 10 departments by requisite count:")
    sorted_depts = sorted(dept_stats.items(), key=lambda x: x[1]['with_requisites'], reverse=True)[:10]
    for dept, stats in sorted_depts:
        print(f"   {dept}: {stats['with_requisites']}/{stats['total']} courses ({stats['percentage']:.0f}%)")
    
    return dept_stats

# Run the analysis
# For all departments:
dept_stats = count_requisites()

# Or for specific departments:
# dept_stats = count_requisites(only={"COMP", "MATH", "BIOL", "CHEM", "PHYS"})

🔍 Analyzing 152 departments...



Scanning departments: 100%|██████████| 152/152 [01:04<00:00,  2.35it/s]


📊 Requisite Analysis Complete!

Total courses across all departments: 10212
Courses with requisites: 2759
Courses without requisites: 7453
Percentage with requisites: 27.0%

💡 You will need 2759 API calls
⏱️  Estimated time at 2.1s/call: 96.6 minutes

📈 Top 10 departments by requisite count:
   BIOL: 167/264 courses (63%)
   PSYC: 101/183 courses (55%)
   NURS: 91/169 courses (54%)
   ECON: 81/145 courses (56%)
   COMP: 79/109 courses (72%)
   PHCY: 75/112 courses (67%)
   CHEM: 74/112 courses (66%)
   SPAN: 71/119 courses (60%)
   MATH: 70/110 courses (64%)
   COMM: 69/213 courses (32%)



