In [128]:
import numpy as np
import pandas as pd
from neo4j import GraphDatabase
from typing import List, Dict
import os
import streamlit as st
import re
import pandas as pd
import ast
from ast import literal_eval

## Processing Raw Data(Scraped from course catalogue)

### Generate Current Course Catalogue(Used for Scraping course indexes)

In [2]:
directory = 'data'
file_name = 'Course_Catalogue.txt'
file_path = os.path.join(directory, file_name)
combined_lines = ''
# Combine lines with '|'
try:
    with open(file_path, 'r') as file:
        combined_lines = '|'.join(line.strip() for line in file if line.strip())
        combined_lines = '('+combined_lines+')'
    print("Combined Lines:\n", combined_lines)
except FileNotFoundError:
    print(f"The file '{file_name}' was not found in the directory '{directory}'.")

The file 'Course_Catalogue.txt' was not found in the directory 'data'.


## Opening and Specifying Directory

In [None]:
source_directory = 'data/raw'
target_directory = 'data/processed'

# Define the source and target directories
def is_graduate_course(title):
    match = re.search(r'\b\d{1,3}', title)  # Extract course number
    if match:
        course_number = int(match.group())  # Convert to integer
        return course_number < 200  # Check if it's a graduate course
    return False  # If no number is found, consider it not a graduate course

# Split the title and extract the first two elements
def get_index(title):
    split_title = title.split()  # Split the title by spaces
    return " ".join(split_title[:2]).strip(".")  # Join the first two elements back into a string

def get_name(title):
    # Split the title by whitespace
    parts = title.split()

    # Remove the first two elements (course code and number)
    course_name_parts = parts[2:-1]  # Exclude the last element (e.g., '(4)')

    # Join the remaining parts to reconstruct the course name
    course_name = ' '.join(course_name_parts)

    return course_name

def get_unit(description):
    # Use regex to find the number inside parentheses
    match = re.search(r'\((\d+)\)', description)
    if match:
        return int(match.group(1))  # Convert the unit to an integer
    return None  # Return None if no unit is found

def get_tags(description):
    # Split the string by "Tags:" and check if tags exist
    if "Tags:" in description:
        tags_part = description.split("Tags:")[1]  # Extract the part after "Tags:"
        # Split tags into a list and strip any extra whitespace
        tags = [tag.strip() for tag in tags_part.split(',')]
        return tags
    return []  # Return an empty list if no tags are found

def split_and_extract_courses(data):
    """
    Split the input by "and" and extract valid course numbers for each group.

    Args:
        data (list or str): Input data containing course information.

    Returns:
        list: A list where each element represents an "AND" group with extracted course numbers.
    """
    # Regular expression to match valid course numbers
    course_pattern = rf'(?i){combined_lines} (\d+[A-Z]?[A-Z]?)'

    def tuples_to_strings(tuples_list):
        """
        Convert a list of tuples into a list of regular strings.

        Args:
            tuples_list (list of tuples): Each tuple contains two strings, e.g., [('MATH', '20AC'), ('MATH', '20BC')].

        Returns:
            list of str: A list of strings where the tuple elements are joined with a space.
        """
        return [f"{dept} {course}" for dept, course in tuples_list]

    def process_string(s):
        """Helper function to process a string, split by 'and', and extract course numbers."""
        and_groups = s.split(' and ')
        courses = [re.findall(course_pattern, group) for group in and_groups]
        itemized_courses = []
        for course in courses:
            itemized_courses.append(tuples_to_strings(course))
        return itemized_courses

    # If the input is a string, process it
    if isinstance(data, str):
        return process_string(data)





## Extracting Key Informations(Prereqs, Units, Major Restrictions)

In [None]:
for file_name in os.listdir(source_directory):

    if file_name.endswith('.csv'):  # Only process CSV files
        prerequisites = []
        major_restrictions = []
        course_indexes = []
        course_units = []
        course_titles = []
        course_descriptions = []
        course_tags = []
        source_file_path = os.path.join(source_directory, file_name)
        target_file_path = os.path.join(target_directory, file_name)

        # Read the CSV file
        raw_data = pd.read_csv(source_file_path)
        data = raw_data[raw_data['Title'].apply(is_graduate_course)]

        # Extract course titles into a list
        course_indexes = data['Title'].apply(get_index).tolist()
        course_titles = data['Title'].apply(get_name).tolist()
        course_units = data['Title'].apply(get_unit).tolist()
        course_tags = data['Title'].apply(get_tags).tolist()


        # Extract descriptions into a list
        course_descriptions = data['Description'].tolist()

        # Loop through the course descriptions to extract key info
        for description in course_descriptions:
            # Check for major restrictions
            if "restricted to" in description.lower():
                # Extract the part mentioning restricted majors
                match = re.search(r"restricted to (.+?)(?:\.|$)", description, re.IGNORECASE)
                if match:
                    restriction_text = match.group(1)
                    # Extract major codes (e.g., CS25, EC26)
                    majors = re.findall(r"[A-Z]{2}\d{2}", restriction_text)

                    # Check if unrestricted condition is also mentioned
                    if "all other students will be allowed" in description.lower():
                        major_restrictions.append([])  # Unrestricted as space permits
                    else:
                        major_restrictions.append(majors)  # Only restricted to listed majors
                else:
                    major_restrictions.append([])  # No valid restriction found
            else:
                major_restrictions.append([])  # No restrictions mentioned

            # Match prerequisites that look like course numbers (e.g., "CSE 12", "MATH 10A")
            match = re.search(r"Prerequisite[s]*: (.+?)(?:;|\.|$)", description)

            if match:
                # Extract the part of the description mentioning prerequisites
                prereqs_text = split_and_extract_courses(match.group(1))
                print('out', prereqs_text)
            else:
                prereqs_text = []  # No prerequisites found
            prerequisites.append(prereqs_text)

        # Display the results in a structured format
        structured_data = {
            "Course_Index": course_indexes,
            "Course_Title": course_titles,
            "Course_Units" : course_units,
            "Course_Description": course_descriptions,
            "Course_Prerequisites": prerequisites,
            "Major_Restriction" : major_restrictions,
            "Course_Tags": course_tags
        }

        df_structured = pd.DataFrame(structured_data)
        df_structured.to_csv(target_file_path, index=False)


        print("Sent to", target_file_path)


out [[]]
out [['MATH 10A', 'MATH 20A']]
out [['MATH 10A', 'MATH 20A']]
out [[]]
out [['CSE 8A']]
out [[]]
out [['CSE 8B', 'CSE 11']]
out [['CSE 8B', 'CSE 11', 'CSE 12', 'DSC 30']]
out [['CSE 11', 'CSE 6R', 'CSE 8A', 'CSE 8B', 'ECE 15']]
out [['CSE 20', 'MATH 15A', 'MATH 31CH']]
out [['CSE 11', 'CSE 8B', 'ECE 15']]
out [['CSE 15L', 'CSE 29', 'ECE 15']]
out [[], []]
out [['CSE 12']]
out [[]]
out [[]]
out [[]]
out [[]]
out [['CSE 21', 'MATH 154', 'MATH 158', 'MATH 184', 'MATH 188'], ['CSE 12'], ['CSE 15L'], ['CSE 30', 'ECE 15']]
out [['CSE 21', 'MATH 154', 'MATH 158', 'MATH 184', 'MATH 188'], ['CSE 12'], ['CSE 15L'], ['CSE 30', 'ECE 15']]
out [['CSE 21', 'MATH 154', 'MATH 158', 'MATH 184', 'MATH 188'], ['CSE 12', 'DSC 30']]
out [['MATH 20B'], ['CSE 21', 'MATH 154', 'MATH 158', 'MATH 184', 'MATH 188']]
out [['CSE 12'], ['CSE 20', 'MATH 109', 'MATH 15A', 'MATH 31CH'], ['CSE 21', 'MATH 100A', 'MATH 103A', 'MATH 154', 'MATH 158', 'MATH 184', 'MATH 188']]
out [['MATH 18', 'MATH 31AH'], ['MATH 

## Main DataBase Class

In [197]:
from neo4j import GraphDatabase
from typing import List, Dict

class CourseDatabase:
    def __init__(self, uri: str, username: str, password: str):
        """Initialize connection to Neo4j database."""
        self.driver = GraphDatabase.driver(uri, auth=(username, password))

    def close(self):
        """Close the database connection."""
        self.driver.close()

    def add_course(self, code: str, name: str) -> None:
        """Add a new course to the database."""
        with self.driver.session() as session:
            session.execute_write(self._create_course, code, name)

    @staticmethod
    def _create_course(tx, code: str, name: str):
        query = """
        MERGE (c:Course {code: $code})
        SET c.name = $name
        RETURN c
        """
        result = tx.run(query, code=code, name=name)
        return result.single()



    def get_prerequisites(self, course_code: str, recursive: bool = True) -> List[Dict]:
        """
        Get prerequisites for a course, including ORGroup members. If recursive=True, fetch indirect prerequisites as well.

        Args:
            course_code (str): The course code to query.
            recursive (bool): Whether to fetch indirect prerequisites (default: True).

        Returns:
            List[Dict]: A list of dictionaries containing prerequisite course codes, names, and ORGroups.
        """
        with self.driver.session() as session:
            return session.execute_read(self._get_prerequisites, course_code, recursive)

    @staticmethod
    def _get_prerequisites(tx, course_code: str, recursive: bool) -> List[Dict]:
        """
        Helper method to retrieve prerequisites, including ORGroup members.

        Args:
            tx: The Neo4j transaction object.
            course_code (str): The course code to query.
            recursive (bool): Whether to fetch indirect prerequisites.

        Returns:
            List[Dict]: A list of prerequisite courses and their ORGroups as dictionaries.
        """
        depth = "*" if recursive else "1"
        query = f"""
        MATCH path = (c:Course {{code: $course_code}})-[:REQUIRES{depth}]->(group:ORGroup)
        WITH group, nodes(path) AS pathNodes
        OPTIONAL MATCH (group)<-[:MEMBER_OF]-(prereq:Course)
        RETURN DISTINCT group.name AS ORGroup, collect(DISTINCT prereq.code) AS PrerequisiteCodes,
            pathNodes AS PathNodes
        """
        result = tx.run(query, course_code=course_code)
        prerequisites = []
        for record in result:
            prerequisites.append({
                "ORGroup": record["ORGroup"],
                "PrerequisiteCodes": record["PrerequisiteCodes"],
                "PathNodes": [node["code"] for node in record["PathNodes"] if "code" in node]
            })
        return prerequisites



    def get_available_courses(self, completed_courses: List[str]) -> List[Dict]:
        """Get courses where all prerequisites have been completed."""
        with self.driver.session() as session:
            return session.execute_read(self._get_available_courses, completed_courses)

    @staticmethod
    def _get_available_courses(tx, completed_courses: List[str]):
        query = """
        MATCH (c:Course)
        WHERE (
            // Either has no prerequisites
            NOT EXISTS((c)-[:REQUIRES]->(:Course))
            OR
            // Or all prerequisites are completed
            ALL(prereq IN [(c)-[:REQUIRES]->(p) | p.code]
                WHERE prereq IN $completed_courses)
        )
        // Exclude courses that are already completed
        AND NOT c.code IN $completed_courses
        RETURN c.code as code, c.name as name
        """
        result = tx.run(query, completed_courses=completed_courses)
        return [dict(record) for record in result]
    
    def add_prerequisites(self, course_code: str, prereq_groups: List[List[str]]) -> None:
        """Add prerequisite relationships between courses."""
        print("hai")
        with self.driver.session() as session:
            session.execute_write(self._create_prerequisites, course_code, prereq_groups)

    @staticmethod
    def _create_prerequisites(tx, course_code: str, prereq_groups: List[List[str]]):
        """
        Creates prerequisite relationships with explicit ORGroup nodes.
        """
        for group_index, prereq_group in enumerate(prereq_groups):
            if not prereq_group:  # Skip empty prerequisite groups
                continue

            # Create an ORGroup node
            or_group_name = f"{course_code}_ORGroup_{group_index}"
            create_or_group_query = """
            MERGE (group:ORGroup {name: $or_group_name})
            MERGE (course:Course {code: $course_code})
            MERGE (course)-[:REQUIRES]->(group)
            """
            tx.run(create_or_group_query, or_group_name=or_group_name, course_code=course_code)

            # Link each prerequisite to the ORGroup
            for prereq_code in prereq_group:
                link_prereq_query = """
                MATCH (group:ORGroup {name: $or_group_name})
                MATCH (prereq:Course {code: $prereq_code})
                MERGE (prereq)-[:MEMBER_OF]->(group)
                """
                tx.run(link_prereq_query, or_group_name=or_group_name, prereq_code=prereq_code)

    def create_lower_division_category(self):
        with self.driver.session() as session:
            session.run("CREATE (:Category {name: 'Lower Division'})")

            # Add Mathematics Sequence OR Group
            session.run("CREATE (:OrGroup {name: 'Mathematics Sequence'})")
            session.run("""
                MATCH (c:Category {name: 'Lower Division'}), (og:OrGroup {name: 'Mathematics Sequence'})
                CREATE (c)-[:INCLUDES]->(og)
            """)
            session.run("""
                MATCH (og:OrGroup {name: 'Mathematics Sequence'})
                MERGE (:Course {code: 'MATH 18', name: 'Linear Algebra'})
                MERGE (:Course {code: 'MATH 20A', name: 'Calculus I'})
                MERGE (:Course {code: 'MATH 20B', name: 'Calculus II'})
                MERGE (:Course {code: 'MATH 20C', name: 'Calculus III'})
                MERGE (:Course {code: 'MATH 20D', name: 'Differential Equations'})
                MERGE (:Course {code: 'MATH 31AH', name: 'Honors Linear Algebra'})
                MERGE (:Course {code: 'MATH 31BH', name: 'Honors Calculus I'})
                MERGE (:Course {code: 'MATH 31CH', name: 'Honors Calculus II'})
            """)
            session.run("""
                MATCH (og:OrGroup {name: 'Mathematics Sequence'})
                MATCH (course:Course)
                WHERE course.code IN ['MATH 18', 'MATH 20A', 'MATH 20B', 'MATH 20C', 'MATH 20D', 'MATH 31AH', 'MATH 31BH', 'MATH 31CH']
                CREATE (og)-[:INCLUDES]->(course)
            """)

            # Add Introduction to Computer Science OR Group
            session.run("CREATE (:OrGroup {name: 'Introduction to Computer Science'})")
            session.run("""
                MATCH (c:Category {name: 'Lower Division'}), (og:OrGroup {name: 'Introduction to Computer Science'})
                CREATE (c)-[:INCLUDES]->(og)
            """)
            session.run("""
                MATCH (og:OrGroup {name: 'Introduction to Computer Science'})
                MERGE (:Course {code: 'CSE 8A-B', name: 'Introduction to Computer Science: Java'})
                MERGE (:Course {code: 'CSE 11', name: 'Accelerated Introduction to Computer Science: Java'})
            """)
            session.run("""
                MATCH (og:OrGroup {name: 'Introduction to Computer Science'})
                MATCH (course:Course)
                WHERE course.code IN ['CSE 8A-B', 'CSE 11']
                CREATE (og)-[:INCLUDES]->(course)
            """)

            # Add Software Tools and Laboratory OR Group
            session.run("CREATE (:OrGroup {name: 'Software Tools and Laboratory'})")
            session.run("""
                MATCH (c:Category {name: 'Lower Division'}), (og:OrGroup {name: 'Software Tools and Laboratory'})
                CREATE (c)-[:INCLUDES]->(og)
            """)
            session.run("""
                MATCH (og:OrGroup {name: 'Software Tools and Laboratory'})
                MERGE (:Course {code: 'CSE 15L', name: 'Software Tools and Techniques Laboratory'})
                MERGE (:Course {code: 'CSE 29', name: 'Software Tools Laboratory'})
            """)
            session.run("""
                MATCH (og:OrGroup {name: 'Software Tools and Laboratory'})
                MATCH (course:Course)
                WHERE course.code IN ['CSE 15L', 'CSE 29']
                CREATE (og)-[:INCLUDES]->(course)
            """)

            # Add Basic Data Structures and Object-Oriented Design directly
            session.run("""
                MATCH (c:Category {name: 'Lower Division'})
                MERGE (:Course {code: 'CSE 12', name: 'Basic Data Structures and Object-Oriented Design'})
                WITH c
                MATCH (course:Course {code: 'CSE 12'})
                CREATE (c)-[:INCLUDES]->(course)
            """)

            print("Lower Division courses created successfully!")

    def create_upper_division_courses(self):
        with self.driver.session() as session:
            # Create the main category for Upper Division
            session.run("CREATE (:Category {name: 'Upper Division'})")

            # Add Mathematical Reasoning
            session.run("""
                MATCH (c:Category {name: 'Upper Division'})
                MERGE (:Course {code: 'MATH 109', name: 'Mathematical Reasoning'})
                WITH c
                MATCH (course:Course {code: 'MATH 109'})
                CREATE (c)-[:INCLUDES]->(course);
            """)

            session.run("""
                MATCH (c:Category {name: 'Upper Division'})
                MERGE (:Course {code: 'CSE 101', name: 'Design and Analysis of Algorithms'})
                WITH c
                MATCH (course:Course {code: 'CSE 101'})
                CREATE (c)-[:INCLUDES]->(course);
            """)

            session.run("""
                CREATE (:SequenceRequirement {name: 'Modern/Applied Algebra Sequence Requirement'});
            """)
            session.run("""
                CREATE (:OrGroup {name: 'Modern Algebra Sequence'});
            """)
            session.run("""
                CREATE (:OrGroup {name: 'Applied Algebra Sequence'});
            """)
            session.run("""
                MATCH (sr:SequenceRequirement {name: 'Modern/Applied Algebra Sequence Requirement'}),
                    (og1:OrGroup {name: 'Modern Algebra Sequence'}),
                    (og2:OrGroup {name: 'Applied Algebra Sequence'})
                CREATE (sr)-[:INCLUDES]->(og1),
                    (sr)-[:INCLUDES]->(og2);
            """)
            session.run("""
                MATCH (c:Category {name: 'Upper Division'}),
                    (sr:SequenceRequirement {name: 'Modern/Applied Algebra Sequence Requirement'})
                CREATE (c)-[:INCLUDES]->(sr);
            """)
            session.run("""
                MATCH (og1:OrGroup {name: 'Modern Algebra Sequence'})
                MERGE (:Course {code: 'MATH 103A', name: 'Modern Algebra A'})
                MERGE (:Course {code: 'MATH 103B', name: 'Modern Algebra B'})
                WITH og1
                MATCH (og1), (course:Course)
                WHERE course.code IN ['MATH 103A', 'MATH 103B']
                CREATE (og1)-[:INCLUDES]->(course);
            """)
            session.run("""
                MATCH (og2:OrGroup {name: 'Applied Algebra Sequence'})
                MERGE (:Course {code: 'MATH 100A', name: 'Applied Algebra A'})
                MERGE (:Course {code: 'MATH 100B', name: 'Applied Algebra B'})
                WITH og2
                MATCH (og2), (course:Course)
                WHERE course.code IN ['MATH 100A', 'MATH 100B']
                CREATE (og2)-[:INCLUDES]->(course);
            """)

            session.run("""
                MATCH (c:Category {name: 'Upper Division'})
                MERGE (:Course {code: 'CSE 105', name: 'Theory of Computability'})
                WITH c
                MATCH (course:Course {code: 'CSE 105'})
                CREATE (c)-[:INCLUDES]->(course);
            """)

            session.run("""
                CREATE (:OrGroup {name: 'Probability'});
            """)
            session.run("""
                MATCH (c:Category {name: 'Upper Division'}), (og:OrGroup {name: 'Probability'})
                CREATE (c)-[:INCLUDES]->(og);
            """)
            session.run("""
                MATCH (og:OrGroup {name: 'Probability'})
                MERGE (c1:Course {code: 'MATH 180A', name: 'Intro to Probability A'})
                MERGE (c2:Course {code: 'MATH 183', name: 'Applied Probability'})
                WITH og, c1, c2
                CREATE (og)-[:INCLUDES]->(c1),
                    (og)-[:INCLUDES]->(c2)
            """)

            session.run("""
                CREATE (:OrGroup {name: 'Discrete Math'});
            """)
            session.run("""
                MATCH (c:Category {name: 'Upper Division'}), (og:OrGroup {name: 'Discrete Math'})
                CREATE (c)-[:INCLUDES]->(og);
            """)
            session.run("""
                MATCH (og:OrGroup {name: 'Discrete Math'})
                MERGE (:Course {code: 'MATH 154', name: 'Discrete Structures'})
                MERGE (:Course {code: 'MATH 158', name: 'Discrete Mathematics'})
                MERGE (:Course {code: 'MATH 184', name: 'Combinatorics A'})
                MERGE (:Course {code: 'MATH 188', name: 'Combinatorics B'});
            """)
            session.run("""
                MATCH (og:OrGroup {name: 'Discrete Math'})
                MATCH (course:Course)
                WHERE course.code IN ['MATH 154', 'MATH 158', 'MATH 184', 'MATH 188']
                CREATE (og)-[:INCLUDES]->(course);
            """)
    

    # David's Functions
    def add_course(self, index, title="", units=0, description=""):
        """
        Adds new course node given input or updates corresponding course node given input. 
        """
        with self.driver.session() as session:
            result = session.run("""
                MERGE (c:Course {course_id: $index})
                SET c.name = $index,
                    c.title = CASE 
                        WHEN $title <> '' AND (c.title IS NULL OR c.title = '') 
                        THEN $title 
                        ELSE COALESCE(c.title, '')
                    END,
                    c.units = CASE 
                        WHEN $units <> 0 AND (c.units IS NULL OR c.units = 0) 
                        THEN $units 
                        ELSE COALESCE(c.units, 0)
                    END,
                    c.description = CASE 
                        WHEN $description <> '' AND (c.description IS NULL OR c.description = '') 
                        THEN $description 
                        ELSE COALESCE(c.description, '')
                    END
                RETURN c
                """,
                index=index,
                title=title,
                units=units,
                description=description
            )
            return result.single()
    
    def add_milestone(self, milestone_id, title="", units_required=0, description=""):
        with self.driver.session() as session:
            result = session.run("""
                MERGE (m:Milestone {milestone_id: $milestone_id})
                SET m.name = $milestone_id,
                    m.title = CASE 
                        WHEN $title <> '' AND (m.title IS NULL OR m.title = '') 
                        THEN $title 
                        ELSE COALESCE(m.title, '')
                    END,
                    m.units_required = CASE 
                        WHEN $units > 0 AND (m.units_required IS NULL OR m.units_required = 0) 
                        THEN $units 
                        ELSE COALESCE(m.units_required, 0)
                    END,
                    m.description = CASE 
                        WHEN $description <> '' AND (m.description IS NULL OR m.description = '') 
                        THEN $description 
                        ELSE COALESCE(m.description, '')
                    END
                RETURN m
                """,
                milestone_id=milestone_id,
                title=title,
                units=units_required,
                description=description
            )
            return result.single()

    def add_or_group(self, group_id):
        with self.driver.session() as session:
            result = session.run("""
                MERGE (og:OrGroup {group_id: $group_id})
                SET og.name = $group_id
                RETURN og
                """,
                group_id=group_id
            )
            return result.single()
    
    def add_milestone_to_milestone(self, milestone_id, minor_milestone_id):
        with self.driver.session() as session:
            try:
                # Check if relationship already exists before creating
                result = session.run("""
                    MATCH (m:Milestone {milestone_id: $milestone_id})
                    MATCH (mm:Milestone {milestone_id: $minor_milestone_id})
                    WITH m, mm
                    WHERE NOT EXISTS((mm)-[:REQUIRED]->(m))
                    MERGE (mm)-[r:REQUIRED]->(m)
                    RETURN r
                    """,
                    milestone_id=milestone_id, minor_milestone_id=minor_milestone_id
                )
                return result.single()
            except Exception as e:
                print(f"Error creating required relation from {minor_milestone_id} to {milestone_id}: {str(e)}")

    def add_group_to_course(self, course_id, group_id):
        with self.driver.session() as session:
            try:
                # Check if relationship already exists before creating
                result = session.run("""
                    MATCH (c:Course {course_id: $course_id})
                    MATCH (og:OrGroup {group_id: $group_id})
                    WITH c, og
                    WHERE NOT EXISTS((og)-[:REQUIRED]->(c))
                    MERGE (og)-[r:REQUIRED]->(c)
                    RETURN r
                    """,
                    course_id=course_id, group_id=group_id
                )
                return result.single()
            except Exception as e:
                print(f"Error creating prerequisite relation from {group_id} to {course_id}: {str(e)}")

    def add_course_to_group(self, group_id, course_id):
        with self.driver.session() as session:
            try:
                # Check if relationship already exists before creating
                result = session.run("""
                    MATCH (c:Course {course_id: $course_id})
                    MATCH (og:OrGroup {group_id: $group_id})
                    WITH c, og
                    WHERE NOT EXISTS((c)-[:REQUIRED]->(og))
                    MERGE (c)-[r:REQUIRED]->(og)
                    RETURN r
                    """,
                    course_id=course_id, group_id=group_id
                )
                return result.single()
            except Exception as e:
                print(f"Error creating required relation from {course_id} to {group_id}: {str(e)}")

    def add_course_in_group(self, course_id, group_id):
        with self.driver.session() as session:
            try:
                # Check if relationship already exists before creating
                result = session.run("""
                    MATCH (c:Course {course_id: $course_id})
                    MATCH (og:OrGroup {group_id: $group_id})
                    WITH c, og
                    WHERE NOT EXISTS((c)-[:INCLUDED_IN]->(og))
                    MERGE (c)-[r:INCLUDED_IN]->(og)
                    RETURN r
                    """,
                    course_id=course_id, group_id=group_id
                )
                return result.single()
            except Exception as e:
                print(f"Error creating course to group relation from {course_id} to {group_id}: {str(e)}")
    
    def add_group_in_milestone(self, group_id, milestone_id):
        with self.driver.session() as session:
            try:
                # Check if relationship already exists before creating
                result = session.run("""
                    MATCH (m:Milestone {milestone_id: $milestone_id})
                    MATCH (og:OrGroup {group_id: $group_id})
                    WITH m, og
                    WHERE NOT EXISTS((og)-[:INCLUDED_IN]->(m))
                    MERGE (og)-[r:INCLUDED_IN]->(m)
                    RETURN r
                    """,
                    group_id=group_id, milestone_id=milestone_id
                )
                return result.single()
            except Exception as e:
                print(f"Error creating included in relation from {group_id} to {milestone_id}: {str(e)}")

    def add_course_in_milestone(self, course_id, milestone_id):
        with self.driver.session() as session:
            try:
                # Check if relationship already exists before creating
                result = session.run("""
                    MATCH (m:Milestone {milestone_id: $milestone_id})
                    MATCH (c:Course {course_id: $course_id})
                    WITH m, c
                    WHERE NOT EXISTS((c)-[:INCLUDED_IN]->(m))
                    MERGE (c)-[r:INCLUDED_IN]->(m)
                    RETURN r
                    """,
                    course_id=course_id, milestone_id=milestone_id
                )
                return result.single()
            except Exception as e:
                print(f"Error creating included in relation from {course_id} to {milestone_id}: {str(e)}")
    
    def get_course_info(self, course_id):
        with self.driver.session() as session:
            result = session.run("""
                MATCH (c:Course {course_id: $course_id})
                RETURN 
                    c.course_id as id,
                    c.title as title,
                    c.units as units,
                    c.description as description
            """, course_id=course_id)
            
            record = result.single()
            if record:
                return {
                    'course_id': record['id'],
                    'title': record['title'],
                    'units': record['units'],
                    'description': record['description']
                }
            return None

    def get_prerequisites(self, course_id):
        with self.driver.session() as session:
            # Query all prerequisites groups and their courses
            result = session.run("""
                MATCH (c:Course {course_id: $course_id})
                MATCH (og:OrGroup)-[:REQUIRED]->(c)
                MATCH (prereq:Course)-[:INCLUDED_IN]->(og)
                RETURN og.group_id as group_id, 
                    collect(prereq.course_id) as prereq_courses
                ORDER BY og.group_id
            """, course_id=course_id)
            
            # Convert result to list of lists format
            prereqs = []
            for record in result:
                prereqs.append(record['prereq_courses'])
                
            return prereqs
    
    

## Testing the get_prereqs function

In [198]:
uri = st.secrets["NEO4J_URI"]
username = st.secrets["NEO4J_USERNAME"]
password = st.secrets["NEO4J_PASSWORD"]

db = CourseDatabase(
    uri=uri,
    username=username,
    password=password
)

## Getting Course Prerequisites

In [34]:
course_code = "MATH 154"
prerequisites = db.get_prerequisites(course_code, recursive=True)

# Print the results
print(f"Prerequisites for {course_code}:")
print(prerequisites)
for prereq in prerequisites:
    print(f"{prereq['PrerequisiteCodes']}")

Failed to write data to connection ResolvedIPv4Address(('34.31.169.230', 7687)) (ResolvedIPv4Address(('34.31.169.230', 7687)))
Failed to write data to connection IPv4Address(('29a8fd63.databases.neo4j.io', 7687)) (ResolvedIPv4Address(('34.31.169.230', 7687)))


Prerequisites for MATH 154:
[{'ORGroup': 'MATH 154_ORGroup_1', 'PrerequisiteCodes': [], 'PathNodes': ['MATH 154']}, {'ORGroup': 'MATH 154_ORGroup_0', 'PrerequisiteCodes': [], 'PathNodes': ['MATH 154']}]
[]
[]


## Creating Categories for MATH CS major

In [203]:
# David's Functions
def add_major_requirements(major_req_data):
    # add the meta milestones
    for idx, row in major_req_data.iloc[:3,:].iterrows():
        db.add_milestone(
            row['Milestones'],
            row['Title'],
            row['Units Required'],
            row['Descriptions']
        )

        milestone_requirements = literal_eval(row['Requirements'])
        for group_idx, or_group in enumerate(milestone_requirements):
            print(or_group)
            db.add_milestone(or_group)
            db.add_milestone_to_milestone(row['Milestones'], or_group)
        print(f"added {row['Milestones']} to database.")

    # add the individual milestones and courses
    for idx, row in major_req_data.iloc[3:,:].iterrows():
        db.add_milestone(
            row['Milestones'],
            row['Title'],
            row['Units Required'],
            row['Descriptions']
        )

        milestone_requirements = literal_eval(row['Requirements'])
        for group_idx, or_group in enumerate(milestone_requirements):
            print(or_group)
            if isinstance(or_group, tuple):
                group_id = f"{row['Milestones']}_PATH_{group_idx}"
                db.add_or_group(group_id)
                db.add_group_in_milestone(group_id, row['Milestones'])

                for required_course in or_group:
                    db.add_course(required_course)
                    db.add_course_to_group(group_id, required_course)
            elif isinstance(or_group, str):
                db.add_course_in_milestone(or_group, row['Milestones'])
        print(f"added {row['Milestones']} to database.")
        
    print("Successfully added all requirement milestones.")


In [200]:
# David's Function
def add_department_catalog(dpt_cat_data):
    """
    Loads entire course catalog into neo4j db. 
    dpt_cat_data should include columns: ['Course_Index', 'Course_Title', 'Course_Units', 'Course_Description', 'Course_Prerequisites']
    """
    for idx, row in dpt_cat_data.iterrows():
        db.add_course(
            row['Course_Index'],
            row['Course_Title'],
            row['Course_Units'],
            row['Course_Description']
        )

        course_prereq_lst = literal_eval(row['Course_Prerequisites'])
        for group_idx, or_group in enumerate(course_prereq_lst):
            # Create unique group ID combining course and group number
            group_id = f"{row['Course_Index']}_GROUP_{group_idx}"
            print(group_id)
            # Add OrGroup
            db.add_or_group(group_id)

            # Add prerequisite relation from OrGroup to main course
            db.add_group_to_course(row['Course_Index'], group_id)

            # Add all prerequisite courses and their relations to OrGroup
            for prereq_course in or_group:
                print(prereq_course)
                # Add prerequisite course (will be ignored if already exists)
                db.add_course(prereq_course)  # Minimal info for prereq courses

                # Add relation from prerequisite course to OrGroup
                db.add_course_in_group(prereq_course, group_id)
        
        print(f"added {row['Course_Index']} to database.")
    print('\nSucessfully added all courses to database.\n')

In [201]:
math_data = pd.read_csv('data/processed/Math Course Data.csv')
add_department_catalog(math_data)

MATH 2_GROUP_0
added MATH 2 to database.
MATH 3B_GROUP_0
added MATH 3B to database.
MATH 3C_GROUP_0
MATH 3B
added MATH 3C to database.
MATH 4C_GROUP_0
MATH 3C
added MATH 4C to database.
MATH 10A_GROUP_0
MATH 3C
MATH 10A_GROUP_1
MATH 4C
added MATH 10A to database.
MATH 10B_GROUP_0
MATH 10A
MATH 10B_GROUP_1
MATH 20A
added MATH 10B to database.
MATH 10C_GROUP_0
MATH 10B
MATH 10C_GROUP_1
MATH 20B
added MATH 10C to database.
MATH 11_GROUP_0
MATH 10B
MATH 11_GROUP_1
MATH 20B
added MATH 11 to database.
MATH 15A_GROUP_0
CSE 8B
MATH 15A_GROUP_1
CSE 11
added MATH 15A to database.
MATH 18_GROUP_0
MATH 4C
MATH 18_GROUP_1
MATH 10A
MATH 18_GROUP_2
MATH 20A
added MATH 18 to database.
MATH 20A_GROUP_0
MATH 2C
MATH 20A_GROUP_1
MATH 4C
MATH 20A_GROUP_2
MATH 10A
added MATH 20A to database.
MATH 20B_GROUP_0
MATH 20A
MATH 20B_GROUP_1
MATH 10B
MATH 20B_GROUP_2
MATH 10C
added MATH 20B to database.
MATH 20C_GROUP_0
MATH 20B
added MATH 20C to database.
MATH 20D_GROUP_0
MATH 20C
MATH 20D_GROUP_1
MATH 21C
MATH 2

In [202]:
cse_data = pd.read_csv('data/processed/CSE Course Data.csv')
add_department_catalog(cse_data)

CSE 3_GROUP_0
added CSE 3 to database.
CSE 4GS_GROUP_0
MATH 10A
MATH 20A
added CSE 4GS to database.
CSE 6GS_GROUP_0
MATH 10A
MATH 20A
added CSE 6GS to database.
CSE 6R_GROUP_0
added CSE 6R to database.
CSE 8A_GROUP_0
added CSE 8A to database.
CSE 8B_GROUP_0
CSE 8A
added CSE 8B to database.
CSE 11_GROUP_0
added CSE 11 to database.
CSE 12_GROUP_0
CSE 8B
CSE 11
added CSE 12 to database.
CSE 15L_GROUP_0
CSE 8B
CSE 11
CSE 12
DSC 30
added CSE 15L to database.
CSE 20_GROUP_0
CSE 11
CSE 6R
CSE 8A
CSE 8B
ECE 15
added CSE 20 to database.
CSE 21_GROUP_0
CSE 20
MATH 15A
MATH 31CH
added CSE 21 to database.
CSE 29_GROUP_0
CSE 11
CSE 8B
ECE 15
added CSE 29 to database.
CSE 30_GROUP_0
CSE 15L
CSE 29
ECE 15
added CSE 30 to database.
CSE 42_GROUP_0
added CSE 42 to database.
CSE 86_GROUP_0
CSE 12
added CSE 86 to database.
CSE 87_GROUP_0
added CSE 87 to database.
CSE 89_GROUP_0
added CSE 89 to database.
CSE 90_GROUP_0
added CSE 90 to database.
CSE 91_GROUP_0
added CSE 91 to database.
CSE 95_GROUP_0
added 

In [204]:
# David's Function
mathcs_req = pd.read_csv('data/processed/MA30 requirements.csv')
add_major_requirements(mathcs_req)

MA30 Lower Division
MA30 Upper Division
added MA30 to database.
MA30_Lower_1
MA30_Lower_2
MA30_Lower_3
MA30_Lower_4
added MA30 Lower Division to database.
MA30_Upper_1
MA30_Upper_2
MA30_Upper_3
MA30_Upper_4
MA30_Upper_5
MA30_Upper_6
MA30_Upper_7
MA30_Upper_8
MA30_Upper_9
MA30_Upper_10
added MA30 Upper Division to database.
('MATH 18', 'MATH 20A', 'MATH 20B', 'MATH 20C', 'MATH 20D', 'MATH 20E')
('MATH 31AH', 'MATH 31BH', 'MATH 31CH', 'MATH 20D')
added MA30_Lower_1 to database.
('CSE 8A', 'CSE 8B')
CSE 11
added MA30_Lower_2 to database.
CSE 15L
CSE 29
added MA30_Lower_3 to database.
CSE 12
added MA30_Lower_4 to database.
MATH 109
added MA30_Upper_1 to database.
('MATH 103A', 'MATH 103B')
('MATH 100A', 'MATH 100B')
added MA30_Upper_2 to database.
CSE 105
added MA30_Upper_3 to database.
CSE 100
added MA30_Upper_4 to database.
MATH 180A
MATH 183
added MA30_Upper_5 to database.
MATH 154
MATH 158
MATH 184
MATH 188
added MA30_Upper_6 to database.
CSE 101
added MA30_Upper_7 to database.
MATH 17

In [12]:
# James's Functions
db.create_lower_division_category()


Lower Division courses created successfully!


In [32]:
# James's Function
db.create_upper_division_courses()

## Populating the Neo4j Database

In [None]:
# James's function
source_directory = "data/processed/"

for file_name in os.listdir(source_directory):
    if file_name.endswith('.csv'):
        # Load the CSV file to examine its structure
        file_path = os.path.join(source_directory, file_name)
        print(f"Processing: {file_path}")

        data = pd.read_csv(file_path)

        # Extract courses and prerequisites
        courses = list(zip(data['Course_Index'], data['Course_Title']))
        prerequisites = list(zip(data['Course_Index'], data['Course_Prerequisites']))

        # Add courses to the database
        for code, name in courses:
            db.add_course(code, name)

        # Add prerequisites as groups
        for code, prereq in prerequisites:
            if pd.notna(prereq):  # Ensure the prerequisites field is not NaN
                try:
                    prereq_groups = ast.literal_eval(prereq)  # Parse the prerequisite string
                    print(f"Calling _create_prerequisites with course_code={code}, prereq_groups={prereq_groups}")
                    db.add_prerequisites(code, prereq_groups)
                except (ValueError, SyntaxError) as e:
                    print(f"Error parsing prerequisites for {code}: {prereq} - {e}")

        print(f"Completed processing: {file_path}")


Processing: data/processed/Math Course Data.csv
Calling _create_prerequisites with course_code=MATH 2, prereq_groups=[]
hai
Calling _create_prerequisites with course_code=MATH 3B, prereq_groups=[]
hai
Calling _create_prerequisites with course_code=MATH 3C, prereq_groups=['MATH 3B']
hai
Calling _create_prerequisites with course_code=MATH 4C, prereq_groups=['MATH 3C']
hai
Calling _create_prerequisites with course_code=MATH 10A, prereq_groups=['MATH 3C', 'MATH 4C']
hai
Calling _create_prerequisites with course_code=MATH 10B, prereq_groups=['MATH 10A', 'MATH 20A']
hai
Calling _create_prerequisites with course_code=MATH 10C, prereq_groups=['MATH 10B', 'MATH 20B']
hai
Calling _create_prerequisites with course_code=MATH 11, prereq_groups=['MATH 10B', 'MATH 20B']
hai
Calling _create_prerequisites with course_code=MATH 15A, prereq_groups=['CSE 8B', 'CSE 11']
hai
Calling _create_prerequisites with course_code=MATH 18, prereq_groups=['MATH 4C', 'MATH 10A', 'MATH 20A']
hai
Calling _create_prerequi

Failed to write data to connection ResolvedIPv4Address(('34.31.169.230', 7687)) (ResolvedIPv4Address(('34.31.169.230', 7687)))
Failed to write data to connection IPv4Address(('29a8fd63.databases.neo4j.io', 7687)) (ResolvedIPv4Address(('34.31.169.230', 7687)))
Failed to write data to connection ResolvedIPv4Address(('34.31.169.230', 7687)) (ResolvedIPv4Address(('34.31.169.230', 7687)))
Failed to write data to connection IPv4Address(('29a8fd63.databases.neo4j.io', 7687)) (ResolvedIPv4Address(('34.31.169.230', 7687)))
Failed to write data to connection ResolvedIPv4Address(('34.31.169.230', 7687)) (ResolvedIPv4Address(('34.31.169.230', 7687)))
Failed to write data to connection IPv4Address(('29a8fd63.databases.neo4j.io', 7687)) (ResolvedIPv4Address(('34.31.169.230', 7687)))
Failed to write data to connection ResolvedIPv4Address(('34.31.169.230', 7687)) (ResolvedIPv4Address(('34.31.169.230', 7687)))
Failed to write data to connection IPv4Address(('29a8fd63.databases.neo4j.io', 7687)) (Resolv

Calling _create_prerequisites with course_code=CSE 152B, prereq_groups=['CSE 152A', 'CSE 152', 'CSE 166']
hai
Calling _create_prerequisites with course_code=CSE 156, prereq_groups=['CSE 12', 'DSC 40B', 'CSE 15L', 'CSE 29', 'DSC 80', 'BENG 134', 'COGS 118D', 'CSE 103', 'ECE 109', 'ECON 120A', 'MAE 108', 'MATH 180A', 'MATH 180B', 'MATH 181A', 'MATH 183', 'MATH 186']
hai
Calling _create_prerequisites with course_code=CSE 158, prereq_groups=['CSE 12', 'DSC 40B', 'CSE 15L', 'CSE 29', 'DSC 80', 'BENG 100', 'BENG 134', 'COGS 118D', 'CSE 103', 'ECE 109', 'ECON 120A', 'MAE 108', 'MATH 180A', 'MATH 180B', 'MATH 181A', 'MATH 183', 'MATH 186']
hai
Calling _create_prerequisites with course_code=CSE 158R, prereq_groups=['CSE 12', 'DSC 40B', 'CSE 15L', 'CSE 29', 'DSC 80', 'BENG 100', 'BENG 134', 'COGS 118D', 'CSE 103', 'ECE 109', 'ECON 120A', 'MAE 108', 'MATH 180A', 'MATH 180B', 'MATH 181A', 'MATH 183', 'MATH 186']
hai
Calling _create_prerequisites with course_code=CSE 160, prereq_groups=['CSE 100', 

In [68]:
# print("Available courses (completed: CS101, MATH101):")
# available = db.get_available_courses(["CS101", "MATH101"])
# for course in available:
#     print(f"- {course['code']}: {course['name']}")

In [69]:
db.close()