In [None]:
import pandas as pd 

## Relationship Extraction Function to be used within the entity and relationship extraction function below 

In [None]:
def create_dynamic_relationship(df, from_type, from_id_col, to_type, to_id_col, relationship_type, output_col):
        # List to store formatted relationship dictionaries for each row
        relationship_column = []

        # Iterate through each row of the DataFrame
        for _, row in df.iterrows():
            # Extract the `from_id` and `to_id` values from the specified columns
            from_ids = row[from_id_col] if isinstance(row[from_id_col], list) else [row[from_id_col]]
            to_ids = row[to_id_col] if isinstance(row[to_id_col], list) else [row[to_id_col]]

            # Create a list of dictionaries for each `to_id`
            relationship_dict = [
                {
                    "from_type": from_type,
                    "from_id": from_id,
                    "to_type": to_type,
                    "to_id": to_id,
                    "type": relationship_type
                }
                for from_id in from_ids if pd.notna(from_id)
                for to_id in to_ids if pd.notna(to_id)  # Only include non-NaN `to_id` values
            ]

            # Append the relationship dictionary or an empty list if no valid `to_id` found
            relationship_column.append(relationship_dict if relationship_dict else [])

        # Add the relationships as a new column to the DataFrame
        df[output_col] = relationship_column
        
        # Return the updated DataFrame with the new relationships column
        return df

# THE MAIN ENTITY AND RS FUNCTION

In [None]:
def entity_rs_extraction(file_name):

    if file_name = 'student_info_final.csv':

        # load data
        student_info = pd.read_csv('../entity_relationship_extraction/entity_extraction/student_info_final.csv') 

        # create new column with empty list
        student_info['Relationship_Data'] = [[] for i in range(len(student_info))]

        # create relationship dictionaries for each row
        for i in range(len(student_info)):
            # student --> majors_in --> major
            student_major = {'from_type': 'Student', 'from_id': student_info['Student_Name'][i], 'to_type': 'Major', 'to_id': student_info['Major'][i], 'type': 'majors_in'}
            # student --> is_in_faculty --> faculty
            student_faculty = {'from_type': 'Student', 'from_id': student_info['Student_Name'][i], 'to_type': 'Faculty', 'to_id': student_info['Faculties'][i], 'type': 'is_in_faculty'}
            # student --> takes_module --> module
            student_module = {'from_type': 'Student', 'from_id': student_info['Student_Name'][i], 'to_type': 'Module', 'to_id': student_info['Modules_Completed'][i], 'type': 'takes_module'}
            
            student_info['Relationship_Data'][i].append(student_major)
            student_info['Relationship_Data'][i].append(student_faculty)
            student_info['Relationship_Data'][i].append(student_module)

        # explode the list of dictionaries into separate rows
        student_info = student_info.explode('Relationship_Data')

        # write back to csv
        student_info.to_csv('../entity_relationship_extraction/entity_extraction/student_info_final.csv', index=False)

    if file_name = "module_info_final.csv":

        # load data
        module_info = pd.read_csv('../entity_relationship_extraction/entity_extraction/module_info_final.csv')  

        # create new column with empty list
        module_info['Relationship_Data'] = [[] for i in range(len(module_info))]

        # create relationship dictionaries for each row
        for i in range(len(module_info)):
            # module --> offered_by --> department
            module_department = {'from_type': 'Module', 'from_id': module_info['moduleCode'][i], 'to_type': 'Department', 'to_id': module_info['department'][i], 'type': 'offered_by'}
            module_info['Relationship_Data'][i].append(module_department)
            # module --> has_prerequisite --> prerequisite
            if module_info['prerequisite'][i] != 'nan' or module_info['prerequisite'][i] != '':
                module_prerequisite = {'from_type': 'Module', 'from_id': module_info['moduleCode'][i], 'to_type': 'Module', 'to_id': module_info['prerequisite'][i], 'type': 'has_prerequisite'}
                module_info['Relationship_Data'][i].append(module_prerequisite)
            # module --> has_preclusion --> preclusion
            if module_info['preclusion'][i] != 'nan' or module_info['preclusion'][i] != '':
                module_preclusion = {'from_type': 'Module', 'from_id': module_info['moduleCode'][i], 'to_type': 'Module', 'to_id': module_info['preclusion'][i], 'type': 'has_preclusion'}
                module_info['Relationship_Data'][i].append(module_preclusion)
            # module --> teaches_skill --> skill
            module_skill = {'from_type': 'Module', 'from_id': module_info['moduleCode'][i], 'to_type': 'Skill', 'to_id': module_info['Skill_entities'][i], 'type': 'teaches_skill'}
            module_info['Relationship_Data'][i].append(module_skill)
            # module --> offered_in --> semester
            sems = [col for col in ['semester_01', 'semester_02', 'semester_03', 'semester_04'] if (module_info[col] == 1).all()]
            module_semester = {'from_type': 'Module', 'from_id': module_info['moduleCode'][i], 'to_type': 'Semester', 'to_id': sems, 'type': 'offered_in'}
            module_info['Relationship_Data'][i].append(module_semester)

        # write back to csv
        module_info.to_csv('../entity_relationship_extraction/entity_extraction/module_info_final.csv', index=False)

# explode the list of dictionaries into separate rows
module_info = module_info.explode('Relationship_Data')

    if file_name = "department_info_final.csv":
        
        # load data
        department_info = pd.read_csv('../entity_relationship_extraction/entity_extraction/department_info_final.csv') 
        
        # create new column with empty list
        department_info['Relationship_Data'] = [[] for i in range(len(department_info))]
        
        # create relationship dictionaries for each row    
        for i in range(len(department_info)):
            # department --> is_under_faculty --> faculty
            department_faculty = {'from_type': 'Department', 'from_id': department_info['department'][i], 'to_type': 'Faculty', 'to_id': department_info['faculty'][i], 'type': 'is_under_faculty'}
                
            department_info['Relationship_Data'][i].append(department_faculty)
        
        # write back to csv
        department_info.to_csv('../entity_relationship_extraction/entity_extraction/department_info_final.csv', index=False)


    if file_name = "staff_info_final.csv":
    
        # load data
        staff_info = pd.read_csv('../entity_relationship_extraction/entity_extraction/staff_info_final.csv') 
    
        # create new column with empty list
        staff_info['Relationship_Data'] = [[] for i in range(len(staff_info))]
    
        # create relationship dictionaries for each row
        for i in range(len(staff_info)):

            # staff --> teaches_module --> module
            staff_module = {'from_type': 'Staff', 'from_id': staff_info['Employee Name'][i], 'to_type': 'Module', 'to_id': staff_info['Modules Taught'][i], 'type': 'teaches_module'}
            # staff --> is_in_department --> department
            staff_department = {'from_type': 'Staff', 'from_id': staff_info['Employee Name'][i], 'to_type': 'Department', 'to_id': staff_info['Department'][i], 'type': 'is_in_department'}
            
            staff_info['Relationship_Data'][i].append(staff_module)
            staff_info['Relationship_Data'][i].append(staff_department)
        
        # write back to csv
        staff_info.to_csv('../entity_relationship_extraction/entity_extraction/staff_info_final.csv', index=False)
    
    
    if file_name == "mock_module_reviews.csv": 
        # 04 - mock_module_reviews.csv
        # Entity Extraction 

        # Relationship Extraction for Module --> teaches_skill --> Skill relationship 
        from_type = "Module"
        from_id_col = "module_entities"  
        to_type = "Skill"
        to_id_col = "skill_entities"   
        relationship_type = "teaches_skill"
        output_col = "relationship1"

        module_review_rs = create_dynamic_relationship(module_review, from_type, from_id_col, to_type, to_id_col, relationship_type, output_col)

        # Example usage for Staff --> teaches_module --> Module relationship 
        from_type = "Staff"
        from_id_col = "staff_entities"  
        to_type = "Module"
        to_id_col = "module_entities"   
        relationship_type = "teaches_module"
        output_col = "relationship2"

        module_review_rs = create_dynamic_relationship(module_review_rs, from_type, from_id_col, to_type, to_id_col, relationship_type, output_col)

        # Display the DataFrame with relationships
        print(module_review_rs[['module_entities', 'skill_entities', 'staff_entities', 'relationship1', 'relationship2']].head())
            
