# Relationship Extraction

In [1]:
# import libraries
import numpy as np
import pandas as pd
import ast


## Student Info

In [18]:
# load data
student_info = pd.read_csv('../entity_extraction/student_info_final.csv') 
print(student_info.head())

      Student_Name Matric_Number       NRIC  Year  \
0      Tracy Lewis     A0216920B  XXXXX506Z     1   
1    Andrew Holden     A0225069H  XXXXX799Z     3   
2  Phillip Bullock     A0228204E  XXXXX194Z     1   
3   Valerie Rivera     A0200778Y  XXXXX150Z     3   
4      Robert Hall     A0222508M  XXXXX824Z     3   

                           Faculties                    Major  \
0                NUS Business School  Business Administration   
1          YST Conservatory of Music                    Music   
2  College of Design and Engineering   Electrical Engineering   
3                          Computing       Business Analytics   
4          YST Conservatory of Music                    Music   

              Second Major                                  Modules_Completed  \
0                      NaN  ['ACC1701B', 'DMB1202ACC', 'DMB1201MKT', 'MNO1...   
1                      NaN  ['CFA1111A', 'MUA1190', 'MUA2109', 'MUA1172', ...   
2                      NaN  ['ME1102', 'BN1111'

In [19]:
# create new column with empty list
student_info['Relationship_Data'] = [[] for i in range(len(student_info))]
print(student_info)

         Student_Name Matric_Number       NRIC  Year  \
0         Tracy Lewis     A0216920B  XXXXX506Z     1   
1       Andrew Holden     A0225069H  XXXXX799Z     3   
2     Phillip Bullock     A0228204E  XXXXX194Z     1   
3      Valerie Rivera     A0200778Y  XXXXX150Z     3   
4         Robert Hall     A0222508M  XXXXX824Z     3   
...               ...           ...        ...   ...   
3280  Renee Carpenter     A0275436A  XXXXX719Z     1   
3281    Garrett Mcgee     A0248365D  XXXXX212Z     1   
3282    Michael Colon     A0284143A  XXXXX269Z     3   
3283     Wesley Moran     A0276835Y  XXXXX301Z     3   
3284   Angel Peterson     A0218295X  XXXXX501Z     3   

                              Faculties                    Major  \
0                   NUS Business School  Business Administration   
1             YST Conservatory of Music                    Music   
2     College of Design and Engineering   Electrical Engineering   
3                             Computing       Business 

In [21]:
# for each row, create a dictionary for each relationship and add to list

for i in range(len(student_info)):
    # student --> majors_in --> major
    student_major = {'from_type': 'Student', 'from_id': student_info['Student_Name'][i], 'to_type': 'Major', 'to_id': student_info['Major'][i], 'type': 'majors_in'}
    # student --> is_in_faculty --> faculty
    student_faculty = {'from_type': 'Student', 'from_id': student_info['Student_Name'][i], 'to_type': 'Faculty', 'to_id': student_info['Faculties'][i], 'type': 'is_in_faculty'}
    # student --> takes_module --> module
    student_module = {'from_type': 'Student', 'from_id': student_info['Student_Name'][i], 'to_type': 'Module', 'to_id': student_info['Modules_Completed'][i], 'type': 'takes_module'}
    
    student_info['Relationship_Data'][i].append(student_major)
    student_info['Relationship_Data'][i].append(student_faculty)
    student_info['Relationship_Data'][i].append(student_module)

# explode the list of dictionaries into separate rows
student_info = student_info.explode('Relationship_Data')

print(student_info)

        Student_Name Matric_Number       NRIC  Year  \
0        Tracy Lewis     A0216920B  XXXXX506Z     1   
0        Tracy Lewis     A0216920B  XXXXX506Z     1   
0        Tracy Lewis     A0216920B  XXXXX506Z     1   
1      Andrew Holden     A0225069H  XXXXX799Z     3   
1      Andrew Holden     A0225069H  XXXXX799Z     3   
...              ...           ...        ...   ...   
3283    Wesley Moran     A0276835Y  XXXXX301Z     3   
3283    Wesley Moran     A0276835Y  XXXXX301Z     3   
3284  Angel Peterson     A0218295X  XXXXX501Z     3   
3284  Angel Peterson     A0218295X  XXXXX501Z     3   
3284  Angel Peterson     A0218295X  XXXXX501Z     3   

                      Faculties                    Major Second Major  \
0           NUS Business School  Business Administration          NaN   
0           NUS Business School  Business Administration          NaN   
0           NUS Business School  Business Administration          NaN   
1     YST Conservatory of Music                

In [22]:
# write back to csv
student_info.to_csv('../entity_extraction/student_info_final.csv', index=False)

## Module Info

In [23]:
# load data
module_info = pd.read_csv('../entity_extraction/module_info_final.csv')

In [28]:
# create new column with empty list
module_info['Relationship_Data'] = [[] for i in range(len(module_info))]

In [29]:
# for each row, create a dictionary for each relationship and add to list

for i in range(len(module_info)):
    # module --> offered_by --> department
    module_department = {'from_type': 'Module', 'from_id': module_info['moduleCode'][i], 'to_type': 'Department', 'to_id': module_info['department'][i], 'type': 'offered_by'}
    module_info['Relationship_Data'][i].append(module_department)
    # module --> has_prerequisite --> prerequisite
    if module_info['prerequisite'][i] != 'nan' or module_info['prerequisite'][i] != '':
        module_prerequisite = {'from_type': 'Module', 'from_id': module_info['moduleCode'][i], 'to_type': 'Module', 'to_id': module_info['prerequisite'][i], 'type': 'has_prerequisite'}
        module_info['Relationship_Data'][i].append(module_prerequisite)
    # module --> has_preclusion --> preclusion
    if module_info['preclusion'][i] != 'nan' or module_info['preclusion'][i] != '':
        module_preclusion = {'from_type': 'Module', 'from_id': module_info['moduleCode'][i], 'to_type': 'Module', 'to_id': module_info['preclusion'][i], 'type': 'has_preclusion'}
        module_info['Relationship_Data'][i].append(module_preclusion)
    # module --> teaches_skill --> skill
    module_skill = {'from_type': 'Module', 'from_id': module_info['moduleCode'][i], 'to_type': 'Skill', 'to_id': module_info['Skill_entities'][i], 'type': 'teaches_skill'}
    module_info['Relationship_Data'][i].append(module_skill)
    # module --> offered_in --> semester
    sems = [col for col in ['semester_01', 'semester_02', 'semester_03', 'semester_04'] if (module_info[col] == 1).all()]
    module_semester = {'from_type': 'Module', 'from_id': module_info['moduleCode'][i], 'to_type': 'Semester', 'to_id': sems, 'type': 'offered_in'}
    module_info['Relationship_Data'][i].append(module_semester)

# explode the list of dictionaries into separate rows
module_info = module_info.explode('Relationship_Data')


print(student_info)

        Student_Name Matric_Number       NRIC  Year            Faculties  \
0        Tracy Lewis     A0216920B  XXXXX506Z     1  NUS Business School   
0        Tracy Lewis     A0216920B  XXXXX506Z     1  NUS Business School   
0        Tracy Lewis     A0216920B  XXXXX506Z     1  NUS Business School   
0        Tracy Lewis     A0216920B  XXXXX506Z     1  NUS Business School   
0        Tracy Lewis     A0216920B  XXXXX506Z     1  NUS Business School   
...              ...           ...        ...   ...                  ...   
3284  Angel Peterson     A0218295X  XXXXX501Z     3  NUS Business School   
3284  Angel Peterson     A0218295X  XXXXX501Z     3  NUS Business School   
3284  Angel Peterson     A0218295X  XXXXX501Z     3  NUS Business School   
3284  Angel Peterson     A0218295X  XXXXX501Z     3  NUS Business School   
3284  Angel Peterson     A0218295X  XXXXX501Z     3  NUS Business School   

                        Major Second Major  \
0     Business Administration          Na

In [26]:
# write back to csv
student_info.to_csv('../entity_extraction/module_info_final.csv', index=False)

## Department Info

In [14]:
# load data
department_info = pd.read_csv('../entity_extraction/department_info_final.csv')

In [15]:
# create new df
department_faculty = pd.DataFrame()

# department --> is_under_faculty --> faculty
department_faculty['Subject'] = department_info['department']
department_faculty['Predicate'] = 'is_under_faculty'
department_faculty['Object'] = department_info['faculty']

print(department_faculty)

                            Subject         Predicate  \
0        NUS Medicine Dean's Office  is_under_faculty   
1                      Architecture  is_under_faculty   
2                        Accounting  is_under_faculty   
3      Communications and New Media  is_under_faculty   
4                           History  is_under_faculty   
..                              ...               ...   
106  Ridge View Residential College  is_under_faculty   
107   University Scholars Programme  is_under_faculty   
108  College of Alice and Peter Tan  is_under_faculty   
109            Residential Colleges  is_under_faculty   
110                Yale-NUS College  is_under_faculty   

                                Object  
0         Yong Loo Lin Sch of Medicine  
1    College of Design and Engineering  
2                  NUS Business School  
3              Arts and Social Science  
4              Arts and Social Science  
..                                 ...  
106                Residenti

## Staff Info

In [16]:
# load data
staff_info = pd.read_csv('../entity_extraction/staff_info_final.csv')

In [17]:
# create new df
staff_module = pd.DataFrame()

# staff --> teaches_module --> module
staff_module['Subject'] = staff_info['Employee Name']
staff_module['Predicate'] = 'teaches_module'
staff_module['Object'] = staff_info['Modules Taught']

print(staff_module)

                    Subject       Predicate      Object
0    Marin Sergio Hernandez  teaches_module     CEG5003
1           Kathryn Cordova  teaches_module     ESE2102
2           Barbara Sanchez  teaches_module   LAT4201HM
3               Bryce Lucas  teaches_module  DMB1203MNO
4            Judith Camacho  teaches_module    EC4401HM
..                      ...             ...         ...
995              Lisa Reyes  teaches_module    MUA3256B
996           Melanie Kelly  teaches_module    LLJ5342V
997          Ronald Spencer  teaches_module      PC2032
998          Charles Rogers  teaches_module      PL4251
999          Vanessa Taylor  teaches_module     MUA4107

[1000 rows x 3 columns]


In [18]:
# create new df
staff_department = pd.DataFrame()

# staff --> is_in_department --> department
staff_department['Subject'] = staff_info['Employee Name']
staff_department['Predicate'] = 'is_in_department'
staff_department['Object'] = staff_info['Department_ID']

print(staff_department)

                    Subject         Predicate  \
0    Marin Sergio Hernandez  is_in_department   
1           Kathryn Cordova  is_in_department   
2           Barbara Sanchez  is_in_department   
3               Bryce Lucas  is_in_department   
4            Judith Camacho  is_in_department   
..                      ...               ...   
995              Lisa Reyes  is_in_department   
996           Melanie Kelly  is_in_department   
997          Ronald Spencer  is_in_department   
998          Charles Rogers  is_in_department   
999          Vanessa Taylor  is_in_department   

                                  Object  
0    Electrical and Computer Engineering  
1    Civil and Environmental Engineering  
2            Centre for Language Studies  
3                      BIZ Dean's Office  
4                              Economics  
..                                   ...  
995                  YSTCM Dean's Office  
996                    FoL Dean's Office  
997                     

## Save Dataframes

In [None]:
# write all back to original csv files
student_major.to_csv('../entity_extraction/student_major.csv', index=False)
student_faculty.to_csv('../entity_extraction/student_faculty.csv', index=False)
student_module.to_csv('../entity_extraction/student_module.csv', index=False)

In [181]:
# write all back to  csv files


student_major.to_csv('student_major.csv', index=False)
student_faculty.to_csv('student_faculty.csv', index=False)
student_module.to_csv('student_module.csv', index=False)
module_department.to_csv('module_department.csv', index=False)
module_prerequisite.to_csv('module_prerequisite.csv', index=False)
module_preclusion.to_csv('module_preclusion.csv', index=False)
module_skill.to_csv('module_skill.csv', index=False)
module_semester.to_csv('module_semester.csv', index=False)
department_faculty.to_csv('department_faculty.csv', index=False)
staff_module.to_csv('staff_module.csv', index=False)
staff_department.to_csv('staff_department.csv', index=False)
