# Relationship Extraction

In [1]:
# import libraries
import numpy as np
import pandas as pd
import ast


## Student Info

In [8]:
# load data
student_info = pd.read_csv('../entity_relationship_extraction/entity_extraction/student_info_final.csv') 
print(student_info.head())

    Student_Name Matric_Number       NRIC  Year                  Faculties  \
0    Tracy Lewis     A0216920B  XXXXX506Z     1        NUS Business School   
1    Tracy Lewis     A0216920B  XXXXX506Z     1        NUS Business School   
2    Tracy Lewis     A0216920B  XXXXX506Z     1        NUS Business School   
3  Andrew Holden     A0225069H  XXXXX799Z     3  YST Conservatory of Music   
4  Andrew Holden     A0225069H  XXXXX799Z     3  YST Conservatory of Music   

                     Major Second Major  \
0  Business Administration          NaN   
1  Business Administration          NaN   
2  Business Administration          NaN   
3                    Music          NaN   
4                    Music          NaN   

                                   Modules_Completed  \
0  ['ACC1701B', 'DMB1202ACC', 'DMB1201MKT', 'MNO1...   
1  ['ACC1701B', 'DMB1202ACC', 'DMB1201MKT', 'MNO1...   
2  ['ACC1701B', 'DMB1202ACC', 'DMB1201MKT', 'MNO1...   
3  ['CFA1111A', 'MUA1190', 'MUA2109', 'MUA1172',

In [19]:
# create new column with empty list
student_info['Relationship_Data'] = [[] for i in range(len(student_info))]
print(student_info)

         Student_Name Matric_Number       NRIC  Year  \
0         Tracy Lewis     A0216920B  XXXXX506Z     1   
1       Andrew Holden     A0225069H  XXXXX799Z     3   
2     Phillip Bullock     A0228204E  XXXXX194Z     1   
3      Valerie Rivera     A0200778Y  XXXXX150Z     3   
4         Robert Hall     A0222508M  XXXXX824Z     3   
...               ...           ...        ...   ...   
3280  Renee Carpenter     A0275436A  XXXXX719Z     1   
3281    Garrett Mcgee     A0248365D  XXXXX212Z     1   
3282    Michael Colon     A0284143A  XXXXX269Z     3   
3283     Wesley Moran     A0276835Y  XXXXX301Z     3   
3284   Angel Peterson     A0218295X  XXXXX501Z     3   

                              Faculties                    Major  \
0                   NUS Business School  Business Administration   
1             YST Conservatory of Music                    Music   
2     College of Design and Engineering   Electrical Engineering   
3                             Computing       Business 

In [21]:
# for each row, create a dictionary for each relationship and add to list

for i in range(len(student_info)):
    # student --> majors_in --> major
    student_major = {'from_type': 'Student', 'from_id': student_info['Student_Name'][i], 'to_type': 'Major', 'to_id': student_info['Major'][i], 'type': 'majors_in'}
    # student --> is_in_faculty --> faculty
    student_faculty = {'from_type': 'Student', 'from_id': student_info['Student_Name'][i], 'to_type': 'Faculty', 'to_id': student_info['Faculties'][i], 'type': 'is_in_faculty'}
    # student --> takes_module --> module
    student_module = {'from_type': 'Student', 'from_id': student_info['Student_Name'][i], 'to_type': 'Module', 'to_id': student_info['Modules_Completed'][i], 'type': 'takes_module'}
    
    student_info['Relationship_Data'][i].append(student_major)
    student_info['Relationship_Data'][i].append(student_faculty)
    student_info['Relationship_Data'][i].append(student_module)

# explode the list of dictionaries into separate rows
student_info = student_info.explode('Relationship_Data')

print(student_info)

        Student_Name Matric_Number       NRIC  Year  \
0        Tracy Lewis     A0216920B  XXXXX506Z     1   
0        Tracy Lewis     A0216920B  XXXXX506Z     1   
0        Tracy Lewis     A0216920B  XXXXX506Z     1   
1      Andrew Holden     A0225069H  XXXXX799Z     3   
1      Andrew Holden     A0225069H  XXXXX799Z     3   
...              ...           ...        ...   ...   
3283    Wesley Moran     A0276835Y  XXXXX301Z     3   
3283    Wesley Moran     A0276835Y  XXXXX301Z     3   
3284  Angel Peterson     A0218295X  XXXXX501Z     3   
3284  Angel Peterson     A0218295X  XXXXX501Z     3   
3284  Angel Peterson     A0218295X  XXXXX501Z     3   

                      Faculties                    Major Second Major  \
0           NUS Business School  Business Administration          NaN   
0           NUS Business School  Business Administration          NaN   
0           NUS Business School  Business Administration          NaN   
1     YST Conservatory of Music                

## Module Info

In [30]:
# load data
module_info = pd.read_csv('../entity_relationship_extraction/entity_extraction/module_info_final.csv')

In [31]:
# create new column with empty list
module_info['Relationship_Data'] = [[] for i in range(len(module_info))]

In [32]:
# for each row, create a dictionary for each relationship and add to list

for i in range(len(module_info)):
    # module --> offered_by --> department
    module_department = {'from_type': 'Module', 'from_id': module_info['moduleCode'][i], 'to_type': 'Department', 'to_id': module_info['department'][i], 'type': 'offered_by'}
    module_info['Relationship_Data'][i].append(module_department)
    # module --> has_prerequisite --> prerequisite
    if module_info['prerequisite'][i] != 'nan' or module_info['prerequisite'][i] != '':
        module_prerequisite = {'from_type': 'Module', 'from_id': module_info['moduleCode'][i], 'to_type': 'Module', 'to_id': module_info['prerequisite'][i], 'type': 'has_prerequisite'}
        module_info['Relationship_Data'][i].append(module_prerequisite)
    # module --> has_preclusion --> preclusion
    if module_info['preclusion'][i] != 'nan' or module_info['preclusion'][i] != '':
        module_preclusion = {'from_type': 'Module', 'from_id': module_info['moduleCode'][i], 'to_type': 'Module', 'to_id': module_info['preclusion'][i], 'type': 'has_preclusion'}
        module_info['Relationship_Data'][i].append(module_preclusion)
    # module --> teaches_skill --> skill
    module_skill = {'from_type': 'Module', 'from_id': module_info['moduleCode'][i], 'to_type': 'Skill', 'to_id': module_info['Skill_entities'][i], 'type': 'teaches_skill'}
    module_info['Relationship_Data'][i].append(module_skill)
    # module --> offered_in --> semester
    sems = [col for col in ['semester_01', 'semester_02', 'semester_03', 'semester_04'] if (module_info[col] == 1).all()]
    module_semester = {'from_type': 'Module', 'from_id': module_info['moduleCode'][i], 'to_type': 'Semester', 'to_id': sems, 'type': 'offered_in'}
    module_info['Relationship_Data'][i].append(module_semester)

# explode the list of dictionaries into separate rows
module_info = module_info.explode('Relationship_Data')


print(module_info)

     moduleCode                                     title  \
0       ABM5001                 Leadership in Biomedicine   
0       ABM5001                 Leadership in Biomedicine   
0       ABM5001                 Leadership in Biomedicine   
0       ABM5001                 Leadership in Biomedicine   
0       ABM5001                 Leadership in Biomedicine   
...         ...                                       ...   
6848     ZB4299  Applied Project in Computational Biology   
6848     ZB4299  Applied Project in Computational Biology   
6848     ZB4299  Applied Project in Computational Biology   
6848     ZB4299  Applied Project in Computational Biology   
6848     ZB4299  Applied Project in Computational Biology   

                                            description  moduleCredit  \
0     Leadership is fundamental to the success of in...           2.0   
0     Leadership is fundamental to the success of in...           2.0   
0     Leadership is fundamental to the success o

In [33]:
# write back to csv
module_info.to_csv('../entity_relationship_extraction/entity_extraction/module_info_final.csv', index=False)

## Department Info

In [15]:
# load data
department_info = pd.read_csv('../entity_relationship_extraction/entity_extraction/department_info_final.csv')

In [16]:
# create new column with empty list
department_info['Relationship_Data'] = [[] for i in range(len(department_info))]
print(department_info)

    department_id                      department  \
0       NUSDP0001      NUS Medicine Dean's Office   
1       NUSDP0002                    Architecture   
2       NUSDP0003                      Accounting   
3       NUSDP0004    Communications and New Media   
4       NUSDP0005                         History   
..            ...                             ...   
106     NUSDP0107  Ridge View Residential College   
107     NUSDP0108   University Scholars Programme   
108     NUSDP0109  College of Alice and Peter Tan   
109     NUSDP0110            Residential Colleges   
110     NUSDP0111                Yale-NUS College   

                               faculty  \
0         Yong Loo Lin Sch of Medicine   
1    College of Design and Engineering   
2                  NUS Business School   
3              Arts and Social Science   
4              Arts and Social Science   
..                                 ...   
106                Residential College   
107                        

In [18]:
# for each row, create a dictionary for each relationship and add to list

for i in range(len(department_info)):
    # department --> is_under_faculty --> faculty
    department_faculty = {'from_type': 'Department', 'from_id': department_info['department'][i], 'to_type': 'Faculty', 'to_id': department_info['faculty'][i], 'type': 'is_under_faculty'}
    department_info['Relationship_Data'][i].append(department_faculty)

print(department_info)

    department_id                      department  \
0       NUSDP0001      NUS Medicine Dean's Office   
1       NUSDP0002                    Architecture   
2       NUSDP0003                      Accounting   
3       NUSDP0004    Communications and New Media   
4       NUSDP0005                         History   
..            ...                             ...   
106     NUSDP0107  Ridge View Residential College   
107     NUSDP0108   University Scholars Programme   
108     NUSDP0109  College of Alice and Peter Tan   
109     NUSDP0110            Residential Colleges   
110     NUSDP0111                Yale-NUS College   

                               faculty  \
0         Yong Loo Lin Sch of Medicine   
1    College of Design and Engineering   
2                  NUS Business School   
3              Arts and Social Science   
4              Arts and Social Science   
..                                 ...   
106                Residential College   
107                        

In [19]:
# write back to csv
department_info.to_csv('../entity_relationship_extraction/entity_extraction/department_info_final.csv', index=False)

## Staff Info

In [21]:
# load data
staff_info = pd.read_csv('../entity_relationship_extraction/entity_extraction/staff_info_final.csv')

In [22]:
# create new column with empty list
staff_info['Relationship_Data'] = [[] for i in range(len(staff_info))]
print(staff_info)

    Employee ID           Employee Name       NRIC                   DOB  \
0    NUSPF12345  Marin Sergio Hernandez  XXXXX479A  1983-02-23T00:00:00Z   
1    NUSPF12346         Kathryn Cordova  XXXXX815A  1985-09-02T00:00:00Z   
2    NUSPF12347         Barbara Sanchez  XXXXX777A  1971-07-30T00:00:00Z   
3    NUSPF12348             Bryce Lucas  XXXXX610A  1973-07-20T00:00:00Z   
4    NUSPF12349          Judith Camacho  XXXXX629A  1991-11-16T00:00:00Z   
..          ...                     ...        ...                   ...   
995  NUSPF13340              Lisa Reyes  XXXXX828A  1993-09-18T00:00:00Z   
996  NUSPF13341           Melanie Kelly  XXXXX978A  1985-02-20T00:00:00Z   
997  NUSPF13342          Ronald Spencer  XXXXX396A  1985-06-13T00:00:00Z   
998  NUSPF13343          Charles Rogers  XXXXX749A  1984-02-06T00:00:00Z   
999  NUSPF13344          Vanessa Taylor  XXXXX815A  1970-09-17T00:00:00Z   

                      DOJ                           Department Modules Taught  \
0    2

In [25]:
# for each row, create a dictionary for each relationship and add to list

for i in range(len(staff_info)):

    # staff --> teaches_module --> module
    staff_module = {'from_type': 'Staff', 'from_id': staff_info['Employee Name'][i], 'to_type': 'Module', 'to_id': staff_info['Modules Taught'][i], 'type': 'teaches_module'}
    # staff --> is_in_department --> department
    staff_department = {'from_type': 'Staff', 'from_id': staff_info['Employee Name'][i], 'to_type': 'Department', 'to_id': staff_info['Department'][i], 'type': 'is_in_department'}
    
    staff_info['Relationship_Data'][i].append(staff_module)
    staff_info['Relationship_Data'][i].append(staff_department)

print(staff_info)

    department_id                      department  \
0       NUSDP0001      NUS Medicine Dean's Office   
1       NUSDP0002                    Architecture   
2       NUSDP0003                      Accounting   
3       NUSDP0004    Communications and New Media   
4       NUSDP0005                         History   
..            ...                             ...   
106     NUSDP0107  Ridge View Residential College   
107     NUSDP0108   University Scholars Programme   
108     NUSDP0109  College of Alice and Peter Tan   
109     NUSDP0110            Residential Colleges   
110     NUSDP0111                Yale-NUS College   

                               faculty  \
0         Yong Loo Lin Sch of Medicine   
1    College of Design and Engineering   
2                  NUS Business School   
3              Arts and Social Science   
4              Arts and Social Science   
..                                 ...   
106                Residential College   
107                        

## Save Dataframes

In [None]:
# write all back to original csv files
student_info.to_csv('../entity_relationship_extraction/entity_extraction/student_info_final.csv', index=False)
module_info.to_csv('../entity_relationship_extraction/entity_extraction/module_info_final.csv', index=False)
department_info.to_csv('../entity_relationship_extraction/entity_extraction/department_info_final.csv', index=False)
staff_info.to_csv('../entity_relationship_extraction/entity_extraction/staff_info_final.csv', index=False)