## Entity and Relationship Extractions function

- argument to be just the original csv file path
- if any column names is in the list of column names then we carry out the functions below
- one function for entity extraction (exlcuding module info and review csv)
- one function for relationship extraction


In [2]:
import pandas as pd
import ast 
import re
import spacy

In [3]:
# Load your spaCy model
nlp = spacy.load('en_core_web_sm')

In [29]:
def extract_entities_rs(csv_file_path): 

    # Predefined entity columns and their corresponding new column names for entity extraction 
    target_cols = ['Student_Name', 'Faculties', 'Degree', 'Major', 'Module', 'module_code', 'moduleCode', 'Skills', 'Staff', 
                   'Modules_Completed', 'department', 'faculty', 'prerequisite', 'preclusion', 'Employee Name', 
                   'Department', 'Modules Taught', 'Title', 'Job Title', 'Tech Skills', 'university'
                   'school', 'degree', 'description', 'message'] # Add any other columns you want to check for
    # 'description', 'message']  

    new_entity_cols = {
        'Student_Name': ('student_entities', 'STUDENT'),
        'Degree': ('degree_entities', 'DEGREE'),
        'degree': ('degree_entities', 'DEGREE'),
        'Major': ('major_entities', 'MAJOR'),
        'Module': ('module_entities', 'MODULE'),
        'Modules_Completed': ('module_entities', 'MODULE'),
        'module_code': ('module_entities', 'MODULE'),
        'moduleCode': ('module_entities', 'MODULE'),
        'Modules Taught': ('module_entities', 'MODULE'),
        'prerequisite': ('prerequisite_entities', 'PREREQUISITEGROUP'),
        'preclusion': ('preclusion_entities', 'PRECLUSIONGROUP'),
        'Skills': ('skill_entities', 'SKILL'),
        'Tech Skills': ('skill_entities', 'SKILL'),
        'Staff': ('staff_entities', 'STAFF'),
        'Employee Name': ('staff_entities', 'STAFF'),
        'department': ('department_entities', 'DEPARTMENT'), 
        'Department': ('department_entities', 'DEPARTMENT'), 
        'Faculties': ('faculty_entities', 'FACULTY'),
        'faculty': ('faculty_entities', 'FACULTY'),
        'school': ('faculty_entities', 'FACULTY'),
        'Title': ('job_entities', 'JOB'),
        'Job Title': ('job_entities', 'JOB'),
        'university': ('university_entities', 'UNIVERSITY'),
        # 'description': ('description_entities', 'DESCRIPTION'),
        # 'message': ('message_entities', 'MESSAGE')
    }

    # Relationship mappings
    relationship_mappings = {
        ('student_entities', 'faculty_entities'): {"from_type": "STUDENT", "to_type": "FACULTY", "relationship_type": "STUDYING_UNDER"},
        ('student_entities', 'major_entities'): {"from_type": "STUDENT", "to_type": "MAJOR", "relationship_type": "MAJOR_IN"},
        ('student_entities', 'module_entities'): {"from_type": "STUDENT", "to_type": "MODULE", "relationship_type": "COMPLETED"},
        ('module_entities', 'department_entities'): {"from_type": "MODULE", "to_type": "DEPARTMENT", "relationship_type": "BELONGS_TO"},
        ('module_entities', 'prerequisite_entities', 'MUST_HAVE_TAKEN_ONE_OF'): {"from_type": "MODULE", "to_type": "DEPARTMENT", "relationship_type": "MUST_HAVE_TAKEN_ONE_OF"},
        ('module_entities', 'preclusion_entities', 'MUST_NOT_HAVE_TAKEN_ONE_OF'): {"from_type": "MODULE", "to_type": "DEPARTMENT", "relationship_type": "MUST_NOT_HAVE_TAKEN_ONE_OF"},

        ('module_entities', 'prerequisite_entities', 'INCLUDED_AS_PREREQUISITE'): {"from_type": "MODULE", "to_type": "DEPARTMENT", "relationship_type": "INCLUDED_AS_PREREQUISITE"},
        ('module_entities', 'preclusion_entities', 'INCLUDED_AS_PRECLUSION'): {"from_type": "MODULE", "to_type": "DEPARTMENT", "relationship_type": "INCLUDED_AS_PRECLUSION"},
        ('module_entities', 'semester_entities', 'OFFERED_IN'): {"from_type": "MODULE", "to_type": "SEMESTER", "relationship_type": "OFFERED_IN"},
        #SEMESTER (can be excluded as this is exception for module info?)
        ('module_entities', 'staff_entities'): {"from_type": "MODULE", "to_type": "STAFF", "relationship_type": "TAUGHT_BY"},
        ('staff_entities', 'department_entities'): {"from_type": "STAFF", "to_type": "DEPARTMENT", "relationship_type": "EMPLOYED_UNDER"},
        ('department_entities', 'faculty_entities'): {"from_type": "DEPARTMENT", "to_type": "FACULTY", "relationship_type": "PART_OF"},
        ('job_entities', 'faculty_entities'): {"from_type": "DEPARTMENT", "to_type": "FACULTY", "relationship_type": "PART_OF"},
        ('major_entities', 'degree_entities'): {"from_type": "MAJOR", "to_type": "DEGREE", "relationship_type": "IS_UNDER"},
        ('job_entities', 'skill_entities'): {"from_type": "JOB", "to_type": "SKILL", "relationship_type": "REQUIRES"},
        ('module_entities', 'skill_entities'): {"from_type": "MODULE", "to_type": "SKILL", "relationship_type": "SKILL_TAUGHT"},
        ## ADDED
        ('university_entities', 'degree_entities'): {"from_type": "UNIVERSTITY", "to_type": "DEGREE", "relationship_type": "OFFERS"},

    }

    # Extract entities function 
    def extract_entities(csv_file_path):
        def parse_entity(x, entity_type):
            # Handle dictionary strings e.g. 
            if isinstance(x, str) and x.startswith('{') and x.endswith('}'):
                return ast.literal_eval(x)  # Convert to dictionary
            # Handle already existing list 
            elif isinstance(x, list):
                # return [str(item).strip() for item in flatten_list(x)]  
                return [(str(item).strip(), entity_type) for item in flatten_list(x)]
            # Handle list strings
            elif isinstance(x, str) and x.startswith('[') and x.endswith(']'):
                try:
                    parsed_list = ast.literal_eval(x)  # Convert string representation of list to actual list
                    # return [str(item).strip() for item in flatten_list(parsed_list)]
                    return [(str(item).strip(), entity_type) for item in flatten_list(parsed_list)]
                except (ValueError, SyntaxError):
                    # return [x.strip()] 
                    return [(x.strip(), entity_type)]
                
            # Handle comma-separated strings
            # NEED TO REVISE THIS AS SOME VALUES HAVE COMMA IN ITSELF E.G. COLLEGE OF HUMANITIES, ARTS & SOCIAL SCIENCES 
            elif pd.notna(x):
                # return [str(item).strip() for item in str(x).split(',')]
                # return [x.strip()]
                return [(x.strip(), entity_type)]
            
            # Return an empty list for NaN or other invalid entries
            return []
        
        # Helper function to flatten nested lists
        def flatten_list(nested_list):
            flat_list = []
            for i in nested_list:
                if isinstance(i, list):
                    flat_list.extend(flatten_list(i))
                else:
                    flat_list.append(i)
            return flat_list

        df = pd.read_csv(csv_file_path)

        # Extract unique skills from the 'Skills' column in a separate CSV
        skills_csv_file_path = '../../backend/data/07 - Jobs and relevant skillset (linkedin).csv'
        df_skills = pd.read_csv(skills_csv_file_path)
        unique_skills = []
        
        if 'Skills' in df_skills.columns:
            # Modified skills parsing
            skills_list = []
            for skill_text in df_skills["Skills"].dropna():
                # Handle the case where skills are comma-separated
                if isinstance(skill_text, str):
                    # Remove any square brackets if present
                    skill_text = skill_text.strip('[]')
                    # Split by comma and clean up each skill
                    skills = [skill.strip().strip('"\'') for skill in skill_text.split(',')]
                    skills_list.extend(skills)
            
            # Clean up skills and remove duplicates
            unique_skills = list(set([re.sub(r'\s\(.*\)', '', skill) for skill in skills_list if skill]))

            # remove bracketed abbreviations from skills and the space before it
            unique_skills = [re.sub(r'\s\(.*\)', '', skill) for skill in unique_skills]

            # remove 'Microsoft ' substring before skills
            unique_skills = [re.sub(r'Microsoft\s', '', skill) for skill in unique_skills]

        # Function to extract skills using PhraseMatcher
        def extract_skills(text):
            if not isinstance(text, str):
                return []  # Return an empty list if the input is not a valid string
            
            doc = nlp(text)
            skills = [] 

            # extract skill entities
            for skill in unique_skills:
                # create a regex pattern with word boundaries around the job title
                pattern = r"\b" + re.escape(skill) + r"\b"
    
                # search for the job title in the text (case-insensitive)
                if re.search(pattern, text, re.IGNORECASE):
                    skills.append(skill)

            return skills

        # Function to extract staff names
        def extract_staff_names(text):
            if isinstance(text, str):
                doc = nlp(text)
                staff = []

                # Regex pattern to capture staff names with titles like 'Prof', 'Dr', 'Lecturer', 'Tutor'
                staff_pattern = re.compile(r'\b(Prof|Professor|Dr|Lecturer|Tutor|Instructor)\s*[A-Z][a-z]+(?:\s+[A-Z][a-z]+)?', re.IGNORECASE)
                
                for ent in doc.ents:
                    match = staff_pattern.search(ent.text)
                    if match:
                        staff_name = match.group(0)

                        # Exclude unwanted phrases that are falsely detected as staff names
                        if not any(word in staff_name.lower() for word in ['tutorial', 'attendance', 'assignment', 'participation', 'ratios', 'draft', 'profile']):
                            staff.append(staff_name.strip())

                # Remove duplicates in staff
                return list(set(staff))  
            
            return [] 

        # Extract semester entities
        semester_cols = ['semester_01', 'semester_02', 'semester_03', 'semester_04']
        if all(col in df.columns for col in semester_cols):
            df['semester_entities'] = df.apply(lambda row: [(col, 'SEMESTER') for col in semester_cols if row[col] == 1], axis=1)

        # Extract entities using appropriate functions 
        for col in target_cols:
            if col in df.columns:
                # new_entity_col = new_entity_cols[col]     
                new_entity_col, entity_type = new_entity_cols.get(col, (col, 'UNKNOWN'))  # Default entity type as 'UNKNOWN'  

                # If the column is description or message, we apply special extraction
                if col in ['description']:
                    # df['skill_entities'] = df[col].apply(lambda text: extract_skills(text))
                    df['skill_entities'] = df[col].apply(lambda text: [(skill, 'SKILL') for skill in extract_skills(text)])

                elif col in ['description', 'message']:
                    # df['skill_entities'] = df[col].apply(lambda text: extract_skills(text))
                    # df['staff_entities'] = df[col].apply(lambda text: extract_staff_names(text))
                    df['skill_entities'] = df[col].apply(lambda text: [(skill, 'SKILL') for skill in extract_skills(text)])
                    df['staff_entities'] = df[col].apply(lambda text: [(staff, 'STAFF') for staff in extract_staff_names(text)])

                else:
                    # Create the new column with extracted entities, using the helper function
                    # df[new_entity_col] = df[col].apply(parse_entity)
                    df[new_entity_col] = df[col].apply(lambda x: parse_entity(x, entity_type))

        
        return df
    
    # Extract relationships function 
    def create_dynamic_relationship(df, from_type, from_id_col, to_type, to_id_col, relationship_type, output_col):
        # List to store formatted relationship dictionaries for each row
        relationship_column = []

        # Iterate through each row of the DataFrame
        for _, row in df.iterrows():
            # Extract the from_id and to_id values from the specified columns
            # from_ids = row[from_id_col] if isinstance(row[from_id_col], list) else [row[from_id_col]]
            # to_ids = row[to_id_col] if isinstance(row[to_id_col], list) else [row[to_id_col]]
            from_ids = [entity[0] for entity in row[from_id_col] if isinstance(entity, tuple)] if isinstance(row[from_id_col], list) else [row[from_id_col]]
            to_ids = [entity[0] for entity in row[to_id_col] if isinstance(entity, tuple)] if isinstance(row[to_id_col], list) else [row[to_id_col]]

            # Create a list of dictionaries
            relationship_dict = [
                {
                    "from_type": from_type,
                    "from_id": from_id,
                    "to_type": to_type,
                    "to_id": to_id,
                    "type": relationship_type
                }
                for from_id in from_ids if pd.notna(from_id)
                for to_id in to_ids if pd.notna(to_id)  # Only include non-NaN to_id values
            ]

            # Append the relationship dictionary or an empty list if no valid to_id found
            relationship_column.append(relationship_dict if relationship_dict else [])

        # Add the relationships as a new column to the DataFrame
        df[output_col] = relationship_column
        
        # Return the updated DataFrame with the new relationships column
        return df
    
    # Step 1: Extract Entities 
    df = extract_entities(csv_file_path)

    # Step 2: Extract Relationships based on predefined mappings 
    for key, relationship_info in relationship_mappings.items():
        if len(key) == 3:
            from_col, to_col, rel_key = key
        elif len(key) == 2:
            from_col, to_col = key
            rel_key = ''  # Set rel_key as an empty string or any default value as needed

        if from_col in df.columns and to_col in df.columns:
            output_col = f"{from_col}_to_{to_col}_{relationship_info['relationship_type'].lower()}_relationship"
            df = create_dynamic_relationship(
                df,
                relationship_info['from_type'],
                from_col,
                relationship_info['to_type'],
                to_col,
                relationship_info['relationship_type'],
                output_col
            )

    # Combine all relationship columns into one
    relationship_columns = [col for col in df.columns if '_relationship' in col]
    df['relationships'] = df[relationship_columns].apply(lambda row: [item for sublist in row if isinstance(sublist, list) for item in sublist], axis=1)

    # Drop the individual relationship columns if no longer needed
    df = df.drop(columns=relationship_columns)

    # Step 3: Output final df
    return df


# Extract from existing cleaned datasets 
csv_file_path = '../../backend/data/00 - mock_student_data.csv'
# csv_file_path = '../../backend/data/01 - mock_module_info.csv'
# csv_file_path = '../../backend/data/02 - mock_department_list.csv'
# csv_file_path = '../../backend/data/03 - mock_staff_info.csv'
# csv_file_path = '../../backend/data/04 - mock_module_reviews.csv'
# csv_file_path = '../../backend/data/05 - mock_venue_info.csv'
# csv_file_path = '../../backend/data/06 - nus_undergraduate_programmes.csv'
# csv_file_path = '../../backend/data/07 - Jobs abd relevant skillset (linkedin).csv'
# csv_file_path = '../../backend/data/08 - jobs_and_tech (ONET).csv'
# csv_file_path = '../../backend/data/09 - jobs_and_skills (ONET).csv'
# csv_file_path = '../../backend/data/10 - Graduate Employment Survey.csv'


df = extract_entities_rs(csv_file_path)
print(df.head())
df.to_csv('test.csv', index=False)

      Student_Name Matric_Number       NRIC  Year  \
0      Tracy Lewis     A0216920B  XXXXX506Z     1   
1    Andrew Holden     A0225069H  XXXXX799Z     3   
2  Phillip Bullock     A0228204E  XXXXX194Z     1   
3    Stephen Owens     A0263298Z  XXXXX790Z     1   
4   Valerie Rivera     A0200778Y  XXXXX150Z     3   

                           Faculties                    Major Second Major  \
0                NUS Business School  Business Administration          NaN   
1          YST Conservatory of Music                    Music          NaN   
2  College of Design and Engineering   Electrical Engineering          NaN   
3                          Dentistry                Dentistry          NaN   
4                          Computing       Business Analytics          NaN   

                                   Modules_Completed  \
0  ['ACC1701B', 'DMB1202ACC', 'DMB1201MKT', 'MNO1...   
1  ['CFA1111A', 'MUA1190', 'MUA2109', 'MUA1172', ...   
2  ['ME1102', 'BN1111', 'PF1103', 'CN1101A',

## Ze Ming's draft

- argument for relationship extraction should be just the df that is returned from the entity extraction function + the new csv file path


In [57]:
def extract_entities(csv_file_path):

    # Predefined entity columns and their corresponding new column names
    target_cols = ['Degree', 'Major', 'Module', 'module_code', 'Skills', 'Staff', 'Modules_Completed', 'department', 'faculty', 
                   'Employee Name', 'Department', 'Modules Taught', 'Title', 'Job Title', 'Tech Skills', 'School', 'University']  # Add any other columns you want to check for
    
    new_entity_cols = {
        'Degree': 'degree_entities',
        'Major': 'major_entities',
        'Module': 'module_entities',
        'module_code': 'module_entities',
        'Skills': 'skills_entities',
        'Tech Skills': 'skills_entities',
        'Staff': 'staff_entities',
        'Modules_Completed': 'modules_completed_entities',
        'department': 'department_entities', 
        'faculty': 'faculty_entities',
        'School': 'faculty_entities',
        'Employee Name': 'staff_entities', 
        'Department': 'department_entities', 
        'Modules Taught': 'module_entities',
        'Title': 'job_entities',
        'Job Title': 'job_entities',
        'University': 'university_entities'
    }


    def parse_entity(x):
        # Handle dictionary strings
        if isinstance(x, str) and x.startswith('{') and x.endswith('}'):
            return ast.literal_eval(x)  # Convert to dictionary
        
        # Handle list strings
        elif isinstance(x, str) and x.startswith('[') and x.endswith(']'):
            return [f"'{str(item).strip()}'" for item in ast.literal_eval(x)]
        
        # Handle comma-separated strings
        elif pd.notna(x):
            return [f"'{str(item).strip()}'" for item in str(x).split(',')]
        
        # Return an empty list for NaN or other invalid entries
        return []

    df = pd.read_csv(csv_file_path)

    # Process each specified entity column
    for col in target_cols:
        if col in df.columns:
            # Determine the new column name based on new_col_names dictionary if provided
            new_entity_col = new_entity_cols[col]
            
            # Create the new column with extracted entities, using the helper function
            df[new_entity_col] = df[col].apply(parse_entity)
    
    return df

In [None]:
def create_dynamic_relationship(df, from_type, from_id_col, to_type, to_id_col, relationship_type, output_col):
    # List to store formatted relationship dictionaries for each row
    relationship_column = []

    # Iterate through each row of the DataFrame
    for _, row in df.iterrows():
        # Extract the `from_id` and `to_id` values from the specified columns
        from_ids = row[from_id_col] if isinstance(row[from_id_col], list) else [row[from_id_col]]
        to_ids = row[to_id_col] if isinstance(row[to_id_col], list) else [row[to_id_col]]

        # Create a list of dictionaries for each `to_id`
        relationship_dict = [
            {
                "from_type": from_type,
                "from_id": from_id,
                "to_type": to_type,
                "to_id": to_id,
                "type": relationship_type
            }
            for from_id in from_ids if pd.notna(from_id)
            for to_id in to_ids if pd.notna(to_id)  # Only include non-NaN `to_id` values
        ]

        # Append the relationship dictionary or an empty list if no valid `to_id` found
        relationship_column.append(relationship_dict if relationship_dict else [])

    # Add the relationships as a new column to the DataFrame
    df[output_col] = relationship_column
    
    # Return the updated DataFrame with the new relationships column
    return df

## Chloe's draft


In [45]:
# def extract_entities(csv_file_path, entity_cols, new_col_names=None):
#     df = pd.read_csv(csv_file_path)    
#     # Process each specified entity column
#     for col in entity_cols:
#         if col in df.columns:
#             # Determine the new column name based on new_col_names dictionary if provided
#             new_col_name = new_col_names[col]
#             # new_col_name = new_col_names.get(col, f'{col}_entities') if new_col_names else f'{col}_entities'
            
#             # Create the new column with extracted entities and rename it as needed if its not in a list 
#             df[new_col_name] = df[col].apply(
#                 lambda x: x if isinstance(x, list) else [item.strip() for item in str(x).split(',')] if pd.notna(x) else []
#             )
    
#     return df

In [53]:
# 00 - mock_student_data.csv
csv_file_path = '../../backend/data/00 - mock_student_data.csv'
entity_cols = ['Student_Name', 'Modules_Completed', 'Grades', 'Faculties', 'Major']
new_col_names = {'Student_Name': 'student_entities', 'Modules_Completed': 'module_entities', 'Grades': 'grade_entities', 'Faculties': 'faculty_entities', 'Major': 'major_entities'}

df = extract_entities(csv_file_path, entity_cols, new_col_names)
print(df.head())

      Student_Name Matric_Number       NRIC  Year  \
0      Tracy Lewis     A0216920B  XXXXX506Z     1   
1    Andrew Holden     A0225069H  XXXXX799Z     3   
2  Phillip Bullock     A0228204E  XXXXX194Z     1   
3    Stephen Owens     A0263298Z  XXXXX790Z     1   
4   Valerie Rivera     A0200778Y  XXXXX150Z     3   

                           Faculties                    Major Second Major  \
0                NUS Business School  Business Administration          NaN   
1          YST Conservatory of Music                    Music          NaN   
2  College of Design and Engineering   Electrical Engineering          NaN   
3                          Dentistry                Dentistry          NaN   
4                          Computing       Business Analytics          NaN   

                                   Modules_Completed  \
0  ['ACC1701B', 'DMB1202ACC', 'DMB1201MKT', 'MNO1...   
1  ['CFA1111A', 'MUA1190', 'MUA2109', 'MUA1172', ...   
2  ['ME1102', 'BN1111', 'PF1103', 'CN1101A',

In [18]:
# 02 - mock_department_list
csv_file_path = '../../backend/data/02 - mock_department_list.csv'
entity_cols = ['department', 'faculty']
new_col_names = {'department': 'department_entities', 'faculty': 'faculty_entities'}

df = extract_entities(csv_file_path, entity_cols, new_col_names)
print(df.head())

  department_id                    department  \
0     NUSDP0001    NUS Medicine Dean's Office   
1     NUSDP0002                  Architecture   
2     NUSDP0003                    Accounting   
3     NUSDP0004  Communications and New Media   
4     NUSDP0005                       History   

                             faculty             department_entities  \
0       Yong Loo Lin Sch of Medicine    [NUS Medicine Dean's Office]   
1  College of Design and Engineering                  [Architecture]   
2                NUS Business School                    [Accounting]   
3            Arts and Social Science  [Communications and New Media]   
4            Arts and Social Science                       [History]   

                      faculty_entities  
0       [Yong Loo Lin Sch of Medicine]  
1  [College of Design and Engineering]  
2                [NUS Business School]  
3            [Arts and Social Science]  
4            [Arts and Social Science]  


In [19]:
# 03 - mock_staff_info
csv_file_path = '../../backend/data/03 - mock_staff_info.csv'
entity_cols = ['Employee Name', 'Department', 'Modules Taught']
new_col_names = {'Employee Name': 'staff_entities', 'Department': 'department_entities', 'Modules Taught': 'module_entities'}

df = extract_entities(csv_file_path, entity_cols, new_col_names)
print(df.head())

  Employee ID          staff_entities       NRIC                   DOB  \
0  NUSPF12345  Marin Sergio Hernandez  XXXXX479A  1983-02-23T00:00:00Z   
1  NUSPF12346         Kathryn Cordova  XXXXX815A  1985-09-02T00:00:00Z   
2  NUSPF12347         Barbara Sanchez  XXXXX777A  1971-07-30T00:00:00Z   
3  NUSPF12348             Bryce Lucas  XXXXX610A  1973-07-20T00:00:00Z   
4  NUSPF12349          Judith Camacho  XXXXX629A  1991-11-16T00:00:00Z   

                    DOJ                  department_entities module_entities  \
0  2009-10-31T00:00:00Z  Electrical and Computer Engineering         CEG5003   
1  2009-06-07T00:00:00Z  Civil and Environmental Engineering         ESE2102   
2  2008-05-09T00:00:00Z          Centre for Language Studies       LAT4201HM   
3  2002-01-17T00:00:00Z                    BIZ Dean's Office      DMB1203MNO   
4  2000-02-13T00:00:00Z                            Economics        EC4401HM   

             staff_entities                    department_entities  \
0  [

In [20]:
# 05 - mock_venue_info


In [21]:
# 06 - nus_undergraduate_programmes.csv
csv_file_path = '../../backend/data/06 - nus_undergraduate_programmes.csv'
entity_cols = ['Degree', 'Major']
new_col_names = {'Degree': 'degree_entities', 'Major': 'major_entities'}

df = extract_entities(csv_file_path, entity_cols, new_col_names)
print(df.head())

                       degree_entities  \
0  Bachelor of Business Administration   
1  Bachelor of Business Administration   
2  Bachelor of Business Administration   
3  Bachelor of Business Administration   
4  Bachelor of Business Administration   

                            major_entities  \
0               Applied Business Analytics   
1                       Business Economics   
2                                  Finance   
3          Innovation and Entrepreneurship   
4  Leadership and Human Capital Management   

                         degree_entities  \
0  [Bachelor of Business Administration]   
1  [Bachelor of Business Administration]   
2  [Bachelor of Business Administration]   
3  [Bachelor of Business Administration]   
4  [Bachelor of Business Administration]   

                              major_entities  
0               [Applied Business Analytics]  
1                       [Business Economics]  
2                                  [Finance]  
3          [Innov