# Import Libraries

In [1]:
# Data Processing 
import pandas as pd
import re
import ast
from collections import OrderedDict

# Data Manipulation
import numpy as np

# Data Visualization
import seaborn as sns 
import plotly.express as px
from termcolor import colored
import matplotlib.pyplot as plt 
import plotly.graph_objects as go
import plotly.figure_factory as ff

# Data Wrangling

## Gathering Data

In [2]:
# Load the scraped data
df_alumni = pd.read_csv("Data/Linkedin_SCU_Alumni.csv")
df_alumni 

Unnamed: 0,Name,Headlines,Linkedin Link,Profile Picture,Experience,Education,Licenses & Certifications
0,Njo Joanna Nydia Siswanto,Fresh Graduate | Bachelor of Food Technology |...,https://www.linkedin.com/in/njo-joanna-nydia-s...,https://media.licdn.com/dms/image/v2/D5635AQFM...,"[""Research And Development Intern\nResearch An...","[""Unika Soegijapranata Semarang\nUnika Soegija...",['TOEFL ITP\nTOEFL ITP\nETS\nETS\nIssued Sep 2...
1,Satya Okky Saputra,Everything Comes to at the right Moment,https://www.linkedin.com/in/satya-okky-saputra...,https://media.licdn.com/dms/image/v2/D4D03AQEw...,['Pengawas\nPengawas\nPT. Inovasi Anak Indones...,['Unika Soegijapranata Semarang\nUnika Soegija...,[]
2,Erwin Eldani,love life,https://www.linkedin.com/in/erwin-eldani-b485b...,https://media.licdn.com/dms/image/v2/D5635AQFr...,['Universitas Katolik Soegijapranata\nUniversi...,['Landscape Design\nLandscape Design\n2 experi...,[]
3,Ignatius Wisnu Widi Nugroho,Bachelor of Psychology,https://www.linkedin.com/in/ignatius-wisnu-wid...,https://media.licdn.com/dms/image/v2/D5635AQGj...,['Technical Support Specialist\nTechnical Supp...,"[""Unika Soegijapranata Semarang\nUnika Soegija...","['Microsoft Office\nMicrosoft Office', 'Proble..."
4,Richo Ardanny,Psychology Student | Seeking 2025 Internship |...,https://www.linkedin.com/in/richo-ardanny-5455...,https://media.licdn.com/dms/image/v2/D4E35AQEe...,"[""Unika Soegijapranata Semarang\nUnika Soegija...",['Public Speaking\nPublic Speaking\n2 educatio...,['BEM Soegijapranata Catholic University\nBEM ...


**Insight:**

## Assessing Data

In [3]:
df_alumni.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 7 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   Name                       5 non-null      object
 1   Headlines                  5 non-null      object
 2   Linkedin Link              5 non-null      object
 3   Profile Picture            5 non-null      object
 4   Experience                 5 non-null      object
 5   Education                  5 non-null      object
 6   Licenses & Certifications  5 non-null      object
dtypes: object(7)
memory usage: 412.0+ bytes


In [4]:
df_alumni.duplicated().sum()

0

In [5]:
df_alumni.describe()

Unnamed: 0,Name,Headlines,Linkedin Link,Profile Picture,Experience,Education,Licenses & Certifications
count,5,5,5,5,5,5,5
unique,5,5,5,5,5,5,4
top,Njo Joanna Nydia Siswanto,Fresh Graduate | Bachelor of Food Technology |...,https://www.linkedin.com/in/njo-joanna-nydia-s...,https://media.licdn.com/dms/image/v2/D5635AQFM...,"[""Research And Development Intern\nResearch An...","[""Unika Soegijapranata Semarang\nUnika Soegija...",[]
freq,1,1,1,1,1,1,2


**Insight:**

## Cleaning Data

### Dataframe df_alumni Formating

In [6]:
def clean_experience_data(raw_experience):
    experience_list = ast.literal_eval(raw_experience)  # Convert string representation of list to list
    structured_data = []
    
    for entry in experience_list:
        lines = entry.split('\n')
        lines = list(OrderedDict.fromkeys(lines))  # Remove duplicates while preserving order
        
        job_title = lines[0]
        company_name = lines[1]
        working_type = lines[2] if '·' in lines[2] else "Unknown"
        datetime = next((line for line in lines if re.search(r'\d{4} - \d{4}', line)), "Unknown")
        job_location = next((line for line in lines if 'Indonesia' in line), "Unknown")
        descriptions = [line for line in lines if line.startswith('- ')]
        
        structured_data.append({
            "Job Title": job_title,
            "Company Name": company_name,
            "Working Type": working_type,
            "Datetime": datetime,
            "Job Location": job_location,
            "Description": descriptions
        })
    
    return structured_data

In [7]:
df_alumni["Experience_Formated"] = df_alumni["Experience"].apply(clean_experience_data)
df_alumni.drop(columns=["Experience"], inplace=True)
df_alumni

Unnamed: 0,Name,Headlines,Linkedin Link,Profile Picture,Education,Licenses & Certifications,Experience_Formated
0,Njo Joanna Nydia Siswanto,Fresh Graduate | Bachelor of Food Technology |...,https://www.linkedin.com/in/njo-joanna-nydia-s...,https://media.licdn.com/dms/image/v2/D5635AQFM...,"[""Unika Soegijapranata Semarang\nUnika Soegija...",['TOEFL ITP\nTOEFL ITP\nETS\nETS\nIssued Sep 2...,[{'Job Title': 'Research And Development Inter...
1,Satya Okky Saputra,Everything Comes to at the right Moment,https://www.linkedin.com/in/satya-okky-saputra...,https://media.licdn.com/dms/image/v2/D4D03AQEw...,['Unika Soegijapranata Semarang\nUnika Soegija...,[],"[{'Job Title': 'Pengawas', 'Company Name': 'PT..."
2,Erwin Eldani,love life,https://www.linkedin.com/in/erwin-eldani-b485b...,https://media.licdn.com/dms/image/v2/D5635AQFr...,['Landscape Design\nLandscape Design\n2 experi...,[],[{'Job Title': 'Universitas Katolik Soegijapra...
3,Ignatius Wisnu Widi Nugroho,Bachelor of Psychology,https://www.linkedin.com/in/ignatius-wisnu-wid...,https://media.licdn.com/dms/image/v2/D5635AQGj...,"[""Unika Soegijapranata Semarang\nUnika Soegija...","['Microsoft Office\nMicrosoft Office', 'Proble...","[{'Job Title': 'Technical Support Specialist',..."
4,Richo Ardanny,Psychology Student | Seeking 2025 Internship |...,https://www.linkedin.com/in/richo-ardanny-5455...,https://media.licdn.com/dms/image/v2/D4E35AQEe...,['Public Speaking\nPublic Speaking\n2 educatio...,['BEM Soegijapranata Catholic University\nBEM ...,[{'Job Title': 'Unika Soegijapranata Semarang'...


**Insight:**

In [8]:
def clean_education_data(raw_education):
    education_list = ast.literal_eval(raw_education)  # Convert string representation of list to list
    structured_data = []
    
    for entry in education_list:
        lines = entry.split('\n')
        lines = list(OrderedDict.fromkeys(lines))  # Remove duplicates while preserving order
        
        university = lines[0]
        graduation_year = next((line for line in lines if re.search(r'\d{4} - \d{4}', line)), "Unknown")
        
        structured_data.append({
            "University": university,
            "Graduation Year": graduation_year
        })
    
    return structured_data

In [9]:
# Processing Education Data
df_alumni["Education_Cleaned"] = df_alumni["Education"].apply(clean_education_data)
df_alumni.drop(columns=["Education"], inplace=True)
df_alumni

Unnamed: 0,Name,Headlines,Linkedin Link,Profile Picture,Licenses & Certifications,Experience_Formated,Education_Cleaned
0,Njo Joanna Nydia Siswanto,Fresh Graduate | Bachelor of Food Technology |...,https://www.linkedin.com/in/njo-joanna-nydia-s...,https://media.licdn.com/dms/image/v2/D5635AQFM...,['TOEFL ITP\nTOEFL ITP\nETS\nETS\nIssued Sep 2...,[{'Job Title': 'Research And Development Inter...,[{'University': 'Unika Soegijapranata Semarang...
1,Satya Okky Saputra,Everything Comes to at the right Moment,https://www.linkedin.com/in/satya-okky-saputra...,https://media.licdn.com/dms/image/v2/D4D03AQEw...,[],"[{'Job Title': 'Pengawas', 'Company Name': 'PT...",[{'University': 'Unika Soegijapranata Semarang...
2,Erwin Eldani,love life,https://www.linkedin.com/in/erwin-eldani-b485b...,https://media.licdn.com/dms/image/v2/D5635AQFr...,[],[{'Job Title': 'Universitas Katolik Soegijapra...,"[{'University': 'Landscape Design', 'Graduatio..."
3,Ignatius Wisnu Widi Nugroho,Bachelor of Psychology,https://www.linkedin.com/in/ignatius-wisnu-wid...,https://media.licdn.com/dms/image/v2/D5635AQGj...,"['Microsoft Office\nMicrosoft Office', 'Proble...","[{'Job Title': 'Technical Support Specialist',...",[{'University': 'Unika Soegijapranata Semarang...
4,Richo Ardanny,Psychology Student | Seeking 2025 Internship |...,https://www.linkedin.com/in/richo-ardanny-5455...,https://media.licdn.com/dms/image/v2/D4E35AQEe...,['BEM Soegijapranata Catholic University\nBEM ...,[{'Job Title': 'Unika Soegijapranata Semarang'...,"[{'University': 'Public Speaking', 'Graduation..."


**Insight:**

## Removing Duplicates

**Insight:**