# Import Libraries

In [2]:
# Data Processing 
import pandas as pd
import re
import ast
from collections import OrderedDict

# Data Manipulation
import numpy as np

# Data Visualization
import seaborn as sns 
import plotly.express as px
from termcolor import colored
import matplotlib.pyplot as plt 
import plotly.graph_objects as go
import plotly.figure_factory as ff

# Data Wrangling

## Gathering Data

In [12]:
# Load the scraped data
df_alumni = pd.read_csv("Data/Linkedin_SCU_Alumni.csv")
df_alumni 

Unnamed: 0,City,Name,Headlines,Linkedin Link,Profile Picture,Experience,Education,Licenses & Certifications
0,Surabaya,Wenseslaus Garendy,Infrastructure and Enviroment Engineering | He...,https://www.linkedin.com/in/wenseslaus-garendy...,https://media.licdn.com/dms/image/v2/D5635AQHT...,[{'Job Title': 'Health Safety Environment Engi...,"[{'School': 'Unika Soegijapranata Semarang', '...",[{'Certification': 'Sertifikasi SISTEM MANAJEM...
1,Jakarta,Imelda Aprilia Wibisono,Branch Manager at Edukita,https://www.linkedin.com/in/imeldaaw?miniProfi...,https://media.licdn.com/dms/image/v2/D5603AQGJ...,"[{'Job Title': 'Branch Manager', 'Company': 'E...","[{'School': 'Unika Soegijapranata Semarang', '...","[{'Certification': 'EF Set Certificate', 'Issu..."
2,Jakarta,Angela Vinky Darudono,HCBP Operations at PT Link Net Tbk,https://www.linkedin.com/in/angela-vinky-darud...,https://media.licdn.com/dms/image/v2/C5603AQFH...,"[{'Job Title': 'PT Link Net Tbk', 'Company': '...","[{'School': 'Unika Soegijapranata Semarang', '...",[]
3,Jakarta,Trisha Monica,Talent Acquisition Specialist @ Somethinc-Beau...,https://www.linkedin.com/in/trisha-monica-22b0...,https://media.licdn.com/dms/image/v2/D5635AQEM...,[{'Job Title': 'Talent Acquisition Specialist'...,"[{'School': 'Unika Soegijapranata Semarang', '...",[]
4,Jakarta,Sherline Nugroho,Finance Accounting,https://www.linkedin.com/in/sherline-nugroho?m...,https://media.licdn.com/dms/image/v2/D5603AQFt...,"[{'Job Title': 'Finance Accounting', 'Company'...","[{'School': 'Unika Soegijapranata Semarang', '...",[]
5,Jakarta,Maria Giovanni,Senior Learning Quality Assurance at Wings Gro...,https://www.linkedin.com/in/mariagiovannihnios...,"data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP//...",[{'Job Title': 'Wings Group Indonesia (Sayap M...,"[{'School': 'Unika Soegijapranata Semarang', '...",[{'Certification': 'Certified Human Capital St...
6,Jakarta,Amadea Carissa,Corporate Marketing Manager at Long John Silve...,https://www.linkedin.com/in/amadeacarissa?mini...,https://media.licdn.com/dms/image/v2/D5603AQG7...,"[{'Job Title': 'Corporate Marketing Manager', ...","[{'School': 'Unika Soegijapranata Semarang', '...","[{'Certification': 'TOEFL ITP', 'Issued By': '..."
7,Surabaya,Rizky Kurniawan,Junior Architect,https://www.linkedin.com/in/rizky-kurniawan-a7...,https://media.licdn.com/dms/image/v2/D5603AQH5...,"[{'Job Title': 'Junior Architect', 'Company': ...","[{'School': 'Unika Soegijapranata Semarang', '...",[]
8,Surabaya,Frida Catalina K,Human Resources | Talent Acquisition,https://www.linkedin.com/in/frida-catalina-k-8...,https://media.licdn.com/dms/image/v2/C4D03AQGP...,"[{'Job Title': 'Human Resources', 'Company': '...","[{'School': 'Unika Soegijapranata Semarang', '...",[]
9,Surabaya,Monicaningtyas Inferlambang,Child and Adolescent Psychologist,https://www.linkedin.com/in/monicaningtyas-inf...,https://media.licdn.com/dms/image/v2/D5603AQE4...,"[{'Job Title': 'Clinical Psychologist', 'Compa...","[{'School': 'Unika Soegijapranata Semarang', '...",[{'Certification': 'Surat Tanda Registrasi Psi...


**Insight:**

## Assessing Data

In [4]:
df_alumni.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 8 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   City                       12 non-null     object
 1    Name                      12 non-null     object
 2   Headlines                  12 non-null     object
 3   Linkedin Link              12 non-null     object
 4   Profile Picture            12 non-null     object
 5   Experience                 12 non-null     object
 6   Education                  12 non-null     object
 7   Licenses & Certifications  12 non-null     object
dtypes: object(8)
memory usage: 900.0+ bytes


In [5]:
df_alumni.duplicated().sum()

0

In [6]:
df_alumni.describe()

Unnamed: 0,City,Name,Headlines,Linkedin Link,Profile Picture,Experience,Education,Licenses & Certifications
count,12,12,12,12,12,12,12,12
unique,2,12,12,12,12,12,12,6
top,Surabaya,Wenseslaus Garendy,Infrastructure and Enviroment Engineering | He...,https://www.linkedin.com/in/wenseslaus-garendy...,https://media.licdn.com/dms/image/v2/D5635AQHT...,[{'Job Title': 'Health Safety Environment Engi...,"[{'School': 'Unika Soegijapranata Semarang', '...",[]
freq,6,1,1,1,1,1,1,7


**Insight:**

## Cleaning Data

### Dataframe df_alumni Formating

In [13]:
def clean_and_format_text_sheets(data):
    if isinstance(data, str) and data.strip():
        try:
            items = ast.literal_eval(data)  # Convert string to list of dictionaries
            if isinstance(items, list) and all(isinstance(i, dict) for i in items):  
                formatted_entries = []
                for entry in items:
                    formatted_entry = "\n".join([f"- {key}: {value}" for key, value in entry.items() if value])
                    formatted_entries.append(formatted_entry)
                return "\n \n".join(formatted_entries)  # Add spacing between entries
            else:
                return data.strip()
        except (SyntaxError, ValueError):
            return data.strip()
    return "N/A"

# Apply the function to DataFrame columns
df_alumni["Experience"] = df_alumni["Experience"].apply(clean_and_format_text_sheets)
df_alumni["Education"] = df_alumni["Education"].apply(clean_and_format_text_sheets)
df_alumni["Licenses & Certifications"] = df_alumni["Licenses & Certifications"].apply(clean_and_format_text_sheets)

df_alumni

Unnamed: 0,City,Name,Headlines,Linkedin Link,Profile Picture,Experience,Education,Licenses & Certifications
0,Surabaya,Wenseslaus Garendy,Infrastructure and Enviroment Engineering | He...,https://www.linkedin.com/in/wenseslaus-garendy...,https://media.licdn.com/dms/image/v2/D5635AQHT...,- Job Title: Health Safety Environment Enginee...,- School: Unika Soegijapranata Semarang\n- Deg...,- Certification: Sertifikasi SISTEM MANAJEMEN ...
1,Jakarta,Imelda Aprilia Wibisono,Branch Manager at Edukita,https://www.linkedin.com/in/imeldaaw?miniProfi...,https://media.licdn.com/dms/image/v2/D5603AQGJ...,- Job Title: Branch Manager\n- Company: Edukit...,- School: Unika Soegijapranata Semarang\n- Deg...,- Certification: EF Set Certificate\n- Issued ...
2,Jakarta,Angela Vinky Darudono,HCBP Operations at PT Link Net Tbk,https://www.linkedin.com/in/angela-vinky-darud...,https://media.licdn.com/dms/image/v2/C5603AQFH...,- Job Title: PT Link Net Tbk\n- Company: Full-...,- School: Unika Soegijapranata Semarang\n- Deg...,
3,Jakarta,Trisha Monica,Talent Acquisition Specialist @ Somethinc-Beau...,https://www.linkedin.com/in/trisha-monica-22b0...,https://media.licdn.com/dms/image/v2/D5635AQEM...,- Job Title: Talent Acquisition Specialist\n- ...,- School: Unika Soegijapranata Semarang\n- Deg...,
4,Jakarta,Sherline Nugroho,Finance Accounting,https://www.linkedin.com/in/sherline-nugroho?m...,https://media.licdn.com/dms/image/v2/D5603AQFt...,- Job Title: Finance Accounting\n- Company: Ag...,- School: Unika Soegijapranata Semarang\n- Deg...,
5,Jakarta,Maria Giovanni,Senior Learning Quality Assurance at Wings Gro...,https://www.linkedin.com/in/mariagiovannihnios...,"data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP//...",- Job Title: Wings Group Indonesia (Sayap Mas ...,- School: Unika Soegijapranata Semarang\n- Deg...,- Certification: Certified Human Capital Staff...
6,Jakarta,Amadea Carissa,Corporate Marketing Manager at Long John Silve...,https://www.linkedin.com/in/amadeacarissa?mini...,https://media.licdn.com/dms/image/v2/D5603AQG7...,- Job Title: Corporate Marketing Manager\n- Co...,- School: Unika Soegijapranata Semarang\n- Deg...,- Certification: TOEFL ITP\n- Issued By: TOEFL...
7,Surabaya,Rizky Kurniawan,Junior Architect,https://www.linkedin.com/in/rizky-kurniawan-a7...,https://media.licdn.com/dms/image/v2/D5603AQH5...,- Job Title: Junior Architect\n- Company: Spas...,- School: Unika Soegijapranata Semarang\n- Deg...,
8,Surabaya,Frida Catalina K,Human Resources | Talent Acquisition,https://www.linkedin.com/in/frida-catalina-k-8...,https://media.licdn.com/dms/image/v2/C4D03AQGP...,- Job Title: Human Resources\n- Company: PT. S...,- School: Unika Soegijapranata Semarang\n- Deg...,
9,Surabaya,Monicaningtyas Inferlambang,Child and Adolescent Psychologist,https://www.linkedin.com/in/monicaningtyas-inf...,https://media.licdn.com/dms/image/v2/D5603AQE4...,- Job Title: Clinical Psychologist\n- Company:...,- School: Unika Soegijapranata Semarang\n- Deg...,- Certification: Surat Tanda Registrasi Psikol...


In [14]:
# Save the improved file for Google Sheets
sheets_friendly_file_path = "Data/Cleaned_Linkedin_SCU_Alumni.csv"
df_alumni.to_csv(sheets_friendly_file_path, index=False)

# Return the new file path
sheets_friendly_file_path

'Data/Cleaned_Linkedin_SCU_Alumni.csv'

**Insight:**

## Removing Duplicates

**Insight:**