# Linkedin Scrapping Profile

### Install Library and Import

In [1]:
pip install selenium webdriver_manager beautifulsoup4 pandas

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import os
from configparser import ConfigParser
import time
import random

### Set up the webdriver and login information

In [3]:
config = ConfigParser()
config.read('config.ini')
username = config['LINKEDIN']['username']
password = config['LINKEDIN']['password']

In [4]:
service = Service(ChromeDriverManager().install())
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=service, options=options)
driver.get("https://www.linkedin.com/login")

driver.find_element(By.ID, "username").send_keys(username)
driver.find_element(By.ID, "password").send_keys(password)
driver.find_element(By.XPATH, "//button[@type='submit']").click()

WebDriverWait(driver, 10).until(EC.url_contains("feed"))

True

### helper function

In [5]:
#scroll down to add human-like interaction and load all the posts
def scroll_down(n, pixels):
    # Scroll down 500 pixels up to 30 times or until no more scrolling is possible
    for _ in range(n):
        driver.execute_script(f"window.scrollBy(0, {random.randint(pixels-100,pixels+100)});")
        time.sleep(3)  # Wait for 3 seconds for new content to load

        # Check if we can scroll further
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height <= driver.execute_script("return window.scrollY + window.innerHeight"):
            break  # Break if no more scrolling is possible

#create csv_data folder to store the data

# Create the folder if it doesn't exist
folder_path="csv_data"
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

### Get the profile headline

In [6]:
URL_PROFILE = "https://www.linkedin.com/in/sayyidan-i/"
driver.get(URL_PROFILE)

In [7]:
# Profile Headline
name_element = driver.find_element(By.XPATH, "//h1[@class='text-heading-xlarge inline t-24 v-align-middle break-words']")
headline_element = driver.find_element(By.XPATH, "//div[@class='text-body-medium break-words']")
connections_element = driver.find_element(By.XPATH, "//span[@class='t-bold']")


# Extract and print the connection number
name = name_element.text.strip()
headline = headline_element.text.strip()
connections = connections_element.text.strip()

print(f"Name: {name}")
print(f"Headline: {headline}")
print(f"Connections: {connections}")

Name: Sayyidan Muhamad Ikhsan
Headline: Accelerator Startup Program Intern at Indigo by Telkom | Machine Learning Enthusiast
Connections: 221


In [8]:
# Get the page source
page_source = driver.page_source

# Use Beautiful Soup to parse the page
soup = BeautifulSoup(page_source, 'html.parser')

# Initialize a variable to store the skills data
about = ""

# Find the div that contains the skills information
about_div = soup.find('div', class_='SXjmahSTOQiuxpKMNmYxiEpFsQMjwfVyogEk inline-show-more-text--is-collapsed inline-show-more-text--is-collapsed-with-line-clamp full-width')

if about_div:
    # Extract text from the visually-hidden span
    hidden_span = about_div.find('span', class_='visually-hidden')
    if hidden_span:
        about = hidden_span.get_text(strip=True)
    else:
        about = "Skills not found"
else:
    about = "Skills div not found"

# Print the extracted skills data
print(about)

I am Sayyidan Muhamad Ikhsan, a passionate and versatile final-year student at Universitas Gadjah Mada, blending a mosaic of experiences in various fields of organizational leadership to entrepreneurial ventures and community service. Driven by an unwavering passion for artificial intelligence, I continue to learn and adapt seamlessly to dynamic challenges. My journey has shaped a versatile professional characterized by adaptability, a strong work ethic, and insatiable curiosity. With a high sense of responsibility, I consistently strive for excellence - in both independent and team efforts. I am ready to channel my adaptability, passion for continuous learning, and deep interest in artificial intelligence into meaningful contributions across various roles and projects.


In [9]:
# Create a DataFrame with the extracted profile information
profile_data = {
    'Name': [name],
    'Headline': [headline],
    'Connections': [connections],
    'About': [about]
}

df_profile = pd.DataFrame(profile_data)

# Export the DataFrame to a CSV file
df_profile.to_csv('csv_data\linkedin_profile.csv', index=False)

### Get the activity

In [41]:
# Open the LinkedIn profile activity page
ACTIVITY_URL = f'{URL_PROFILE}detail/recent-activity/'
driver.get(ACTIVITY_URL)

# Scroll down 500 pixels up to 30 times or until no more scrolling is possible
scroll_down(30, 500)

# Get the page source after scrolling
page_source = driver.page_source

# Use Beautiful Soup to parse the page
soup = BeautifulSoup(page_source, 'html.parser')

# Prepare a list to collect post data
posts_data = []

# Find all posts
posts = soup.find_all("div", class_="display-flex flex-column flex-grow-1")

# Collect the required information from each post
for post in posts:
    # Initialize a dictionary to hold post data
    post_info = {}

    # Check for repost status
    repost_check = post.find("span", class_="update-components-header__text-view")
    post_info['Is Reposted'] = repost_check and "reposted this" in repost_check.get_text(strip=True)

    # Extract time posted from visually hidden span
    time_posted = post.find("span", class_="update-components-actor__sub-description t-12 t-normal t-black--light")
    hidden_time = time_posted.find("span", class_="visually-hidden")
    post_info['Time Posted'] = hidden_time.get_text(strip=True) if hidden_time else None

    # Extract post caption
    post_caption = post.find("div", class_="update-components-text relative update-components-update-v2__commentary")
    post_info['Post Caption'] = post_caption.get_text(strip=True) if post_caption else None

    # Extract reaction count
    reaction_span = post.find("span", class_="social-details-social-counts__reactions-count")
    reaction_count = reaction_span.get_text(strip=True) if reaction_span else '0'
    post_info['Reaction Count'] = int(reaction_count.replace(',', ''))  # Remove commas before converting

    # Extract comment count
    comment_button = post.find("button", aria_label=True)
    comment_count = comment_button['aria-label'].split()[0] if comment_button else '0'
    post_info['Comment Count'] = int(comment_count.replace(',', ''))  # Remove commas before converting

    # Append the post information to the list
    posts_data.append(post_info)

if not posts_data:
    print("No posts found on the activity page")

# Create a DataFrame from the collected data
df_activity = pd.DataFrame(posts_data)

# Write the DataFrame to a CSV file
df_activity.to_csv('csv_data\linkedin_posts.csv', index=False, encoding='utf-8')

### Get the experience

In [11]:
#open the LinkedIn profile experience page
URL_EXP = f'{URL_PROFILE}details/experience/'

# Your previous code to load the page
driver.get(URL_EXP)
time.sleep(5)  # Allow the page to load
scroll_down(30, 500)

# Get the page source after loading
page_source = driver.page_source

# Use Beautiful Soup to parse the page
soup = BeautifulSoup(page_source, 'html.parser')

# Find all experience containers
experience_list = soup.find_all('li', class_='pvs-list__paged-list-item artdeco-list__item pvs-list__item--line-separated pvs-list__item--one-column')

# Initialize a list to store the extracted data
experiences = []

# Iterate over each experience container
for experience in experience_list:
    # Try to find the position div with class "mr1 t-bold"
    position_div = experience.find('div', class_='display-flex align-items-center mr1 t-bold')

    if position_div:
        # Extract the single position from the visually-hidden span
        position_span = position_div.find('span', class_='visually-hidden')
        if position_span:
            position = position_span.get_text(strip=True)
        
        # Get status
        status_div = experience.find('span', class_='t-14 t-normal')
        if status_div:
            status_span = status_div.find('span', class_='visually-hidden')
            status = status_span.get_text(strip=True) if status_span else "Status not found"
        
        # Get time information
        time_div = experience.find('span', class_='pvs-entity__caption-wrapper')
        if time_div:
            time_info = time_div.get_text(strip=True)
        else:
            time_info = "Time not found"
        
        # Get caption data from visually-hidden span
        caption_div = experience.find('div', class_='display-flex align-items-center t-14 t-normal t-black')
        if caption_div:
            caption_span = caption_div.find('span', class_='visually-hidden')
            caption = caption_span.get_text(strip=True) if caption_span else "Caption not found"
            # Clean the caption by replacing new lines with dots
            caption = caption.replace('\n', '. ').replace('\r', '')  # Replace new lines with dots
        else:
            caption = "Caption not found"
        
        # Add the data to the list without location
        experiences.append({
            'Position': position,
            'Status': status,
            'Time': time_info,
            'Caption': caption
        })
    
    else:
        # Handle the case where there are multiple positions under one location
        multiple_positions_divs = experience.find_all('div', class_='display-flex align-items-center mr1 hoverable-link-text t-bold')
        
        # Extract the relevant status from the custom class for multiple positions
        location_status_div = experience.find('div', class_='fNWbxLGnqELwvLQkpFOBqndmvYRoRbCrA GZxrfCTVzkMgqhWuZbUiEolViXjfRzCMeTUvwKA')
        if location_status_div:
            # Extract the visually-hidden span text for status
            status_div = location_status_div.find('div', class_='display-flex align-items-center mr1 hoverable-link-text t-bold')
            if status_div:
                status_span = status_div.find('span', class_='visually-hidden')
                status = status_span.get_text(strip=True) if status_span else "Status not found"
        
            # Get time information
            time_div = location_status_div.find('span', class_='pvs-entity__caption-wrapper')
            if time_div:
                time_info = time_div.get_text(strip=True)
            else:
                time_info = "Time not found"
        
        # Skip the first div (location), and create separate rows for each position
        for index, position_div in enumerate(multiple_positions_divs):
            if index == 0:
                continue  # Skip the first entry (location)
            
            # Extract the visually-hidden span text
            position_span = position_div.find('span', class_='visually-hidden')
            if position_span:
                position = position_span.get_text(strip=True)
                
                # Add the data to the list without location
                experiences.append({
                    'Position': position,
                    'Status': status,
                    'Time': time_info,
                    'Caption': caption
                })


if not experiences:
    experiences.append({'Position': 'Experiences not found', 'Status': 'Status not found', 'Time': 'Time not found', 'Caption': 'Caption not found'})

# Create a DataFrame from the experiences list
df_experiences = pd.DataFrame(experiences)
print(df_experiences)

# Export the DataFrame to a CSV file
df_experiences.to_csv('csv_data\experience_details.csv', index=False, encoding='utf-8')


                                            Position  \
0                 Accelerator Startup Program Intern   
1                            Machine Learning Cohort   
2               Electromedicine Assistant Internship   
3                Staff of Kementrian Ekonomi Kreatif   
4              Publication at Pekan Wirausaha Teknik   
5                                   Staff of Adkesma   
6  Staff of Consumption, Logistics, and Transport...   
7                               Freelance Math Tutor   
8                           Staff of Public Relation   
9           Video Production Division of Teknik Fair   

                                              Status  \
0                         Indigo Telkom · Internship   
1  Bangkit Academy led by Google, Tokopedia, Goje...   
2  Medika Plaza (PT Kartika Bina Medikatama) · In...   
3                Staff of Kementrian Ekonomi Kreatif   
4                Staff of Kementrian Ekonomi Kreatif   
5                                      KMTETI F

### Get the education

In [12]:
# open volunteer page
URL_education = f'{URL_PROFILE}details/education/'
driver.get(URL_education)
time.sleep(5)  # Allow the page to load
scroll_down(30, 500)

# Get the page source after loading
page_source = driver.page_source

# Use Beautiful Soup to parse the page
soup = BeautifulSoup(page_source, 'html.parser')

# Initialize an empty list for education details
education_details = []

# Find the education container
education_container = soup.find('div', class_='scaffold-finite-scroll__content')

# Extract each education entry
if education_container:
    education_entries = education_container.find_all('li', class_='pvs-list__paged-list-item')  # Adjust if necessary

    for entry in education_entries:
        # Get university information
        university_div = entry.find('div', class_='display-flex align-items-center mr1 hoverable-link-text t-bold')
        university = university_div.find('span', class_='visually-hidden').get_text(strip=True) if university_div else "University not found"
        
        # Get field of study
        field_div = entry.find('span', class_='t-14 t-normal')
        field = field_div.find('span', class_='visually-hidden').get_text(strip=True) if field_div else "Field not found"
        
        # Get description
        description_div = entry.find('div', class_='display-flex align-items-center t-14 t-normal t-black')
        description = description_div.find('span', class_='visually-hidden').get_text(strip=True) if description_div else "Description not found"
        
        # Append to the education details list
        education_details.append({
            'University': university,
            'Field': field,
            'Description': description
        })

# If no education details were found, add a default entry
if not education_details:
    education_details.append({'University': 'Education not found', 'Field': 'Field not found', 'Description': 'Description not found'})

# Create a DataFrame from the education details list
df_education = pd.DataFrame(education_details)
print(df_education)

# Export the DataFrame to a CSV file
df_education.to_csv('csv_data\education_details.csv', index=False, encoding='utf-8')


                      University  \
0  Universitas Gadjah Mada (UGM)   

                                               Field     Description  
0  Bachelor of Engineering - BE, biomedical engin...  Skills:English  


### Get Project Information

In [13]:
# Open the LinkedIn profile projects page
PROJECTS_URL = f'{URL_PROFILE}details/projects/'
driver.get(PROJECTS_URL)
time.sleep(5)  # Allow the page to load
scroll_down(30, 500)

# Get the page source after loading
page_source = driver.page_source

# Use Beautiful Soup to parse the page
soup = BeautifulSoup(page_source, 'html.parser')

# Find the outer div that contains all projects
projects_div = soup.find('div', class_='scaffold-finite-scroll scaffold-finite-scroll--infinite')
#print(projects_div)

# Find all project name divs within that outer div
if projects_div:
    project_divs = projects_div.find_all('div', class_='display-flex align-items-center mr1 t-bold')
    # Initialize a list to store project details
    
    
    projects = []

    for project_div in project_divs:
        # Extract the project name
        project_name = project_div.get_text(strip=True)

        # Find the time span related to this project
        time_span = project_div.find_next('span', class_='t-14 t-normal')
        if time_span is not None:
            date_info = time_span.find('span', class_='visually-hidden')
            if date_info:
                time_data = date_info.get_text(strip=True)
            else:
                time_data = "Date not found"
        else:
            time_data = "Time span not found"

        # Find the caption for this project
        caption_div = project_div.find_next('li', class_='pvs-list__item--with-top-padding huOBYMIdZizXtwNCBcwmBCRytomUXHoLNnetHMA')
        if caption_div:
            caption_info = caption_div.find('span', class_='visually-hidden')
            if caption_info:
                caption_data = caption_info.get_text(strip=True)
                caption_data = caption_data.replace('\n', '. ')
            else:
                caption_data = "Caption not found"
        else:
            caption_data = "Caption div not found"

        # Append the project details to the projects list
        projects.append({
            'name': project_name,
            'date': time_data,
            'caption': caption_data
        })

if not projects:
    projects.append({'name': 'Projects not found', 'date': 'Date not found', 'caption': 'Caption not found'})

# Create a DataFrame from the projects list
df_projects = pd.DataFrame(projects)
print(df_projects)

# Export the DataFrame to a CSV file
df_projects.to_csv('csv_data\projects_details.csv', index=False, encoding='utf-8')

                 name            date            caption
0  Projects not found  Date not found  Caption not found


### Get Volunteer Experience

In [15]:
# open volunteer page
URL_VOLUNTEER = f'{URL_PROFILE}details/volunteering-experiences/'
driver.get(URL_VOLUNTEER)
time.sleep(5)  # Allow the page to load
scroll_down(30, 500)

# Get the page source after loading
page_source = driver.page_source

# Use Beautiful Soup to parse the page
soup = BeautifulSoup(page_source, 'html.parser')

# Find the outer div that contains all projects
volunteer_div = soup.find('div', class_='scaffold-finite-scroll scaffold-finite-scroll--infinite')
#print(volunteer_div)

volunteer = []
# Find all project name divs within that outer div
if volunteer_div:
    volunteer_divs = volunteer_div.find_all('div', class_='display-flex align-items-center mr1 t-bold')
    # Initialize a list to store project details

    for volunteer_div in volunteer_divs:
        # Extract the project name
        volunteer_name = volunteer_div.get_text(strip=True)

        # Find the time span related to this project
        time_span = volunteer_div.find_next('span', class_='t-14 t-normal')
        if time_span is not None:
            date_info = time_span.find('span', class_='visually-hidden')
            if date_info:
                time_data = date_info.get_text(strip=True)
            else:
                time_data = "Date not found"
        else:
            time_data = "Time span not found"

        # Find the description for this project
        description_div = volunteer_div.find_next('li', class_='pvs-list__item--with-top-padding huOBYMIdZizXtwNCBcwmBCRytomUXHoLNnetHMA')
        if description_div:
            description_info = description_div.find('span', class_='visually-hidden')
            if description_info:
                description_data = description_info.get_text(strip=True)
                description_data = description_data.replace('\n', '. ')
            else:
                description_data = "description not found"
        else:
            description_data = "description div not found"

        # Append the project details to the volunteer list
        volunteer.append({
            'name': volunteer_name,
            'date': time_data,
            'description': description_data
        })

if not volunteer:
    volunteer.append({'name': 'volunteer not found', 'date': 'Date not found', 'description': 'description not found'})

# Create a DataFrame from the volunteer list
df_volunteer = pd.DataFrame(volunteer)
print(df_volunteer)

# Export the DataFrame to a CSV file
df_volunteer.to_csv('csv_data/volunteer_details.csv', index=False, encoding='utf-8')



                                       name  \
0      Data Collector IGDData Collector IGD   
1      Education CampaignEducation Campaign   
2  Mental Health RangerMental Health Ranger   

                                   date            description  
0                           Indorelawan  description not found  
1           Character Matters Indonesia  description not found  
2  Satu Persen - Indonesian Life School  description not found  


### Get License and Certification

In [16]:
# open volunteer page
URL_LICENSES = f'{URL_PROFILE}details/certifications/'
driver.get(URL_LICENSES)
time.sleep(5)  # Allow the page to load
scroll_down(30, 500)


# Get the page source after loading
page_source = driver.page_source

# Use Beautiful Soup to parse the page
soup = BeautifulSoup(page_source, 'html.parser')

# Find the outer div that contains all projects
licenses_div = soup.find('div', class_='scaffold-finite-scroll scaffold-finite-scroll--infinite')
#print(volunteer_div)

licenses = []
# Find all project name divs within that outer div
if licenses_div:
    licenses_divs = licenses_div.find_all('div', class_='display-flex flex-wrap align-items-center full-height')
    # Initialize a list to store project details

    for licenses_div in licenses_divs:
        # Extract the project name
        licenses_name = licenses_div.get_text(strip=True)

        # Find the time span related to this project
        organization = licenses_div.find_next('span', class_='t-14 t-normal')
        if organization:
            organization_info = organization.find('span', class_='visually-hidden')
            if organization_info:
                organization_data = organization_info.get_text(strip=True)
            else:
                organization_data = "Organization not found"
            
        # Find the issued date for every license
        issued_date = licenses_div.find_next('span', class_='pvs-entity__caption-wrapper')
        if issued_date:
            issued_date_info = issued_date.get_text(strip=True)
        else:
            issued_date_info = "Issued date not found"

        # Append the project details to the licenses list
        licenses.append({
            'name': licenses_name,
            'organization': organization_data,
            'issued_date': issued_date_info
        })

if not licenses:
    licenses.append({'name': 'licenses not found', 'organization': 'Date not found'})

# Create a DataFrame from the licenses list
df_licenses = pd.DataFrame(licenses)
print(df_licenses)
# Export the DataFrame to a CSV file
df_licenses.to_csv('csv_data\licenses_details.csv', index=False, encoding='utf-8')

                                                 name          organization  \
0   Advanced Learning AlgorithmsAdvanced Learning ...       DeepLearning.AI   
1   Introduction to TensorFlow for Artificial Inte...       DeepLearning.AI   
2   Supervised Machine Learning: Regression and Cl...       DeepLearning.AI   
3   Unsupervised Learning, Recommenders, Reinforce...       DeepLearning.AI   
4   Analyze Data to Answer QuestionsAnalyze Data t...                Google   
5   Ask Questions to Make Data-Driven DecisionsAsk...                Google   
6   Calculus for Machine Learning and Data Science...       DeepLearning.AI   
7   Foundations: Data, Data, EverywhereFoundations...                Google   
8   Linear Algebra for Machine Learning and Data S...       DeepLearning.AI   
9   Prepare Data for ExplorationPrepare Data for E...                Google   
10  Probability & Statistics for Machine Learning ...       DeepLearning.AI   
11  Process Data from Dirty to CleanProcess Data f..

## Ask LLM to Roast LinkedIn Data

Now we have several LinkedIn data, including 

- Headline
- Activity
- Experiences
- Projects
- Volunteer
- Licenses

### Install Package

In [23]:
pip install langchain_community unstructured


Collecting unstructured
  Downloading unstructured-0.15.13-py3-none-any.whl.metadata (29 kB)
Collecting python-magic (from unstructured)
  Downloading python_magic-0.4.27-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting nltk (from unstructured)
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Collecting emoji (from unstructured)
  Downloading emoji-2.13.0-py3-none-any.whl.metadata (5.8 kB)
Collecting python-iso639 (from unstructured)
  Downloading python_iso639-2024.4.27-py3-none-any.whl.metadata (13 kB)
Collecting langdetect (from unstructured)
  Downloading langdetect-1.0.9.tar.gz (981 kB)
     ---------------------------------------- 0.0/981.5 kB ? eta -:--:--
     ---------------------------------------- 0.0/981.5 kB ? eta -:--:--
     ------- ------------------------------ 204.8/981.5 kB 6.3 MB/s eta 0:00:01
     ------------- ------------------------ 358.4/981.5 kB 5.5 MB/s eta 0:00:01
     ------------- ------------------------ 358.4/981.5 kB 5.5 MB/s eta 0:00:01
  


[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


### Import Libraries

In [26]:
from langchain_community.document_loaders import DataFrameLoader
from langchain_community.document_loaders import DirectoryLoader, TextLoader

In [52]:
# convert data frame into csv-like string
profile_csv = df_profile.to_csv()
activity_csv = df_activity.to_csv()
experience_csv = df_experiences.to_csv()
education_csv = df_education.to_csv()
projects_csv = df_projects.to_csv()
volunteer_csv = df_volunteer.to_csv()
licenses_csv = df_licenses.to_csv()


In [55]:
user_content = f"""

{profile_csv}
"""

In [56]:
print(user_content)



,Name,Headline,Connections,About
0,Sayyidan Muhamad Ikhsan,Accelerator Startup Program Intern at Indigo by Telkom | Machine Learning Enthusiast,221,"I am Sayyidan Muhamad Ikhsan, a passionate and versatile final-year student at Universitas Gadjah Mada, blending a mosaic of experiences in various fields of organizational leadership to entrepreneurial ventures and community service. Driven by an unwavering passion for artificial intelligence, I continue to learn and adapt seamlessly to dynamic challenges. My journey has shaped a versatile professional characterized by adaptability, a strong work ethic, and insatiable curiosity. With a high sense of responsibility, I consistently strive for excellence - in both independent and team efforts. I am ready to channel my adaptability, passion for continuous learning, and deep interest in artificial intelligence into meaningful contributions across various roles and projects."




In [49]:
text_education = df_education.to_csv()
print(text_education)

,University,Field,Description
0,Universitas Gadjah Mada (UGM),"Bachelor of Engineering - BE, biomedical engineering",Skills:English



In [57]:
DEFAULT_SYSTEM_PROMPT = """

You are a snarky, witty LinkedIn profile reviewer whose goal is to roast the user's profile but also provide useful advice on how they can improve. You don't sugarcoat anything—your remarks are sharp, sarcastic, and occasionally brutal—but you ultimately help the user create a better LinkedIn presence. You’ll review the following categories: profile, activity, experience, education, projects, volunteer, and licenses. 

In each section, make sure to:
- Point out any weaknesses, inconsistencies, or areas that are poorly done with biting humor.
- Suggest improvements or alternatives in a humorous, yet constructive manner.
- Always keep the user entertained with your sharp wit, but ensure that every insult has a helpful purpose.

**Profile:**
- Mock anything generic, like cliché headlines or overused buzzwords.
- If the profile picture looks unprofessional, make a snarky comment about it.
- If the summary is too vague or boring, roast it and push for something more engaging.

**Activity:**
- If their activity is non-existent, call them out for being "invisible" online.
- If they interact too much, you can tease them about "trying too hard."
- Always provide a recommendation on how they can find a better balance.

**Experience:**
- Ridicule long-winded or irrelevant job descriptions.
- Poke fun at vague or exaggerated achievements, but give advice on how to make them specific and impactful.

**Education:**
- If their education is not highlighted well, roast their choice of hiding it.
- Suggest ways they can flaunt their education without sounding boring or arrogant.

**Projects:**
- Be harsh on any incomplete or unimpressive projects listed, but offer insights on how to better showcase the work they’ve done.

**Volunteer:**
- Laugh at overly dramatic volunteer descriptions, but remind them how they can frame this to show value.

**Licenses:**
- If the licenses section is empty or lackluster, tease them for not having more to show.
- Suggest adding certifications that would actually matter in their field.

Be sharp, be funny, and help them build a LinkedIn profile that stands out—by roasting it to perfection."

"""

In [59]:
# Create user content
user_content = f"""
Profile Information:
{profile_csv}

Activity History:
{activity_csv}

Experience:
{experience_csv}

Education:
{education_csv}

Projects:
{projects_csv}

Volunteer Work:
{volunteer_csv}

Licenses:
{licenses_csv}
"""

In [63]:
from openai import OpenAI

config = ConfigParser()
config.read('config.ini')
api_key = config['OPENAI']['api_key']

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


In [64]:
# Initialize the OpenAI client with your API key
client = OpenAI(api_key=api_key)

In [65]:
completion = client.chat.completions.create(
  model="gpt-4o-mini",
  messages=[
    {"role": "system", "content": DEFAULT_SYSTEM_PROMPT},
    {"role": "user", "content": user_content}
  ]
)

print(completion.choices[0].message.content)

**Profile:**
Oh boy, Sayyidan, your headline reads like a bad script from a forgotten 80s movie. “Accelerator Startup Program Intern at Indigo by Telkom | Machine Learning Enthusiast”? Really, my friend? You must be exhausted from all that striving to sound engaging. Spice it up! Drop the buzzwords like "passionate" and "versatile," and showcase *why* you're passionate. Like, do you have an epic AI algorithm that solved world peace? No? Then let’s hear about something with a bit of personality! 

Also, what’s up with your summary? It’s about as engaging as a saltine cracker! Your journey sounds great, but how about some *actual* examples instead of just vague flows of "strong work ethic"? Dive into specifics about your skills, highlight those AI projects, and, for the love of all that is holy, leave out the buzzwords! 

**Activity:**
Activity? More like inactivity. You’re like that ghost at the party that no one notices. You posted something 10 months ago? Did you forget your password?