In [1]:
import os
import json
import logging

from linkedin_api import Linkedin
from urllib.parse import urlparse, unquote

from openai import OpenAI
from dotenv import load_dotenv

import PyPDF2
import re

import fitz 


# Suppress INFO logs
logging.getLogger("httpx").setLevel(logging.WARNING)



In [2]:
# Authenticate with Linkedin account credentials
api = Linkedin("--", "--")

# Authenticate with OpenAI api credetials
client = OpenAI(api_key='--')

In [3]:
%run '/Users/leozinho.air/Desktop/Ironhack/Project 04 - LinkedMetrics/linkedin_data_extractor_functions.ipynb'

'''
### Functions:

## linkedin_profile_extractor() -> dict with linkedin profiles informations
## linkedin_job_description_extractor() -> dict with a linkedin job description 
## linkedin_company_info_extractor() -> dict with company description
## linkedin_job_company_extractor() -> dict with jd and company informations

'''


Exception: File `'/Users/leozinho.air/Desktop/Ironhack/Project 04 - LinkedMetrics/linkedin_data_extractor_functions.ipynb'` not found.

## Prompt Engineering

### Profile analysis

In [None]:
# THIS FUNCTION RETURN JSON
def analyze_linkedin_profile(profile_dict):
    """
    Analyzes the given LinkedIn profile dictionary using OpenAI.

    :param jd_dict: A dictionary containing LinkedIn profile information.
    :return: Analysis result from OpenAI.
    """

    # Convert the LinkedIn profile dictionary into a textual prompt
    
    profile_info = json.dumps(profile_dict, indent=2)
    
    prompt = f'''
    Given a LinkedIn profile in a structured dictionary format, translate any non-English text to English while preserving Named Entities in their original language. Then, generate a detailed analysis that provides a clear and structured overview. Focus specifically on highlighting the individual's key strengths, areas for improvement, and actionable suggestions for enhancing their profile. Emphasize skills listed in the 'skills' section and those implied in the headline, summary, and project descriptions.

    **LinkedIn Profile Data Structure:**
    {profile_info}

    **Analysis should include:**

    1. **Full Name:** Clearly state the individual's full name.

    2. **Location:** Mention the location if provided, highlighting the city and country.

    3. **Brief Overview:** Offer a concise summary based on the individual's headline, summary, experience, education, and projects, to portray a comprehensive narrative of their professional journey.

    4. **Degrees:** Inspect the education section to identify the highest academic degrees obtained. Focus on the highest level of academic achievement, considering both the degree level (e.g., Doctorate, Master's, Bachelor's) and the recency of completion. Present the degree type (translate in English), field of study (translate in English), institution (in real language), and graduation date. If multiple degrees of the same level exist, prioritize the most recent based on completion date.

    5. **Hard Skills List:** Compile a list of hard skills, drawing from the 'skills' section and inferences from the headline, summary, and projects.

    6. **Soft Skills List:** Gather a list of soft skills found throughout the profile, prefacing this section with a brief introduction.

    7. **Strengths:** Highlight the most prominent strengths of the profile, based on an in-depth analysis of the experience, education, and project sections.

    8. **Weaknesses:** Identify potential areas for improvement, considering any gaps or missing elements in the profile.

    9. **Suggestions for Improvement:** Offer specific, actionable advice to enhance the profile, focusing on increasing the individual's marketability and presentation.

    10. **Career Suggestion:** Propose specific job recommendations or career advice to help the individual advance their professional path. Just prompt a list of jobs

    The analysis should be concise, formatted for ease of reading, and maintain a professional tone throughout. Use bullet points for clarity and separate sections clearly to ensure a comprehensive and useful profile summary.

    '''

    response = client.chat.completions.create(
      model="gpt-3.5-turbo-0125",
      response_format={ "type": "json_object" },
      messages=[
        {"role": "system", "content": "You are a helpful HR analytics assistant designed to output JSON."},
        {"role": "user", "content": prompt},
        
      ],
      temperature=0.5,
      max_tokens=1024,
      seed = 42
    )
    return json.loads(response.choices[0].message.content)



### Resume analysis

In [None]:
def pdf_to_resume_dict(pdf_path):
    """
    Extracts text from a PDF file, cleans it, and converts it into a structured format using OpenAI.

    :param pdf_path: The file path to the PDF document.
    :return: A structured text representation of the PDF content.
    """
    # Extract text from PDF
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    doc.close()


    prompt = f'''Given the following resume text, extract and organize the information into a structured format as shown in the template. Ensure that all relevant details such as full name, headline, summary, industry name, location, experience, education, languages, projects, and skills are accurately captured. Follow the template structure closely, adjusting for any additional categories or missing information as necessary.

                Resume Text:
                {text}

                Template for Structured Format:
                {{
                  "fullName": "[Full Name]",
                  "headline": "[Position | Specialization]",
                  "summary": "[Brief summary including skills, experiences, and objectives]",
                  "industryName": "[Industry]",
                  "locationName": "[City, Country]",
                  "geoCountryName": "[Country]",
                  "geoLocationName": "[City, City]",
                  "experience": [
                    {{
                      "locationName": "[City, Country]",
                      "geoLocationName": "[City, Country]",
                      "companyName": "[Company Name]",
                      "timePeriod": {{"startDate": {{"month": [Month], "year": [Year]}}, "endDate": {{"month": [Month], "year": [Year]}}}},
                      "industries": ["[Industry]"],
                      "title": "[Job Title]"
                    }},
                    # Add more experience entries as needed
                  ],
                  "education": [
                    {{
                      "schoolName": "[Institution Name]",
                      "timePeriod": {{"startDate": {{"year": [Year]}}, "endDate": {{"year": [Year]}}}},
                      "degreeName": "[Degree]",
                      "fieldOfStudy": "[Field of Study]"
                    }},
                    # Add more education entries as needed
                  ],
                  "languages": [
                    {{"name": "[Language]", "proficiency": "[Proficiency Level]"}},
                    # Add more languages as needed
                  ],
                  "projects": [
                    # Optional: Include any relevant projects
                  ],
                  "skills": ["[Skill 1]", "[Skill 2]"]
                }}

                Please structure the information from the resume text accordingly.
                '''
    response = client.chat.completions.create(
        model="gpt-3.5-turbo-0125",
        response_format={ "type": "json_object" },
        messages=[
        {"role": "system", "content": "You are a helpful HR analytics assistant designed to output JSON."},
        {"role": "user", "content": prompt},
        
      ],
      temperature=0.5,
      max_tokens=1024,
      seed = 42
    )

    try:
        return json.loads(response.choices[0].message.content)
    except Exception as e:
        print(f"Error parsing response to JSON: {e}")
        return response.choices[0].message.content
# Example usage
#if __name__ == "__main__":
    #pdf_path = '/Users/leozinho.air/Desktop/Ironhack/Project 04 - LinkedMetrics/leonardo_pagliacci_resume.pdf'
    #resume_dict = pdf_to_resume_dict(pdf_path)


### Job description analysis

In [None]:
# THIS FUNCTION RETURN JSON

def analyze_linkedin_jd(jd_dict):
    """
    Analyzes the given LinkedIn Job Description dictionary using OpenAI.

    :param jd_dict: A dictionary containing LinkedIn Job Description information.
    :return: Analysis result from OpenAI.
    """

    # Convert the LinkedIn JD dictionary into a textual prompt
    jd_info = json.dumps(jd_dict, indent=2)
    
    prompt = f'''
    Given a job description as outlined below, translate any non-English text to English while preserving Named Entities in their original language. Then, generate a detailed analysis that provides a clear and structured overview. Focus specifically on extracting nuanced details such as the job title, concise job responsibilities, company information including URL and overview, location, required experience and academic degrees, skills needed, language requirements, and key responsibilities. Where explicit information is not provided, infer from context or explicitly state it's not mentioned. The analysis should be easily readable, employing bullet points and clear section separations.

    **Job Description Data Structure:**
    {jd_info}

    **Analysis should include:**

    1. **Job Title:** Clearly state the job title.
    
    2. **Location:** Identify and mention the job location based on the description or company's primary location if not explicitly stated.

    3. **Job Description Overview:** Summarize key job responsibilities and technologies involved, aiming for concise clarity.

    4. **Company Information:** 
       - **Company Name:** Mention the company name.
       - **LinkedIn URL:** Provide the LinkedIn URL.
       - **Company Overview:** Give a summary of the company's mission, vision, and what sets it apart.
       
    5. **Company Specialties:** Enumerate the company's specialties or areas of expertise.

    6. **Experience Level and Years Required:** Detail the experience level (e.g., entry-level, mid-senior level).If the job description does not explicitly mention an experience level or years of experience, infer the level (e.g., entry-level, mid-senior, senior) based on the responsibilities and skills mentioned. Assume mid-senior/senior level for roles requiring specific technical expertise or leadership responsibilities unless indicated otherwise.

    7. **Academic Degree Requested:** When a specific academic degree is not mentioned, analyze the descriptionText for keywords related to academic qualifications (e.g., "Bachelor's", "Master's", "Ph.D.", "degree in computer science") and infer the necessary degree level. If technical skills such as machine learning or natural language processing are highlighted, suggest relevant fields of study (e.g., computer science, data science) that typically prepare candidates for such roles.

    8. **Skills Requested:**
        - **Hard Skills:** Compile a list of hard skills required for the position, based on direct mentions or inference.
        - **Soft Skills:** Similarly, gather a list of soft skills important for the role.

    9. **Language Requirements:** If the job description does not specify language requirements, consider the language of the job description as the primary language needed for the role. Additionally, assess the company's location and mention any language that might be beneficial based on the geographical and cultural context.

    10. **Key Responsibilities:** Highlight the 10 major responsibilities associated with the position, formatted as bullet points for easy reading.
    '''

    response = client.chat.completions.create(
      model="gpt-3.5-turbo-0125",
      response_format={ "type": "json_object" },
      messages=[
        {"role": "system", "content": "You are a helpful HR analytics assistant designed to output JSON."},
        {"role": "user", "content": prompt},
        
      ],
      temperature=0.5,
      max_tokens=1024,
      seed = 42
    )
    return json.loads(response.choices[0].message.content)


### Matching Profile and JD

In [None]:

def job_matching_system(profile_json, jd_json):
    
    """
    Evaluates the compatibility between a LinkedIn profile and a job description based on specified criteria and weights.

    This function assesses how well a LinkedIn profile matches with a job description (JD) using criteria such as skill matching,
    experience relevance, education alignment, soft skills and cultural fit, and language proficiency. Each criterion is assigned a weight, contributing to an overall compatibility score. 
    The analysis includes a detailed breakdown of match status, percentage matches for each criterion, and suggestions for improving the LinkedIn profile to align more closely with the job description.

    Parameters:
    - profile_json (dict): A dictionary containing LinkedIn profile data structured in JSON format.
    - jd_json (dict): A dictionary containing job description data structured in JSON format.

    Returns:
    - dict: A JSON object containing the overall compatibility score, detailed analysis for each criterion, and improvement suggestions.
    """

    
    # Define the prompt for comparison and scoring
    prompt = f"""
    Given the detailed LinkedIn profile and job description, perform a comprehensive compatibility analysis. Consider advanced criteria with their respective weights for an overall compatibility score from 0 to 100. Include qualitative and quantitative analysis on:

    1. Skill Depth and Specialization (25% weight): Evaluate not only the presence of required skills but the depth, expertise, and specialization levels demonstrated.
    2. Experience Progression and Relevance (25% weight): Assess the career progression, relevance of the experiences to the job, and achievements in past roles.
    3. Educational Achievements and Specializations (15% weight): Consider the level, relevance, and prestige of educational qualifications.
    4. Cultural Alignment and Soft Skills (15% weight): Evaluate cultural fit, leadership, communication, and teamwork abilities.
    5. Language Fluency and International Exposure (10% weight): Assess language skills and experience in international or diverse settings.
    6. Growth Potential and Learning Agility (10% weight): Estimate the candidate's potential for growth and ability to learn new skills.


    The analysis output should be a structured JSON object containing:
    - "Overall Compatibility Score"(key): An aggregate score based on the criteria weights.
    - "Details"(key): A dictionary with each criterion as a key. For each key, provide:
        - "Match Status": "Match" or "Partial Match" or "No Match"
        - "Percentage Match": A numerical score from 0 to 100
        - "Matched Skills": (Only for Skill Matching) A list of matched skills.
        - "Suggestions": Text on how the profile can be improved to better match the job description.
    - "Summary"(key): Brief text that summarizes the results

    LinkedIn Profile Summary:
    {json.dumps(profile_json, indent=4)}

    Job Description Summary:
    {json.dumps(jd_json, indent=4)}

    Format your response as a JSON object following the structure provided above.
    """

    
    response = client.chat.completions.create(
      model="gpt-4-0125-preview",
      response_format={ "type": "json_object" },
      messages=[
        {"role": "system", "content": "You are a helpful HR analytics assistant designed to output JSON."},
        {"role": "user", "content": prompt},
        
      ],
      temperature=0.3,
      max_tokens=1024,
      seed = 42
    )
    
    return json.loads(response.choices[0].message.content)


