In [1]:
import os
import pdfplumber
import json
import openai
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
API_KEY=os.getenv("API_KEY")
openAI_API_KEY=os.getenv("openAI_API_KEY")

## CV

In [9]:

# STEP 1: Load PDF and extract text
def extract_text_from_pdf(file_path):
    with pdfplumber.open(file_path) as pdf:
        return "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])

# STEP 2: Define JSON schema and prompt
def build_prompt(resume_text):
    json_schema = {
        "name": "",
        "email": "",
        "phone": "",
        "country": "",
        "city": "",
        "summary": "",
        "skills": [
            {
                'specialized skill': "",
                'common skill': ""
            }
        ],
        "experience": [
            {
                "job_title": "",
                "company": "",
                "start_date": "",
                "end_date": "",
                "description": ""
            }
        ],
        "education": [
            {
                "degree": "",
                "institution": "",
                "start_year": "",
                "end_year": ""
            }
        ],
       "enrichment parameters": [
            {
                "Employment Pattern & Progression": "",
                "Company Type & Sector": "",
                "Education Quality & Ranking": "",
                "Skill Demand & Market Relevance": "",
                "Leadership Experience": "",
                "Budget & Project Management": "",
                "International Experience & Mobility": "",
                "Soft Skills from Sales Calls": "",
                "Personality & Behavioral Traits": [
                    {
                    "Openness": "",
                    "Conscientiousness": "",
                    "Extraversion": "",
                    "Agreeableness": "",
                    "Neuroticism": ""
                    }
                ],
                "Future Career Goals (Sales-Inferred)": "",
                "Salary Expectations (Sales-Inferred)": "",
                "JD Enrichment with Implied Preferences": "",
                "Cultural Fit Indicators": ""
            }
        ]
    }

    prompt = f"""
You are an expert resume parser. Convert the resume text below into this JSON format. Fill in all the relevant fields. Leave the enrichment_parameters field empty.
The JSON schema is as follows:

{json.dumps(json_schema, indent=2)}

Resume:
\"\"\"
{resume_text}
\"\"\"
"""
    return prompt

# STEP 3: Call OpenAI API
def call_openai(prompt):
    openai.api_key = openAI_API_KEY
    client= OpenAI(api_key=openAI_API_KEY)
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )
    return response.choices[0].message.content

# STEP 4: Main function to process all PDFs
def main():
    input_folder = "./sample_CVs"
    output_folder = "./sentiment_CVs"

    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Loop through all files in the input folder
    for file_name in os.listdir(input_folder):
        if file_name.endswith(".pdf"):  # Ignore non-PDF files
            pdf_path = os.path.join(input_folder, file_name)
            print(f"Processing: {file_name}")

            # Extract text from PDF
            resume_text = extract_text_from_pdf(pdf_path)

            # Build prompt and call OpenAI
            prompt = build_prompt(resume_text)
            parsed_cv = call_openai(prompt)

            # Save the parsed CV to the output folder
            output_file = os.path.join(output_folder, f"{os.path.splitext(file_name)[0]}_parsed.json")
            with open(output_file, "w") as f:
                f.write(parsed_cv)

            print(f"Saved parsed CV to: {output_file}")

if __name__ == "__main__":
    main()

Processing: Lisa_Green.pdf
Saved parsed CV to: ./sentiment_CVs/Lisa_Green_parsed.json
Processing: Mark_Reynolds.pdf
Saved parsed CV to: ./sentiment_CVs/Mark_Reynolds_parsed.json
Processing: John_Doe_CV.pdf
Saved parsed CV to: ./sentiment_CVs/John_Doe_CV_parsed.json
Processing: Kevin_Adams.pdf
Saved parsed CV to: ./sentiment_CVs/Kevin_Adams_parsed.json
Processing: Michael_Johnson_CV.pdf
Saved parsed CV to: ./sentiment_CVs/Michael_Johnson_CV_parsed.json
Processing: Sophia_Martinez.pdf
Saved parsed CV to: ./sentiment_CVs/Sophia_Martinez_parsed.json
Processing: Sarah_Lee_CV.pdf
Saved parsed CV to: ./sentiment_CVs/Sarah_Lee_CV_parsed.json
Processing: Daniel_Carter.pdf
Saved parsed CV to: ./sentiment_CVs/Daniel_Carter_parsed.json
Processing: Jane_Smith_CV.pdf
Saved parsed CV to: ./sentiment_CVs/Jane_Smith_CV_parsed.json
Processing: Robert_Brown_CV.pdf
Saved parsed CV to: ./sentiment_CVs/Robert_Brown_CV_parsed.json


In [12]:


# STEP 1: Build enrichment prompt
def build_enrichment_prompt(cv_data):
    prompt = f"""
You are an expert in CV enrichment. Analyze the provided CV data and infer the following enrichment parameters:
- Employment Pattern & Progression: Describe the career trajectory and progression.
- Company Type & Sector: Identify the type and sector of companies worked for.
- Education Quality & Ranking: Assess the quality and ranking of educational institutions.
- Skill Demand & Market Relevance: Evaluate the relevance of skills in the current market.
- Leadership Experience: Highlight leadership roles and responsibilities.
- Budget & Project Management: Detail experience in managing budgets and projects.
- International Experience & Mobility: Indicate international exposure and mobility.
- Soft Skills from Sales Calls: Infer soft skills demonstrated in sales or communication.
- Personality & Behavioral Traits: Deduce personality traits and behaviors.
- Future Career Goals (Sales-Inferred): Predict future career aspirations based on sales roles.
- Salary Expectations (Sales-Inferred): Estimate salary expectations based on experience.
- JD Enrichment with Implied Preferences: Enrich job descriptions with implied preferences.
- Cultural Fit Indicators: Suggest cultural fit indicators for potential roles.

Also, analyze the candidate's Personality & Behavioral Traits according to the Big Five (OCEAN) model. Use the resume's tone, accomplishments, language, career path and the above inferred enrichment parameters to estimate the following traits:
Personality & Behavioral Traits: 
    "Openness": "How open is the candidate to new experiences and ideas?"
    "Conscientiousness": "How organized and dependable is the candidate?"
    "Extraversion": "How outgoing and energetic is the candidate?"
    "Agreeableness": "How friendly and compassionate is the candidate?"
    "Neuroticism": "How emotionally stable is the candidate?"

For each Personality and Behavioral Trait, provide a rating (High, Moderate, or Low).

Here is the CV data:
{json.dumps(cv_data, indent=2)}

Please analyze and fill in the enrichment parameters. Return the enriched CV data in JSON format. Please respond ONLY with raw JSON. Do not include explanations, markdown, or code block formatting.

"""
    return prompt

# STEP 2: Call OpenAI API for enrichment
def call_openai_for_enrichment(prompt):
    openai.api_key = openAI_API_KEY
    client= OpenAI(api_key=openAI_API_KEY)
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )
    enriched_cv_content = response.choices[0].message.content
    return json.loads(enriched_cv_content)  # Assuming the response is valid JSON

# STEP 3: Main function to process all parsed CVs
def main():
    input_folder = "./gpt_parsed_CVs"
    output_folder = "./sentiment_CVs"

    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Loop through all files in the input folder
    for file_name in os.listdir(input_folder):
        if file_name.endswith(".json"):
                    input_file_path = os.path.join(input_folder, file_name)
                    print(f"Processing: {file_name}")

                    # Load the parsed CV data
                    with open(input_file_path, "r") as f:
                        cv_data = json.load(f)

                    # Build enrichment prompt and call OpenAI
                    prompt = build_enrichment_prompt(cv_data)
                    enriched_cv = call_openai_for_enrichment(prompt)

                    # Merge enrichment parameters into the original CV data
                    cv_data["enrichment parameters"] = enriched_cv.get("enrichment parameters", {})

                    # Save the enriched CV to the output folder
                    output_file_path = os.path.join(output_folder, file_name)
                    with open(output_file_path, "w") as f:
                        json.dump(cv_data, f, indent=4)

                    print(f"Saved enriched CV to: {output_file_path}")

if __name__ == "__main__":
    main()









Processing: Kevin_Adams_parsed.json
Saved enriched CV to: ./sentiment_CVs/Kevin_Adams_parsed.json
Processing: Sarah_Lee_CV_parsed.json
Saved enriched CV to: ./sentiment_CVs/Sarah_Lee_CV_parsed.json
Processing: Jane_Smith_CV_parsed.json
Saved enriched CV to: ./sentiment_CVs/Jane_Smith_CV_parsed.json
Processing: Lisa_Green_parsed.json
Saved enriched CV to: ./sentiment_CVs/Lisa_Green_parsed.json
Processing: John_Doe_CV_parsed.json
Saved enriched CV to: ./sentiment_CVs/John_Doe_CV_parsed.json
Processing: Michael_Johnson_CV_parsed.json
Saved enriched CV to: ./sentiment_CVs/Michael_Johnson_CV_parsed.json
Processing: Daniel_Carter_parsed.json
Saved enriched CV to: ./sentiment_CVs/Daniel_Carter_parsed.json
Processing: Robert_Brown_CV_parsed.json
Saved enriched CV to: ./sentiment_CVs/Robert_Brown_CV_parsed.json
Processing: Sophia_Martinez_parsed.json
Saved enriched CV to: ./sentiment_CVs/Sophia_Martinez_parsed.json
Processing: Mark_Reynolds_parsed.json
Saved enriched CV to: ./sentiment_CVs/Mark

## JD

Made changes to the the prompts. Shifted perspective towards the company rather than the candidate to enhance responses.
Disclaimer: the variable names are unchanged for jd and are the same as cv_data. 

In [None]:

# STEP 1: Load PDF and extract text
def extract_text_from_pdf(file_path):
    with pdfplumber.open(file_path) as pdf:
        return "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])

# STEP 2: Define JSON schema and prompt
def build_prompt(resume_text):
    json_schema = {
        "country": "",
        "city": "",
        "summary": "",
        "skills": [
            {
                'specialized skill': "",
                'common skill': ""
            }
        ],
        "experience": [
            {
                "job_title": "",
                "company": "",
                "start_date": "",
                "end_date": "",
                "description": ""
            }
        ],
        "education": [
            {
                "degree": "",
                "institution": "",
                "start_year": "",
                "end_year": ""
            }
        ],
       "enrichment parameters": [
            {
                "Employment Pattern & Progression": "",
                "Company Type & Sector": "",
                "Education Quality & Ranking": "",
                "Skill Demand & Market Relevance": "",
                "Leadership Experience": "",
                "Budget & Project Management": "",
                "International Experience & Mobility": "",
                "Soft Skills from Sales Calls": "",
                "Future Career Goals (Sales-Inferred)": "",
                "Salary Expectations (Sales-Inferred)": "",
                "JD Enrichment with Implied Preferences": "",
                "Cultural Fit Indicators": ""
            }
        ]
    }

    prompt = f"""
You are an expert Job description parser. Convert the job description text below into this JSON format. Fill in all the relevant fields. Leave the enrichment parameters field empty.
Note that the json schema resembles a resume schema. 
This is because the end goal is to match the resume with the job description. 
However, keep in mind that the schema is to be filled with the job description data.
Again , the enrichment parameters field should be left empty.
The JSON schema is as follows:

{json.dumps(json_schema, indent=2)}

Job Description:
\"\"\"
{resume_text}
\"\"\"
"""
    return prompt

# STEP 3: Call OpenAI API
def call_openai(prompt):
    openai.api_key = openAI_API_KEY
    client= OpenAI(api_key=openAI_API_KEY)
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )
    return response.choices[0].message.content

# STEP 4: Main function to process all PDFs
def main():
    input_folder = "./JD_pdfs"
    output_folder = "./Jd_parsed"

    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Loop through all files in the input folder
    for file_name in os.listdir(input_folder):
        if file_name.endswith(".pdf"):  # Ignore non-PDF files
            pdf_path = os.path.join(input_folder, file_name)
            print(f"Processing: {file_name}")

            # Extract text from PDF
            resume_text = extract_text_from_pdf(pdf_path)

            # Build prompt and call OpenAI
            prompt = build_prompt(resume_text)
            parsed_cv = call_openai(prompt)

            # Save the parsed CV to the output folder
            output_file = os.path.join(output_folder, f"{os.path.splitext(file_name)[0]}_parsed.json")
            with open(output_file, "w") as f:
                f.write(parsed_cv)

            print(f"Saved parsed CV to: {output_file}")

if __name__ == "__main__":
    main()

In [17]:


# STEP 1: Build enrichment prompt
def build_enrichment_prompt(cv_data):
    prompt = f"""
You are an expert in Job Description enrichment. Analyze the provided Job description data and infer the following enrichment parameters:
- Employment Pattern & Progression: Describe the required career trajectory and progression for an ideal candidate.
- Company Type & Sector: Identify the type and sector of company.
- Education Quality & Ranking: potential quality and ranking of educational institutions of the candidate.
- Skill Demand & Market Relevance: Evaluate the relevance of skills in the current market.
- Leadership Experience: Highlight leadership roles and responsibilities for a potential candidate.
- Budget & Project Management: Detail experience in managing budgets and projects.
- International Experience & Mobility: Indicate international exposure and mobility for a potential candidate.
- Soft Skills from Sales Calls: Infer soft skills demonstrated in sales or communication.
- Personality & Behavioral Traits: Deduce personality traits and behaviors for this role.
- Future Career Goals (Sales-Inferred): Predict future career aspirations based on sales roles.
- Salary Expectations (Sales-Inferred): Estimate salary expectations for this role.
- JD Enrichment with Implied Preferences: leave this empty
- Cultural Fit Indicators: Suggest cultural fit indicators for potential candidates.

Also, analyze a potential candidate's Personality & Behavioral Traits according to the Big Five (OCEAN) model. Use the job description's tone, requiremets, language and the above inferred enrichment parameters to estimate the following traits:
Personality & Behavioral Traits: 
    "Openness": "How open is the candidate to new experiences and ideas?"
    "Conscientiousness": "How organized and dependable is the candidate?"
    "Extraversion": "How outgoing and energetic is the candidate?"
    "Agreeableness": "How friendly and compassionate is the candidate?"
    "Neuroticism": "How emotionally stable is the candidate?"

For each Personality and Behavioral Trait, provide a rating (High, Moderate, or Low).

Here is the job description data:
{json.dumps(cv_data, indent=2)}

Please analyze and fill in the enrichment parameters. Return the enriched job description data in JSON format. Please respond ONLY with raw JSON. Do not include explanations, markdown, or code block formatting.

"""
    return prompt

# STEP 2: Call OpenAI API for enrichment
def call_openai_for_enrichment(prompt):
    openai.api_key = openAI_API_KEY
    client= OpenAI(api_key=openAI_API_KEY)
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )
    enriched_cv_content = response.choices[0].message.content
    return json.loads(enriched_cv_content)  # Assuming the response is valid JSON

# STEP 3: Main function to process all parsed CVs
def main():
    input_folder = "./JD_parsed"
    output_folder = "./JD_enriched"

    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Loop through all files in the input folder
    for file_name in os.listdir(input_folder):
        if file_name.endswith(".json"):
                    input_file_path = os.path.join(input_folder, file_name)
                    print(f"Processing: {file_name}")

                    # Load the parsed CV data
                    with open(input_file_path, "r") as f:
                        cv_data = json.load(f)

                    # Build enrichment prompt and call OpenAI
                    prompt = build_enrichment_prompt(cv_data)
                    enriched_cv = call_openai_for_enrichment(prompt)

                    # Merge enrichment parameters into the original CV data
                    cv_data["enrichment parameters"] = enriched_cv.get("enrichment parameters", {})

                    # Save the enriched CV to the output folder
                    output_file_path = os.path.join(output_folder, file_name)
                    with open(output_file_path, "w") as f:
                        json.dump(cv_data, f, indent=4)

                    print(f"Saved enriched CV to: {output_file_path}")

if __name__ == "__main__":
    main()









Processing: job_description_6_parsed.json
Saved enriched CV to: ./JD_enriched/job_description_6_parsed.json
Processing: job_description_3_parsed.json
Saved enriched CV to: ./JD_enriched/job_description_3_parsed.json
Processing: job_description_5_parsed.json
Saved enriched CV to: ./JD_enriched/job_description_5_parsed.json
Processing: job_description_2_parsed.json
Saved enriched CV to: ./JD_enriched/job_description_2_parsed.json
Processing: job_description_4_parsed.json
Saved enriched CV to: ./JD_enriched/job_description_4_parsed.json
Processing: job_description_1_parsed.json
Saved enriched CV to: ./JD_enriched/job_description_1_parsed.json
